File Coverage

Bio/Tools/AmpliconSearch.pm
Criterion Covered Total %
statement 145 155 93.5
branch 56 66 84.8
condition 14 21 66.6
subroutine 20 20 100.0
pod 8 8 100.0
total 243 270 90.0


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::AmpliconSearch
2             #
3             # Copyright Florent Angly
4             #
5             # You may distribute this module under the same terms as perl itself
6              
7              
8             package Bio::Tools::AmpliconSearch;
9              
10 1     1   1040 use strict;
  1         2  
  1         22  
11 1     1   3 use warnings;
  1         1  
  1         25  
12 1     1   3 use Bio::Tools::IUPAC;
  1         1  
  1         14  
13 1     1   240 use Bio::SeqFeature::Amplicon;
  1         2  
  1         26  
14 1     1   614 use Bio::Tools::SeqPattern;
  1         1  
  1         26  
15             # we require Bio::SeqIO
16             # and Bio::SeqFeature::Primer
17              
18 1     1   4 use base qw(Bio::Root::Root);
  1         1  
  1         1143  
19              
20             my $template_str;
21              
22              
23             =head1 NAME
24              
25             Bio::Tools::AmpliconSearch - Find amplicons in a template using degenerate PCR primers
26              
27             =head1 SYNOPSIS
28              
29             use Bio::PrimarySeq;
30             use Bio::Tools::AmpliconSearch;
31              
32             my $template = Bio::PrimarySeq->new(
33             -seq => 'aaaaaCCCCaaaaaaaaaaTTTTTTaaaaaCCACaaaaaTTTTTTaaaaaaaaaa',
34             );
35             my $fwd_primer = Bio::PrimarySeq->new(
36             -seq => 'CCNC',
37             );
38             my $rev_primer = Bio::PrimarySeq->new(
39             -seq => 'AAAAA',
40             );
41              
42             my $search = Bio::Tools::AmpliconSearch->new(
43             -template => $template,
44             -fwd_primer => $fwd_primer,
45             -rev_primer => $rev_primer,
46             );
47            
48             while (my $amplicon = $search->next_amplicon) {
49             print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
50             print $amplicon->seq->seq."\n\n";
51             }
52              
53             # Now change the template (but you could change the primers instead) and look
54             # for amplicons again
55              
56             $template = Bio::PrimarySeq->new(
57             -seq => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
58             );
59             $search->template($template);
60              
61             while (my $amplicon = $search->next_amplicon) {
62             print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
63             print $amplicon->seq->seq."\n\n";
64             }
65              
66             =head1 DESCRIPTION
67              
68             Perform an in silico PCR reaction, i.e. search for amplicons in a given template
69             sequence using the specified degenerate primer.
70              
71             The template sequence is a sequence object, e.g. L, and the primers
72             can be a sequence or a L object and contain ambiguous
73             residues as defined in the IUPAC conventions. The primer sequences are converted
74             into regular expressions using L and the matching regions of
75             the template sequence, i.e. the amplicons, are returned as L
76             objects.
77              
78             AmpliconSearch will look for amplicons on both strands (forward and reverse-
79             complement) of the specified template sequence. If the reverse primer is not
80             provided, an amplicon will be returned and span a match of the forward primer to
81             the end of the template. Similarly, when no forward primer is given, match from
82             the beginning of the template sequence. When several amplicons overlap, only the
83             shortest one to more accurately represent the biases of PCR. Future improvements
84             may include modelling the effects of the number of PCR cycles or temperature on
85             the PCR products.
86              
87             =head1 TODO
88              
89             Future improvements may include:
90              
91             =over
92              
93             =item *
94              
95             Allowing a small number of primer mismatches
96              
97             =item *
98              
99             Reporting all amplicons, including overlapping ones
100              
101             =item *
102              
103             Putting a limit on the length of amplicons, in accordance with the processivity
104             of the polymerase used
105              
106             =back
107              
108             =head1 FEEDBACK
109              
110             =head2 Mailing Lists
111              
112             User feedback is an integral part of the evolution of this and other
113             Bioperl modules. Send your comments and suggestions preferably to one
114             of the Bioperl mailing lists. Your participation is much appreciated.
115              
116             bioperl-l@bioperl.org - General discussion
117             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
118              
119             =head2 Support
120              
121             Please direct usage questions or support issues to the mailing list:
122              
123             I
124              
125             rather than to the module maintainer directly. Many experienced and
126             reponsive experts will be able look at the problem and quickly
127             address it. Please include a thorough description of the problem
128             with code and data examples if at all possible.
129              
130             =head2 Reporting Bugs
131              
132             Report bugs to the Bioperl bug tracking system to help us keep track
133             the bugs and their resolution. Bug reports can be submitted via the
134             web:
135              
136             https://github.com/bioperl/bioperl-live/issues
137              
138             =head1 AUTHOR
139              
140             Florent Angly
141              
142             =head1 APPENDIX
143              
144             The rest of the documentation details each of the object
145             methods. Internal methods are usually preceded with a _
146              
147             =head2 new
148              
149             Title : new
150             Usage : my $search = Bio::Tools::AmpliconSearch->new( );
151             Function : Initialize an amplicon search
152             Args : -template Sequence object for the template sequence. This object
153             will be converted to Bio::Seq if needed in since features
154             (amplicons and primers) will be added to this object.
155             -fwd_primer A sequence object representing the forward primer
156             -rev_primer A sequence object representing the reverse primer
157             -primer_file Read primers from a sequence file. It replaces
158             -fwd_primer and -rev_primer (optional)
159             -attach_primers Whether or not to attach primers to Amplicon objects. Default: 0 (off)
160             Returns : A Bio::Tools::AmpliconSearch object
161              
162             =cut
163              
164             sub new {
165 17     17 1 193 my ($class, @args) = @_;
166 17         46 my $self = $class->SUPER::new(@args);
167 17         62 my ($template, $primer_file, $fwd_primer, $rev_primer, $attach_primers) =
168             $self->_rearrange([qw(TEMPLATE PRIMER_FILE FWD_PRIMER REV_PRIMER ATTACH_PRIMERS)],
169             @args);
170              
171             # Get primers
172 17 100       44 if (defined $primer_file) {
173 3         6 $self->primer_file($primer_file);
174             } else {
175 14   100     42 $self->fwd_primer($fwd_primer || '');
176 14   100     37 $self->rev_primer($rev_primer || '');
177             }
178              
179             # Get template sequence
180 17 100       39 $self->template($template) if defined $template;
181              
182 17 100       37 $self->attach_primers($attach_primers) if defined $attach_primers;
183              
184 17         63 return $self;
185             }
186              
187              
188             =head2 template
189              
190             Title : template
191             Usage : my $template = $search->template;
192             Function : Get/set the template sequence. Setting a new template resets any
193             search in progress.
194             Args : Optional Bio::Seq object
195             Returns : A Bio::Seq object
196              
197             =cut
198              
199             sub template {
200 83     83 1 71 my ($self, $template) = @_;
201 83 100       135 if (defined $template) {
202 17 50 33     59 if ( not(ref $template) || not $template->isa('Bio::PrimarySeqI') ) {
203             # Not a Bio::Seq or Bio::PrimarySeq
204 0         0 $self->throw("Expected a sequence object as input but got a '".ref($template)."'\n");
205             }
206 17 50       51 if (not $template->isa('Bio::SeqI')) {
207             # Convert sequence object to Bio::Seq Seq so that features can be added
208 17         14 my $primary_seq = $template;
209 17         36 $template = Bio::Seq->new();
210 17         30 $template->primary_seq($primary_seq);
211             }
212 17         19 $self->{template} = $template;
213             # Reset search in progress
214 17         19 $template_str = undef;
215             }
216 83         206 return $self->{template};
217             }
218              
219              
220             =head2 fwd_primer
221              
222             Title : fwd_primer
223             Usage : my $primer = $search->fwd_primer;
224             Function : Get/set the forward primer. Setting a new forward primer resets any
225             search in progress.
226             Args : Optional sequence object or primer object or '' to match beginning
227             of sequence.
228             Returns : A sequence object or primer object or undef
229              
230             =cut
231              
232             sub fwd_primer {
233 46     46 1 49 my ($self, $primer) = @_;
234 46 100       65 if (defined $primer) {
235 19         31 $self->_set_primer('fwd', $primer);
236             }
237 46         84 return $self->{fwd_primer};
238             }
239              
240              
241             =head2 rev_primer
242              
243             Title : rev_primer
244             Usage : my $primer = $search->rev_primer;
245             Function : Get/set the reverse primer. Setting a new reverse primer resets any
246             search in progress.
247             Args : Optional sequence object or primer object or '' to match end of
248             sequence.
249             Returns : A sequence object or primer object or undef
250              
251             =cut
252              
253             sub rev_primer {
254 29     29 1 29 my ($self, $primer) = @_;
255 29 100       48 if (defined $primer) {
256 19         23 $self->_set_primer('rev', $primer);
257             }
258 29         52 return $self->{rev_primer};
259             }
260              
261              
262             sub _set_primer {
263             # Save a primer (sequence object) and convert it to regexp. Type is 'fwd' for
264             # the forward primer or 'rev' for the reverse primer.
265 38     38   36 my ($self, $type, $primer) = @_;
266 38         22 my $re;
267 38         35 my $match_rna = 1;
268 38 100       85 if ($primer eq '') {
269 7 100       12 $re = $type eq 'fwd' ? '^' : '$';
270             } else {
271 31 50 66     237 if ( not(ref $primer) || (
      33        
272             not($primer->isa('Bio::PrimarySeqI')) &&
273             not($primer->isa('Bio::SeqFeature::Primer')) ) ) {
274 0         0 $self->throw('Expected a sequence or primer object as input but got a '.ref($primer)."\n");
275             }
276 31         53 $self->{$type.'_primer'} = $primer;
277 31 100       104 my $seq = $primer->isa('Bio::SeqFeature::Primer') ? $primer->seq : $primer;
278 31 100       117 $re = Bio::Tools::IUPAC->new(
279             -seq => $type eq 'fwd' ? $seq : $seq->revcom,
280             )->regexp($match_rna);
281             }
282 38         89 $self->{$type.'_regexp'} = $re;
283             # Reset search in progress
284 38         43 $template_str = undef;
285 38         41 $self->{regexp} = undef;
286 38         74 return $self->{$type.'_primer'};
287             }
288              
289              
290             =head2 primer_file
291              
292             Title : primer_file
293             Usage : my ($fwd, $rev) = $search->primer_file;
294             Function : Get/set a sequence file to read the primer from. The first sequence
295             must be the forward primer, and the second is the optional reverse
296             primer. After reading the file, the primers are set using fwd_primer()
297             and rev_primer() and returned.
298             Args : Sequence file
299             Returns : Array containing forward and reverse primers as sequence objects.
300              
301             =cut
302              
303             sub primer_file {
304 3     3 1 3 my ($self, $primer_file) = @_;
305             # Read primer file and convert primers into regular expressions to catch
306             # amplicons present in the database
307              
308 3 50       6 if (not defined $primer_file) {
309 0         0 $self->throw("Need to provide an input file\n");
310             }
311              
312             # Mandatory first primer
313 3         530 require Bio::SeqIO;
314 3         14 my $in = Bio::SeqIO->new( -file => $primer_file );
315 3         5 my $fwd_primer = $in->next_seq;
316 3 50       6 if (not defined $fwd_primer) {
317 0         0 $self->throw("The file '$primer_file' contains no primers\n");
318             }
319 3         6 $fwd_primer->alphabet('dna'); # Force the alphabet since degenerate primers can look like protein sequences
320              
321             # Optional reverse primers
322 3         4 my $rev_primer = $in->next_seq;
323 3 100       5 if (defined $rev_primer) {
324 2         3 $rev_primer->alphabet('dna');
325             } else {
326 1         2 $rev_primer = '';
327             }
328            
329 3         9 $in->close;
330              
331 3         6 $self->fwd_primer($fwd_primer);
332 3         5 $self->rev_primer($rev_primer);
333              
334 3         9 return ($fwd_primer, $rev_primer);
335             }
336              
337              
338             =head2 attach_primers
339              
340             Title : attach_primers
341             Usage : my $attached = $search->attach_primers;
342             Function : Get/set whether or not to attach primer objects to the amplicon
343             objects.
344             Args : Optional integer (1 for yes, 0 for no)
345             Returns : Integer (1 for yes, 0 for no)
346              
347             =cut
348              
349             sub attach_primers {
350 23     23 1 18 my ($self, $attach) = @_;
351 23 100       32 if (defined $attach) {
352 2         2 $self->{attach_primers} = $attach;
353 2         10 require Bio::SeqFeature::Primer;
354             }
355 23   100     79 return $self->{attach_primers} || 0;
356             }
357              
358              
359             =head2 next_amplicon
360              
361             Title : next_amplicon
362             Usage : my $amplicon = $search->next_amplicon;
363             Function : Get the next amplicon
364             Args : None
365             Returns : A Bio::SeqFeature::Amplicon object
366              
367             =cut
368              
369             sub next_amplicon {
370 40     40 1 1058 my ($self) = @_;
371              
372             # Initialize search
373 40 100       73 if (not defined $template_str) {
374 18         30 $self->_init;
375             }
376              
377 40         66 my $re = $self->_regexp;
378              
379 40         55 my $amplicon;
380 40 100       314 if ($template_str =~ m/$re/g) {
381 21         61 my ($match, $rev_match) = ($1, $2);
382 21 100       26 my $strand = $rev_match ? -1 : 1;
383 21   66     42 $match = $match || $rev_match;
384 21         19 my $end = pos($template_str);
385 21         24 my $start = $end - length($match) + 1;
386 21         34 $amplicon = $self->_attach_amplicon($start, $end, $strand);
387             }
388              
389             # If no more matches. Make sure calls to next_amplicon() will return undef.
390 40 100       58 if (not $amplicon) {
391 19         26 $template_str = '';
392             }
393              
394 40         119 return $amplicon;
395             }
396              
397              
398             sub _init {
399 18     18   12 my ($self) = @_;
400             # Sanity checks
401 18 50       26 if ( not $self->template ) {
402 0         0 $self->throw('Need to provide a template sequence');
403             }
404 18 50 66     34 if ( not($self->fwd_primer) && not($self->rev_primer) ) {
405 0         0 $self->throw('Need to provide at least a primer');
406             }
407             # Set the template sequence string
408 18         22 $template_str = $self->template->seq;
409             # Set the regular expression to match amplicons
410 18         33 $self->_regexp;
411              
412 18         16 return 1;
413             }
414              
415              
416             sub _regexp {
417             # Get the regexp to match amplicon. If the regexp is not set, initialize it.
418 58     58   49 my ($self, $regexp) = @_;
419              
420 58 100       93 if ( not defined $self->{regexp} ) {
421             # Build regexp that matches amplicons on both strands and reports shortest
422             # amplicon when there are several overlapping amplicons
423              
424 17         20 my $fwd_regexp = $self->_fwd_regexp;
425 17         27 my $rev_regexp = $self->_rev_regexp;
426              
427 17         14 my ($fwd_regexp_rc, $basic_fwd_match, $rev_regexp_rc, $basic_rev_match);
428 17 100       26 if ($fwd_regexp eq '^') {
429 1         2 $fwd_regexp_rc = '';
430 1         2 $basic_fwd_match = "(?:.*?$rev_regexp)";
431             } else {
432 16         56 $fwd_regexp_rc = Bio::Tools::SeqPattern->new(
433             -seq => $fwd_regexp,
434             -type => 'dna',
435             )->revcom->str;
436 16         35 $basic_fwd_match = "(?:$fwd_regexp.*?$rev_regexp)";
437             }
438              
439 17 100       22 if ($rev_regexp eq '$') {
440 2         2 $rev_regexp_rc = '';
441 2         4 $basic_rev_match = "(?:.*?$fwd_regexp_rc)";
442             } else {
443 15         37 $rev_regexp_rc = Bio::Tools::SeqPattern->new(
444             -seq => $rev_regexp,
445             -type => 'dna',
446             )->revcom->str;
447 15         28 $basic_rev_match = "(?:$rev_regexp_rc.*?$fwd_regexp_rc)";
448             }
449              
450 17 100       40 my $fwd_exclude = "(?!$basic_rev_match".
451             ($fwd_regexp eq '^' ? '' : "|$fwd_regexp").
452             ")";
453              
454 17 100       51 my $rev_exclude = "(?!$basic_fwd_match".
455             ($rev_regexp eq '$' ? '' : "|$rev_regexp_rc").
456             ')';
457              
458 17         762 $self->{regexp} = qr/
459             ( $fwd_regexp (?:$fwd_exclude.)*? $rev_regexp ) |
460             ( $rev_regexp_rc (?:$rev_exclude.)*? $fwd_regexp_rc )
461             /xi;
462             }
463              
464 58         86 return $self->{regexp};
465             }
466              
467              
468             =head2 annotate_template
469              
470             Title : annotate_template
471             Usage : my $template = $search->annotate_template;
472             Function : Search for all amplicons and attach them to the template.
473             This is equivalent to running:
474             while (my $amplicon = $self->next_amplicon) {
475             # do something
476             }
477             my $annotated = $self->template;
478             Args : None
479             Returns : A Bio::Seq object with attached Bio::SeqFeature::Amplicons (and
480             Bio::SeqFeature::Primers if you set -attach_primers to 1).
481              
482             =cut
483              
484             sub annotate_template {
485 1     1 1 3 my ($self) = @_;
486             # Search all amplicons and attach them to template
487 1         3 1 while $self->next_amplicon;
488             # Return annotated template
489 1         3 return $self->template;
490             }
491              
492              
493             sub _fwd_regexp {
494 17     17   13 my ($self) = @_;
495 17         19 return $self->{fwd_regexp};
496             }
497              
498              
499             sub _rev_regexp {
500 17     17   9 my ($self) = @_;
501 17         21 return $self->{rev_regexp};
502             }
503              
504              
505             sub _attach_amplicon {
506             # Create an amplicon object and attach it to template
507 21     21   22 my ($self, $start, $end, $strand) = @_;
508              
509             # Create Bio::SeqFeature::Amplicon feature and attach it to the template
510 21         50 my $amplicon = Bio::SeqFeature::Amplicon->new(
511             -start => $start,
512             -end => $end,
513             -strand => $strand,
514             -template => $self->template,
515             );
516              
517             # Create Bio::SeqFeature::Primer feature and attach them to the amplicon
518 21 100       47 if ($self->attach_primers) {
519 1         2 for my $type ('fwd', 'rev') {
520 2         2 my ($pstart, $pend, $pstrand, $primer_seq);
521              
522             # Coordinates relative to amplicon
523 2 100       4 if ($type eq 'fwd') {
524             # Forward primer
525 1         1 $primer_seq = $self->fwd_primer;
526 1 50       2 next if not defined $primer_seq;
527 1         1 $pstart = 1;
528 1         3 $pend = $primer_seq->length;
529 1         2 $pstrand = $amplicon->strand;
530             } else {
531             # Optional reverse primer
532 1         2 $primer_seq = $self->rev_primer;
533 1 50       3 next if not defined $primer_seq;
534 0         0 $pstart = $end - $primer_seq->length + 1;
535 0         0 $pend = $end;
536 0         0 $pstrand = -1 * $amplicon->strand;
537             }
538              
539             # Absolute coordinates needed
540 1         1 $pstart += $start - 1;
541 1         1 $pend += $start - 1;
542              
543 1         6 my $primer = Bio::SeqFeature::Primer->new(
544             -start => $pstart,
545             -end => $pend,
546             -strand => $pstrand,
547             -template => $amplicon,
548             );
549              
550             # Attach primer to amplicon
551 1 50       3 if ($type eq 'fwd') {
552 1         3 $amplicon->fwd_primer($primer);
553             } else {
554 0         0 $amplicon->rev_primer($primer);
555             }
556              
557             }
558             }
559              
560 21         30 return $amplicon;
561             }
562              
563              
564             1;