File Coverage

Bio/Tools/SiRNA.pm
Criterion Covered Total %
statement 129 140 92.1
branch 34 60 56.6
condition 12 27 44.4
subroutine 19 19 100.0
pod 5 5 100.0
total 199 251 79.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::SiRNA
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Donald Jackson, donald.jackson@bms.com
7             #
8             # Copyright Bristol-Myers Squibb
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             SiRNA - Perl object for designing small inhibitory RNAs.
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::SiRNA;
21              
22             my $sirna_designer = Bio::Tools::SiRNA->new( -target => $bio_seq,
23             -rules => 'saigo'
24             );
25             my @pairs = $sirna_designer->design;
26              
27             foreach $pair (@pairs) {
28             my $sense_oligo_sequence = $pair->sense->seq;
29             my $antisense_oligo_sequence = $pair->antisense->seq;
30              
31             # print out results
32             print join ("\t", $pair->start, $pair->end, $pair->rank,
33             $sense_oligo_sequence, $antisense_oligo_sequence), "\n";
34             }
35              
36             =head1 DESCRIPTION
37              
38             Package for designing siRNA reagents.
39              
40             Input is a L-compliant object (the target).
41              
42             Output is a list of Bio::SeqFeature::SiRNA::Pair objects, which are
43             added to the feature table of the target sequence. Each
44             Bio::SeqFeature::SiRNA::Pair contains two subfeatures
45             (Bio::SeqFeature::Oligo objects) which correspond to the individual
46             oligos. These objects provide accessors for the information on the
47             individual reagent pairs.
48              
49             This verion of Bio::Tools::SiRNA represents a major change in architecture.
50             Specific 'rulesets' for siRNA selection as developed by various groups are
51             implemented as Bio::Tools::SiRNA::Ruleset objects, which inherit from
52             Bio::Tools::SiRNA. This will make it easier to add new rule sets or modify
53             existing approaches. Currently the Tuschl and Ui-Tei (2004) rules are
54             implemented. For consistency, the Tuschl rules are implemented by default.
55              
56             In addition, this module provides three 'extra' rules which can be added
57             above and beyond any ruleset.
58              
59             =over 3
60              
61             =item 1.
62              
63             SiRNAs that overlap known SNPs (identified as SeqFeatures with
64             primary tag = variation) can be avoided.
65              
66             =item 2.
67              
68             Other regions (with primary tag = 'Excluded') can also be skipped. I
69             use this with Bio::Tools::Run::Mdust to avoid low-complexity regions
70             (must be run separately), but other programs could also be used.
71              
72             =item 3.
73              
74             SiRNAs may also be selected in the 3 prime UTR of a gene by setting
75             $sirna_designer-Einclude_3pr() to true.
76              
77             =back
78              
79             =head2 EXPORT
80              
81             None.
82              
83             =head1 SEE ALSO
84              
85             L, L,
86             L..
87              
88             =head1 FEEDBACK
89              
90             =head2 Mailing Lists
91              
92             User feedback is an integral part of the evolution of this and other
93             Bioperl modules. Send your comments and suggestions preferably to
94             the Bioperl mailing list. Your participation is much appreciated.
95              
96             bioperl-l@bioperl.org - General discussion
97             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
98              
99             =head2 Support
100              
101             Please direct usage questions or support issues to the mailing list:
102              
103             I
104              
105             rather than to the module maintainer directly. Many experienced and
106             reponsive experts will be able look at the problem and quickly
107             address it. Please include a thorough description of the problem
108             with code and data examples if at all possible.
109              
110             =head2 Reporting Bugs
111              
112             Report bugs to the Bioperl bug tracking system to help us keep track
113             of the bugs and their resolution. Bug reports can be submitted via
114             the web:
115              
116             https://github.com/bioperl/bioperl-live/issues
117              
118             =head1 AUTHOR
119              
120             Donald Jackson (donald.jackson@bms.com)
121              
122             =head1 APPENDIX
123              
124             The rest of the documentation details each of the object methods.
125             Internal methods are usually preceded with a _
126              
127             =cut
128              
129             package Bio::Tools::SiRNA;
130              
131 1     1   401 use strict;
  1         2  
  1         22  
132 1     1   3 use warnings;
  1         1  
  1         22  
133              
134 1     1   3 use vars qw($AUTOLOAD);
  1         1  
  1         32  
135              
136 1     1   265 use Bio::Seq::RichSeq;
  1         2  
  1         29  
137 1     1   331 use Bio::SeqFeature::Generic;
  1         1  
  1         24  
138 1     1   285 use Bio::SeqFeature::SiRNA::Oligo;
  1         2  
  1         25  
139 1     1   298 use Bio::SeqFeature::SiRNA::Pair;
  1         2  
  1         29  
140              
141              
142 1     1   4 use base qw(Bio::Root::Root);
  1         0  
  1         1096  
143              
144              
145             our %COMP = ( A => 'T',
146             T => 'A',
147             C => 'G',
148             G => 'C',
149             N => 'N',
150             );
151              
152             our @ARGNAMES = qw(RULES START_PAD END_PAD MIN_GC CUTOFF OLIGOS AVOID_SNPS
153             GSTRING TMPDIR TARGET DEBUG);
154              
155              
156             =head2 new
157              
158             Title : new
159             Usage : my $sirna_designer = Bio::Tools::SiRNA->new();
160             Function : Constructor for designer object
161             Returns : Bio::Tools::SiRNA object
162             Args : target - the target sequence for the SiRNAs as a Bio::Seq::RichSeq
163             start_pad - distance from the CDS start to skip (default 75)
164             end_pad - distance from the CDS end to skip (default 50)
165             include_3pr - set to true to include SiRNAs in the 3prime UTR (default false)
166             rules - rules for selecting siRNAs, currently supporting saigo and tuschl
167             min_gc - minimum GC fraction (NOT percent) (default 0.4)
168             max_gc - maximum GC fraction (NOT percent) (default 0.6)
169             cutoff - worst 'rank' accepted(default 3)
170             avoid_snps - boolean - reject oligos that overlap a variation
171             SeqFeature in the target (default true)
172             gstring - maximum allowed consecutive Gs.
173             Too many can cause problems in synthesis (default 4)
174             Note : All arguments can also be changed/accessed using autoloaded
175             methods such as:
176              
177             my $start_pad = $sirna_designer->start_pad().
178              
179             =cut
180              
181             sub new {
182 1     1 1 698 my ($proto, @args) = @_;
183 1   33     6 my $pkg = ref($proto) || $proto;
184              
185 1         1 my $self = {};
186 1         2 bless ($self, $pkg);
187              
188 1         1 my %args;
189              
190 1         7 @args{@ARGNAMES} = $self->_rearrange(\@ARGNAMES, @args);
191            
192 1 50       3 if ($args{'RULES'}) {
193 0         0 $self->rules($args{'RULES'});
194             }
195              
196 1   50     8 $self->{'start_pad'} = $args{'START_PAD'} || 75; # nt from start to mask
197 1   50     8 $self->{'end_pad'} = $args{'END_PAD'} || 50; # nt from end to mask
198 1   50     5 $self->{'include_3pr'} = $args{'INCLUDE_3PR'} || 0; # look for oligos in 3prime UTR
199 1   50     4 $self->{'min_gc'} = $args{'MIN_GC'} || 0.40;
200 1   50     5 $self->{'max_gc'} = $args{'MAX_GC'} || 0.60;
201 1   50     4 $self->{'cutoff'} = $args{'CUTOFF'} || 3; # highest (worst) rank wanted
202 1         4 $self->{'oligos'} = [];
203             defined($args{'AVOID_SNPS'}) ? $self->{'avoid_snps'} = $args{'AVOID_SNPS'} :
204 1 50       5 $self->{'avoid_snps'} = 1; # (t/f to avoid or include reagents that cover SNPs)
205 1   50     4 $self->{'gstring'} = $args{'GSTRING'} || 4; # maximum allowed consecutive Gs - too many can cause problems in oligo synthesis
206 1   50     9 $self->{'tmpdir'} = $args{'TMPDIR'} || $ENV{'TMPDIR'} || $ENV{'TMP'} || '';
207 1   50     4 $self->{'debug'} = $args{'DEBUG'} || 0;
208              
209 1 50       5 $self->target($args{'TARGET'}) if ($args{'TARGET'});
210              
211 1         3 return $self;
212             }
213              
214              
215             =head2 target
216              
217             Title : target
218             Usage : my $target_seq = $sirna_designer->target(); # get the current target
219             OR
220             $sirna_designer->target($new_target_seq); # set a new target
221             Function : Set/get the target as a Bio::SeqI-compliant object
222             Returns : a Bio::SeqI-compliant object
223             Args : a Bio::SeqI-compliant object (optional)
224              
225             =cut
226              
227             sub target {
228 338     338 1 638 my ($self, $target) = @_;
229              
230 338 100       578 if ($target) {
    50          
231 2 50       9 unless ($target->isa('Bio::SeqI')) {
232 0         0 $self->throw( -class => 'Bio::Root::BadParameter',
233             -text => "Target must be passed as a Bio::Seq object" );
234             }
235 2 100       18 if ($target->can('molecule')) {
236 1 50       1 ( grep { uc($target->molecule) eq $_ } qw(DNA MRNA CDNA)) or
  3         5  
237             $self->throw( -class => 'Bio::Root::BadParameter',
238             -text => "Sequences of type ". $target->molecule. " are not supported"
239             );
240             }
241             else {
242 1 50       4 ($target->alphabet eq 'dna') or
243             $self->throw( -class => 'Bio::Root::BadParameter',
244             -text => "Sequences of alphabet ". $target->alphabet. " are not supported"
245             );
246             }
247            
248 2         4 $self->{'target'} = $target;
249 2         7 return 1;
250              
251             }
252             elsif ($self->{'target'}) {
253 336         836 return $self->{'target'};
254             }
255             else {
256 0         0 $self->throw("Target sequence not defined");
257             }
258             }
259              
260             =head2 rules
261              
262             Title : rules
263             Usage : $sirna->rules('ruleset')
264             Purpose : set/get ruleset to use for selecting SiRNA oligo pairs.
265             Returns : not sure yet
266             Args : a ruleset name (currently supported: Tuschl, Saigo)
267             or a Bio::Tools::SiRNA::RulesetI compliant object
268              
269             =cut
270              
271             sub rules {
272 3     3 1 3 my ($self, $rules) = @_;
273              
274 3 50       9 if ($rules) {
275 0         0 $self->_load_ruleset($rules);
276             }
277             # default: use tuschl rules
278 3 100       8 unless ($self->{_rules}) {
279 1         3 $self->_load_ruleset('tuschl');
280             }
281 3         15 return $self->{_rules};
282             }
283              
284             sub _load_ruleset {
285 1     1   2 my ($self, $ruleset) = @_;
286              
287 1         3 my $rule_module = join('::', ref($self), 'Ruleset', lc($ruleset));
288              
289 1         84 eval "require $rule_module";
290            
291 1 50       5 if ($@) {
292             #warn join("\n", '@INC contains:', @INC, undef);
293 0         0 $self->throw("Unable to load $rule_module: $@");
294 0         0 return;
295             }
296              
297             else {
298 1         3 $self->{_rules} = $rule_module;
299 1         2 bless($self, $rule_module); # recast as subclass
300             }
301            
302 1         1 return 1;
303             }
304              
305             =head2 design
306              
307             Title : design
308             Usage : my @pairs = $sirna_designer->design();
309             Purpose : Design SiRNA oligo pairs.
310             Returns : A list of SiRNA pairs as Bio::SeqFeature::SiRNA::Pair objects
311             Args : none
312              
313             =cut
314              
315             sub design {
316 3     3 1 357 my ($self) = @_;
317              
318 3 50       9 ($self->rules) or $self->throw('Unable to design siRNAs: no rule set specified');
319              
320             # unless ( grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures ) {
321             # $self->_define_target();
322             # }
323              
324 3         8 my @oligos = $self->_get_oligos();
325            
326 3         8 return ( grep { $_->isa('Bio::SeqFeature::SiRNA::Pair') } $self->target->top_SeqFeatures );
  329         534  
327             }
328            
329             sub _define_target {
330 3     3   3 my ($self) = @_;
331 3         4 my ($feat, $cds, $left, $right);
332              
333 3 50       5 my $target = $self->target or
334             $self->throw("Unable to design oligos - no target provided");
335              
336 3 50       15 ($cds) = grep { $_->primary_tag eq 'CDS' } $target->top_SeqFeatures if ($target->can('top_SeqFeatures'));
  14         17  
337            
338 3 100       7 if ($cds) {
339 2         5 $left = $cds->start + $self->start_pad;
340 2 100       7 if (!$self->include_3pr) {
341 1         3 $right = $cds->end - $self->end_pad;
342             }
343             else {
344 1         7 $right = $target->length - $self->end_pad;
345             }
346             }
347             else {
348 1         6 $left = 0 + $self->start_pad;
349 1         4 $right = $target->length - $self->end_pad;
350             }
351              
352              
353             # is there anything left?
354 3 50       10 if (($right - $left) < 20) {
355 0         0 $self->throw("There isn't enough sequence to design oligos. Please reduce start_pad and end_pad or supply more sequence");
356             }
357             # define target region
358 3         18 my $targregion = Bio::SeqFeature::Generic->new( -start => $left,
359             -end => $right,
360             -primary => 'Target' );
361 3         8 $self->target->add_SeqFeature($targregion);
362              
363             # locate excluded regions
364 3         7 my @excluded = grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures;
  17         22  
365              
366 3 50       17 if ($self->avoid_snps) {
367 3         8 my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
  17         20  
368 3         6 push(@excluded, @snps);
369             }
370            
371 3         13 $self->excluded(\@excluded);
372              
373 3         8 return $targregion;
374             }
375              
376             sub _get_targetregion {
377 9     9   12 my ($self) = @_;
378            
379 9         24 my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
  177         204  
380 9   66     30 $targregion ||= $self->_define_target;
381              
382 9 50       16 $self->throw("Target region for SiRNA design not defined") unless ($targregion);
383              
384 9         28 my $seq = $targregion->seq->seq;
385             # but this way I loose start info
386 9         34 my $targstart = $targregion->start;
387              
388 9         20 return ($seq, $targstart);
389             }
390              
391             # MOVE to SiRNA::Ruleset::tuschl
392             # sub _regex {
393             # my ($self, $rank) = @_;
394             # return $PATTERNS{$rank};
395             # }
396              
397             # sub _get_oligos {
398             # # use regular expressions to pull out oligos
399              
400             # my ($self, $rank) = @_;
401             # my $regex = $self->_regex($rank);
402             # my @exclude;
403              
404              
405             # my ($targregion) = grep { $_->primary_tag eq 'Target' } $self->target->top_SeqFeatures;
406             # my $seq = $targregion->seq->seq;
407             # # but this way I loose start info
408             # my $targstart = $targregion->start;
409            
410             # # exclude masked region
411             # push(@exclude, grep { $_->primary_tag eq 'Excluded' } $self->target->top_SeqFeatures);
412              
413             # # add SNP checking
414             # if ($self->avoid_snps) {
415             # my @snps = grep { $_->primary_tag eq 'variation' } $self->target->top_SeqFeatures;
416             # push(@exclude, @snps);
417             # }
418              
419             # while ( $seq =~ /$regex/gi ) {
420             # my $target = $1;
421              
422             # # check for too many Gs (or Cs on the other strand)
423             # next if ( $target =~ /G{ $self->gstring,}/io );
424             # next if ( $target =~ /C{ $self->gstring,}/io );
425             # # skip Ns (for filtering)
426             # next if ( $target =~ /N/i);
427              
428             # my $start = length($`) + $targstart;
429             # my $stop = $start + length($target) -1;
430              
431             # my @gc = ( $target =~ /G|C/gi);
432             # my $fxGC = sprintf("%2.2f", (scalar(@gc) / length($target)));
433             # next if ($fxGC < $self->min_gc);
434             # next if ($fxGC > $self->max_gc);
435              
436             # my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
437             # -end => $stop,
438             # -strand => 1,
439             # -seq => _get_sense($target),
440             # -source_tag => ref($self),
441             # );
442              
443             # my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
444             # -end => $stop,
445             # -strand => -1,
446             # -seq => _get_anti($target),
447             # -source_tag => ref($self),
448             # );
449              
450             # my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank,
451             # -fxGC => $fxGC,
452             # -sense => $sense,
453             # -antisense => $asense,
454             # -source_tag => ref($self),
455             # );
456              
457             # unless ($self->_has_overlap($sirna, \@exclude)) {
458             # $self->target->add_SeqFeature($sirna);
459             # }
460             # }
461             # }
462              
463             =head2 add_oligos
464              
465             Title : add_oligos
466             Usage : $sirna_designer->add_oligos($sequence, $start, $rank);
467             Purpose : Add SiRNA olgos to target Bio::Seq as Bio::SeqFeature::SiRNA::Pair objects
468             Args : Oligo sequence and start position (required), rank/score (optional)
469              
470             =cut
471              
472             sub add_oligos {
473 312     312 1 345 my ($self, $seq, $start, $rank) = @_;
474              
475 312 50       365 ($seq) or throw ('No sequence supplied for add_oligos');
476 312 50       399 (defined $start) or throw ('No start position specified for add_oligos');
477            
478 312         278 my ($end) = $start + length($seq);
479              
480 312         495 my ($sseq) = $self->_get_sense($seq);
481 312         1321 my $sense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $start,
482             -end => ($start + length($sseq)),
483             -strand => 1,
484             -seq => $sseq,
485             -source_tag => ref($self),
486             );
487              
488 312         724 my $aseq = $self->_get_anti($seq);
489 312         1206 my $asense = Bio::SeqFeature::SiRNA::Oligo->new( -start => $end,
490             -end => ($end - length($aseq)),
491             -strand => -1,
492             -seq => $aseq,
493             -source_tag => ref($self),
494             );
495              
496 312         1142 my $sirna = Bio::SeqFeature::SiRNA::Pair->new( -rank => $rank,
497             # -fxGC => $fxGC,
498             -sense => $sense,
499             -antisense => $asense,
500             -source_tag => ref($self),
501             );
502              
503 312 50       1615 unless ($self->_has_overlap($sirna, $self->excluded)) {
504 312         451 $self->target->add_SeqFeature($sirna);
505             }
506             }
507              
508             sub _has_overlap {
509             # flag any pairs that overlap an UNDESIRED feature (eg SNP)
510             # return true if there is overlap, false if not
511              
512 312     312   257 my ($self, $test, $flist) = @_;
513 312 50       529 print STDERR "Checking oligo at ", $test->start, " to ",$test->end, "\n"
514             if ($self->debug);
515            
516 312         553 foreach my $feat (@$flist) {
517 0 0 0     0 if (($test->start <= $feat->end) and ($test->end >= $feat->start)) {
518 0 0       0 print STDERR "Overlaps ", $feat->primary_tag, " at ",
519             $feat->start, " to ", $feat->end, "\n" if ($self->debug);
520 0         0 return 1;
521             }
522             }
523 312         485 return 0; # default - no overlap
524             }
525            
526             # MOVE to SiRNA::Ruleset::tuschl
527            
528             # sub _get_sense {
529             # my ($target) = @_;
530             # # trim off 1st 2 nt to get overhang
531             # $target =~ s/^..//;
532             # # convert T's to U's (transcribe)
533             # $target =~ s/T/U/gi;
534             # # force last 2 nt to be T's
535             # $target =~ s/..$/TT/;
536              
537             # return $target;
538             # }
539              
540             # sub _get_anti {
541             # my ($target) = @_;
542             # my @target = split(//, $target);
543             # my ($nt,@antitarget);
544              
545             # while ($nt = pop @target) {
546             # push(@antitarget, $COMP{$nt});
547             # }
548             # my $anti = join('', @antitarget);
549             # # trim off 1st 2 nt to get overhang
550             # $anti =~ s/^..//;
551             # # convert T's to U's
552             # $anti =~ s/T/U/gi;
553             # # convert last 2 NT's to T
554             # $anti =~ s/..$/TT/;
555              
556             # return $anti;
557             # }
558              
559              
560             sub AUTOLOAD {
561 2047     2047   1655 my ($self, $value) = @_;
562 2047         1493 my $name = $AUTOLOAD;
563 2047         4399 $name =~ s/.+:://;
564              
565 2047 50       3038 return if ($name eq 'DESTROY');
566              
567              
568 2047 100       2248 if (defined $value) {
569 4         9 $self->{$name} = $value;
570             }
571              
572 2047 50       2632 unless (exists $self->{$name}) {
573 0         0 $self->throw("Attribute $name not defined for ". ref($self));
574             }
575              
576 2047         5294 return $self->{$name};
577             }
578              
579             sub _comp {
580 7176     7176   4782 my ($self, $char) = @_;
581              
582 7176 50       7688 return unless ($char);
583 7176         4300 $char = uc($char);
584 7176         12335 return $COMP{ $char };
585             }
586             1;