File Coverage

Bio/Tools/QRNA.pm
Criterion Covered Total %
statement 114 124 91.9
branch 65 74 87.8
condition 3 5 60.0
subroutine 14 14 100.0
pod 7 7 100.0
total 203 224 90.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::QRNA
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::QRNA - A Parser for qrna output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::QRNA;
21             my $parser = Bio::Tools::QRNA->new(-file => $qrnaoutput);
22             while( my $feature = $parser->next_feature ) {
23             # do something here
24             }
25              
26             =head1 DESCRIPTION
27              
28             Parses QRNA output (E.Rivas:
29             http://selab.janelia.org/software.html
30             ftp://selab.janelia.org/pub/software/qrna/).
31              
32             This module is not complete, but currently it packs information from
33             each QRNA alignment into a single Bio::SeqFeature::Generic object.
34              
35             Not all options for QRNA output have been tested or tried. It has
36             been tested on sliding window output (-w -x) and shuffled output (-b
37             or -B).
38              
39             See t/QRNA.t for example usage.
40              
41             At some point we may have more complicated feature object which will
42             support this data rather than forcing most of the information into
43             tag/value pairs in a SeqFeature::Generic.
44              
45             Running with -verbose =E 1 will store extra data in the feature. The
46             entire unparsed entry for a particular feature will be stored as a
47             string in the tag 'entry' it is accessible via:
48              
49             my ($entry) = $f->each_tag_value('entry');
50              
51             The winning model for any given alignment test will be the name stored
52             in the primary_tag field of feature. The bit score will stored in the
53             score field. The logoddpost is available via the a tag/value pair.
54             This example code will show how to print out the score and log odds
55             post for each model.
56              
57             # assuming you got a feature already
58             print "model score logoddspost\n";
59             foreach my $model ( qw(OTH COD RNA) ) {
60             my ($score) = $f->get_tag_values("$model\_score");
61             my ($logoddspost) = $f->get_tag_values("$model\_logoddspost");
62             print "$model $score $logoddspost\n";
63             }
64              
65             The start and end of the alignment for both the query and hit sequence
66             are available through the L interface,
67             specifically L and
68             L. Additionally if you have
69             run QRNA with an input file which has the location of the alignment
70             stored in the FASTA filename as in (ID/START-END) which is the default
71             output format from L produced alignment output,
72             this module will re-number start/end for the two sequences so they are
73             in the actual coordinates of the sequence rather than the relative
74             coordinates of the alignment. You may find the bioperl utillity
75             script search2alnblocks useful in creating your input files for QRNA.
76              
77             Some other words of warning, QRNA uses a 0 based numbering system for
78             sequence locations, Bioperl uses a 1 based system. You'll notice that
79             locations will be +1 they are reported in the raw QRNA output.
80              
81             =head1 FEEDBACK
82              
83             =head2 Mailing Lists
84              
85             User feedback is an integral part of the evolution of this and other
86             Bioperl modules. Send your comments and suggestions preferably to
87             the Bioperl mailing list. Your participation is much appreciated.
88              
89             bioperl-l@bioperl.org - General discussion
90             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
91              
92             =head2 Support
93              
94             Please direct usage questions or support issues to the mailing list:
95              
96             I
97              
98             rather than to the module maintainer directly. Many experienced and
99             reponsive experts will be able look at the problem and quickly
100             address it. Please include a thorough description of the problem
101             with code and data examples if at all possible.
102              
103             =head2 Reporting Bugs
104              
105             Report bugs to the Bioperl bug tracking system to help us keep track
106             of the bugs and their resolution. Bug reports can be submitted via
107             the web:
108              
109             https://github.com/bioperl/bioperl-live/issues
110              
111             =head1 AUTHOR - Jason Stajich
112              
113             Email jason-at-bioperl-dot-org
114              
115             =head1 APPENDIX
116              
117             The rest of the documentation details each of the object methods.
118             Internal methods are usually preceded with a _
119              
120             =cut
121              
122              
123             # Let the code begin...
124              
125              
126             package Bio::Tools::QRNA;
127 1     1   381 use vars qw(@Models);
  1         1  
  1         33  
128 1     1   3 use strict;
  1         1  
  1         15  
129              
130 1     1   319 use Bio::SeqFeature::Generic;
  1         2  
  1         22  
131 1     1   277 use Bio::SeqFeature::FeaturePair;
  1         2  
  1         24  
132              
133 1     1   4 use base qw(Bio::Root::IO Bio::SeqAnalysisParserI);
  1         1  
  1         1516  
134             @Models = qw(OTH COD RNA);
135              
136             =head2 new
137              
138             Title : new
139             Usage : my $obj = Bio::Tools::QRNA->new();
140             Function: Builds a new Bio::Tools::QRNA object
141             Returns : an instance of Bio::Tools::QRNA
142             Args : -fh/-file filehandle/filename standard input for
143             Bio::Root:IO objects
144              
145             =cut
146              
147             =head2 next_feature
148              
149             Title : next_feature
150             Usage : my $feature = $parser->next_feature
151             Function: Get the next QRNA feature
152             Returns :
153             Args :
154              
155              
156             =cut
157              
158             sub next_feature {
159 48     48 1 47 my ($self) = @_;
160 48 100       33 my $f = shift @{$self->{'_parsed_features'} || []};
  48         94  
161 48 100 66     113 if( ! defined $f && $self->_parse_pair ) {
162 47 50       35 $f = shift @{$self->{'_parsed_features'} || []};
  47         93  
163             }
164 48         73 return $f;
165             }
166              
167             sub _parse_pair {
168 48     48   43 my ($self,@args) = @_;
169 48         49 my (@features,%data);
170 48         34 my $seenstart = 0;
171 48         74 while( defined( $_ = $self->_readline) ) {
172 927 100       1173 next if( /^\#\-\-/o );
173 915 100       5340 if( /^\#\s+(qrna)\s+(\S+)\s+\(([^\)]+)\)/o ) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
174 2         4 $self->program_name($1);
175 2         4 $self->program_version($2);
176 2         5 $self->program_date($3);
177             } elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) {
178 2         5 $self->PAM_model($2);
179             } elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) {
180 2         6 $self->RNA_model($2);
181             } elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) {
182 2         3 $self->seq_file($2);
183             } elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) {
184 94 100       125 if( $seenstart ) {
185 46 50       63 if( $data{'alignment_len'} ) {
186 46         85 push @features, $self->_make_feature(\%data);
187             }
188 46         86 $self->_pushback($_);
189 46         52 last;
190             }
191 48         38 $seenstart = 1;
192             } elsif( /^\#/ ) {
193 4         7 next;
194             } elsif( />(\S+)\s+\((\d+)\)/ ) {
195 96 100       58 if( @{$data{'seqs'} || []} == 2 ) {
  96 50       261  
196 0         0 $self->warn( "already seen seqs ".join(' ', ,map { $_->[0] }
197 0         0 @{$data{'seqs'}}). "\n");
  0         0  
198             } else {
199 96         60 push @{$data{'seqs'}}, [$1,$2];
  96         232  
200             }
201             } elsif( /^length alignment:\s+(\d+)\s+\(id\=(\d+(\.\d+)?)\)/o ) {
202            
203 51 100       91 if( $data{'alignment_len'} ) {
204 3         5 push @features, $self->_make_feature(\%data);
205             # reset all the data but the 'seqs' field
206 3         19 %data = ( 'seqs' => $data{'seqs'} );
207             }
208            
209 51 50       85 if( /\(((sre_)?shuffled)\)/ ) {
210 0         0 $data{'shuffled'} = $1;
211             }
212 51         77 $data{'alignment_len'} = $1;
213 51         53 $data{'alignment_pid'} = $2;
214             } elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]\((\d+)\)\s+
215             \-\-\s+\((\S+\s+\S+\s+\S+\s+\S+)\)/ox ) {
216 102         380 $data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6];
217 102         96 @{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7);
  102         387  
218             } elsif( /^winner\s+\=\s+(\S{3})/ ) {
219 51         83 $data{'winning_model'} = $1;
220             } elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) {
221             # QRNA is 0-based
222             # Bioperl is 1 based
223 153         338 $data{'model_location'}->{$1} = [ $2,$3 ];
224             } elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) {
225 102         303 while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) {
226 306         431 my ($model,$score)= ($1,$2);
227 306 100       432 if( $model =~ s/^logoddspost// ) {
228 153         449 $data{'model_scores'}->{'logoddspost'}->{$model} = $score;
229             } else {
230 153         485 $data{'model_scores'}->{'bits'}->{$model} = $score;
231             }
232             }
233             }
234 865         1580 $data{'entry'} .= $_;
235             }
236 48 100       74 if( @features ) {
237 47         26 push @{$self->{'_parsed_features'}}, @features;
  47         66  
238 47         372 return scalar @features;
239             }
240 1         9 return 0;
241             }
242              
243             =head2 PAM_model
244              
245             Title : PAM_model
246             Usage : $obj->PAM_model($newval)
247             Function:
248             Example :
249             Returns : value of PAM_model (a scalar)
250             Args : on set, new value (a scalar or undef, optional)
251              
252              
253             =cut
254              
255             sub PAM_model{
256 3     3 1 3 my $self = shift;
257 3 100       7 return $self->{'PAM_model'} = shift if @_;
258 1         3 return $self->{'PAM_model'};
259             }
260              
261             =head2 RNA_model
262              
263             Title : RNA_model
264             Usage : $obj->RNA_model($newval)
265             Function:
266             Example :
267             Returns : value of RNA_model (a scalar)
268             Args : on set, new value (a scalar or undef, optional)
269              
270              
271             =cut
272              
273             sub RNA_model{
274 3     3 1 4 my $self = shift;
275              
276 3 100       7 return $self->{'RNA_model'} = shift if @_;
277 1         3 return $self->{'RNA_model'};
278             }
279              
280             =head2 seq_file
281              
282             Title : seq_file
283             Usage : $obj->seq_file($newval)
284             Function:
285             Example :
286             Returns : value of seq_file (a scalar)
287             Args : on set, new value (a scalar or undef, optional)
288              
289              
290             =cut
291              
292             sub seq_file{
293 3     3 1 4 my $self = shift;
294              
295 3 100       9 return $self->{'seq_file'} = shift if @_;
296 1         2 return $self->{'seq_file'};
297             }
298              
299              
300             =head2 program_name
301              
302             Title : program_name
303             Usage : $obj->program_name($newval)
304             Function:
305             Example :
306             Returns : value of program_name (a scalar)
307             Args : on set, new value (a scalar or undef, optional)
308              
309              
310             =cut
311              
312             sub program_name{
313 52     52 1 48 my $self = shift;
314              
315 52 100       64 return $self->{'program_name'} = shift if @_;
316 50   50     295 return $self->{'program_name'} || 'qrna';
317             }
318              
319             =head2 program_version
320              
321             Title : program_version
322             Usage : $obj->program_version($newval)
323             Function:
324             Example :
325             Returns : value of program_version (a scalar)
326             Args : on set, new value (a scalar or undef, optional)
327              
328              
329             =cut
330              
331             sub program_version{
332 3     3 1 4 my $self = shift;
333              
334 3 100       7 return $self->{'program_version'} = shift if @_;
335 1         4 return $self->{'program_version'};
336             }
337              
338             =head2 program_date
339              
340             Title : program_date
341             Usage : $obj->program_date($newval)
342             Function:
343             Example :
344             Returns : value of program_date (a scalar)
345             Args : on set, new value (a scalar or undef, optional)
346              
347              
348             =cut
349              
350             sub program_date{
351 3     3 1 3 my $self = shift;
352 3 100       9 return $self->{'program_date'} = shift if @_;
353 1         3 return $self->{'program_date'};
354             }
355              
356             sub _make_feature {
357 49     49   38 my ($self,$data) = @_;
358 49         44 my ($qoffset,$hoffset) = (1,1);
359             # when you run qrna and have produced ID/START-END
360             # formatted input strings we can remap the location
361             # to the original
362              
363             # name is stored as the first entry in the seq array ref
364             my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0],
365 49         65 $data->{'seqs'}->[1]->[0]);
366 49 100       78 if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
367 3         6 ($qid,$qoffset) = ($1,$2);
368             }
369 49 100       63 if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
370 3         4 ($hid,$hoffset) = ($1,$2);
371             }
372              
373 49         97 my $f = Bio::SeqFeature::FeaturePair->new();
374              
375 49         39 my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}};
  49         93  
376             my $qf = Bio::SeqFeature::Generic->new
377             ( -primary_tag => $data->{'winning_model'},
378             -source_tag => $self->program_name,
379 49 100       108 -score => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}},
380             -start => $s+$qoffset,
381             -end => $e+$qoffset,
382             -seq_id => $qid,
383             -strand => ($s < $e ) ? 1 : -1,
384             );
385              
386 49         129 my $hf = Bio::SeqFeature::Generic->new
387             ( -primary_tag => $qf->primary_tag,
388             -source_tag => $qf->source_tag,
389             -score => $qf->score,
390             -seq_id => $hid,
391             -start => $s + $hoffset,
392             -end => $e + $hoffset,
393             -strand => $qf->strand,
394             );
395 49         122 $f->feature1($qf);
396 49         62 $f->feature2($hf);
397 49         79 $f->add_tag_value('alignment_len', $data->{'alignment_len'});
398 49         69 $f->add_tag_value('alignment_pid', $data->{'alignment_pid'});
399             # store the other model scores and data
400 49         58 foreach my $model ( @Models ) {
401 147         269 $f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model});
402 147         304 $f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model});
403 147 50       210 if( ! $data->{'model_location'}->{$model} ) {
404 0 0       0 if( $self->verbose > 0 ) {
405 0         0 $self->debug( $data->{'entry'} );
406             }
407             $self->throw("no location parsed for $model in ",
408 0         0 (map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end);
  0         0  
  0         0  
409             } else {
410             $f->add_tag_value("$model\_positions",
411 147         118 join("..",@{$data->{'model_location'}->{$model} }));
  147         281  
412             }
413             }
414             # probably a better way to store this - as
415             # a seq object perhaps
416 49         41 $f->add_tag_value('seq1', @{$data->{'seqs'}->[0]});
  49         85  
417 49         40 $f->add_tag_value('seq2', @{$data->{'seqs'}->[1]});
  49         72  
418 49 50       76 $f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0;
419 49 50       64 if( $data->{'shuffled'} ) {
420 0         0 $f->add_tag_value('shuffled', $data->{'shuffled'});
421             }
422 49         80 return $f;
423             }
424             1;