File Coverage

Bio/Tools/QRNA.pm
Criterion Covered Total %
statement 114 124 91.9
branch 65 74 87.8
condition 3 5 60.0
subroutine 14 14 100.0
pod 7 7 100.0
total 203 224 90.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::QRNA
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::QRNA - A Parser for qrna output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::QRNA;
21             my $parser = Bio::Tools::QRNA->new(-file => $qrnaoutput);
22             while( my $feature = $parser->next_feature ) {
23             # do something here
24             }
25              
26             =head1 DESCRIPTION
27              
28             Parses QRNA output (E.Rivas:
29             http://selab.janelia.org/software.html
30             ftp://selab.janelia.org/pub/software/qrna/).
31              
32             This module is not complete, but currently it packs information from
33             each QRNA alignment into a single Bio::SeqFeature::Generic object.
34              
35             Not all options for QRNA output have been tested or tried. It has
36             been tested on sliding window output (-w -x) and shuffled output (-b
37             or -B).
38              
39             See t/QRNA.t for example usage.
40              
41             At some point we may have more complicated feature object which will
42             support this data rather than forcing most of the information into
43             tag/value pairs in a SeqFeature::Generic.
44              
45             Running with -verbose =E 1 will store extra data in the feature. The
46             entire unparsed entry for a particular feature will be stored as a
47             string in the tag 'entry' it is accessible via:
48              
49             my ($entry) = $f->each_tag_value('entry');
50              
51             The winning model for any given alignment test will be the name stored
52             in the primary_tag field of feature. The bit score will stored in the
53             score field. The logoddpost is available via the a tag/value pair.
54             This example code will show how to print out the score and log odds
55             post for each model.
56              
57             # assuming you got a feature already
58             print "model score logoddspost\n";
59             foreach my $model ( qw(OTH COD RNA) ) {
60             my ($score) = $f->get_tag_values("$model\_score");
61             my ($logoddspost) = $f->get_tag_values("$model\_logoddspost");
62             print "$model $score $logoddspost\n";
63             }
64              
65             The start and end of the alignment for both the query and hit sequence
66             are available through the L interface,
67             specifically L and
68             L. Additionally if you have
69             run QRNA with an input file which has the location of the alignment
70             stored in the FASTA filename as in (ID/START-END) which is the default
71             output format from L produced alignment output,
72             this module will re-number start/end for the two sequences so they are
73             in the actual coordinates of the sequence rather than the relative
74             coordinates of the alignment. You may find the bioperl utillity
75             script search2alnblocks useful in creating your input files for QRNA.
76              
77             Some other words of warning, QRNA uses a 0 based numbering system for
78             sequence locations, Bioperl uses a 1 based system. You'll notice that
79             locations will be +1 they are reported in the raw QRNA output.
80              
81             =head1 FEEDBACK
82              
83             =head2 Mailing Lists
84              
85             User feedback is an integral part of the evolution of this and other
86             Bioperl modules. Send your comments and suggestions preferably to
87             the Bioperl mailing list. Your participation is much appreciated.
88              
89             bioperl-l@bioperl.org - General discussion
90             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
91              
92             =head2 Support
93              
94             Please direct usage questions or support issues to the mailing list:
95              
96             I
97              
98             rather than to the module maintainer directly. Many experienced and
99             reponsive experts will be able look at the problem and quickly
100             address it. Please include a thorough description of the problem
101             with code and data examples if at all possible.
102              
103             =head2 Reporting Bugs
104              
105             Report bugs to the Bioperl bug tracking system to help us keep track
106             of the bugs and their resolution. Bug reports can be submitted via
107             the web:
108              
109             https://github.com/bioperl/bioperl-live/issues
110              
111             =head1 AUTHOR - Jason Stajich
112              
113             Email jason-at-bioperl-dot-org
114              
115             =head1 APPENDIX
116              
117             The rest of the documentation details each of the object methods.
118             Internal methods are usually preceded with a _
119              
120             =cut
121              
122              
123             # Let the code begin...
124              
125              
126             package Bio::Tools::QRNA;
127 1     1   551 use vars qw(@Models);
  1         2  
  1         37  
128 1     1   5 use strict;
  1         1  
  1         16  
129              
130 1     1   287 use Bio::SeqFeature::Generic;
  1         2  
  1         26  
131 1     1   290 use Bio::SeqFeature::FeaturePair;
  1         3  
  1         28  
132              
133 1     1   5 use base qw(Bio::Root::IO Bio::SeqAnalysisParserI);
  1         2  
  1         1322  
134             @Models = qw(OTH COD RNA);
135              
136             =head2 new
137              
138             Title : new
139             Usage : my $obj = Bio::Tools::QRNA->new();
140             Function: Builds a new Bio::Tools::QRNA object
141             Returns : an instance of Bio::Tools::QRNA
142             Args : -fh/-file filehandle/filename standard input for
143             Bio::Root:IO objects
144              
145             =cut
146              
147             =head2 next_feature
148              
149             Title : next_feature
150             Usage : my $feature = $parser->next_feature
151             Function: Get the next QRNA feature
152             Returns :
153             Args :
154              
155              
156             =cut
157              
158             sub next_feature {
159 48     48 1 77 my ($self) = @_;
160 48 100       47 my $f = shift @{$self->{'_parsed_features'} || []};
  48         93  
161 48 100 66     103 if( ! defined $f && $self->_parse_pair ) {
162 47 50       52 $f = shift @{$self->{'_parsed_features'} || []};
  47         94  
163             }
164 48         92 return $f;
165             }
166              
167             sub _parse_pair {
168 48     48   59 my ($self,@args) = @_;
169 48         48 my (@features,%data);
170 48         48 my $seenstart = 0;
171 48         82 while( defined( $_ = $self->_readline) ) {
172 927 100       1396 next if( /^\#\-\-/o );
173 915 100       4924 if( /^\#\s+(qrna)\s+(\S+)\s+\(([^\)]+)\)/o ) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
174 2         8 $self->program_name($1);
175 2         6 $self->program_version($2);
176 2         5 $self->program_date($3);
177             } elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) {
178 2         5 $self->PAM_model($2);
179             } elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) {
180 2         6 $self->RNA_model($2);
181             } elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) {
182 2         6 $self->seq_file($2);
183             } elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) {
184 94 100       146 if( $seenstart ) {
185 46 50       77 if( $data{'alignment_len'} ) {
186 46         85 push @features, $self->_make_feature(\%data);
187             }
188 46         106 $self->_pushback($_);
189 46         57 last;
190             }
191 48         52 $seenstart = 1;
192             } elsif( /^\#/ ) {
193 4         10 next;
194             } elsif( />(\S+)\s+\((\d+)\)/ ) {
195 96 100       100 if( @{$data{'seqs'} || []} == 2 ) {
  96 50       240  
196 0         0 $self->warn( "already seen seqs ".join(' ', ,map { $_->[0] }
197 0         0 @{$data{'seqs'}}). "\n");
  0         0  
198             } else {
199 96         88 push @{$data{'seqs'}}, [$1,$2];
  96         266  
200             }
201             } elsif( /^length alignment:\s+(\d+)\s+\(id\=(\d+(\.\d+)?)\)/o ) {
202            
203 51 100       87 if( $data{'alignment_len'} ) {
204 3         8 push @features, $self->_make_feature(\%data);
205             # reset all the data but the 'seqs' field
206 3         21 %data = ( 'seqs' => $data{'seqs'} );
207             }
208            
209 51 50       79 if( /\(((sre_)?shuffled)\)/ ) {
210 0         0 $data{'shuffled'} = $1;
211             }
212 51         92 $data{'alignment_len'} = $1;
213 51         71 $data{'alignment_pid'} = $2;
214             } elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]\((\d+)\)\s+
215             \-\-\s+\((\S+\s+\S+\s+\S+\s+\S+)\)/ox ) {
216 102         445 $data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6];
217 102         111 @{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7);
  102         418  
218             } elsif( /^winner\s+\=\s+(\S{3})/ ) {
219 51         107 $data{'winning_model'} = $1;
220             } elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) {
221             # QRNA is 0-based
222             # Bioperl is 1 based
223 153         426 $data{'model_location'}->{$1} = [ $2,$3 ];
224             } elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) {
225 102         345 while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) {
226 306         601 my ($model,$score)= ($1,$2);
227 306 100       554 if( $model =~ s/^logoddspost// ) {
228 153         502 $data{'model_scores'}->{'logoddspost'}->{$model} = $score;
229             } else {
230 153         518 $data{'model_scores'}->{'bits'}->{$model} = $score;
231             }
232             }
233             }
234 865         1814 $data{'entry'} .= $_;
235             }
236 48 100       72 if( @features ) {
237 47         45 push @{$self->{'_parsed_features'}}, @features;
  47         74  
238 47         419 return scalar @features;
239             }
240 1         8 return 0;
241             }
242              
243             =head2 PAM_model
244              
245             Title : PAM_model
246             Usage : $obj->PAM_model($newval)
247             Function:
248             Example :
249             Returns : value of PAM_model (a scalar)
250             Args : on set, new value (a scalar or undef, optional)
251              
252              
253             =cut
254              
255             sub PAM_model{
256 3     3 1 5 my $self = shift;
257 3 100       10 return $self->{'PAM_model'} = shift if @_;
258 1         4 return $self->{'PAM_model'};
259             }
260              
261             =head2 RNA_model
262              
263             Title : RNA_model
264             Usage : $obj->RNA_model($newval)
265             Function:
266             Example :
267             Returns : value of RNA_model (a scalar)
268             Args : on set, new value (a scalar or undef, optional)
269              
270              
271             =cut
272              
273             sub RNA_model{
274 3     3 1 6 my $self = shift;
275              
276 3 100       9 return $self->{'RNA_model'} = shift if @_;
277 1         4 return $self->{'RNA_model'};
278             }
279              
280             =head2 seq_file
281              
282             Title : seq_file
283             Usage : $obj->seq_file($newval)
284             Function:
285             Example :
286             Returns : value of seq_file (a scalar)
287             Args : on set, new value (a scalar or undef, optional)
288              
289              
290             =cut
291              
292             sub seq_file{
293 3     3 1 5 my $self = shift;
294              
295 3 100       8 return $self->{'seq_file'} = shift if @_;
296 1         4 return $self->{'seq_file'};
297             }
298              
299              
300             =head2 program_name
301              
302             Title : program_name
303             Usage : $obj->program_name($newval)
304             Function:
305             Example :
306             Returns : value of program_name (a scalar)
307             Args : on set, new value (a scalar or undef, optional)
308              
309              
310             =cut
311              
312             sub program_name{
313 52     52 1 53 my $self = shift;
314              
315 52 100       74 return $self->{'program_name'} = shift if @_;
316 50   50     311 return $self->{'program_name'} || 'qrna';
317             }
318              
319             =head2 program_version
320              
321             Title : program_version
322             Usage : $obj->program_version($newval)
323             Function:
324             Example :
325             Returns : value of program_version (a scalar)
326             Args : on set, new value (a scalar or undef, optional)
327              
328              
329             =cut
330              
331             sub program_version{
332 3     3 1 4 my $self = shift;
333              
334 3 100       9 return $self->{'program_version'} = shift if @_;
335 1         3 return $self->{'program_version'};
336             }
337              
338             =head2 program_date
339              
340             Title : program_date
341             Usage : $obj->program_date($newval)
342             Function:
343             Example :
344             Returns : value of program_date (a scalar)
345             Args : on set, new value (a scalar or undef, optional)
346              
347              
348             =cut
349              
350             sub program_date{
351 3     3 1 5 my $self = shift;
352 3 100       10 return $self->{'program_date'} = shift if @_;
353 1         4 return $self->{'program_date'};
354             }
355              
356             sub _make_feature {
357 49     49   64 my ($self,$data) = @_;
358 49         57 my ($qoffset,$hoffset) = (1,1);
359             # when you run qrna and have produced ID/START-END
360             # formatted input strings we can remap the location
361             # to the original
362              
363             # name is stored as the first entry in the seq array ref
364             my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0],
365 49         90 $data->{'seqs'}->[1]->[0]);
366 49 100       96 if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
367 3         9 ($qid,$qoffset) = ($1,$2);
368             }
369 49 100       71 if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
370 3         6 ($hid,$hoffset) = ($1,$2);
371             }
372              
373 49         113 my $f = Bio::SeqFeature::FeaturePair->new();
374              
375 49         54 my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}};
  49         132  
376             my $qf = Bio::SeqFeature::Generic->new
377             ( -primary_tag => $data->{'winning_model'},
378             -source_tag => $self->program_name,
379 49 100       98 -score => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}},
380             -start => $s+$qoffset,
381             -end => $e+$qoffset,
382             -seq_id => $qid,
383             -strand => ($s < $e ) ? 1 : -1,
384             );
385              
386 49         113 my $hf = Bio::SeqFeature::Generic->new
387             ( -primary_tag => $qf->primary_tag,
388             -source_tag => $qf->source_tag,
389             -score => $qf->score,
390             -seq_id => $hid,
391             -start => $s + $hoffset,
392             -end => $e + $hoffset,
393             -strand => $qf->strand,
394             );
395 49         158 $f->feature1($qf);
396 49         85 $f->feature2($hf);
397 49         104 $f->add_tag_value('alignment_len', $data->{'alignment_len'});
398 49         91 $f->add_tag_value('alignment_pid', $data->{'alignment_pid'});
399             # store the other model scores and data
400 49         90 foreach my $model ( @Models ) {
401 147         338 $f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model});
402 147         309 $f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model});
403 147 50       237 if( ! $data->{'model_location'}->{$model} ) {
404 0 0       0 if( $self->verbose > 0 ) {
405 0         0 $self->debug( $data->{'entry'} );
406             }
407             $self->throw("no location parsed for $model in ",
408 0         0 (map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end);
  0         0  
  0         0  
409             } else {
410             $f->add_tag_value("$model\_positions",
411 147         157 join("..",@{$data->{'model_location'}->{$model} }));
  147         297  
412             }
413             }
414             # probably a better way to store this - as
415             # a seq object perhaps
416 49         53 $f->add_tag_value('seq1', @{$data->{'seqs'}->[0]});
  49         108  
417 49         55 $f->add_tag_value('seq2', @{$data->{'seqs'}->[1]});
  49         89  
418 49 50       118 $f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0;
419 49 50       83 if( $data->{'shuffled'} ) {
420 0         0 $f->add_tag_value('shuffled', $data->{'shuffled'});
421             }
422 49         87 return $f;
423             }
424             1;