File Coverage

Bio/Tools/QRNA.pm
Criterion Covered Total %
statement 114 124 91.9
branch 65 74 87.8
condition 3 5 60.0
subroutine 14 14 100.0
pod 7 7 100.0
total 203 224 90.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::QRNA
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::QRNA - A Parser for qrna output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::QRNA;
21             my $parser = Bio::Tools::QRNA->new(-file => $qrnaoutput);
22             while( my $feature = $parser->next_feature ) {
23             # do something here
24             }
25              
26             =head1 DESCRIPTION
27              
28             Parses QRNA output (E.Rivas:
29             http://selab.janelia.org/software.html
30             ftp://selab.janelia.org/pub/software/qrna/).
31              
32             This module is not complete, but currently it packs information from
33             each QRNA alignment into a single Bio::SeqFeature::Generic object.
34              
35             Not all options for QRNA output have been tested or tried. It has
36             been tested on sliding window output (-w -x) and shuffled output (-b
37             or -B).
38              
39             See t/QRNA.t for example usage.
40              
41             At some point we may have more complicated feature object which will
42             support this data rather than forcing most of the information into
43             tag/value pairs in a SeqFeature::Generic.
44              
45             Running with -verbose =E 1 will store extra data in the feature. The
46             entire unparsed entry for a particular feature will be stored as a
47             string in the tag 'entry' it is accessible via:
48              
49             my ($entry) = $f->each_tag_value('entry');
50              
51             The winning model for any given alignment test will be the name stored
52             in the primary_tag field of feature. The bit score will stored in the
53             score field. The logoddpost is available via the a tag/value pair.
54             This example code will show how to print out the score and log odds
55             post for each model.
56              
57             # assuming you got a feature already
58             print "model score logoddspost\n";
59             foreach my $model ( qw(OTH COD RNA) ) {
60             my ($score) = $f->get_tag_values("$model\_score");
61             my ($logoddspost) = $f->get_tag_values("$model\_logoddspost");
62             print "$model $score $logoddspost\n";
63             }
64              
65             The start and end of the alignment for both the query and hit sequence
66             are available through the L interface,
67             specifically L and
68             L. Additionally if you have
69             run QRNA with an input file which has the location of the alignment
70             stored in the FASTA filename as in (ID/START-END) which is the default
71             output format from L produced alignment output,
72             this module will re-number start/end for the two sequences so they are
73             in the actual coordinates of the sequence rather than the relative
74             coordinates of the alignment. You may find the bioperl utillity
75             script search2alnblocks useful in creating your input files for QRNA.
76              
77             Some other words of warning, QRNA uses a 0 based numbering system for
78             sequence locations, Bioperl uses a 1 based system. You'll notice that
79             locations will be +1 they are reported in the raw QRNA output.
80              
81             =head1 FEEDBACK
82              
83             =head2 Mailing Lists
84              
85             User feedback is an integral part of the evolution of this and other
86             Bioperl modules. Send your comments and suggestions preferably to
87             the Bioperl mailing list. Your participation is much appreciated.
88              
89             bioperl-l@bioperl.org - General discussion
90             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
91              
92             =head2 Support
93              
94             Please direct usage questions or support issues to the mailing list:
95              
96             I
97              
98             rather than to the module maintainer directly. Many experienced and
99             reponsive experts will be able look at the problem and quickly
100             address it. Please include a thorough description of the problem
101             with code and data examples if at all possible.
102              
103             =head2 Reporting Bugs
104              
105             Report bugs to the Bioperl bug tracking system to help us keep track
106             of the bugs and their resolution. Bug reports can be submitted via
107             the web:
108              
109             https://github.com/bioperl/bioperl-live/issues
110              
111             =head1 AUTHOR - Jason Stajich
112              
113             Email jason-at-bioperl-dot-org
114              
115             =head1 APPENDIX
116              
117             The rest of the documentation details each of the object methods.
118             Internal methods are usually preceded with a _
119              
120             =cut
121              
122              
123             # Let the code begin...
124              
125              
126             package Bio::Tools::QRNA;
127 1     1   495 use vars qw(@Models);
  1         1  
  1         40  
128 1     1   3 use strict;
  1         1  
  1         16  
129              
130 1     1   378 use Bio::SeqFeature::Generic;
  1         2  
  1         28  
131 1     1   295 use Bio::SeqFeature::FeaturePair;
  1         2  
  1         26  
132              
133 1     1   4 use base qw(Bio::Root::IO Bio::SeqAnalysisParserI);
  1         1  
  1         1292  
134             @Models = qw(OTH COD RNA);
135              
136             =head2 new
137              
138             Title : new
139             Usage : my $obj = Bio::Tools::QRNA->new();
140             Function: Builds a new Bio::Tools::QRNA object
141             Returns : an instance of Bio::Tools::QRNA
142             Args : -fh/-file filehandle/filename standard input for
143             Bio::Root:IO objects
144              
145             =cut
146              
147             =head2 next_feature
148              
149             Title : next_feature
150             Usage : my $feature = $parser->next_feature
151             Function: Get the next QRNA feature
152             Returns :
153             Args :
154              
155              
156             =cut
157              
158             sub next_feature {
159 48     48 1 46 my ($self) = @_;
160 48 100       37 my $f = shift @{$self->{'_parsed_features'} || []};
  48         89  
161 48 100 66     106 if( ! defined $f && $self->_parse_pair ) {
162 47 50       35 $f = shift @{$self->{'_parsed_features'} || []};
  47         92  
163             }
164 48         78 return $f;
165             }
166              
167             sub _parse_pair {
168 48     48   41 my ($self,@args) = @_;
169 48         33 my (@features,%data);
170 48         31 my $seenstart = 0;
171 48         85 while( defined( $_ = $self->_readline) ) {
172 927 100       1298 next if( /^\#\-\-/o );
173 915 100       5575 if( /^\#\s+(qrna)\s+(\S+)\s+\(([^\)]+)\)/o ) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
174 2         5 $self->program_name($1);
175 2         5 $self->program_version($2);
176 2         4 $self->program_date($3);
177             } elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) {
178 2         7 $self->PAM_model($2);
179             } elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) {
180 2         6 $self->RNA_model($2);
181             } elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) {
182 2         7 $self->seq_file($2);
183             } elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) {
184 94 100       121 if( $seenstart ) {
185 46 50       71 if( $data{'alignment_len'} ) {
186 46         82 push @features, $self->_make_feature(\%data);
187             }
188 46         91 $self->_pushback($_);
189 46         49 last;
190             }
191 48         47 $seenstart = 1;
192             } elsif( /^\#/ ) {
193 4         8 next;
194             } elsif( />(\S+)\s+\((\d+)\)/ ) {
195 96 100       61 if( @{$data{'seqs'} || []} == 2 ) {
  96 50       256  
196 0         0 $self->warn( "already seen seqs ".join(' ', ,map { $_->[0] }
197 0         0 @{$data{'seqs'}}). "\n");
  0         0  
198             } else {
199 96         66 push @{$data{'seqs'}}, [$1,$2];
  96         256  
200             }
201             } elsif( /^length alignment:\s+(\d+)\s+\(id\=(\d+(\.\d+)?)\)/o ) {
202            
203 51 100       78 if( $data{'alignment_len'} ) {
204 3         7 push @features, $self->_make_feature(\%data);
205             # reset all the data but the 'seqs' field
206 3         23 %data = ( 'seqs' => $data{'seqs'} );
207             }
208            
209 51 50       88 if( /\(((sre_)?shuffled)\)/ ) {
210 0         0 $data{'shuffled'} = $1;
211             }
212 51         88 $data{'alignment_len'} = $1;
213 51         56 $data{'alignment_pid'} = $2;
214             } elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]\((\d+)\)\s+
215             \-\-\s+\((\S+\s+\S+\s+\S+\s+\S+)\)/ox ) {
216 102         391 $data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6];
217 102         83 @{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7);
  102         420  
218             } elsif( /^winner\s+\=\s+(\S{3})/ ) {
219 51         87 $data{'winning_model'} = $1;
220             } elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) {
221             # QRNA is 0-based
222             # Bioperl is 1 based
223 153         386 $data{'model_location'}->{$1} = [ $2,$3 ];
224             } elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) {
225 102         274 while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) {
226 306         409 my ($model,$score)= ($1,$2);
227 306 100       446 if( $model =~ s/^logoddspost// ) {
228 153         435 $data{'model_scores'}->{'logoddspost'}->{$model} = $score;
229             } else {
230 153         498 $data{'model_scores'}->{'bits'}->{$model} = $score;
231             }
232             }
233             }
234 865         1723 $data{'entry'} .= $_;
235             }
236 48 100       75 if( @features ) {
237 47         36 push @{$self->{'_parsed_features'}}, @features;
  47         57  
238 47         409 return scalar @features;
239             }
240 1         9 return 0;
241             }
242              
243             =head2 PAM_model
244              
245             Title : PAM_model
246             Usage : $obj->PAM_model($newval)
247             Function:
248             Example :
249             Returns : value of PAM_model (a scalar)
250             Args : on set, new value (a scalar or undef, optional)
251              
252              
253             =cut
254              
255             sub PAM_model{
256 3     3 1 7 my $self = shift;
257 3 100       11 return $self->{'PAM_model'} = shift if @_;
258 1         4 return $self->{'PAM_model'};
259             }
260              
261             =head2 RNA_model
262              
263             Title : RNA_model
264             Usage : $obj->RNA_model($newval)
265             Function:
266             Example :
267             Returns : value of RNA_model (a scalar)
268             Args : on set, new value (a scalar or undef, optional)
269              
270              
271             =cut
272              
273             sub RNA_model{
274 3     3 1 6 my $self = shift;
275              
276 3 100       14 return $self->{'RNA_model'} = shift if @_;
277 1         4 return $self->{'RNA_model'};
278             }
279              
280             =head2 seq_file
281              
282             Title : seq_file
283             Usage : $obj->seq_file($newval)
284             Function:
285             Example :
286             Returns : value of seq_file (a scalar)
287             Args : on set, new value (a scalar or undef, optional)
288              
289              
290             =cut
291              
292             sub seq_file{
293 3     3 1 4 my $self = shift;
294              
295 3 100       9 return $self->{'seq_file'} = shift if @_;
296 1         4 return $self->{'seq_file'};
297             }
298              
299              
300             =head2 program_name
301              
302             Title : program_name
303             Usage : $obj->program_name($newval)
304             Function:
305             Example :
306             Returns : value of program_name (a scalar)
307             Args : on set, new value (a scalar or undef, optional)
308              
309              
310             =cut
311              
312             sub program_name{
313 52     52 1 37 my $self = shift;
314              
315 52 100       74 return $self->{'program_name'} = shift if @_;
316 50   50     345 return $self->{'program_name'} || 'qrna';
317             }
318              
319             =head2 program_version
320              
321             Title : program_version
322             Usage : $obj->program_version($newval)
323             Function:
324             Example :
325             Returns : value of program_version (a scalar)
326             Args : on set, new value (a scalar or undef, optional)
327              
328              
329             =cut
330              
331             sub program_version{
332 3     3 1 5 my $self = shift;
333              
334 3 100       11 return $self->{'program_version'} = shift if @_;
335 1         5 return $self->{'program_version'};
336             }
337              
338             =head2 program_date
339              
340             Title : program_date
341             Usage : $obj->program_date($newval)
342             Function:
343             Example :
344             Returns : value of program_date (a scalar)
345             Args : on set, new value (a scalar or undef, optional)
346              
347              
348             =cut
349              
350             sub program_date{
351 3     3 1 3 my $self = shift;
352 3 100       12 return $self->{'program_date'} = shift if @_;
353 1         4 return $self->{'program_date'};
354             }
355              
356             sub _make_feature {
357 49     49   44 my ($self,$data) = @_;
358 49         44 my ($qoffset,$hoffset) = (1,1);
359             # when you run qrna and have produced ID/START-END
360             # formatted input strings we can remap the location
361             # to the original
362              
363             # name is stored as the first entry in the seq array ref
364             my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0],
365 49         67 $data->{'seqs'}->[1]->[0]);
366 49 100       73 if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
367 3         6 ($qid,$qoffset) = ($1,$2);
368             }
369 49 100       78 if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
370 3         4 ($hid,$hoffset) = ($1,$2);
371             }
372              
373 49         110 my $f = Bio::SeqFeature::FeaturePair->new();
374              
375 49         47 my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}};
  49         105  
376             my $qf = Bio::SeqFeature::Generic->new
377             ( -primary_tag => $data->{'winning_model'},
378             -source_tag => $self->program_name,
379 49 100       114 -score => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}},
380             -start => $s+$qoffset,
381             -end => $e+$qoffset,
382             -seq_id => $qid,
383             -strand => ($s < $e ) ? 1 : -1,
384             );
385              
386 49         122 my $hf = Bio::SeqFeature::Generic->new
387             ( -primary_tag => $qf->primary_tag,
388             -source_tag => $qf->source_tag,
389             -score => $qf->score,
390             -seq_id => $hid,
391             -start => $s + $hoffset,
392             -end => $e + $hoffset,
393             -strand => $qf->strand,
394             );
395 49         124 $f->feature1($qf);
396 49         77 $f->feature2($hf);
397 49         81 $f->add_tag_value('alignment_len', $data->{'alignment_len'});
398 49         72 $f->add_tag_value('alignment_pid', $data->{'alignment_pid'});
399             # store the other model scores and data
400 49         70 foreach my $model ( @Models ) {
401 147         296 $f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model});
402 147         261 $f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model});
403 147 50       204 if( ! $data->{'model_location'}->{$model} ) {
404 0 0       0 if( $self->verbose > 0 ) {
405 0         0 $self->debug( $data->{'entry'} );
406             }
407             $self->throw("no location parsed for $model in ",
408 0         0 (map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end);
  0         0  
  0         0  
409             } else {
410             $f->add_tag_value("$model\_positions",
411 147         136 join("..",@{$data->{'model_location'}->{$model} }));
  147         302  
412             }
413             }
414             # probably a better way to store this - as
415             # a seq object perhaps
416 49         48 $f->add_tag_value('seq1', @{$data->{'seqs'}->[0]});
  49         92  
417 49         38 $f->add_tag_value('seq2', @{$data->{'seqs'}->[1]});
  49         76  
418 49 50       91 $f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0;
419 49 50       74 if( $data->{'shuffled'} ) {
420 0         0 $f->add_tag_value('shuffled', $data->{'shuffled'});
421             }
422 49         79 return $f;
423             }
424             1;