File Coverage

Bio/Tools/Spidey/Results.pm
Criterion Covered Total %
statement 127 139 91.3
branch 43 52 82.6
condition 2 3 66.6
subroutine 14 15 93.3
pod 9 9 100.0
total 195 218 89.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Spidey::Results
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ryan Golhar
7             #
8             # You may distribute this module under the same terms as perl itself
9              
10             # POD documentation - main docs before the code
11              
12             =head1 NAME
13              
14             Bio::Tools::Spidey::Results - Results of a Spidey run
15              
16             =head1 SYNOPSIS
17              
18             use Bio::Tools::Spidey::Results;
19             my $spidey = Bio::Tools::Spidey::Results->new(-file => 'result.spidey' );
20              
21             # or
22              
23             my $spidey = Bio::Tools::Spidey::Results->new( -fh => \*INPUT );
24              
25             # get the exons before doing anything else
26             my $exonset = $spidey->next_exonset();
27              
28             # parse the results
29             my @exons = $exonset->sub_SeqFeature();
30             print "Total no of Exons: ", scalar(@exons), "\n";
31              
32             print "Genomic sequence length: ", $spidey->genomic_dna_length(), "\n";
33              
34             # $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Spidey::Exons
35             # as sub features
36             print "Delimited on sequence ", $exonset->seq_id(), " from ",
37             $exonset->start(), " to ", $exonset->end(), "\n";
38              
39             foreach my $exon ( $exonset->sub_SeqFeature() ) {
40             # $exon is-a Bio::SeqFeature::FeaturePair
41             print "Exon from ", $exon->start, " to ", $exon->end,
42             " on strand ", $exon->strand(), "\n";
43             # you can get out what it matched using the est_hit attribute
44             my $homol = $exon->est_hit();
45             print "Matched to sequence ", $homol->seq_id,
46             " at ", $homol->start," to ", $homol->end, "\n";
47             }
48              
49             # essential if you gave a filename at initialization (otherwise
50             # the file stays open)
51             $spidey->close();
52              
53             =head1 DESCRIPTION
54              
55             The spidey module provides a parser and results object for spidey
56             output. The spidey results are specialised types of SeqFeatures,
57             meaning you can add them to AnnSeq objects fine, and manipulate them
58             in the "normal" seqfeature manner.
59              
60             The spidey Exon objects are Bio::SeqFeature::FeaturePair inherited
61             objects. The $esthit = $exon-Eest_hit() is the alignment as a
62             feature on the matching object (normally, a cDNA), in which the
63             start/end points are where the hit lies.
64              
65             To make this module work sensibly you need to run
66              
67             spidey -i genomic.fasta -m cDNA.fasta
68              
69             =head1 FEEDBACK
70              
71             =head2 Mailing Lists
72              
73             User feedback is an integral part of the evolution of this and other
74             Bioperl modules. Send your comments and suggestions preferably to one
75             of the Bioperl mailing lists. Your participation is much appreciated.
76              
77             bioperl-l@bioperl.org - General discussion
78             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79              
80             =head2 Support
81              
82             Please direct usage questions or support issues to the mailing list:
83              
84             I
85              
86             rather than to the module maintainer directly. Many experienced and
87             reponsive experts will be able look at the problem and quickly
88             address it. Please include a thorough description of the problem
89             with code and data examples if at all possible.
90              
91             =head2 Reporting Bugs
92              
93             Report bugs to the Bioperl bug tracking system to help us keep track
94             the bugs and their resolution. Bug reports can be submitted via the
95             web:
96              
97             https://github.com/bioperl/bioperl-live/issues
98              
99             =head1 AUTHOR - Ryan Golhar
100              
101             Email golharam@umdnj.edu
102              
103             =head1 APPENDIX
104              
105             The rest of the documentation details each of the object methods.
106             Internal methods are usually preceded with a _
107              
108             =cut
109              
110              
111             # Let the code begin...
112              
113              
114             package Bio::Tools::Spidey::Results;
115 1     1   543 use strict;
  1         2  
  1         26  
116              
117 1     1   4 use File::Basename;
  1         2  
  1         57  
118 1     1   236 use Bio::Root::Root;
  1         2  
  1         32  
119 1     1   247 use Bio::Tools::Spidey::Exon;
  1         3  
  1         36  
120              
121 1     1   5 use base qw(Bio::Tools::AnalysisResult);
  1         2  
  1         253  
122              
123             sub _initialize_state {
124 2     2   4 my($self,@args) = @_;
125              
126             # call the inherited method first
127 2         7 my $make = $self->SUPER::_initialize_state(@args);
128              
129             # my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
130              
131             # delete($self->{'_est_is_first'});
132             # $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
133 2         6 $self->analysis_method("Spidey");
134             }
135              
136             =head2 analysis_method
137              
138             Usage : $spidey->analysis_method();
139             Purpose : Inherited method. Overridden to ensure that the name matches
140             /Spidey/i.
141             Returns : String
142             Argument : n/a
143              
144             =cut
145              
146             #-------------
147             sub analysis_method {
148             #-------------
149 8     8 1 14 my ($self, $method) = @_;
150 8 50 66     22 if($method && ($method !~ /Spidey/i)) {
151 0         0 $self->throw("method $method not supported in " . ref($self));
152             }
153 8         20 return $self->SUPER::analysis_method($method);
154             }
155              
156             =head2 parse_next_alignment
157              
158             Title : parse_next_alignment
159             Usage : @exons = $spidey_result->parse_next_alignment;
160             foreach $exon (@exons) {
161             # do something
162             }
163             Function: Parses the next alignment of the Spidey result file and returns the
164             found exons as an array of Bio::Tools::Spidey::Exon objects. Call
165             this method repeatedly until an empty array is returned to get the
166             results for all alignments.
167             Example :
168             Returns : An array of Bio::Tools::Spidey::Exon objects
169             Args :
170              
171              
172             =cut
173              
174             sub parse_next_alignment {
175 2     2 1 3 my ($self) = @_;
176             # for strand 1 = plus, -1 = minus
177 2         4 my ($started,$version,$strand, $exoncount) = (0,0,0,-1);
178 2         3 my (%seq1props,%seq2props,@exons);
179            
180             # we refer to the properties of each seq by reference
181              
182 2         9 while(defined($_ = $self->_readline())) {
183 164         188 chomp;
184             #
185             # bascially, parse a Spidey result...
186             #
187             # matches: --SPIDEY version 1.40--
188 164 100       747 if( /^--SPIDEY\s+version\s+(\d+\.\d+)--/) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
189 1 50       4 if($started) {
190 0         0 $self->_pushback($_);
191 0         0 return \@exons;
192             }
193 1         2 $version = $1;
194 1 50       6 if ($version != 1.40) {
195 0         0 $self->throw("Spidey parser only designed to work with Spidey v1.40\n");
196             }
197 1         2 $started = 1;
198             } elsif (/^Genomic:\s+(\S+)\s.*,\s+(\d+)\sbp$/ ) {
199             # matches: Genomic: lcl|some_name other information, 1234 bp
200             # $seq1props{'filename'} = $1;
201 1         4 $seq1props{'seqname'} = $1;
202 1         2 $seq1props{'length'} = $2;
203 1         4 $self->genomic_dna_length($seq1props{'length'});
204              
205             } elsif( /^mRNA:\s+(\S+)\s.*,(?:\s+mRNA\s+sequence,)?\s(\d+)\sbp$/ ) {
206             # matches: mRNA:
207             # $seq2props{'filename'} = $1;
208 1         3 $seq2props{'seqname'} = $1;
209 1         3 $seq2props{'length'} = $2;
210              
211             } elsif( /^Strand:/ ) {
212 1 50       2 if (/plus/) {
213 0         0 $strand = 1;
214             } else {
215 1         3 $strand = -1;
216             }
217             } elsif( /^Number of exons: (\d+)/ ) {
218 1         2 $exoncount = $1;
219              
220 1         2 my ($genomic_start, $genomic_stop, $cdna_start, $cdna_stop,
221             $id, $mismatches, $gaps, $splice_donor,
222             $splice_acceptor, $uncertain);
223              
224             # the next $exoncount lines contains information
225             # about the matches of each exon. we should parse
226             # this information here
227              
228 1         3 for (my $ec = 1; $ec <= $exoncount; $ec++) {
229 6 50       80 if (defined($_ = $self->_readline())) {
230 6         10 chomp;
231              
232 6 50       326 if (/^Exon\s$ec[\(\)-]*:\s(\d+)-(\d+)\s\(gen\)\s+(\d+)-(\d+)\s\(mRNA\)\s+id\s([\d\.inf-]+)%\s+mismatches\s(\d+)\s+gaps\s(\d+)\s+splice\ssite\s\(d\s+a\):\s(\d+)\s+(\d+)\s*(\w*)/) {
233 6         18 $genomic_start = $1;
234 6         9 $genomic_stop = $2;
235 6         9 $cdna_start = $3;
236 6         11 $cdna_stop = $4;
237 6         7 $id = $5;
238 6         7 $mismatches = $6;
239 6         7 $gaps = $7;
240 6         6 $splice_donor = $8;
241 6         7 $splice_acceptor = $9;
242 6         8 $uncertain = $10;
243             } else {
244 0         0 $self->throw( "Failed to match anything:\n$_\n");
245             }
246              
247 6         25 my $exon = Bio::Tools::Spidey::Exon->new
248             (-start => $genomic_start,
249             -end => $genomic_stop,
250             -strand => $strand);
251 6         20 $exon->seq_id($seq1props{'seqname'});
252              
253             # feature1 is supposed to be initialized to a Similarity object, but we provide a safety net
254 6 50       8 if ($exon->feature1->can('seqlength')) {
255 6         10 $exon->feature1->seqlength($seq1props{'length'});
256             } else {
257 0         0 $exon->feature1->add_tag_value('seqlength', $seq1props{'length'});
258             }
259              
260             # create and initialize the feature wrapping the 'hit' (the cDNA)
261             my $fea2 = Bio::SeqFeature::Similarity->new
262             (-start => $cdna_start,
263             -end => $cdna_stop,
264             -strand => $strand,
265 6         20 -seq_id => $seq2props{'seqname'},
266             -primary => "aligning_cDNA");
267 6         16 $fea2->seqlength($seq2props{'length'});
268             # store
269 6         16 $exon->est_hit($fea2);
270              
271             # general properties
272 6         12 $exon->source_tag($self->analysis_method());
273 6         15 $exon->percentage_id($5);
274 6         12 $exon->mismatches($6);
275 6         13 $exon->gaps($7);
276 6         12 $exon->donor($8);
277 6         11 $exon->acceptor($9);
278              
279             # push onto array
280 6         20 push(@exons, $exon);
281             } else {
282 0         0 $self->throw("Unexpected end of file reached\n");
283             }
284             }
285             } elsif( /^Number of splice sites:\s+(\d+)/ ) {
286 1         4 $self->splicesites($1);
287             } elsif( /^mRNA coverage:\s+(\d+)%/ ) {
288 1         5 $self->est_coverage($1);
289             } elsif(/^overall percent identity:\s+([\d\.]+)%/ ) {
290 1         4 $self->overall_percentage_id($1);
291             } elsif(/^Missing mRNA ends:\s+(\w+)/ ) {
292 1         4 $self->missing_mrna_ends($1);
293             } elsif( /^Exon (\d+): (\d+)-(\d+) \(gen\)\s+(\d+)-(\d+) \(mRNA\)/ ) {
294 6         10 my ($exon_num, $gen_start, $gen_stop, $cdna_start, $cdna_stop);
295 6         10 $exon_num = $1;
296 6         8 $gen_start = $2;
297 6         7 $gen_stop = $3;
298 6         9 $cdna_start = $4;
299 6         11 $cdna_stop = $5;
300             } elsif( /No alignment found/ ) {
301 0         0 return [];
302             } else {
303             #$self->debug("unmatched $_\n");
304             }
305             }
306             # Typical format:
307             # Exon 1: 36375798-36375691 (gen) 1-108 (mRNA)
308             #
309             #
310             # CCTCTTTTTCTTTGCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
311             # | ||||||||||||||||||||||||||||||||||||||||||||||
312             # ATGTCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
313             # M S G Y I P S Y L D K D E L C V V
314             #
315             #
316             # ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAGGT
317             # ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
318             # ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAG
319             # C G D K A T G Y H Y R C I T C E G C K
320             #
321             #
322             # AAATGGCA
323             #
324 2 100       9 @exons ? return \@exons : return ;
325             }
326              
327             =head2 next_exonset
328              
329             Title : next_exonset
330             Usage : $exonset = $spidey_result->parse_next_exonset;
331             print "Exons start at ", $exonset->start(),
332             "and end at ", $exonset->end(), "\n";
333             for $exon ($exonset->sub_SeqFeature()) {
334             # do something
335             }
336             Function: Parses the next alignment of the Spidey result file and returns the
337             set of exons as a container of features. The container is itself
338             a Bio::SeqFeature::Generic object, with the Bio::Tools::Spidey::Exon
339             objects as sub features. Start, end, and strand of the container
340             will represent the total region covered by the exons of this set.
341              
342             See the documentation of parse_next_alignment() for further
343             reference about parsing and how the information is stored.
344             Example :
345             Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Spidey::Exon
346             objects as sub features.
347             Args :
348              
349             =cut
350              
351             sub next_exonset {
352 2     2 1 6 my $self = shift;
353 2         3 my $exonset;
354              
355             # get the next array of exons
356 2         4 my $exons = $self->parse_next_alignment();
357 2 100       7 if( ! defined $exons ) {
358 1         6 $self->warn("No exons returned");
359 1         3 return;
360             }
361 1 50       4 if( @$exons == 0 ) {
362 0         0 return Bio::SeqFeature::Generic->new();
363             }
364             # create the container of exons as a feature object itself, with the
365             # data of the first exon for initialization
366 1         7 $exonset = Bio::SeqFeature::Generic->new('-start' => $exons->[0]->start(),
367             '-end' => $exons->[-1]->end(),
368             '-strand' => $exons->[0]->strand(),
369             '-primary' => "ExonSet");
370 1         5 $exonset->source_tag($exons->[0]->source_tag());
371 1         3 $exonset->seq_id($exons->[0]->seq_id());
372             # now add all exons as sub features, with enabling EXPANsion of the region
373             # covered in total
374 1         3 foreach my $exon (@$exons) {
375 6         12 $exonset->add_sub_SeqFeature($exon, 'EXPAND');
376             }
377 1         5 return $exonset;
378             }
379              
380             =head2 next_feature
381              
382             Title : next_feature
383             Usage : while($exonset = $spidey->next_feature()) {
384             # do something
385             }
386             Function: Does the same as L. See there for documentation of
387             the functionality. Call this method repeatedly until FALSE is
388             returned.
389              
390             The returned object is actually a SeqFeatureI implementing object.
391             This method is required for classes implementing the
392             SeqAnalysisParserI interface, and is merely an alias for
393             next_exonset() at present.
394              
395             Example :
396             Returns : A Bio::SeqFeature::Generic object.
397             Args :
398              
399             =cut
400              
401             sub next_feature {
402 0     0 1 0 my ($self,@args) = @_;
403             # even though next_exonset doesn't expect any args (and this method
404             # does neither), we pass on args in order to be prepared if this changes
405             # ever
406 0         0 return $self->next_exonset(@args);
407             }
408              
409             =head2 genomic_dna_length
410              
411             Title : genomic_dna_length
412             Usage : $spidey->genomic_dna_length();
413             Function: Returns the length of the genomic DNA used in this Spidey result
414             Example :
415             Returns : An integer value.
416             Args :
417              
418             =cut
419              
420             sub genomic_dna_length {
421 2     2 1 711 my ($self, @args) = @_;
422 2         3 my $val;
423              
424 2 100       4 if(@args) {
425 1         2 $val = shift(@args);
426 1         3 $self->{'genomic_dna_length'} = $val;
427             } else {
428 1         3 $val = $self->{'genomic_dna_length'};
429             }
430 2         6 return $val;
431             }
432              
433             =head2 splicesites
434              
435             Title : splicesites
436             Usage : $spidey->splicesites();
437             Function: Returns the number of splice sites found in this Spidey result
438             Example :
439             Returns : An integer value.
440             Args :
441              
442             =cut
443              
444             sub splicesites {
445 2     2 1 6 my ($self, @args) = @_;
446 2         3 my $val;
447              
448 2 100       6 if(@args) {
449 1         1 $val = shift(@args);
450 1         3 $self->{'splicesites'} = $val;
451             } else {
452 1         2 $val = $self->{'splicesites'};
453             }
454 2         7 return $val;
455             }
456              
457             =head2 est_coverage
458              
459             Title : est_coverage
460             Usage : $spidey->est_coverage();
461             Function: Returns the percent of est coverage in this Spidey result
462             Example :
463             Returns : An integer value.
464             Args :
465              
466             =cut
467              
468             sub est_coverage {
469 2     2 1 5 my ($self, @args) = @_;
470 2         3 my $val;
471            
472 2 100       4 if(@args) {
473 1         2 $val = shift(@args);
474 1         2 $self->{'est_coverage'} = $val;
475             } else {
476 1         3 $val = $self->{'est_coverage'};
477             }
478 2         7 return $val;
479             }
480              
481             =head2 overall_percentage_id
482              
483             Title : overall_percentage_id
484             Usage : $spidey->overall_percentage_id();
485             Function: Returns the overall percent id in this Spidey result
486             Example :
487             Returns : An float value.
488             Args :
489              
490             =cut
491              
492             sub overall_percentage_id {
493 2     2 1 6 my ($self, @args) = @_;
494 2         3 my $val;
495              
496 2 100       6 if(@args) {
497 1         2 $val = shift(@args);
498 1         2 $self->{'overall_percentage_id'} = $val;
499             } else {
500 1         2 $val = $self->{'overall_percentage_id'};
501             }
502 2         6 return $val;
503             }
504              
505             =head2 missing_mrna_ends
506              
507             Title : missing_mrna_ends
508             Usage : $spidey->missing_mrna_ends();
509             Function: Returns left/right/neither from Spidey
510             Example :
511             Returns : A string value.
512             Args :
513              
514             =cut
515              
516             sub missing_mrna_ends {
517 2     2 1 6 my ($self, @args) = @_;
518 2         3 my $val;
519              
520 2 100       6 if(@args) {
521 1         2 $val = shift(@args);
522 1         2 $self->{'missing_mrna_ends'} = $val;
523             } else {
524 1         3 $val = $self->{'missing_mrna_ends'};
525             }
526 2         6 return $val;
527             }
528              
529             1;