File Coverage

Bio/Tools/Genewise.pm
Criterion Covered Total %
statement 122 128 95.3
branch 27 36 75.0
condition 2 3 66.6
subroutine 18 18 100.0
pod 2 3 66.6
total 171 188 90.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Genewise
3             #
4             # Copyright Fugu Team
5             #
6             # You may distribute this module under the same terms as perl itself
7              
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::Tools::Genewise - Results of one Genewise run
13              
14             =head1 SYNOPSIS
15              
16             use Bio::Tools::Genewise;
17             my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
18              
19             while (my $gene = $gw->next_prediction){
20             my @transcripts = $gene->transcripts;
21             foreach my $t(@transcripts){
22             my @exons = $t->exons;
23             foreach my $e(@exons){
24             print $e->start." ".$e->end."\n";
25             }
26             }
27             }
28              
29             =head1 DESCRIPTION
30              
31             This is the parser for the output of Genewise. It takes either a file
32             handle or a file name and returns a
33             Bio::SeqFeature::Gene::GeneStructure object.
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to one
41             of the Bioperl mailing lists. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             the bugs and their resolution. Bug reports can be submitted via the
61             web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Fugu Team, Jason Stajich
66              
67             Email: fugui@worf.fugu-sg.org
68             Email: jason-at-bioperl.org
69              
70             =head1 APPENDIX
71              
72             The rest of the documentation details each of the object
73             methods. Internal methods are usually preceded with a _
74              
75             =cut
76              
77              
78             # Let the code begin...
79              
80              
81             package Bio::Tools::Genewise;
82 3     3   449 use vars qw($Srctag);
  3         5  
  3         119  
83 3     3   11 use strict;
  3         3  
  3         55  
84 3     3   9 use Symbol;
  3         3  
  3         130  
85              
86 3     3   577 use Bio::Tools::AnalysisResult;
  3         4  
  3         56  
87 3     3   647 use Bio::SeqFeature::Generic;
  3         6  
  3         64  
88 3     3   595 use Bio::SeqFeature::Gene::Exon;
  3         5  
  3         63  
89 3     3   556 use Bio::SeqFeature::FeaturePair;
  3         5  
  3         62  
90 3     3   656 use Bio::SeqFeature::Gene::Transcript;
  3         5  
  3         91  
91 3     3   605 use Bio::SeqFeature::Gene::GeneStructure;
  3         6  
  3         74  
92              
93 3     3   10 use base qw(Bio::Root::Root Bio::Root::IO);
  3         3  
  3         2936  
94             $Srctag = 'genewise';
95              
96             =head2 new
97              
98             Title : new
99             Usage : $obj->new(-file=>"genewise.out");
100             $obj->new(-fh=>\*GW);
101             Function: Constructor for genewise wrapper. Takes either a file or filehandle
102             Example :
103             Returns : Bio::Tools::Genewise object
104              
105             See L
106              
107             =cut
108              
109             sub new {
110 5     5 1 22 my($class,@args) = @_;
111 5         29 my $self = $class->SUPER::new(@args);
112 5         21 $self->_initialize_io(@args);
113 5         11 return $self;
114             }
115              
116             =head2 _get_strand
117              
118             Title : _get_strand
119             Usage : $obj->_get_strand
120             Function: takes start and end values, swap them if start>end and
121             returns end
122             Example :
123             Returns :$start,$end,$strand
124              
125             =cut
126              
127             sub _get_strand {
128 185     185   181 my ($self,$start,$end) = @_;
129 185 50       257 defined($start) || $self->throw("Need a start");
130 185 50       236 defined($end) || $self->throw("Need an end");
131 185         123 my $strand;
132 185 50       347 if ($start > $end) {
133 0         0 my $tmp = $start;
134 0         0 $start = $end;
135 0         0 $end = $tmp;
136 0         0 $strand = -1;
137             }
138             else {
139 185         164 $strand = 1;
140             }
141 185         368 return ($start,$end,$strand);
142             }
143              
144             =head2 _score
145              
146             Title : _score
147             Usage : $obj->_score
148             Function: get/set for score info
149             Returns : a score value
150              
151             =cut
152              
153             sub _score {
154 25     25   24 my $self = shift;
155 25 100       45 return $self->{'_score'} = shift if @_;
156 22         65 return $self->{'_score'};
157             }
158              
159             =head2 _prot_id
160              
161             Title : _prot_id
162             Usage : $obj->_prot_id
163             Function: get/set for protein id
164             Returns :a protein id
165              
166             =cut
167              
168             sub _prot_id {
169 283     283   208 my $self = shift;
170 283 100       347 return $self->{'_prot_id'} = shift if @_;
171 280         575 return $self->{'_prot_id'};
172             }
173              
174             =head2 _target_id
175              
176             Title : _target_id
177             Usage : $obj->_target_id
178             Function: get/set for genomic sequence id
179             Example :
180             Returns :a target id
181              
182             =cut
183              
184             sub _target_id {
185 150     150   115 my $self = shift;
186 150 100       520 return $self->{'_target_id'} = shift if @_;
187 147         552 return $self->{'_target_id'};
188             }
189              
190              
191             =head2 next_prediction
192              
193             Title : next_prediction
194             Usage : while($gene = $genewise->next_prediction()) {
195             # do something
196             }
197             Function: Returns the gene structure prediction of the Genewise result
198             file. Call this method repeatedly until FALSE is returned.
199              
200             Example :
201             Returns : a Bio::SeqFeature::Gene::GeneStructure object
202             Args :
203              
204             See L
205              
206             =cut
207              
208              
209             sub next_prediction {
210 5     5 1 17 my ($self) = @_;
211              
212 5 100       10 unless ( $self->parsed ){
213 3         7 $self->_parse_genes;
214 3         11 $self->parsed(1);
215             }
216 5         6 return shift @{$self->{'_genes'}};
  5         13  
217             }
218              
219             sub parsed {
220 8     8 0 8 my $self = shift;
221 8 50 66     34 return $self->{'_parsed'} = 1 if @_ && $_[0];
222 5         12 return $self->{'_parsed'};
223             }
224            
225             sub _parse_genes {
226 3     3   3 my ($self) = @_;
227 3         4 my (@alignments,@genes);
228 3         10 local ($/) = "//";
229              
230 3         14 while ( defined($_ = $self->_readline) ) {
231 9         32 $self->debug( $_ );
232 9         49 while( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
233 0         0 $alignments[$1] = $2;
234             }
235 9 100       38 if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
236 3         8 $self->_score($1);# unless defined $self->_score;
237             }
238              
239 9 100       33 if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
240 3         6 $self->_prot_id($1); #unless defined $self->_prot_id;
241             }
242              
243 9 100       22 if( /Target Sequence\s+(\S+)/ ) {
244 3         8 $self->_target_id($1);# unless defined $self->_target_id;
245             }
246            
247 9 100       82 next unless /Gene\s+\d+\n/;
248 3         22 my @genes_txt = split(/Gene\s+\d+\n/,$_);
249 3         6 shift @genes_txt; #remove first empty entry
250 3         5 my $gene_num = 1;
251 3         4 foreach my $gene_txt (@genes_txt) {
252 3         6 my $score = $alignments[$gene_num];
253             # If genewise has assigned a strand to the gene as a whole
254             # overall gene start and end
255 3         14 my ($g_start, $g_end, $type) =
256             $gene_txt =~ m/Gene\s+
257             (\d+)[\s-]+ # start (1-based)
258             (\d+)\s+ # end
259             (?:\[(\w+)\])? #
260             /x;
261 3         3 my $g_strand;
262 3 50       10 my $source_tag = $type ? "$Srctag". "_$type" : $Srctag;
263 3         10 my $genes = Bio::SeqFeature::Gene::GeneStructure->new
264             (-source => $source_tag,
265             -score => $self->_score,
266             );
267 3         8 $genes->add_tag_value('ID', $self->_prot_id.".gene");
268 3         25 my $transcript = Bio::SeqFeature::Gene::Transcript->new
269             (-source => $source_tag,
270             -score => $score);
271 3         9 ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
272 3         7 $genes->strand($g_strand);
273              
274             # grab exon + supporting feature info
275 3         4 my @exons;
276 3 50       65 unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
277 0         0 @exons = $gene_txt =~ m/(Exon .+\s+)/g;
278             }
279 3         6 my $nbr = 1;
280              
281             # loop through each exon-supporting feature pair
282 3         6 foreach my $e (@exons){
283 54         296 my ($e_start,$e_end,$phase) =
284             $e =~ m/Exon\s+
285             (\d+)[\s-]+ # start (1 based)
286             (\d+)\s+ # end
287             phase\s+(\d+) # phase
288             /x;
289 54         55 my $e_strand;
290 54         79 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
291             $e_end);
292 54 100       97 $transcript->strand($e_strand) unless $transcript->strand != 0;
293 54         106 my $exon = Bio::SeqFeature::Gene::Exon->new
294             (-seq_id =>$self->_target_id,
295             -source => $source_tag,
296             -start =>$e_start,
297             -end =>$e_end,
298             -score => $score,
299             #-frame => $phase,
300             -strand =>$e_strand);
301              
302 54         105 $exon->add_tag_value('phase',$phase);
303 54         76 $exon->is_coding(1);
304 54 50       81 if( $self->_prot_id ) {
305 54         68 $exon->add_tag_value('Parent',$self->_prot_id);
306             }
307 54         96 $exon->add_tag_value("Exon",$nbr++);
308 54 50       242 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
309 54         137 my ($geno_start,$geno_end,
310             $prot_start, $prot_end) = ($1,$2,$3,$4);
311 54         40 my $prot_strand;
312 54         80 ($prot_start,$prot_end,
313             $prot_strand) = $self->_get_strand($prot_start,$prot_end);
314 54         116 my $pf = Bio::SeqFeature::Generic->new
315             ( -start => $prot_start,
316             -end => $prot_end,
317             -seq_id => $self->_prot_id,
318             -score => $score,
319             -strand => $prot_strand,
320             -source => $source_tag,
321             -primary_tag => 'supporting_protein_feature',);
322 54         57 my $geno_strand;
323 54         86 ($geno_start,$geno_end,
324             $geno_strand) = $self->_get_strand($geno_start,$geno_end);
325 54         115 my $gf = Bio::SeqFeature::Generic->new
326             ( -start => $geno_start,
327             -end => $geno_end,
328             -seq_id => $self->_target_id,
329             -score => $score,
330             -strand => $geno_strand,
331             -source => $source_tag,
332             -primary_tag => 'supporting_genomic_feature',);
333 54         180 my $fp = Bio::SeqFeature::FeaturePair->new
334             (-feature1 =>$gf,
335             -feature2 =>$pf);
336 54         108 $exon->add_tag_value( 'supporting_feature',$fp );
337 54 50       77 if( $self->_prot_id ) {
338 54         66 $exon->add_tag_value('Target','Protein:'.$self->_prot_id);
339 54         78 $exon->add_tag_value('Target',$prot_start);
340 54         66 $exon->add_tag_value('Target',$prot_end);
341             }
342             }
343 54         113 $transcript->add_exon($exon);
344             }
345 3         9 $transcript->seq_id($self->_target_id);
346 3         9 $transcript->add_tag_value('ID', $self->_prot_id);
347 3         7 $transcript->add_tag_value('Parent', $self->_prot_id.".gene");
348 3         17 $genes->add_transcript($transcript);
349 3         8 $genes->seq_id($self->_target_id);
350 3         11 push @genes, $genes;
351 3         26 $gene_num++;
352             }
353             }
354 3         17 $self->{'_genes'} = \@genes;
355             }
356              
357             1;