File Coverage

Bio/Tools/Genewise.pm
Criterion Covered Total %
statement 122 128 95.3
branch 27 36 75.0
condition 2 3 66.6
subroutine 18 18 100.0
pod 2 3 66.6
total 171 188 90.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Genewise
3             #
4             # Copyright Fugu Team
5             #
6             # You may distribute this module under the same terms as perl itself
7              
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::Tools::Genewise - Results of one Genewise run
13              
14             =head1 SYNOPSIS
15              
16             use Bio::Tools::Genewise;
17             my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
18              
19             while (my $gene = $gw->next_prediction){
20             my @transcripts = $gene->transcripts;
21             foreach my $t(@transcripts){
22             my @exons = $t->exons;
23             foreach my $e(@exons){
24             print $e->start." ".$e->end."\n";
25             }
26             }
27             }
28              
29             =head1 DESCRIPTION
30              
31             This is the parser for the output of Genewise. It takes either a file
32             handle or a file name and returns a
33             Bio::SeqFeature::Gene::GeneStructure object.
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to one
41             of the Bioperl mailing lists. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             the bugs and their resolution. Bug reports can be submitted via the
61             web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Fugu Team, Jason Stajich
66              
67             Email: fugui@worf.fugu-sg.org
68             Email: jason-at-bioperl.org
69              
70             =head1 APPENDIX
71              
72             The rest of the documentation details each of the object
73             methods. Internal methods are usually preceded with a _
74              
75             =cut
76              
77              
78             # Let the code begin...
79              
80              
81             package Bio::Tools::Genewise;
82 3     3   363 use vars qw($Srctag);
  3         3  
  3         97  
83 3     3   9 use strict;
  3         3  
  3         40  
84 3     3   6 use Symbol;
  3         4  
  3         118  
85              
86 3     3   522 use Bio::Tools::AnalysisResult;
  3         4  
  3         70  
87 3     3   707 use Bio::SeqFeature::Generic;
  3         5  
  3         67  
88 3     3   574 use Bio::SeqFeature::Gene::Exon;
  3         3  
  3         63  
89 3     3   563 use Bio::SeqFeature::FeaturePair;
  3         4  
  3         59  
90 3     3   639 use Bio::SeqFeature::Gene::Transcript;
  3         4  
  3         70  
91 3     3   629 use Bio::SeqFeature::Gene::GeneStructure;
  3         4  
  3         70  
92              
93 3     3   10 use base qw(Bio::Root::Root Bio::Root::IO);
  3         3  
  3         2843  
94             $Srctag = 'genewise';
95              
96             =head2 new
97              
98             Title : new
99             Usage : $obj->new(-file=>"genewise.out");
100             $obj->new(-fh=>\*GW);
101             Function: Constructor for genewise wrapper. Takes either a file or filehandle
102             Example :
103             Returns : Bio::Tools::Genewise object
104              
105             See L
106              
107             =cut
108              
109             sub new {
110 5     5 1 18 my($class,@args) = @_;
111 5         25 my $self = $class->SUPER::new(@args);
112 5         20 $self->_initialize_io(@args);
113 5         14 return $self;
114             }
115              
116             =head2 _get_strand
117              
118             Title : _get_strand
119             Usage : $obj->_get_strand
120             Function: takes start and end values, swap them if start>end and
121             returns end
122             Example :
123             Returns :$start,$end,$strand
124              
125             =cut
126              
127             sub _get_strand {
128 185     185   190 my ($self,$start,$end) = @_;
129 185 50       247 defined($start) || $self->throw("Need a start");
130 185 50       210 defined($end) || $self->throw("Need an end");
131 185         137 my $strand;
132 185 50       311 if ($start > $end) {
133 0         0 my $tmp = $start;
134 0         0 $start = $end;
135 0         0 $end = $tmp;
136 0         0 $strand = -1;
137             }
138             else {
139 185         153 $strand = 1;
140             }
141 185         358 return ($start,$end,$strand);
142             }
143              
144             =head2 _score
145              
146             Title : _score
147             Usage : $obj->_score
148             Function: get/set for score info
149             Returns : a score value
150              
151             =cut
152              
153             sub _score {
154 25     25   20 my $self = shift;
155 25 100       42 return $self->{'_score'} = shift if @_;
156 22         70 return $self->{'_score'};
157             }
158              
159             =head2 _prot_id
160              
161             Title : _prot_id
162             Usage : $obj->_prot_id
163             Function: get/set for protein id
164             Returns :a protein id
165              
166             =cut
167              
168             sub _prot_id {
169 283     283   203 my $self = shift;
170 283 100       343 return $self->{'_prot_id'} = shift if @_;
171 280         578 return $self->{'_prot_id'};
172             }
173              
174             =head2 _target_id
175              
176             Title : _target_id
177             Usage : $obj->_target_id
178             Function: get/set for genomic sequence id
179             Example :
180             Returns :a target id
181              
182             =cut
183              
184             sub _target_id {
185 150     150   114 my $self = shift;
186 150 100       206 return $self->{'_target_id'} = shift if @_;
187 147         557 return $self->{'_target_id'};
188             }
189              
190              
191             =head2 next_prediction
192              
193             Title : next_prediction
194             Usage : while($gene = $genewise->next_prediction()) {
195             # do something
196             }
197             Function: Returns the gene structure prediction of the Genewise result
198             file. Call this method repeatedly until FALSE is returned.
199              
200             Example :
201             Returns : a Bio::SeqFeature::Gene::GeneStructure object
202             Args :
203              
204             See L
205              
206             =cut
207              
208              
209             sub next_prediction {
210 5     5 1 16 my ($self) = @_;
211              
212 5 100       11 unless ( $self->parsed ){
213 3         7 $self->_parse_genes;
214 3         9 $self->parsed(1);
215             }
216 5         6 return shift @{$self->{'_genes'}};
  5         13  
217             }
218              
219             sub parsed {
220 8     8 0 8 my $self = shift;
221 8 50 66     42 return $self->{'_parsed'} = 1 if @_ && $_[0];
222 5         10 return $self->{'_parsed'};
223             }
224            
225             sub _parse_genes {
226 3     3   3 my ($self) = @_;
227 3         5 my (@alignments,@genes);
228 3         10 local ($/) = "//";
229              
230 3         15 while ( defined($_ = $self->_readline) ) {
231 9         31 $self->debug( $_ );
232 9         52 while( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
233 0         0 $alignments[$1] = $2;
234             }
235 9 100       34 if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
236 3         9 $self->_score($1);# unless defined $self->_score;
237             }
238              
239 9 100       30 if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
240 3         7 $self->_prot_id($1); #unless defined $self->_prot_id;
241             }
242              
243 9 100       25 if( /Target Sequence\s+(\S+)/ ) {
244 3         7 $self->_target_id($1);# unless defined $self->_target_id;
245             }
246            
247 9 100       76 next unless /Gene\s+\d+\n/;
248 3         23 my @genes_txt = split(/Gene\s+\d+\n/,$_);
249 3         5 shift @genes_txt; #remove first empty entry
250 3         6 my $gene_num = 1;
251 3         5 foreach my $gene_txt (@genes_txt) {
252 3         5 my $score = $alignments[$gene_num];
253             # If genewise has assigned a strand to the gene as a whole
254             # overall gene start and end
255 3         16 my ($g_start, $g_end, $type) =
256             $gene_txt =~ m/Gene\s+
257             (\d+)[\s-]+ # start (1-based)
258             (\d+)\s+ # end
259             (?:\[(\w+)\])? #
260             /x;
261 3         4 my $g_strand;
262 3 50       10 my $source_tag = $type ? "$Srctag". "_$type" : $Srctag;
263 3         12 my $genes = Bio::SeqFeature::Gene::GeneStructure->new
264             (-source => $source_tag,
265             -score => $self->_score,
266             );
267 3         9 $genes->add_tag_value('ID', $self->_prot_id.".gene");
268 3         21 my $transcript = Bio::SeqFeature::Gene::Transcript->new
269             (-source => $source_tag,
270             -score => $score);
271 3         11 ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
272 3         7 $genes->strand($g_strand);
273              
274             # grab exon + supporting feature info
275 3         3 my @exons;
276 3 50       65 unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
277 0         0 @exons = $gene_txt =~ m/(Exon .+\s+)/g;
278             }
279 3         5 my $nbr = 1;
280              
281             # loop through each exon-supporting feature pair
282 3         5 foreach my $e (@exons){
283 54         293 my ($e_start,$e_end,$phase) =
284             $e =~ m/Exon\s+
285             (\d+)[\s-]+ # start (1 based)
286             (\d+)\s+ # end
287             phase\s+(\d+) # phase
288             /x;
289 54         48 my $e_strand;
290 54         91 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
291             $e_end);
292 54 100       95 $transcript->strand($e_strand) unless $transcript->strand != 0;
293 54         106 my $exon = Bio::SeqFeature::Gene::Exon->new
294             (-seq_id =>$self->_target_id,
295             -source => $source_tag,
296             -start =>$e_start,
297             -end =>$e_end,
298             -score => $score,
299             #-frame => $phase,
300             -strand =>$e_strand);
301              
302 54         97 $exon->add_tag_value('phase',$phase);
303 54         81 $exon->is_coding(1);
304 54 50       75 if( $self->_prot_id ) {
305 54         61 $exon->add_tag_value('Parent',$self->_prot_id);
306             }
307 54         88 $exon->add_tag_value("Exon",$nbr++);
308 54 50       218 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
309 54         138 my ($geno_start,$geno_end,
310             $prot_start, $prot_end) = ($1,$2,$3,$4);
311 54         38 my $prot_strand;
312 54         78 ($prot_start,$prot_end,
313             $prot_strand) = $self->_get_strand($prot_start,$prot_end);
314 54         110 my $pf = Bio::SeqFeature::Generic->new
315             ( -start => $prot_start,
316             -end => $prot_end,
317             -seq_id => $self->_prot_id,
318             -score => $score,
319             -strand => $prot_strand,
320             -source => $source_tag,
321             -primary_tag => 'supporting_protein_feature',);
322 54         56 my $geno_strand;
323 54         79 ($geno_start,$geno_end,
324             $geno_strand) = $self->_get_strand($geno_start,$geno_end);
325 54         124 my $gf = Bio::SeqFeature::Generic->new
326             ( -start => $geno_start,
327             -end => $geno_end,
328             -seq_id => $self->_target_id,
329             -score => $score,
330             -strand => $geno_strand,
331             -source => $source_tag,
332             -primary_tag => 'supporting_genomic_feature',);
333 54         188 my $fp = Bio::SeqFeature::FeaturePair->new
334             (-feature1 =>$gf,
335             -feature2 =>$pf);
336 54         104 $exon->add_tag_value( 'supporting_feature',$fp );
337 54 50       76 if( $self->_prot_id ) {
338 54         69 $exon->add_tag_value('Target','Protein:'.$self->_prot_id);
339 54         86 $exon->add_tag_value('Target',$prot_start);
340 54         73 $exon->add_tag_value('Target',$prot_end);
341             }
342             }
343 54         109 $transcript->add_exon($exon);
344             }
345 3         8 $transcript->seq_id($self->_target_id);
346 3         11 $transcript->add_tag_value('ID', $self->_prot_id);
347 3         7 $transcript->add_tag_value('Parent', $self->_prot_id.".gene");
348 3         14 $genes->add_transcript($transcript);
349 3         8 $genes->seq_id($self->_target_id);
350 3         6 push @genes, $genes;
351 3         19 $gene_num++;
352             }
353             }
354 3         17 $self->{'_genes'} = \@genes;
355             }
356              
357             1;