File Coverage

Bio/Tools/MZEF.pm
Criterion Covered Total %
statement 80 85 94.1
branch 21 28 75.0
condition 3 6 50.0
subroutine 12 12 100.0
pod 3 3 100.0
total 119 134 88.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::MZEF
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp
7             #
8             # Copyright Hilmar Lapp
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::MZEF - Results of one MZEF run
17              
18             =head1 SYNOPSIS
19              
20             $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
21             # filehandle:
22             $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
23             # to indicate that the sequence was reversed prior to feeding it to MZEF
24             # and that you want to have this reflected in the strand() attribute of
25             # the exons, as well have the coordinates translated to the non-reversed
26             # sequence
27             $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
28             -strand => -1 );
29              
30             # parse the results
31             # note: this class is-a Bio::Tools::AnalysisResult which implements
32             # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
33             while($gene = $mzef->next_prediction()) {
34             # $gene is an instance of Bio::Tools::Prediction::Gene
35              
36             # $gene->exons() returns an array of
37             # Bio::Tools::Prediction::Exon objects
38             # all exons:
39             @exon_arr = $gene->exons();
40              
41             # internal exons only
42             @intrl_exons = $gene->exons('Internal');
43             # note that presently MZEF predicts only internal exons!
44             }
45              
46             # essential if you gave a filename at initialization (otherwise the file
47             # will stay open)
48             $mzef->close();
49              
50             =head1 DESCRIPTION
51              
52             The MZEF module provides a parser for MZEF gene structure prediction
53             output.
54              
55             This module inherits off L and therefore
56             implements L.
57              
58             =head1 FEEDBACK
59              
60             =head2 Mailing Lists
61              
62             User feedback is an integral part of the evolution of this and other
63             Bioperl modules. Send your comments and suggestions preferably to one
64             of the Bioperl mailing lists. Your participation is much appreciated.
65              
66             bioperl-l@bioperl.org - General discussion
67             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
68              
69             =head2 Support
70              
71             Please direct usage questions or support issues to the mailing list:
72              
73             I
74              
75             rather than to the module maintainer directly. Many experienced and
76             reponsive experts will be able look at the problem and quickly
77             address it. Please include a thorough description of the problem
78             with code and data examples if at all possible.
79              
80             =head2 Reporting Bugs
81              
82             Report bugs to the Bioperl bug tracking system to help us keep track
83             the bugs and their resolution. Bug reports can be submitted via the
84             web:
85              
86             https://github.com/bioperl/bioperl-live/issues
87              
88             =head1 AUTHOR - Hilmar Lapp
89              
90             Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
91              
92             =head1 APPENDIX
93              
94             The rest of the documentation details each of the object
95             methods. Internal methods are usually preceded with a _
96              
97             =cut
98              
99              
100             # Let the code begin...
101              
102              
103             package Bio::Tools::MZEF;
104 2     2   691 use strict;
  2         4  
  2         53  
105              
106 2     2   8 use Bio::Tools::Prediction::Gene;
  2         3  
  2         33  
107 2     2   7 use Bio::Tools::Prediction::Exon;
  2         3  
  2         39  
108              
109 2     2   7 use base qw(Bio::Tools::AnalysisResult);
  2         3  
  2         1520  
110              
111             sub _initialize_state {
112 2     2   5 my($self,@args) = @_;
113              
114             # first call the inherited method!
115 2         11 my $make = $self->SUPER::_initialize_state(@args);
116              
117             # handle our own parameters
118 2         8 my ($strand, $params) =
119             $self->_rearrange([qw(STRAND
120             )],
121             @args);
122              
123             # our private state variables
124 2 50       7 $strand = 1 unless defined($strand);
125 2         5 $self->{'_strand'} = $strand;
126 2         5 $self->{'_preds_parsed'} = 0;
127 2         2 $self->{'_has_cds'} = 0;
128             # array of pre-parsed predictions
129 2         5 $self->{'_preds'} = [];
130             }
131              
132             =head2 analysis_method
133              
134             Usage : $mzef->analysis_method();
135             Purpose : Inherited method. Overridden to ensure that the name matches
136             /mzef/i.
137             Returns : String
138             Argument : n/a
139              
140             =cut
141              
142             #-------------
143             sub analysis_method {
144             #-------------
145 2     2 1 7 my ($self, $method) = @_;
146 2 50 33     21 if($method && ($method !~ /mzef/i)) {
147 0         0 $self->throw("method $method not supported in " . ref($self));
148             }
149 2         11 return $self->SUPER::analysis_method($method);
150             }
151              
152             =head2 next_feature
153              
154             Title : next_feature
155             Usage : while($gene = $mzef->next_feature()) {
156             # do something
157             }
158             Function: Returns the next gene structure prediction of the MZEF result
159             file. Call this method repeatedly until FALSE is returned.
160              
161             The returned object is actually a SeqFeatureI implementing object.
162             This method is required for classes implementing the
163             SeqAnalysisParserI interface, and is merely an alias for
164             next_prediction() at present.
165              
166             Note that with the present version of MZEF there will only be one
167             object returned, because MZEF does not predict individual genes
168             but just potential internal exons.
169             Example :
170             Returns : A Bio::Tools::Prediction::Gene object.
171             Args :
172              
173             =cut
174              
175             sub next_feature {
176 2     2 1 6 my ($self,@args) = @_;
177             # even though next_prediction doesn't expect any args (and this method
178             # does neither), we pass on args in order to be prepared if this changes
179             # ever
180 2         7 return $self->next_prediction(@args);
181             }
182              
183             =head2 next_prediction
184              
185             Title : next_prediction
186             Usage : while($gene = $mzef->next_prediction()) {
187             # do something
188             }
189             Function: Returns the next gene structure prediction of the MZEF result
190             file. Call this method repeatedly until FALSE is returned.
191              
192             Note that with the present version of MZEF there will only be one
193             object returned, because MZEF does not predict individual genes
194             but just potential internal exons.
195             Example :
196             Returns : A Bio::Tools::Prediction::Gene object.
197             Args :
198              
199             =cut
200              
201             sub next_prediction {
202 3     3 1 248 my ($self) = @_;
203 3         5 my $gene;
204              
205             # if the prediction section hasn't been parsed yet, we do this now
206 3 100       9 $self->_parse_predictions() unless $self->_predictions_parsed();
207              
208             # return the next gene structure (transcript)
209 3         10 return $self->_prediction();
210             }
211              
212             =head2 _parse_predictions
213              
214             Title : _parse_predictions()
215             Usage : $obj->_parse_predictions()
216             Function: Parses the prediction section. Automatically called by
217             next_prediction() if not yet done.
218             Example :
219             Returns :
220              
221             =cut
222              
223             sub _parse_predictions {
224 2     2   3 my ($self) = @_;
225 2         3 my ($method); # set but not used presently
226 2         3 my $exon_tag = "InternalExon";
227 2         3 my $gene;
228             # my $seqname; # name given in output is poorly formatted
229             my $seqlen;
230 2         5 my $prednr = 1;
231              
232 2         21 while(defined($_ = $self->_readline())) {
233 68 100       277 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
234             # exon or signal
235 46 100       81 if(! defined($gene)) {
236 2         22 $gene = Bio::Tools::Prediction::Gene->new(
237             '-primary' => "GenePrediction$prednr",
238             '-source' => 'MZEF');
239             }
240             # we handle start-end first because may not be space delimited
241             # for large numbers
242 46         121 my ($start,$end) = ($1,$2);
243 46         158 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
244             # split the rest into fields
245 46         75 chomp();
246             # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
247             # index: 0 1 2 3 4 5 6 7
248 46         118 my @flds = split(' ', $_);
249             # create the feature object depending on the type of signal --
250             # which is always an (internal) exon for MZEF
251 46         114 my $predobj = Bio::Tools::Prediction::Exon->new();
252             # set common fields
253 46         97 $predobj->source_tag('MZEF');
254 46         98 $predobj->significance($flds[0]);
255 46         103 $predobj->score($flds[0]); # what shall we set as overall score?
256 46         102 $predobj->strand($self->{'_strand'}); # MZEF searches only one
257 46 50       78 if($predobj->strand() == 1) {
258 46         93 $predobj->start($start);
259 46         81 $predobj->end($end);
260             } else {
261 0         0 $predobj->start($seqlen-$end+1);
262 0         0 $predobj->end($seqlen-$start+1);
263             }
264             # set scores
265 46         110 $predobj->start_signal_score($flds[5]);
266 46         94 $predobj->end_signal_score($flds[7]);
267 46         91 $predobj->coding_signal_score($flds[6]);
268             # frame -- we simply extract the one with highest score from the
269             # orf field, and store the individual scores for now
270 46         93 my $frm = index($flds[4], "1");
271 46 50       115 $predobj->frame(($frm < 0) ? undef : $frm);
272 46         87 $predobj->primary_tag($exon_tag);
273 46         89 $predobj->is_coding(1);
274             # add to gene structure (should be done only when start and end
275             # are set, in order to allow for proper expansion of the range)
276 46         100 $gene->add_exon($predobj);
277 46         160 next;
278             }
279 22 100       34 if(/^\s*Internal .*(MZEF)/) {
280 2         8 $self->analysis_method($1);
281 2         5 next;
282             }
283 20 100       44 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
284             # $seqname = $1; # this is too poor currently (file name truncated
285             # to 10 chars) in order to be sensible enough
286 2         6 $seqlen = $2;
287 2         8 next;
288             }
289             }
290             # $gene->seq_id($seqname);
291 2 50       15 $self->_add_prediction($gene) if defined($gene);
292 2         5 $self->_predictions_parsed(1);
293             }
294              
295             =head2 _prediction
296              
297             Title : _prediction()
298             Usage : $gene = $obj->_prediction()
299             Function: internal
300             Example :
301             Returns :
302              
303             =cut
304              
305             sub _prediction {
306 3     3   5 my ($self) = @_;
307              
308 3 100 66     9 return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
  3         12  
309 2         3 return shift(@{$self->{'_preds'}});
  2         9  
310             }
311              
312             =head2 _add_prediction
313              
314             Title : _add_prediction()
315             Usage : $obj->_add_prediction($gene)
316             Function: internal
317             Example :
318             Returns :
319              
320             =cut
321              
322             sub _add_prediction {
323 2     2   5 my ($self, $gene) = @_;
324              
325 2 50       7 if(! exists($self->{'_preds'})) {
326 0         0 $self->{'_preds'} = [];
327             }
328 2         4 push(@{$self->{'_preds'}}, $gene);
  2         6  
329             }
330              
331             =head2 _predictions_parsed
332              
333             Title : _predictions_parsed
334             Usage : $obj->_predictions_parsed
335             Function: internal
336             Example :
337             Returns : TRUE or FALSE
338              
339             =cut
340              
341             sub _predictions_parsed {
342 5     5   9 my ($self, $val) = @_;
343              
344 5 100       12 $self->{'_preds_parsed'} = $val if $val;
345 5 50       12 if(! exists($self->{'_preds_parsed'})) {
346 0         0 $self->{'_preds_parsed'} = 0;
347             }
348 5         17 return $self->{'_preds_parsed'};
349             }
350              
351              
352             1;