File Coverage

Bio/Tools/Phylo/Molphy.pm
Criterion Covered Total %
statement 116 122 95.0
branch 40 46 86.9
condition 3 3 100.0
subroutine 7 7 100.0
pod 2 2 100.0
total 168 180 93.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Phylo::Molphy
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Phylo::Molphy - parser for Molphy output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Phylo::Molphy;
21             my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22             while( my $r = $parser->next_result ) {
23             # r is a Bio::Tools::Phylo::Molphy::Result object
24              
25             # print the model name
26             print $r->model, "\n";
27              
28             # get the substitution matrix
29             # this is a hash of 3letter aa codes -> 3letter aa codes representing
30             # substitution rate
31             my $smat = $r->substitution_matrix;
32             print "Arg -> Gln substitution rate is %d\n",
33             $smat->{'Arg'}->{'Gln'}, "\n";
34              
35             # get the transition probablity matrix
36             # this is a hash of 3letter aa codes -> 3letter aa codes representing
37             # transition probabilty
38             my $tmat = $r->transition_probability_matrix;
39             print "Arg -> Gln transition probablity is %.2f\n",
40             $tmat->{'Arg'}->{'Gln'}, "\n";
41              
42             # get the frequency for each of the residues
43             my $rfreqs = $r->residue_frequencies;
44              
45             foreach my $residue ( keys %{$rfreqs} ) {
46             printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47             $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
48             }
49              
50             my @trees;
51             while( my $t = $r->next_tree ) {
52             push @trees, $t;
53             }
54              
55             print "search space is ", $r->search_space, "\n",
56             "1st tree score is ", $trees[0]->score, "\n";
57              
58             # writing to STDOUT, use -file => '>filename' to specify a file
59             my $out = Bio::TreeIO->new(-format => "newick");
60             $out->write_tree($trees[0]); # writing only the 1st tree
61             }
62              
63             =head1 DESCRIPTION
64              
65             A parser for Molphy output (protml,dnaml)
66              
67             =head1 FEEDBACK
68              
69             =head2 Mailing Lists
70              
71             User feedback is an integral part of the evolution of this and other
72             Bioperl modules. Send your comments and suggestions preferably to
73             the Bioperl mailing list. Your participation is much appreciated.
74              
75             bioperl-l@bioperl.org - General discussion
76             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77              
78             =head2 Support
79              
80             Please direct usage questions or support issues to the mailing list:
81              
82             I
83              
84             rather than to the module maintainer directly. Many experienced and
85             reponsive experts will be able look at the problem and quickly
86             address it. Please include a thorough description of the problem
87             with code and data examples if at all possible.
88              
89             =head2 Reporting Bugs
90              
91             Report bugs to the Bioperl bug tracking system to help us keep track
92             of the bugs and their resolution. Bug reports can be submitted via the
93             web:
94              
95             https://github.com/bioperl/bioperl-live/issues
96              
97             =head1 AUTHOR - Jason Stajich
98              
99             Email jason-at-bioperl.org
100              
101             =head1 APPENDIX
102              
103             The rest of the documentation details each of the object methods.
104             Internal methods are usually preceded with a _
105              
106             =cut
107              
108              
109             # Let the code begin...
110              
111              
112             package Bio::Tools::Phylo::Molphy;
113 1     1   378 use strict;
  1         1  
  1         22  
114              
115 1     1   281 use Bio::Tools::Phylo::Molphy::Result;
  1         2  
  1         23  
116 1     1   232 use Bio::TreeIO;
  1         1  
  1         23  
117 1     1   5 use IO::String;
  1         1  
  1         20  
118              
119 1     1   3 use base qw(Bio::Root::Root Bio::Root::IO);
  1         1  
  1         875  
120              
121             =head2 new
122              
123             Title : new
124             Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
125             Function: Builds a new Bio::Tools::Phylo::Molphy object
126             Returns : Bio::Tools::Phylo::Molphy
127             Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
128              
129              
130             =cut
131              
132             sub new {
133 2     2 1 4 my($class,@args) = @_;
134              
135 2         11 my $self = $class->SUPER::new(@args);
136 2         9 $self->_initialize_io(@args);
137              
138 2         5 return $self;
139             }
140              
141             =head2 next_result
142              
143             Title : next_result
144             Usage : my $r = $molphy->next_result
145             Function: Get the next result set from parser data
146             Returns : Bio::Tools::Phylo::Molphy::Result object
147             Args : none
148              
149              
150             =cut
151              
152             sub next_result{
153 2     2 1 572 my ($self) = @_;
154              
155             # A little statemachine for the parser here
156 2         6 my ($state,$transition_ct,
157             @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
158 2         3 my ( %subst_matrix, @treelines, @treedata, %frequencies);
159 0         0 my ( $treenum,$possible_trees, $model);
160 0         0 my ($trans_type,$trans_amount);
161 2         2 my $parsed = 0;
162 2         8 while( defined ( $_ = $self->_readline()) ) {
163 82         51 $parsed = 1;
164 82 100       245 if( /^Relative Substitution Rate Matrix/ ) {
    100          
    100          
    100          
    100          
    100          
165 1 50       4 if( %subst_matrix ) {
166 0         0 $self->_pushback($_);
167 0         0 last;
168             }
169 1         2 $state = 0;
170 1         1 my ( @tempdata);
171 1         2 @resloc = ();
172 1         3 while( defined ($_ = $self->_readline) ) {
173 21 100       45 last if (/^\s+$/);
174             # remove leading/trailing spaces
175 20         35 s/^\s+//;
176 20         72 s/\s+$//;
177 20         106 my @data = split;
178 20         13 my $i = 0;
179 20         18 for my $l ( @data ) {
180 400 100       434 if( $l =~ /\D+/ ) {
181 20         21 push @resloc, $l;
182             }
183 400         251 $i++;
184             }
185 20         41 push @tempdata, \@data;
186             }
187 1         2 my $i = 0;
188 1         2 for my $row ( @tempdata ) {
189 20         11 my $j = 0;
190 20         19 for my $col ( @$row ) {
191 400 100       318 if( $i == $j ) {
192             # empty string for diagonals
193 20         24 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
194             } else {
195 380         397 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
196             }
197 400         255 $j++;
198             }
199 20         28 $i++;
200             }
201             } elsif( /^Transition Probability Matrix/ ) {
202 2 50       9 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
203 2         2 $state = 1;
204 2         6 my $newtrans_type = "$3-$1";
205 2         3 $trans_amount = $1;
206 2 100       3 if( defined $trans_type ) {
207             # finish processing the transition_matrix
208 1         2 my $i =0;
209 1         2 foreach my $row ( @transition_matrix ) {
210 20         13 my $j = 0;
211 20         16 foreach my $col ( @$row ) {
212 400         410 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
213 400         256 $j++;
214             }
215 20         14 $i++;
216             }
217             }
218 2         2 $trans_type = $newtrans_type;
219 2         2 $transition_ct = 0;
220 2         20 @transition_matrix = ();
221             }
222             } elsif ( /Acid Frequencies/ ) {
223 1         2 $state = 0;
224 1         3 $self->_readline(); # skip the next line
225 1         3 while( defined( $_ = $self->_readline) ) {
226 21 100       34 unless( /^\s+/) {
227 1         8 $self->_pushback($_);
228 1         3 last;
229             }
230 20         30 s/^\s+//;
231 20         38 s/\s+$//;
232 20         36 my ($index,$res,$model,$data) = split;
233 20         53 $frequencies{$res} = [ $model,$data];
234             }
235             } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
236 2         8 my @save = ($1,$2,$3);
237             # finish processing the transition_matrix
238 2         3 my $i =0;
239 2         3 foreach my $row ( @transition_matrix ) {
240 20         14 my $j = 0;
241 20         14 foreach my $col ( @$row ) {
242 400         451 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
243 400         226 $j++;
244             }
245 20         15 $i++;
246             }
247 2 50       5 if( defined $treenum ) {
248 0         0 $self->_pushback($_);
249 0         0 last;
250             }
251            
252 2         2 $state = 2;
253 2         3 ($treenum,$possible_trees, $model) = @save;
254 2         9 $model =~ s/\s+/ /g;
255             } elsif( $state == 1 ) {
256 65 100 100     209 next if( /^\s+$/ || /^\s+Ala/);
257 61         83 s/^\s+//;
258 61         187 s/\s+$//;
259 61 100       80 if( $trans_type eq '1PAM-1.0e7' ) {
    50          
260             # because the matrix is split up into 2-10 column sets
261 40         18 push @{$transition_matrix[$transition_ct++]}, split ;
  40         136  
262 40 100       113 $transition_ct = 0 if $transition_ct % 20 == 0;
263             } elsif( $trans_type eq '1PAM-1.0e5' ) {
264             # because the matrix is split up into 2-10 column sets
265 21         90 my ($res,@row) = split;
266 21 100       33 next if $transition_ct >= 20; # skip last
267 20         14 push @{$transition_matrix[$transition_ct++]}, @row;
  20         116  
268             }
269             } elsif( $state == 2 ) {
270 10 100       29 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
271 5         15 push @treedata, [ $1,$2];
272             }
273             # save this for the end so that we can
274             # be efficient and only open one tree parser
275 10         21 push @treelines, $_;
276             }
277             }
278             # waiting till the end to do this, is it better
279 2         45 my @trees;
280 2 50       4 if( @treelines ) {
281 2         16 my $strdat = IO::String->new(join('',@treelines));
282 2         110 my $treeio = Bio::TreeIO->new(-fh => $strdat,
283             -format => 'newick');
284 2         4 while( my $tree = $treeio->next_tree ) {
285 10 100       14 if( @treedata ) {
286 5         4 my $dat = shift @treedata;
287             # set the associated information
288 5         12 $tree->id($dat->[0]);
289 5         26 $tree->score($dat->[1]);
290             }
291 10         41 push @trees, $tree;
292             }
293             }
294 2 50       19 return unless( $parsed );
295 2         25 my $result = Bio::Tools::Phylo::Molphy::Result->new
296             (-trees => \@trees,
297             -substitution_matrix => \%subst_matrix,
298             -frequencies => \%frequencies,
299             -model => $model,
300             -search_space => $possible_trees,
301             );
302 2         10 while( my ($type,$mat) = each %transition_mat ) {
303 2         6 $result->transition_probability_matrix( $type,$mat);
304             }
305 2         47 $result;
306             }
307              
308             1;