File Coverage

Bio/Tools/Phylo/Molphy.pm
Criterion Covered Total %
statement 116 122 95.0
branch 40 46 86.9
condition 3 3 100.0
subroutine 7 7 100.0
pod 2 2 100.0
total 168 180 93.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Phylo::Molphy
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Phylo::Molphy - parser for Molphy output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Phylo::Molphy;
21             my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22             while( my $r = $parser->next_result ) {
23             # r is a Bio::Tools::Phylo::Molphy::Result object
24              
25             # print the model name
26             print $r->model, "\n";
27              
28             # get the substitution matrix
29             # this is a hash of 3letter aa codes -> 3letter aa codes representing
30             # substitution rate
31             my $smat = $r->substitution_matrix;
32             print "Arg -> Gln substitution rate is %d\n",
33             $smat->{'Arg'}->{'Gln'}, "\n";
34              
35             # get the transition probablity matrix
36             # this is a hash of 3letter aa codes -> 3letter aa codes representing
37             # transition probabilty
38             my $tmat = $r->transition_probability_matrix;
39             print "Arg -> Gln transition probablity is %.2f\n",
40             $tmat->{'Arg'}->{'Gln'}, "\n";
41              
42             # get the frequency for each of the residues
43             my $rfreqs = $r->residue_frequencies;
44              
45             foreach my $residue ( keys %{$rfreqs} ) {
46             printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47             $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
48             }
49              
50             my @trees;
51             while( my $t = $r->next_tree ) {
52             push @trees, $t;
53             }
54              
55             print "search space is ", $r->search_space, "\n",
56             "1st tree score is ", $trees[0]->score, "\n";
57              
58             # writing to STDOUT, use -file => '>filename' to specify a file
59             my $out = Bio::TreeIO->new(-format => "newick");
60             $out->write_tree($trees[0]); # writing only the 1st tree
61             }
62              
63             =head1 DESCRIPTION
64              
65             A parser for Molphy output (protml,dnaml)
66              
67             =head1 FEEDBACK
68              
69             =head2 Mailing Lists
70              
71             User feedback is an integral part of the evolution of this and other
72             Bioperl modules. Send your comments and suggestions preferably to
73             the Bioperl mailing list. Your participation is much appreciated.
74              
75             bioperl-l@bioperl.org - General discussion
76             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77              
78             =head2 Support
79              
80             Please direct usage questions or support issues to the mailing list:
81              
82             I
83              
84             rather than to the module maintainer directly. Many experienced and
85             reponsive experts will be able look at the problem and quickly
86             address it. Please include a thorough description of the problem
87             with code and data examples if at all possible.
88              
89             =head2 Reporting Bugs
90              
91             Report bugs to the Bioperl bug tracking system to help us keep track
92             of the bugs and their resolution. Bug reports can be submitted via the
93             web:
94              
95             https://github.com/bioperl/bioperl-live/issues
96              
97             =head1 AUTHOR - Jason Stajich
98              
99             Email jason-at-bioperl.org
100              
101             =head1 APPENDIX
102              
103             The rest of the documentation details each of the object methods.
104             Internal methods are usually preceded with a _
105              
106             =cut
107              
108              
109             # Let the code begin...
110              
111              
112             package Bio::Tools::Phylo::Molphy;
113 1     1   472 use strict;
  1         2  
  1         23  
114              
115 1     1   327 use Bio::Tools::Phylo::Molphy::Result;
  1         1  
  1         29  
116 1     1   252 use Bio::TreeIO;
  1         3  
  1         29  
117 1     1   4 use IO::String;
  1         1  
  1         21  
118              
119 1     1   3 use base qw(Bio::Root::Root Bio::Root::IO);
  1         1  
  1         920  
120              
121             =head2 new
122              
123             Title : new
124             Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
125             Function: Builds a new Bio::Tools::Phylo::Molphy object
126             Returns : Bio::Tools::Phylo::Molphy
127             Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
128              
129              
130             =cut
131              
132             sub new {
133 2     2 1 4 my($class,@args) = @_;
134              
135 2         11 my $self = $class->SUPER::new(@args);
136 2         7 $self->_initialize_io(@args);
137              
138 2         6 return $self;
139             }
140              
141             =head2 next_result
142              
143             Title : next_result
144             Usage : my $r = $molphy->next_result
145             Function: Get the next result set from parser data
146             Returns : Bio::Tools::Phylo::Molphy::Result object
147             Args : none
148              
149              
150             =cut
151              
152             sub next_result{
153 2     2 1 371 my ($self) = @_;
154              
155             # A little statemachine for the parser here
156 2         4 my ($state,$transition_ct,
157             @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
158 2         2 my ( %subst_matrix, @treelines, @treedata, %frequencies);
159 0         0 my ( $treenum,$possible_trees, $model);
160 0         0 my ($trans_type,$trans_amount);
161 2         2 my $parsed = 0;
162 2         8 while( defined ( $_ = $self->_readline()) ) {
163 82         52 $parsed = 1;
164 82 100       249 if( /^Relative Substitution Rate Matrix/ ) {
    100          
    100          
    100          
    100          
    100          
165 1 50       4 if( %subst_matrix ) {
166 0         0 $self->_pushback($_);
167 0         0 last;
168             }
169 1         1 $state = 0;
170 1         2 my ( @tempdata);
171 1         2 @resloc = ();
172 1         2 while( defined ($_ = $self->_readline) ) {
173 21 100       44 last if (/^\s+$/);
174             # remove leading/trailing spaces
175 20         35 s/^\s+//;
176 20         70 s/\s+$//;
177 20         107 my @data = split;
178 20         16 my $i = 0;
179 20         21 for my $l ( @data ) {
180 400 100       466 if( $l =~ /\D+/ ) {
181 20         27 push @resloc, $l;
182             }
183 400         246 $i++;
184             }
185 20         44 push @tempdata, \@data;
186             }
187 1         2 my $i = 0;
188 1         2 for my $row ( @tempdata ) {
189 20         15 my $j = 0;
190 20         14 for my $col ( @$row ) {
191 400 100       358 if( $i == $j ) {
192             # empty string for diagonals
193 20         26 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
194             } else {
195 380         429 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
196             }
197 400         249 $j++;
198             }
199 20         32 $i++;
200             }
201             } elsif( /^Transition Probability Matrix/ ) {
202 2 50       9 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
203 2         2 $state = 1;
204 2         7 my $newtrans_type = "$3-$1";
205 2         2 $trans_amount = $1;
206 2 100       4 if( defined $trans_type ) {
207             # finish processing the transition_matrix
208 1         2 my $i =0;
209 1         2 foreach my $row ( @transition_matrix ) {
210 20         9 my $j = 0;
211 20         17 foreach my $col ( @$row ) {
212 400         449 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
213 400         244 $j++;
214             }
215 20         18 $i++;
216             }
217             }
218 2         3 $trans_type = $newtrans_type;
219 2         3 $transition_ct = 0;
220 2         31 @transition_matrix = ();
221             }
222             } elsif ( /Acid Frequencies/ ) {
223 1         2 $state = 0;
224 1         2 $self->_readline(); # skip the next line
225 1         2 while( defined( $_ = $self->_readline) ) {
226 21 100       38 unless( /^\s+/) {
227 1         6 $self->_pushback($_);
228 1         3 last;
229             }
230 20         27 s/^\s+//;
231 20         39 s/\s+$//;
232 20         39 my ($index,$res,$model,$data) = split;
233 20         48 $frequencies{$res} = [ $model,$data];
234             }
235             } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
236 2         8 my @save = ($1,$2,$3);
237             # finish processing the transition_matrix
238 2         2 my $i =0;
239 2         4 foreach my $row ( @transition_matrix ) {
240 20         15 my $j = 0;
241 20         12 foreach my $col ( @$row ) {
242 400         460 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
243 400         237 $j++;
244             }
245 20         15 $i++;
246             }
247 2 50       5 if( defined $treenum ) {
248 0         0 $self->_pushback($_);
249 0         0 last;
250             }
251            
252 2         1 $state = 2;
253 2         4 ($treenum,$possible_trees, $model) = @save;
254 2         8 $model =~ s/\s+/ /g;
255             } elsif( $state == 1 ) {
256 65 100 100     222 next if( /^\s+$/ || /^\s+Ala/);
257 61         83 s/^\s+//;
258 61         196 s/\s+$//;
259 61 100       87 if( $trans_type eq '1PAM-1.0e7' ) {
    50          
260             # because the matrix is split up into 2-10 column sets
261 40         18 push @{$transition_matrix[$transition_ct++]}, split ;
  40         156  
262 40 100       95 $transition_ct = 0 if $transition_ct % 20 == 0;
263             } elsif( $trans_type eq '1PAM-1.0e5' ) {
264             # because the matrix is split up into 2-10 column sets
265 21         92 my ($res,@row) = split;
266 21 100       36 next if $transition_ct >= 20; # skip last
267 20         17 push @{$transition_matrix[$transition_ct++]}, @row;
  20         121  
268             }
269             } elsif( $state == 2 ) {
270 10 100       26 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
271 5         10 push @treedata, [ $1,$2];
272             }
273             # save this for the end so that we can
274             # be efficient and only open one tree parser
275 10         19 push @treelines, $_;
276             }
277             }
278             # waiting till the end to do this, is it better
279 2         40 my @trees;
280 2 50       6 if( @treelines ) {
281 2         12 my $strdat = IO::String->new(join('',@treelines));
282 2         73 my $treeio = Bio::TreeIO->new(-fh => $strdat,
283             -format => 'newick');
284 2         4 while( my $tree = $treeio->next_tree ) {
285 10 100       18 if( @treedata ) {
286 5         4 my $dat = shift @treedata;
287             # set the associated information
288 5         10 $tree->id($dat->[0]);
289 5         18 $tree->score($dat->[1]);
290             }
291 10         22 push @trees, $tree;
292             }
293             }
294 2 50       16 return unless( $parsed );
295 2         23 my $result = Bio::Tools::Phylo::Molphy::Result->new
296             (-trees => \@trees,
297             -substitution_matrix => \%subst_matrix,
298             -frequencies => \%frequencies,
299             -model => $model,
300             -search_space => $possible_trees,
301             );
302 2         8 while( my ($type,$mat) = each %transition_mat ) {
303 2         5 $result->transition_probability_matrix( $type,$mat);
304             }
305 2         26 $result;
306             }
307              
308             1;