File Coverage

Bio/Tools/Phylo/Molphy.pm
Criterion Covered Total %
statement 118 122 96.7
branch 40 46 86.9
condition 3 3 100.0
subroutine 7 7 100.0
pod 2 2 100.0
total 170 180 94.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Phylo::Molphy
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Phylo::Molphy - parser for Molphy output
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Phylo::Molphy;
21             my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22             while( my $r = $parser->next_result ) {
23             # r is a Bio::Tools::Phylo::Molphy::Result object
24              
25             # print the model name
26             print $r->model, "\n";
27              
28             # get the substitution matrix
29             # this is a hash of 3letter aa codes -> 3letter aa codes representing
30             # substitution rate
31             my $smat = $r->substitution_matrix;
32             print "Arg -> Gln substitution rate is %d\n",
33             $smat->{'Arg'}->{'Gln'}, "\n";
34              
35             # get the transition probablity matrix
36             # this is a hash of 3letter aa codes -> 3letter aa codes representing
37             # transition probabilty
38             my $tmat = $r->transition_probability_matrix;
39             print "Arg -> Gln transition probablity is %.2f\n",
40             $tmat->{'Arg'}->{'Gln'}, "\n";
41              
42             # get the frequency for each of the residues
43             my $rfreqs = $r->residue_frequencies;
44              
45             foreach my $residue ( keys %{$rfreqs} ) {
46             printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47             $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
48             }
49              
50             my @trees;
51             while( my $t = $r->next_tree ) {
52             push @trees, $t;
53             }
54              
55             print "search space is ", $r->search_space, "\n",
56             "1st tree score is ", $trees[0]->score, "\n";
57              
58             # writing to STDOUT, use -file => '>filename' to specify a file
59             my $out = Bio::TreeIO->new(-format => "newick");
60             $out->write_tree($trees[0]); # writing only the 1st tree
61             }
62              
63             =head1 DESCRIPTION
64              
65             A parser for Molphy output (protml,dnaml)
66              
67             =head1 FEEDBACK
68              
69             =head2 Mailing Lists
70              
71             User feedback is an integral part of the evolution of this and other
72             Bioperl modules. Send your comments and suggestions preferably to
73             the Bioperl mailing list. Your participation is much appreciated.
74              
75             bioperl-l@bioperl.org - General discussion
76             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77              
78             =head2 Support
79              
80             Please direct usage questions or support issues to the mailing list:
81              
82             I
83              
84             rather than to the module maintainer directly. Many experienced and
85             reponsive experts will be able look at the problem and quickly
86             address it. Please include a thorough description of the problem
87             with code and data examples if at all possible.
88              
89             =head2 Reporting Bugs
90              
91             Report bugs to the Bioperl bug tracking system to help us keep track
92             of the bugs and their resolution. Bug reports can be submitted via the
93             web:
94              
95             https://github.com/bioperl/bioperl-live/issues
96              
97             =head1 AUTHOR - Jason Stajich
98              
99             Email jason-at-bioperl.org
100              
101             =head1 APPENDIX
102              
103             The rest of the documentation details each of the object methods.
104             Internal methods are usually preceded with a _
105              
106             =cut
107              
108              
109             # Let the code begin...
110              
111              
112             package Bio::Tools::Phylo::Molphy;
113 1     1   455 use strict;
  1         2  
  1         24  
114              
115 1     1   241 use Bio::Tools::Phylo::Molphy::Result;
  1         2  
  1         29  
116 1     1   241 use Bio::TreeIO;
  1         3  
  1         29  
117 1     1   5 use IO::String;
  1         2  
  1         22  
118              
119 1     1   3 use base qw(Bio::Root::Root Bio::Root::IO);
  1         2  
  1         995  
120              
121             =head2 new
122              
123             Title : new
124             Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
125             Function: Builds a new Bio::Tools::Phylo::Molphy object
126             Returns : Bio::Tools::Phylo::Molphy
127             Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
128              
129              
130             =cut
131              
132             sub new {
133 2     2 1 7 my($class,@args) = @_;
134              
135 2         11 my $self = $class->SUPER::new(@args);
136 2         9 $self->_initialize_io(@args);
137              
138 2         10 return $self;
139             }
140              
141             =head2 next_result
142              
143             Title : next_result
144             Usage : my $r = $molphy->next_result
145             Function: Get the next result set from parser data
146             Returns : Bio::Tools::Phylo::Molphy::Result object
147             Args : none
148              
149              
150             =cut
151              
152             sub next_result{
153 2     2 1 630 my ($self) = @_;
154              
155             # A little statemachine for the parser here
156 2         7 my ($state,$transition_ct,
157             @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
158 2         6 my ( %subst_matrix, @treelines, @treedata, %frequencies);
159 2         0 my ( $treenum,$possible_trees, $model);
160 2         0 my ($trans_type,$trans_amount);
161 2         2 my $parsed = 0;
162 2         8 while( defined ( $_ = $self->_readline()) ) {
163 82         85 $parsed = 1;
164 82 100       226 if( /^Relative Substitution Rate Matrix/ ) {
    100          
    100          
    100          
    100          
    100          
165 1 50       4 if( %subst_matrix ) {
166 0         0 $self->_pushback($_);
167 0         0 last;
168             }
169 1         2 $state = 0;
170 1         2 my ( @tempdata);
171 1         2 @resloc = ();
172 1         2 while( defined ($_ = $self->_readline) ) {
173 21 100       53 last if (/^\s+$/);
174             # remove leading/trailing spaces
175 20         40 s/^\s+//;
176 20         83 s/\s+$//;
177 20         141 my @data = split;
178 20         23 my $i = 0;
179 20         24 for my $l ( @data ) {
180 400 100       492 if( $l =~ /\D+/ ) {
181 20         27 push @resloc, $l;
182             }
183 400         343 $i++;
184             }
185 20         42 push @tempdata, \@data;
186             }
187 1         2 my $i = 0;
188 1         2 for my $row ( @tempdata ) {
189 20         15 my $j = 0;
190 20         22 for my $col ( @$row ) {
191 400 100       410 if( $i == $j ) {
192             # empty string for diagonals
193 20         29 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
194             } else {
195 380         486 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
196             }
197 400         374 $j++;
198             }
199 20         35 $i++;
200             }
201             } elsif( /^Transition Probability Matrix/ ) {
202 2 50       10 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
203 2         3 $state = 1;
204 2         8 my $newtrans_type = "$3-$1";
205 2         4 $trans_amount = $1;
206 2 100       4 if( defined $trans_type ) {
207             # finish processing the transition_matrix
208 1         2 my $i =0;
209 1         2 foreach my $row ( @transition_matrix ) {
210 20         19 my $j = 0;
211 20         18 foreach my $col ( @$row ) {
212 400         517 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
213 400         346 $j++;
214             }
215 20         21 $i++;
216             }
217             }
218 2         3 $trans_type = $newtrans_type;
219 2         2 $transition_ct = 0;
220 2         20 @transition_matrix = ();
221             }
222             } elsif ( /Acid Frequencies/ ) {
223 1         1 $state = 0;
224 1         3 $self->_readline(); # skip the next line
225 1         3 while( defined( $_ = $self->_readline) ) {
226 21 100       63 unless( /^\s+/) {
227 1         16 $self->_pushback($_);
228 1         3 last;
229             }
230 20         40 s/^\s+//;
231 20         53 s/\s+$//;
232 20         50 my ($index,$res,$model,$data) = split;
233 20         58 $frequencies{$res} = [ $model,$data];
234             }
235             } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
236 2         11 my @save = ($1,$2,$3);
237             # finish processing the transition_matrix
238 2         3 my $i =0;
239 2         4 foreach my $row ( @transition_matrix ) {
240 20         18 my $j = 0;
241 20         20 foreach my $col ( @$row ) {
242 400         524 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
243 400         347 $j++;
244             }
245 20         18 $i++;
246             }
247 2 50       6 if( defined $treenum ) {
248 0         0 $self->_pushback($_);
249 0         0 last;
250             }
251            
252 2         3 $state = 2;
253 2         4 ($treenum,$possible_trees, $model) = @save;
254 2         8 $model =~ s/\s+/ /g;
255             } elsif( $state == 1 ) {
256 65 100 100     204 next if( /^\s+$/ || /^\s+Ala/);
257 61         111 s/^\s+//;
258 61         215 s/\s+$//;
259 61 100       96 if( $trans_type eq '1PAM-1.0e7' ) {
    50          
260             # because the matrix is split up into 2-10 column sets
261 40         32 push @{$transition_matrix[$transition_ct++]}, split ;
  40         153  
262 40 100       103 $transition_ct = 0 if $transition_ct % 20 == 0;
263             } elsif( $trans_type eq '1PAM-1.0e5' ) {
264             # because the matrix is split up into 2-10 column sets
265 21         84 my ($res,@row) = split;
266 21 100       35 next if $transition_ct >= 20; # skip last
267 20         18 push @{$transition_matrix[$transition_ct++]}, @row;
  20         128  
268             }
269             } elsif( $state == 2 ) {
270 10 100       35 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
271 5         19 push @treedata, [ $1,$2];
272             }
273             # save this for the end so that we can
274             # be efficient and only open one tree parser
275 10         47 push @treelines, $_;
276             }
277             }
278             # waiting till the end to do this, is it better
279 2         3 my @trees;
280 2 50       4 if( @treelines ) {
281 2         19 my $strdat = IO::String->new(join('',@treelines));
282 2         131 my $treeio = Bio::TreeIO->new(-fh => $strdat,
283             -format => 'newick');
284 2         7 while( my $tree = $treeio->next_tree ) {
285 10 100       22 if( @treedata ) {
286 5         6 my $dat = shift @treedata;
287             # set the associated information
288 5         12 $tree->id($dat->[0]);
289 5         9 $tree->score($dat->[1]);
290             }
291 10         26 push @trees, $tree;
292             }
293             }
294 2 50       22 return unless( $parsed );
295 2         26 my $result = Bio::Tools::Phylo::Molphy::Result->new
296             (-trees => \@trees,
297             -substitution_matrix => \%subst_matrix,
298             -frequencies => \%frequencies,
299             -model => $model,
300             -search_space => $possible_trees,
301             );
302 2         13 while( my ($type,$mat) = each %transition_mat ) {
303 2         6 $result->transition_probability_matrix( $type,$mat);
304             }
305 2         56 $result;
306             }
307              
308             1;