File Coverage

blib/lib/Bio/Tools/Run/Phylo/Molphy/ProtML.pm
Criterion Covered Total %
statement 69 166 41.5
branch 16 68 23.5
condition 4 30 13.3
subroutine 13 19 68.4
pod 10 10 100.0
total 112 293 38.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::Molphy::ProtML
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Phylo::Molphy::ProtML - A wrapper for the Molphy pkg app ProtML
17              
18             =head1 SYNOPSIS
19              
20             use Bio::AlignIO;
21             use Bio::TreeIO;
22             use Bio::Tools::Run::Phylo::Molphy::ProtML;
23              
24             my %args = ( 'models' => 'jtt',
25             'search' => 'quick',
26             'other' => [ '-information', '-w'] );
27             my $verbose = 0; # change to 1 if you want some debugging output
28             my $protml = Bio::Tools::Run::Phylo::Molphy::ProtML->new(-verbose => $verbose,
29             -flags => \%args);
30              
31             die("cannot find the protml executable") unless $protml->executable;
32              
33             # read in a previously built protein alignment
34             my $in = Bio::AlignIO->new(-format => 'clustalw',
35             -file => 't/data/cel-cbr-fam.aln');
36             my $aln = $in->next_aln;
37             $protml->alignment($aln);
38              
39             my ($rc,$results) = $protml->run();
40              
41             # This may be a bit of overkill, but it is possible we could
42             # have a bunch of results and $results is a
43             # Bio::Tools::Phylo::Molphy object
44              
45             my $r = $results->next_result;
46             # $r is a Bio::Tools::Phylo::Molphy::Result object
47              
48             my @trees;
49             while( my $t = $r->next_tree ) {
50             push @trees, $t;
51             }
52              
53             print "search space is ", $r->search_space, "\n";
54             "1st tree score is ", $tree[0]->score, "\n";
55              
56             my $out = Bio::TreeIO->new(-file => ">saved_MLtrees.tre",
57             -format => "newick");
58             $out->write_tree($tree[0]);
59             $out = undef;
60              
61             =head1 DESCRIPTION
62              
63             This is a wrapper for the exe from the Molphy (MOLecular
64             PHYlogenetics) package by Jun Adachi & Masami Hasegawa. The software
65             can be downloaded from L.
66             Note that PHYLIP (Joe Felsenstein) also provides a version of protml
67             which this module is currently NOT prepared to handle. Use the package
68             available directly from MOLPHY authors if you want to use the module
69             in its present implementation (extensions are welcomed!).
70              
71             The main components are the protml and nucml executables which are
72             used to build maximum likelihood (ML) phylogenetic trees based on
73             either protein or nucleotide sequences.
74              
75             Here are the valid input parameters, we have added a longhand version
76             of the parameters to help you understand what each one does. Either
77             the longhand or the original Molphy parameter will work.
78              
79             Bioperl Molphy Description
80             Longhand parameter
81             Model (one of these):
82             ---------------
83             jtt j Jones, Taylor & Thornton (1992)
84             jtt-f jf JTT w/ frequencies
85             dayhoff d Dahoff et al. (1978)
86             dayhoff-f d dayhoff w/ frequencies
87             mtrev24 m mtREV24 Adachi & Hasegwa (1995)
88             mtrev24-f mf mtREV24 w/ frequencies
89             poisson p Poisson
90             proportional pf Proportional
91             rsr r Relative Substitution Rate
92             rsr-f rf RSR w/ frequencies
93             frequencies f data frequencies
94              
95             Search Strategy (one of these):
96             ----------------
97             usertrees u User trees (must also supply a tree)
98             rearrangement R Local rearrangement
99             lbp RX Local boostrap prob
100             exhaustive e Exhaustive search
101             star s Star decomposition search (may not be ML)
102             quick q Quick Add OTU search (may not be ML)
103             distance D ML Distance matrix --> NJDIST (need to supply
104             NJDIST tree)
105              
106             Others (can be some or all of these):
107             ---------------
108             norell-bp b No RELL-BP
109             minimumevolution M Minimum evolution
110              
111             sequential S Sequence is in Sequential format
112             _OR_
113             interleaved I Sequence is in Interleaved format
114              
115             verbose v Verbose messages directed to STDERR
116             information i Output some information (tree vals)
117             w More some extra information (transition
118             matricies, etc)
119              
120              
121             =head1 FEEDBACK
122              
123             =head2 Mailing Lists
124              
125             User feedback is an integral part of the evolution of this and other
126             Bioperl modules. Send your comments and suggestions preferably to
127             the Bioperl mailing list. Your participation is much appreciated.
128              
129             bioperl-l@bioperl.org - General discussion
130             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
131              
132             =head2 Support
133              
134             Please direct usage questions or support issues to the mailing list:
135              
136             I
137              
138             rather than to the module maintainer directly. Many experienced and
139             reponsive experts will be able look at the problem and quickly
140             address it. Please include a thorough description of the problem
141             with code and data examples if at all possible.
142              
143             =head2 Reporting Bugs
144              
145             Report bugs to the Bioperl bug tracking system to help us keep track
146             of the bugs and their resolution. Bug reports can be submitted via the
147             web:
148              
149             http://redmine.open-bio.org/projects/bioperl/
150              
151             =head1 AUTHOR - Jason Stajich
152              
153             Email jason-AT-bioperl_DOT_org
154              
155             =head1 CONTRIBUTORS
156              
157             Additional contributors names and emails here
158              
159             =head1 APPENDIX
160              
161             The rest of the documentation details each of the object methods.
162             Internal methods are usually preceded with a _
163              
164             =cut
165              
166              
167             # Let the code begin...
168              
169              
170             package Bio::Tools::Run::Phylo::Molphy::ProtML;
171 1     1   152935 use vars qw(@ISA $PROGRAMNAME $PROGRAM $MINNAMELEN %VALIDVALUES %VALIDFLAGS);
  1         2  
  1         65  
172 1     1   5 use strict;
  1         2  
  1         18  
173 1     1   274 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         23  
174 1     1   6 use Bio::Tools::Phylo::Molphy;
  1         2  
  1         16  
175 1     1   330 use Bio::AlignIO;
  1         64688  
  1         36  
176 1     1   8 use Bio::TreeIO;
  1         2  
  1         17  
177 1     1   4 use Bio::Root::Root;
  1         2  
  1         209  
178              
179             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase );
180              
181             BEGIN {
182 1     1   3 $MINNAMELEN = 25;
183              
184 1         20 %VALIDFLAGS = (
185             'models' => { # models
186             jtt => 'j', # Jones, Taylor & Thornton (1992)
187             'jtt-f' => 'jf', # jtt w/ frequencies
188             dayhoff => 'd', # Dahoff et al. (1978)
189             'dayhoff-f' => 'df', # dayhoff w/ frequencies
190             mtrev24 => 'm', # Adachi & Hasegwa (1995)
191             'mtrev24-f' => 'mf', # mtREV24 w/ frequencies
192             poisson => 'p', # Poisson
193             proportional => 'pf', # Proportional
194             rsr => 'r', # Relative Substitution Rate
195             'rsr-f' => 'rf', # RSR w/ frequencies
196             frequencies => 'f', # data frequencies
197             },
198             'search' => { # search strategy
199             usertrees => 'u', # must also supply tree
200             rearrangement => 'R', # local rearrangement
201             lbp => 'RX', # local boostrap prob
202             exhaustive => 'e', # exhaustive
203             star => 's', # star decomposition search (may not be ML)
204             quick => 'q', # quick add OTU search (may not be ML)
205             distance => 'D', # ML Distance matrix --> NJDIST
206             },
207             'others' => { # others
208             'norell-bp' => 'b',
209             sequential => 'S', # sequential format
210             interleaved => 'I', # interleaved format
211             minimumevolution => 'M', # minimum evolution
212             verbose => 'v', # verbose to stderr
213             information => 'i', # output some information
214             w => 'w', # some extra information
215             }
216             );
217             # this will allow for each of the parameters to also accept the original
218             # protML params
219 1         2 my @toadd;
220 1         4 foreach my $type ( keys %VALIDFLAGS ) {
221 3         5 my @keys = keys %{ $VALIDFLAGS{$type} };
  3         9  
222 3         5 for my $k ( @keys ) {
223 25         36 my $v = $VALIDFLAGS{$type}->{$k};
224 25         45 $VALIDFLAGS{$type}->{$v} = $v;
225             }
226             }
227 0         0 %VALIDVALUES = (num_retained => sub { my $a = shift;
228 0 0       0 if( $a =~ /^\d+$/) {
229 0         0 return 'n';
230             }}, # should be a number
231 0         0 percent_retained => sub { my $a = shift;
232 0 0 0     0 if( $a =~ /^\d+$/ &&
      0        
233             $a >= 0 && $a <= 100) {
234 0         0 return 'P';
235             }}
236 1         1121 );
237              
238              
239             }
240              
241             =head2 program_name
242              
243             Title : program_name
244             Usage : >program_name()
245             Function: holds the program name
246             Returns: string
247             Args : None
248              
249             =cut
250              
251             sub program_name {
252 6     6 1 30 return 'protml';
253             }
254              
255             =head2 program_dir
256              
257             Title : program_dir
258             Usage : ->program_dir()
259             Function: returns the program directory, obtained from ENV variable.
260             Returns: string
261             Args :
262              
263             =cut
264              
265             sub program_dir {
266 3 50   3 1 13 return Bio::Root::IO->catfile($ENV{MOLPHYDIR}) if $ENV{MOLPHYDIR};
267             }
268              
269             =head2 new
270              
271             Title : new
272             Usage : my $obj = Bio::Tools::Run::Phylo::Molphy::ProtML->new();
273             Function: Builds a new Bio::Tools::Run::Phylo::Molphy::ProtML object
274             Returns : Bio::Tools::Run::Phylo::Molphy::ProtML
275             Args : -alignment => the Bio::Align::AlignI object
276             -save_tempfiles => boolean to save the generated tempfiles and
277             NOT cleanup after onesself (default FALSE)
278             -tree => the Bio::Tree::TreeI object
279             -params => a hashref of PAML parameters (all passed to
280             set_parameter)
281             -executable => where the protml executable resides
282              
283             See also: L, L
284              
285              
286             =cut
287              
288             sub new {
289 1     1 1 100 my($class,@args) = @_;
290              
291 1         12 my $self = $class->SUPER::new(@args);
292 1         41 $self->{'_protmlparams'} = {};
293 1         2 $self->{'_protmlflags'} = {};
294              
295 1         9 my ($aln, $tree, $st, $flags, $params,
296             $exe) = $self->_rearrange([qw(ALIGNMENT TREE SAVE_TEMPFILES
297             FLAGS PARAMS EXECUTABLE)],
298             @args);
299 1 50       34 defined $aln && $self->alignment($aln);
300 1 50       4 defined $tree && $self->tree($tree );
301 1 50       3 defined $st && $self->save_tempfiles($st);
302 1 50       3 defined $exe && $self->executable($exe);
303 1 50       5 if( defined $flags ) {
304 1 50       6 if( ref($flags) !~ /HASH/i ) {
305 0         0 $self->warn("Must provide a valid hash ref for parameter -FLAGS");
306             } else {
307 1         4 foreach my $type ( keys %$flags ) {
308 3 100       9 if( $type =~ /other/i ) {
309 1         1 foreach my $flag ( @{$flags->{$type}} ) {
  1         3  
310 2         4 $self->set_flag('others', $flag) ;
311             }
312             } else {
313 2         6 $self->set_flag($type, $flags->{$type}) ;
314             }
315             }
316             }
317             }
318 1 50       3 if( defined $params ) {
319 0 0       0 if( ref($flags) !~ /HASH/i ) {
320 0         0 $self->warn("Must provide a valid hash ref for parameter -FLAGS");
321             } else {
322 0         0 map { $self->set_parameter($_, $$params{$_}) } keys %$params;
  0         0  
323             }
324             }
325 1         3 return $self;
326             }
327              
328              
329             =head2 run
330              
331             Title : run
332             Usage : $protml->run();
333             Function: run the protml analysis using the default or updated parameters
334             the alignment parameter must have been set
335             Returns : Bio::Tools::Phylo::Molphy
336             Args :
337              
338              
339             =cut
340              
341             sub run {
342 0     0 1 0 my ($self) = @_;
343              
344 0 0       0 unless ( $self->save_tempfiles ) {
345 0         0 $self->cleanup();
346             }
347              
348 0         0 my $align = $self->alignment();
349 0 0       0 if( ! $align ) {
350 0         0 $self->warn("must have provided a valid alignment object");
351 0         0 return -1;
352             }
353 0 0       0 if( $align->get_seq_by_pos(1)->alphabet ne 'protein' ) {
354 0         0 $self->warn("Must have provided a valid protein alignment");
355 0         0 return -1;
356             }
357              
358 0         0 my %params = $self->get_parameters;
359 0         0 my %flags = $self->get_flags();
360 0         0 my $cmdstring = $self->executable;
361              
362 0 0       0 if( ! defined $flags{'search'} ) {
363 0         0 $self->warn("Must have set a valid 'search' flag to run protml this is one of ".join(",", keys %{$VALIDFLAGS{'search'}}));
  0         0  
364 0         0 return;
365             }
366 0         0 my $tree = $self->tree;
367              
368 0         0 for my $t ( keys %flags ) {
369 0 0       0 if( $t eq 'others' ) {
370 0         0 $cmdstring .= " " . join(" ", map { '-'.$_ } keys %{$flags{$t}});
  0         0  
  0         0  
371             } else {
372 0 0       0 next if $flags{$t} eq 'u';
373 0         0 $cmdstring .= " -".$flags{$t};
374             }
375             }
376              
377 0         0 while( my ($param,$val) = each %params ) {
378 0         0 $cmdstring .= " \-$param $val";
379             }
380 0         0 my ($tmpdir) = $self->tempdir();
381 0 0       0 my ($tempseqFH,$tempseqfile) = $self->io->tempfile
382             ('DIR' => $tmpdir,
383             UNLINK => ($self->save_tempfiles ? 0 : 1));
384            
385 0 0       0 my $alnout = Bio::AlignIO->new('-format' => 'phylip',
386             '-fh' => $tempseqFH,
387             '-interleaved' => 0,
388             '-idlinebreak' => 1,
389             '-idlength' => $MINNAMELEN > $align->maxdisplayname_length() ? $MINNAMELEN : $align->maxdisplayname_length() +1);
390              
391 0         0 $alnout->write_aln($align);
392 0         0 $alnout->close();
393 0         0 $alnout = undef;
394 0         0 close($tempseqFH);
395 0         0 $tempseqFH = undef;
396 0         0 $cmdstring .= " $tempseqfile";
397 0 0 0     0 if( $tree && defined $flags{'search'} eq 'u' ) {
398 0 0       0 my ($temptreeFH,$temptreefile) = $self->io->tempfile
399             ('DIR' => $tmpdir,
400             UNLINK => ($self->save_tempfiles ? 0 : 1));
401 0         0 my $treeout = Bio::TreeIO->new('-format' => 'newick',
402             '-fh' => $temptreeFH);
403 0         0 $treeout->write_tree($tree);
404 0         0 $treeout->close();
405 0         0 close($temptreeFH);
406 0         0 $cmdstring .= " $temptreefile";
407             }
408 0         0 $self->debug( "cmdstring is $cmdstring\n");
409              
410 0 0       0 unless( open(PROTML, "$cmdstring |") ) {
411 0         0 $self->warn("Cannot run $cmdstring");
412 0         0 return undef;
413             }
414 0         0 my $parser= Bio::Tools::Phylo::Molphy->new(-fh => \*PROTML);
415 0         0 return (1,$parser);
416             }
417              
418             =head2 alignment
419              
420             Title : alignment
421             Usage : $protml->align($aln);
422             Function: Get/Set the Bio::Align::AlignI object
423             Returns : Bio::Align::AlignI object
424             Args : [optional] Bio::Align::AlignI
425             Comment : We could potentially add support for running directly on a file
426             but we shall keep it simple
427             See also : L, L
428              
429             =cut
430              
431             sub alignment{
432 0     0 1 0 my ($self,$aln) = @_;
433 0 0       0 if( defined $aln ) {
434 0 0 0     0 if( !ref($aln) || ! $aln->isa('Bio::Align::AlignI') ) {
435 0         0 $self->warn("Must specify a valid Bio::Align::AlignI object to the alignment function");
436 0         0 return undef;
437             }
438 0         0 $self->{'_alignment'} = $aln;
439             }
440 0         0 return $self->{'_alignment'};
441             }
442              
443             =head2 tree
444              
445             Title : tree
446             Usage : $protml->tree($tree, %params);
447             Function: Get/Set the Bio::Tree::TreeI object
448             Returns : Bio::Tree::TreeI
449             Args : [optional] $tree => Bio::Tree::TreeI,
450              
451             Comment : We could potentially add support for running directly on a file
452             but we shall keep it simple
453             See also : L
454              
455             =cut
456              
457             sub tree {
458 0     0 1 0 my ($self, $tree, %params) = @_;
459 0 0       0 if( defined $tree ) {
460 0 0 0     0 if( ! ref($tree) || ! $tree->isa('Bio::Tree::TreeI') ) {
461 0         0 $self->warn("Must specify a valid Bio::Tree::TreeI object to the alignment function");
462             }
463 0         0 $self->{'_tree'} = $tree;
464             }
465 0         0 return $self->{'_tree'};
466             }
467              
468             =head2 get_flags
469              
470             Title : get_flags
471             Usage : my @params = $protml->get_flags();
472             Function: returns the list of flags
473             Returns : array of flag names coded in the way that
474             Args : none
475              
476              
477             =cut
478              
479             sub get_flags{
480 0     0 1 0 my ($self) = @_;
481             # we're returning a copy of this
482 0         0 return %{ $self->{'_protmlflags'} };
  0         0  
483             }
484              
485              
486             =head2 set_flag
487              
488             Title : set_flag
489             Usage : $protml->set_parameter($type,$val);
490             Function: Sets a protml parameter, will be validated against
491             the valid values as set in the %VALIDVALUES class variable.
492             The checks can be ignored if one turns off param checks like this:
493             $protml->no_param_checks(1)
494             Returns : boolean if set was success, if verbose is set to -1
495             then no warning will be reported
496             Args : $type => name of the parameter
497             This can be one of 'search', 'model', 'other'
498             $value => flag value
499             See also: L
500              
501             =cut
502              
503             sub set_flag{
504 4     4 1 16 my ($self,$type,$param) = @_;
505 4         8 $type = lc($type);
506 4         14 while( substr($type,0,1) eq '-') { # handle multiple '-'
507 0         0 substr($type,0,1,'');
508             }
509              
510 4 50 33     15 if( ! defined $type ||
511             ! defined $param ) {
512 0         0 $self->debug("Must supply a type and param when setting flag");
513 0         0 return 0;
514             }
515              
516 4 50       8 if( ! $VALIDFLAGS{$type} ) {
517 0         0 $self->warn("$type is an unrecognized type");
518             }
519 4         8 $param = lc($param);
520              
521 4         9 while( substr($param,0,1) eq '-') { # handle multiple '-'
522 2         4 substr($param,0,1,'');
523             }
524              
525 4 50 33     13 if(! $self->no_param_checks && ! defined $VALIDFLAGS{$type}->{$param} ) {
526 0         0 $self->warn("unknown flag ($type) $param will not be set unless you force by setting no_param_checks to true");
527 0         0 return 0;
528             }
529 4 100       8 if($type eq 'others' ) {
530 2   33     7 $self->{'_protmlflags'}->{$type}->{$VALIDFLAGS{$type}->{$param} || $param} = 1;
531             } else {
532 2   33     6 $self->{'_protmlflags'}->{$type} = $VALIDFLAGS{$type}->{$param} || $param;
533             }
534 4         9 return 1;
535             }
536              
537              
538             =head2 get_parameters
539              
540             Title : get_parameters
541             Usage : my %params = $protml->get_parameters();
542             Function: returns the list of parameters as a hash
543             Returns : associative array keyed on parameter names
544             Args : none
545              
546              
547             =cut
548              
549             sub get_parameters{
550 0     0 1 0 my ($self) = @_;
551             # we're returning a copy of this
552 0         0 return %{ $self->{'_protmlparams'} };
  0         0  
553             }
554              
555              
556             =head2 set_parameter
557              
558             Title : set_parameter
559             Usage : $protml->set_parameter($param,$val);
560             Function: Sets a protml parameter, will be validated against
561             the valid values as set in the %VALIDVALUES class variable.
562             The checks can be ignored if one turns off param checks like this:
563             $protml->no_param_checks(1)
564             Returns : boolean if set was success, if verbose is set to -1
565             then no warning will be reported
566             Args : $param => name of the parameter
567             $value => value to set the parameter to
568             See also: L
569              
570             =cut
571              
572             sub set_parameter{
573 0     0 1 0 my ($self,$param,$value) = @_;
574 0         0 $param = lc($param);
575 0         0 $param =~ s/^\-//;
576 0 0 0     0 if(! $self->no_param_checks && ! defined $VALIDVALUES{$param} ) {
577 0         0 $self->warn("unknown parameter $param will not be set unless you force by setting no_param_checks to true");
578 0         0 return 0;
579             }
580              
581 0         0 my $paramflag = $VALIDVALUES{$param}->($value);
582 0 0       0 if( $paramflag ) {
583 0         0 $self->{'_protmlparams'}->{$paramflag} = $value;
584             } else {
585 0         0 print "value $value was not valid for param $param\n";
586 0         0 return 0;
587             }
588 0         0 return 1;
589             }
590              
591             =head1 Bio::Tools::Run::WrapperBase methods
592              
593             =cut
594              
595             =head2 no_param_checks
596              
597             Title : no_param_checks
598             Usage : $obj->no_param_checks($newval)
599             Function: Boolean flag as to whether or not we should
600             trust the sanity checks for parameter values
601             Returns : value of no_param_checks
602             Args : newvalue (optional)
603              
604              
605             =cut
606              
607             =head2 save_tempfiles
608              
609             Title : save_tempfiles
610             Usage : $obj->save_tempfiles($newval)
611             Function:
612             Returns : value of save_tempfiles
613             Args : newvalue (optional)
614              
615              
616             =cut
617              
618             =head2 outfile_name
619              
620             Title : outfile_name
621             Usage : my $outfile = $protml->outfile_name();
622             Function: Get/Set the name of the output file for this run
623             (if you wanted to do something special)
624             Returns : string
625             Args : [optional] string to set value to
626              
627              
628             =cut
629              
630              
631             =head2 tempdir
632              
633             Title : tempdir
634             Usage : my $tmpdir = $self->tempdir();
635             Function: Retrieve a temporary directory name (which is created)
636             Returns : string which is the name of the temporary directory
637             Args : none
638              
639              
640             =cut
641              
642             =head2 cleanup
643              
644             Title : cleanup
645             Usage : $protml->cleanup();
646             Function: Will cleanup the tempdir directory after a PAML run
647             Returns : none
648             Args : none
649              
650              
651             =cut
652              
653             =head2 io
654              
655             Title : io
656             Usage : $obj->io($newval)
657             Function: Gets a L object
658             Returns : L
659             Args : none
660              
661              
662             =cut
663              
664              
665             sub DESTROY {
666 1     1   3507 my $self= shift;
667 1 50       11 unless ( $self->save_tempfiles ) {
668 1         13 $self->cleanup();
669             }
670 1         6 $self->SUPER::DESTROY();
671             }
672             1;