File Coverage

blib/lib/Bio/Tools/Run/Phylo/Phylip/Consense.pm
Criterion Covered Total %
statement 38 160 23.7
branch 2 56 3.5
condition 0 8 0.0
subroutine 12 20 60.0
pod 6 6 100.0
total 58 250 23.2


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Phylo::Phylip::Consense
2             #
3             # Created by
4             #
5             # Shawn Hoon
6             #
7             # You may distribute this module under the same terms as perl itself
8              
9             # POD documentation - main docs before the code
10              
11             =head1 NAME
12              
13             Bio::Tools::Run::Phylo::Phylip::Consense - Wrapper for the phylip
14             program Consense
15              
16             =head1 SYNOPSIS
17              
18              
19             use Bio::Tools::Run::Phylo::Phylip::Consense;
20             use Bio::Tools::Run::Phylo::Phylip::SeqBoot;
21             use Bio::Tools::Run::Phylo::Phylip::ProtDist;
22             use Bio::Tools::Run::Phylo::Phylip::Neighbor;
23             use Bio::Tools::Run::Phylo::Phylip::DrawTree;
24              
25              
26             #first get an alignment
27             my $aio= Bio::AlignIO->new(-file=>$ARGV[0],-format=>"clustalw");
28             my $aln = $aio->next_aln;
29              
30             # To prevent truncation of sequence names by PHYLIP runs, use set_displayname_safe
31             my ($aln_safe, $ref_name)=$aln->set_displayname_safe();
32              
33             #next use seqboot to generate multiple aligments
34             my @params = ('datatype'=>'SEQUENCE','replicates'=>10);
35             my $seqboot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
36              
37             my $aln_ref= $seqboot_factory->run($aln);
38              
39             Or, for long sequence names:
40              
41             my $aln_ref= $seqboot_factory->run($aln_safe);
42              
43             #next build distance matrices and construct trees
44             my $pd_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new();
45             my $ne_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new();
46              
47             foreach my $a (@{$aln_ref}){
48             my $mat = $pd_factory->create_distance_matrix($a);
49             push @tree, $ne_factory->create_tree($mat);
50             }
51              
52             #now use consense to get a final tree
53             my $con_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new();
54              
55             #you may set outgroup either by the number representing the order in
56             #which species are entered or by the name of the species
57              
58             $con_factory->outgroup(1);
59             $con_factory->outgroup('HUMAN');
60              
61             my $tree = $con_factory->run(\@tree);
62              
63             # Restore original sequence names, after ALL phylip runs:
64             my @nodes = $tree->get_nodes();
65             foreach my $nd (@nodes){
66             $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf;
67             }
68              
69             #now draw the tree
70             my $draw_factory = Bio::Tools::Run::Phylo::Phylip::DrawTree->new();
71             my $image_filename = $draw_factory->draw_tree($tree);
72              
73             =head1 DESCRIPTION
74              
75             Wrapper for phylip consense program
76              
77             Taken from phylip documentation...
78              
79             CONSENSE reads a file of computer-readable trees and prints out
80             (and may also write out onto a file) a consensus tree. At the moment
81             it carries out a family of consensus tree methods called the M[l] methods
82             (Margush and McMorris, 1981). These include strict consensus
83             and majority rule consensus. Basically the consensus tree consists of monophyletic
84             groups that occur as often as possible in the data.
85              
86             More documentation on using Consense and setting parameters may be found
87             in the phylip package.
88              
89             VERSION Support
90              
91             This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
92             this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
93              
94             =head1 PARAMETERS FOR Consense
95              
96             =head2 TYPE
97              
98             Title : TYPE
99             Description : (optional)
100             Only available in phylip v3.6
101              
102             This program supports 3 types of consensus generation
103              
104             MRe : Majority Rule (extended) Any set of species that
105             appears in more than 50% of the trees is included.
106             The program then considers the other sets of species
107             in order of the frequency with which they have appeared,
108             adding to the consensus tree any which are compatible
109             with it until
110              
111             STRICT: A set of species must appear in all input trees to be
112             included in the strict consensus tree.
113              
114             MR : A set of species is included in the consensus tree
115             if it is present in more than half of the input trees.
116              
117             Ml : The user is asked for a fraction between 0.5 and 1, and
118             the program then includes in the consensus tree any set
119             of species that occurs among the input trees more than
120             that fraction of then time. The Strict consensus and the
121             Majority Rule consensus are extreme cases of the M[l] consensus,
122             being for fractions of 1 and 0.5 respectively
123              
124             usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-type=>"Ml 0.7");
125              
126              
127             Defaults to MRe
128              
129             =head2 ROOTED
130              
131             Title: ROOTED
132             Description: (optional)
133              
134             toggles between the default assumption that the input trees are unrooted trees and
135             the selection that specifies that the tree is to be treated as a rooted tree and not
136             re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be
137             re-rooted automatically at the first species encountered on the first tree
138             (or at a species designated by the Outgroup option)
139              
140             usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-rooted=>1);
141              
142             Defaults to unrooted
143              
144             =head2 OUTGROUP
145              
146             Title : OUTGROUP
147             Description : (optional)
148              
149             It is in effect only if the Rooted option selection is not in effect.
150             The trees will be re-rooted with a species of your choosing.
151              
152             usage my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-outgroup=>2);
153              
154             Defaults to first species encountered.
155              
156             =head1 FEEDBACK
157              
158             =head2 Mailing Lists
159              
160             User feedback is an integral part of the evolution of this and other
161             Bioperl modules. Send your comments and suggestions preferably to one
162             of the Bioperl mailing lists. Your participation is much appreciated.
163              
164             bioperl-l@bioperl.org - General discussion
165             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
166              
167             =head2 Support
168              
169             Please direct usage questions or support issues to the mailing list:
170              
171             I
172              
173             rather than to the module maintainer directly. Many experienced and
174             reponsive experts will be able look at the problem and quickly
175             address it. Please include a thorough description of the problem
176             with code and data examples if at all possible.
177              
178             =head2 Reporting Bugs
179              
180             Report bugs to the Bioperl bug tracking system to help us keep track
181             the bugs and their resolution. Bug reports can be submitted via the
182             web:
183              
184             http://redmine.open-bio.org/projects/bioperl/
185              
186             =head1 AUTHOR - Shawn Hoon
187              
188             Email shawnh@fugu-sg.org
189              
190             =head1 APPENDIX
191              
192             The rest of the documentation details each of the object
193             methods. Internal methods are usually preceded with a _
194              
195             =cut
196              
197             #'
198              
199            
200             package Bio::Tools::Run::Phylo::Phylip::Consense;
201              
202 1         70 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
203             @CONSENSE_PARAMS @OTHER_SWITCHES
204 1     1   101690 %OK_FIELD);
  1         1  
205 1     1   3 use strict;
  1         1  
  1         15  
206 1     1   629 use Bio::SimpleAlign;
  1         80394  
  1         40  
207 1     1   449 use Bio::TreeIO;
  1         13827  
  1         31  
208 1     1   428 use Bio::Tools::Run::Phylo::Phylip::Base;
  1         3  
  1         31  
209 1     1   5 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
  1         0  
  1         110  
210 1     1   5 use IO::String;
  1         1  
  1         20  
211 1     1   2 use Cwd;
  1         2  
  1         83  
212              
213              
214             # inherit from Phylip::Base which has some methods for dealing with
215             # Phylip specifics
216             @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
217              
218             # You will need to enable the Consense program. This
219             # can be done in (at least) 3 ways:
220             #
221             # 1. define an environmental variable PHYLIPDIR:
222             # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
223             #
224             # 2. include a definition of an environmental variable PHYLIPDIR in
225             # every script that will use Consense.pm.
226             # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
227             #
228             # 3. You can set the path to the program through doing:
229             # my @params('executable'=>'/usr/local/bin/consense');
230             # my $Consense_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(@params);
231             #
232              
233              
234             BEGIN {
235 1     1   2 @CONSENSE_PARAMS = qw(TYPE OUTGROUP ROOTED);
236 1         2 @OTHER_SWITCHES = qw(QUIET);
237 1         1 foreach my $attr(@CONSENSE_PARAMS,@OTHER_SWITCHES) {
238 4         1055 $OK_FIELD{$attr}++;
239             }
240             }
241              
242             =head2 program_name
243              
244             Title : program_name
245             Usage : $obj->program_name()
246             Function: holds the program name
247             Returns: string
248             Args : None
249              
250             =cut
251              
252             sub program_name {
253 6     6 1 24 return 'consense';
254             }
255              
256             =head2 program_dir
257              
258             Title : program_dir
259             Usage : ->program_dir()
260             Function: returns the program directory, obtained from ENV variable.
261             Returns: string
262             Args :
263              
264             =cut
265              
266             sub program_dir {
267 3 50   3 1 10 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
268             }
269              
270             sub new {
271 1     1 1 77 my ($class,@args) = @_;
272 1         9 my $self = $class->SUPER::new(@args);
273            
274 1         31 my ($attr, $value);
275 1         4 while (@args) {
276 1         2 $attr = shift @args;
277 1         1 $value = shift @args;
278            
279 1 50       6 next if( $attr =~ /^-/ ); # don't want named parameters
280 0 0       0 if ($attr =~/PROGRAM/i) {
281 0         0 $self->executable($value);
282 0         0 next;
283             }
284 0 0       0 if ($attr =~ /IDLENGTH/i){
285 0         0 $self->idlength($value);
286 0         0 next;
287             }
288 0         0 $self->$attr($value);
289             }
290 1         3 return $self;
291             }
292              
293             sub AUTOLOAD {
294 0     0     my $self = shift;
295 0           my $attr = $AUTOLOAD;
296 0           $attr =~ s/.*:://;
297 0           $attr = uc $attr;
298 0 0         $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
299 0 0         $self->{$attr} = shift if @_;
300 0           return $self->{$attr};
301             }
302              
303             =head2 idlength
304              
305             Title : idlength
306             Usage : $obj->idlength ($newval)
307             Function:
308             Returns : value of idlength
309             Args : newvalue (optional)
310              
311              
312             =cut
313              
314             sub idlength{
315 0     0 1   my $self = shift;
316 0 0         if( @_ ) {
317 0           my $value = shift;
318 0           $self->{'idlength'} = $value;
319             }
320 0           return $self->{'idlength'};
321              
322             }
323              
324              
325             =head2 run
326              
327             Title : run
328             Usage :
329             $inputfilename = 't/data/prot.treefile';
330             $tree= $Consense_factory->run($inputfilename);
331             or
332              
333             $tree= $consense_factory->run(\@tree);
334              
335             Function: Create bootstrap sets of alignments
336             Example :
337             Returns : a L
338             Args : either a file containing trees in newick format
339             or an array ref of L
340              
341             Throws an exception if argument is not either a string (eg a
342             filename) or a Bio::Tree::TreeI object. If
343             argument is string, throws exception if file corresponding to string
344             name can not be found.
345              
346             =cut
347              
348             sub run{
349              
350 0     0 1   my ($self,$input) = @_;
351 0           my ($infilename);
352              
353             # Create input file pointer
354 0           $infilename = $self->_setinput($input);
355 0 0         if (!$infilename) {
356 0           $self->throw("Problems setting up for Consense. Probably bad input data in $input !");
357             }
358            
359             # Create parameter string to pass to Consense program
360 0           my $param_string = $self->_setparams();
361             # run Consense
362 0           my $aln = $self->_run($infilename,$param_string);
363             }
364              
365             #################################################
366              
367             =head2 _run
368              
369             Title : _run
370             Usage : Internal function, not to be called directly
371             Function: makes actual system call to Consense program
372             Example :
373             Returns : an array ref of
374             Args : Name of a file containing a set of tree in newick format
375             and a parameter string to be passed to Consense
376              
377              
378             =cut
379              
380             sub _run {
381 0     0     my ($self,$infile,$param_string) = @_;
382 0           my $instring;
383 0           my $curpath = cwd;
384 0 0         unless( File::Spec->file_name_is_absolute($infile) ) {
385 0           $infile = $self->io->catfile($curpath,$infile);
386             }
387 0           my $tmpdir = $self->tempdir;
388 0           chdir($self->tempdir);
389             # open a pipe to run Consense to bypass interactive menus
390 0 0 0       if ($self->quiet() || $self->verbose() < 0) {
391 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
392 0           open(Consense,"| ".$self->executable .">$null");
393             }
394             else {
395 0           open(Consense,"| ".$self->executable);
396             }
397 0           $instring = $infile."\n".$param_string;
398 0           $self->debug( "Program ".$self->executable." $instring\n");
399 0           print Consense $instring;
400 0           close(Consense);
401            
402             # get the results
403 0           my $outfile = $self->io->catfile($self->tempdir,$self->treefile);
404 0           chdir($curpath);
405            
406 0 0         $self->throw("Consense did not create files correctly ($outfile)")
407             unless (-e $outfile);
408              
409             #parse the alignments
410            
411 0           my @aln;
412 0           my $tio = Bio::TreeIO->new(-file=>$outfile,-format=>"newick");
413 0           my $tree = $tio->next_tree;
414              
415             # Clean up the temporary files created along the way...
416 0 0         unlink $outfile unless $self->save_tempfiles;
417            
418 0           return $tree;
419             }
420              
421             sub _set_names_from_tree {
422 0     0     my ($self,$tree) = @_;
423 0           my $newick;
424 0           my $ios = IO::String->new($newick);
425 0           my $tio = Bio::TreeIO->new(-fh=>$ios,-format=>'newick');
426 0           $tio->write_tree($tree);
427 0           my @names = $newick=~/(\w+):\d+/g;
428 0           my %names;
429 0           for(my $i=0; $i < $#names; $i++){
430 0           $names{$names[$i]} = $i+1;
431             }
432 0           $self->names(\%names);
433              
434 0           return;
435             }
436              
437              
438             =head2 _setinput()
439              
440             Title : _setinput
441             Usage : Internal function, not to be called directly
442             Function: Create input file for Consense program
443             Example :
444             Returns : name of file containing a trees in newick format
445             Args : an array ref of Bio::Tree::Tree object or input file name
446              
447              
448             =cut
449              
450             sub _setinput {
451 0     0     my ($self, $input) = @_;
452 0           my ($alnfilename,$tfh);
453              
454             # a phy formatted alignment file
455 0 0         unless (ref $input) {
456             # check that file exists or throw
457 0           $alnfilename= $input;
458 0 0         unless (-e $input) {return 0;}
  0            
459 0           my $tio = Bio::TreeIO->new(-file=>$alnfilename,-format=>'newick');
460 0           my $tree = $tio->next_tree;
461 0           $self->_set_names_from_tree($tree);
462 0           return $alnfilename;
463             }
464              
465             # $input may be a SimpleAlign Object
466 0 0         my @input = ref($input) eq "ARRAY" ? @{$input} : ($input);
  0            
467 0           ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
468 0           my $treeIO = Bio::TreeIO->new(-fh => $tfh,
469             -format=>'newick');
470              
471 0           foreach my $tree(@input){
472 0 0         $tree->isa('Bio::Tree::TreeI') || $self->throw('Expected a Bio::TreeI object');
473 0           $treeIO->write_tree($tree);
474             }
475             #get the species names in order, using the first one
476 0           $self->_set_names_from_tree($input[0]);
477 0           $treeIO->close();
478 0           close($tfh);
479 0           undef $tfh;
480 0           return $alnfilename;
481             }
482              
483             =head2 names()
484              
485             Title : names
486             Usage : $tree->names(\%names)
487             Function: get/set for a hash ref for storing names in matrix
488             with rank as values.
489             Example :
490             Returns : hash reference
491             Args : hash reference
492              
493             =cut
494              
495             sub names {
496 0     0 1   my ($self,$name) = @_;
497 0 0         if($name){
498 0           $self->{'_names'} = $name;
499             }
500 0           return $self->{'_names'};
501             }
502              
503             =head2 _setparams()
504              
505             Title : _setparams
506             Usage : Internal function, not to be called directly
507             Function: Create parameter inputs for Consense program
508             Example :
509             Returns : parameter string to be passed to Consense
510             Args : name of calling object
511              
512             =cut
513              
514             sub _setparams {
515 0     0     my ($attr, $value, $self);
516              
517             #do nothing for now
518 0           $self = shift;
519 0           my $param_string = "";
520 0           my $rooted = 0;
521              
522             #for case where type is Ml
523 0           my $Ml = 0;
524 0           my $frac = 0.5;
525 0           my %menu = %{$Menu{$self->version}->{'CONSENSE'}};
  0            
526              
527 0           foreach my $attr ( @CONSENSE_PARAMS) {
528 0           $value = $self->$attr();
529 0 0         next unless (defined $value);
530 0 0         if ($attr =~/ROOTED/i){
    0          
    0          
531 0           $rooted = 1;
532 0           $param_string .= $menu{'ROOTED'};
533             }
534             elsif($attr =~/OUTGROUP/i){
535 0 0         if($rooted == 1){
536 0           $self->warn("Outgroup option cannot be used with a rooted tree");
537 0           next;
538             }
539 0 0         if($value !~/^\d+$/){ # is a name
540 0           my %names = %{$self->names};
  0            
541 0 0         $names{$value} || $self->throw("Outgroup $value not found");
542 0           $value = $names{$value};
543             }
544 0           $param_string .=$menu{'OUTGROUP'}."$value\n";
545             }
546             elsif($attr=~/TYPE/i){
547 0 0         if($value=~/Ml/i){
548 0           ($value,$frac) = split(/\s+/,$value);
549             #default if not given
550 0   0       $frac ||= 0.5;
551 0 0 0       if($frac <= 0.5 || $frac > 1){
552 0           $self->warn("fraction given is out of range 0.5
553 0           $frac = 0.5;
554             }
555 0           $Ml=1;
556             }
557 0           $param_string.=$menu{'TYPE'}{uc $value};
558             }
559             else {
560 0           $param_string.=$menu{uc $attr};
561             }
562             }
563 0           $param_string .= $menu{'SUBMIT'};
564 0 0         if($Ml){
565 0           $param_string.=$frac."\n";
566             }
567              
568 0           return $param_string;
569             }
570              
571              
572              
573             =head1 Bio::Tools::Run::Wrapper methods
574              
575             =cut
576              
577             =head2 no_param_checks
578              
579             Title : no_param_checks
580             Usage : $obj->no_param_checks($newval)
581             Function: Boolean flag as to whether or not we should
582             trust the sanity checks for parameter values
583             Returns : value of no_param_checks
584             Args : newvalue (optional)
585              
586              
587             =cut
588              
589             =head2 save_tempfiles
590              
591             Title : save_tempfiles
592             Usage : $obj->save_tempfiles($newval)
593             Function:
594             Returns : value of save_tempfiles
595             Args : newvalue (optional)
596              
597              
598             =cut
599              
600             =head2 outfile_name
601              
602             Title : outfile_name
603             Usage : my $outfile = $Consense->outfile_name();
604             Function: Get/Set the name of the output file for this run
605             (if you wanted to do something special)
606             Returns : string
607             Args : [optional] string to set value to
608              
609              
610             =cut
611              
612              
613             =head2 tempdir
614              
615             Title : tempdir
616             Usage : my $tmpdir = $self->tempdir();
617             Function: Retrieve a temporary directory name (which is created)
618             Returns : string which is the name of the temporary directory
619             Args : none
620              
621              
622             =cut
623              
624             =head2 cleanup
625              
626             Title : cleanup
627             Usage : $codeml->cleanup();
628             Function: Will cleanup the tempdir directory after a Consense run
629             Returns : none
630             Args : none
631              
632              
633             =cut
634              
635             =head2 io
636              
637             Title : io
638             Usage : $obj->io($newval)
639             Function: Gets a L object
640             Returns : L
641             Args : none
642              
643              
644             =cut
645              
646             1; # Needed to keep compiler happy