File Coverage

blib/lib/Bio/Tools/Run/Phylo/Njtree/Best.pm
Criterion Covered Total %
statement 42 183 22.9
branch 6 74 8.1
condition 0 23 0.0
subroutine 13 22 59.0
pod 10 10 100.0
total 71 312 22.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::Njtree::Best
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Albert Vilella
7             #
8             # Copyright Albert Vilella
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Phylo::Njtree::Best - Wrapper around the Njtree (Njtree/phyml) best program.
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::Phylo::Njtree::Best;
21             use Bio::AlignIO;
22             use Bio::TreeIO;
23              
24             my $alignio = Bio::AlignIO->new(-format => 'fasta',
25             -file => 't/data/njtree_aln2.nucl.mfa');
26              
27             my $aln = $alignio->next_aln;
28              
29             my $treeio = Bio::TreeIO->new(
30             -format => 'nhx', -file => 't/data/species_tree_njtree.nh');
31              
32             my $tree = $treeio->next_tree;
33              
34             my $njtree_best = Bio::Tools::Run::Phylo::Njtree::Best->new();
35             $njtree_best->alignment($aln);
36             $njtree_best->tree($tree);
37             my $nhx_tree = $njtree_best->run();
38              
39             =head1 DESCRIPTION
40              
41             This is a wrapper around the best program of Njtree by Li Heng. See
42             http://treesoft.sourceforge.net/njtree.shtml for more information.
43              
44             Wrapper for the calculation of a reconciled phylogenetic tree with
45             inferred duplication tags from amultiple sequence alignment and a
46             species tree using NJTREE.
47              
48             =head2 Helping the module find your executable
49              
50             You will need to enable NJTREEDIR to find the njtree program. This can be
51             done in (at least) three ways:
52              
53             1. Make sure the njtree executable is in your path (i.e.
54             'which njtree' returns a valid program
55             2. define an environmental variable NJTREEDIR which points to a
56             directory containing the 'njtree' app:
57             In bash
58             export NJTREEDIR=/home/progs/treesoft/njtree or
59             In csh/tcsh
60             setenv NJTREEDIR /home/progs/treesoft/njtree
61              
62             3. include a definition of an environmental variable NJTREEDIR
63             in every script that will
64             BEGIN {$ENV{NJTREEDIR} = '/home/progs/treesoft/njtree'; }
65             use Bio::Tools::Run::Phylo::Njtree::Best;
66              
67             =head1 FEEDBACK
68              
69             =head2 Mailing Lists
70              
71             User feedback is an integral part of the evolution of this and other
72             Bioperl modules. Send your comments and suggestions preferably to one
73             of the Bioperl mailing lists. Your participation is much appreciated.
74              
75             bioperl-l@bioperl.org - General discussion
76             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77              
78             =head2 Support
79              
80             Please direct usage questions or support issues to the mailing list:
81              
82             I
83              
84             rather than to the module maintainer directly. Many experienced and
85             reponsive experts will be able look at the problem and quickly
86             address it. Please include a thorough description of the problem
87             with code and data examples if at all possible.
88              
89             =head2 Reporting Bugs
90              
91             Report bugs to the Bioperl bug tracking system to help us keep track
92             the bugs and their resolution. Bug reports can be submitted via the web:
93              
94             http://redmine.open-bio.org/projects/bioperl/
95              
96             =head1 AUTHOR - Albert Vilella
97              
98             Email avilella-at-gmail-dot-com
99              
100             =head1 APPENDIX
101              
102             The rest of the documentation details each of the object
103             methods. Internal methods are usually preceded with a _
104              
105             =cut
106              
107             package Bio::Tools::Run::Phylo::Njtree::Best;
108              
109 1         66 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM
110 1     1   121770 @NJTREE_BEST_PARAMS @NJTREE_BEST_SWITCHES %OK_FIELD);
  1         2  
111              
112 1     1   6 use strict;
  1         1  
  1         18  
113 1     1   451 use Bio::SimpleAlign;
  1         59474  
  1         34  
114 1     1   288 use Bio::AlignIO;
  1         4029  
  1         24  
115 1     1   242 use Bio::TreeIO;
  1         12937  
  1         25  
116 1     1   7 use Bio::Root::Root;
  1         2  
  1         14  
117 1     1   4 use Bio::Root::IO;
  1         2  
  1         16  
118 1     1   307 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         60  
119             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
120              
121             BEGIN {
122            
123 1     1   4 @NJTREE_BEST_PARAMS = qw(C p F c k a d l L b);
124 1         2 @NJTREE_BEST_SWITCHES = qw(P S A r D s g N);
125             # Authorize attribute fields
126 1         3 foreach my $attr ( @NJTREE_BEST_PARAMS, @NJTREE_BEST_SWITCHES ) {
127 18         1216 $OK_FIELD{$attr}++; }
128              
129             }
130              
131             =head2 program_name
132              
133             Title : program_name
134             Usage : $factory->program_name()
135             Function: holds the program name
136             Returns: string
137             Args : None
138              
139             =cut
140              
141             sub program_name {
142 6     6 1 29 return 'njtree';
143             }
144              
145             =head2 program_dir
146              
147             Title : program_dir
148             Usage : $factory->program_dir(@params)
149             Function: returns the program directory, obtained from ENV variable.
150             Returns: string
151             Args :
152              
153             =cut
154              
155             sub program_dir {
156 3 50   3 1 20 return Bio::Root::IO->catfile($ENV{NJTREEDIR}) if $ENV{NJTREEDIR};
157             }
158              
159              
160             =head2 new
161              
162             Title : new
163             Usage : my $njtree_best = Bio::Tools::Run::Phylo::Njtree::Best->new();
164             Function: Builds a new Bio::Tools::Run::Phylo::Njtree::Best
165             Returns : Bio::Tools::Run::Phylo::Njtree::Best
166             Args : -alignment => the Bio::Align::AlignI object
167             -tree => the Bio::Tree::TreeI object
168             -save_tempfiles => boolean to save the generated tempfiles and
169             NOT cleanup after onesself (default FALSE)
170             -executable => where the njtree executable resides
171              
172             See also: L, L
173              
174             =cut
175              
176             sub new {
177 1     1 1 77 my($class,@args) = @_;
178              
179 1         11 my $self = $class->SUPER::new(@args);
180 1         17 my ($aln, $tree, $st, $exe,
181             $ubl) = $self->_rearrange([qw(ALIGNMENT TREE SAVE_TEMPFILES
182             EXECUTABLE)],
183             @args);
184 1 50       9 defined $aln && $self->alignment($aln);
185 1 50       3 defined $tree && $self->tree($tree);
186 1 50       3 defined $st && $self->save_tempfiles($st);
187 1 50       2 defined $exe && $self->executable($exe);
188              
189 1         5 return $self;
190             }
191              
192             =head2 prepare
193              
194             Title : prepare
195             Usage : my $rundir = $njtree_best->prepare();
196             Function: prepare the njtree_best analysis using the default or updated parameters
197             the alignment parameter and species tree must have been set
198             Returns : value of rundir
199             Args : L object,
200             L object [optional]
201              
202             =cut
203              
204             sub prepare {
205 0     0 1 0 my ($self,$aln,$tree) = @_;
206              
207 0 0       0 unless ( $self->save_tempfiles ) {
208             # brush so we don't get plaque buildup ;)
209 0         0 $self->cleanup();
210             }
211 0 0       0 $tree = $self->tree unless $tree;
212 0 0       0 $aln = $self->alignment unless $aln;
213 0 0       0 if( ! $aln ) {
214 0         0 $self->warn("Must have supplied a valid alignment file in order to run njtree_best");
215 0         0 return 0;
216             }
217 0 0       0 if( ! $tree ) {
218 0         0 $self->warn("Must have supplied a valid species tree file in order to run njtree_best");
219 0         0 return 0;
220             }
221 0         0 my ($tempdir) = $self->tempdir();
222 0         0 my $tempalnFH;
223 0 0 0     0 if( ! ref($aln) && -e $aln ) {
224 0         0 $self->{_tempalnfile} = $aln;
225             } else {
226 0 0       0 ($tempalnFH,$self->{_tempalnfile}) = $self->io->tempfile
227             ('-dir' => $tempdir,
228             UNLINK => ($self->save_tempfiles ? 0 : 1));
229 0         0 my $alnout = Bio::AlignIO->new('-format' => 'fasta',
230             '-fh' => $tempalnFH);
231 0         0 $aln->set_displayname_flat(1);
232 0         0 $alnout->write_aln($aln);
233 0         0 $alnout->close();
234 0         0 undef $alnout;
235 0         0 close($tempalnFH);
236             }
237              
238 0         0 my ($temptreeFH);
239 0 0 0     0 if( ! ref($tree) && -e $tree ) {
240 0         0 $self->{_temptreefile} = $tree;
241             } else {
242 0 0       0 ($temptreeFH,$self->{_temptreefile}) = $self->io->tempfile
243             ('-dir' => $tempdir,
244             UNLINK => ($self->save_tempfiles ? 0 : 1));
245              
246 0         0 my $treeout = Bio::TreeIO->new('-format' => 'newick',
247             '-fh' => $temptreeFH);
248 0         0 $treeout->write_tree($tree);
249 0         0 $treeout->close();
250 0         0 close($temptreeFH);
251             }
252 0         0 $self->{_prepared} = 1;
253 0         0 $self->{_njtree_best_params} = $self->_setparams();
254              
255 0         0 return $tempdir;
256             }
257              
258              
259             =head2 run
260              
261             Title : run
262             Usage : my $nhx_tree = $njtree_best->run();
263             Function: run the njtree_best analysis using the default or updated parameters
264             the alignment parameter must have been set
265             Returns : L object [optional]
266             Args : L object
267             L object
268              
269              
270             =cut
271              
272             sub run {
273 0     0 1 0 my ($self,$aln,$tree) = @_;
274              
275 0 0       0 $self->prepare($aln,$tree) unless (defined($self->{_prepared}));
276 0         0 my ($rc,$nhx_tree) = (1);
277 0         0 my ($tmpdir) = $self->tempdir();
278 0         0 my $outfile = $self->outfile_name;
279             {
280 0         0 my $commandstring;
  0         0  
281             my $exit_status;
282             #./njtree best [other_params] -f species_file.nh -p tree -o inputfile.best.nhx inputfile.nucl.mfa
283 0         0 my $njtree_executable = $self->executable;
284 0         0 $commandstring = $njtree_executable." best ";
285 0         0 $commandstring .= $self->{_njtree_best_params};
286 0         0 $commandstring .= " -f $self->{_temptreefile} -p tree -o ";
287 0 0       0 unless ($self->outfile_name ) {
288 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
289 0         0 close($tfh);
290 0         0 undef $tfh;
291 0         0 $self->outfile_name($outfile);
292             }
293 0         0 $commandstring .= $self->outfile_name;
294 0         0 $commandstring .= " $self->{_tempalnfile} ";
295              
296 0 0 0     0 $self->throw("unable to find or run executable for 'njtree'")
      0        
297             unless $njtree_executable && -e $njtree_executable && -x _;
298              
299 0 0       0 open(RUN, "$commandstring |")
300             or $self->throw("Cannot run $commandstring");
301              
302 0         0 my @output = ;
303 0         0 $exit_status = close(RUN);
304 0         0 $self->error_string(join('',@output));
305 0 0 0     0 if( (grep { /^\[ /io } @output) || !$exit_status) {
  0         0  
306 0         0 $self->warn("There was an error - see error_string for the program output");
307 0         0 $rc = 0;
308             }
309 0         0 eval {
310 0         0 $nhx_tree = Bio::TreeIO->new(-file => "$tmpdir/$outfile",
311             -format => 'nhx');
312             };
313 0 0       0 if( $@ ) {
314 0         0 $self->warn($self->error_string);
315             }
316             }
317 0 0       0 unless ( $self->save_tempfiles ) {
318 0         0 $self->cleanup();
319             }
320 0         0 return ($rc,$nhx_tree);
321             }
322              
323              
324             sub AUTOLOAD {
325 0     0   0 my $self = shift;
326 0         0 my $attr = $AUTOLOAD;
327 0         0 $attr =~ s/.*:://;
328 0         0 $attr = $attr;
329             # aliasing
330 0 0       0 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
331              
332 0 0       0 $self->{$attr} = shift if @_;
333 0         0 return $self->{$attr};
334             }
335              
336             =head2 error_string
337              
338             Title : error_string
339             Usage : $obj->error_string($newval)
340             Function: Where the output from the last analysus run is stored.
341             Returns : value of error_string
342             Args : newvalue (optional)
343              
344              
345             =cut
346              
347             sub error_string {
348 0     0 1 0 my ($self,$value) = @_;
349 0 0       0 if( defined $value) {
350 0         0 $self->{'error_string'} = $value;
351             }
352 0         0 return $self->{'error_string'};
353              
354             }
355              
356              
357             =head2 version
358              
359             Title : version
360             Usage : exit if $prog->version() < 1.8
361             Function: Determine the version number of the program
362             Example :
363             Returns : float or undef
364             Args : none
365              
366             =cut
367              
368             sub version {
369 0     0 1 0 my ($self) = @_;
370 0         0 my $exe;
371 0 0       0 return undef unless $exe = $self->executable;
372 0         0 my $string = `$exe 2>&1` ;
373              
374 0         0 $string =~ /Version\:\s+(\d+.\d+.\d+)/m;
375 0   0     0 return $1 || undef;
376             }
377              
378              
379             =head2 alignment
380              
381             Title : alignment
382             Usage : $njtree_best->align($aln);
383             Function: Get/Set the L object
384             Returns : L object
385             Args : [optional] L
386             Comment : We could potentially add support for running directly on a file
387             but we shall keep it simple
388             See also: L
389              
390             =cut
391              
392             sub alignment {
393 0     0 1 0 my ($self,$aln) = @_;
394              
395 0 0       0 if( defined $aln ) {
396 0 0 0     0 if( -e $aln ) {
    0          
397 0         0 $self->{'_alignment'} = $aln;
398             } elsif( !ref($aln) || ! $aln->isa('Bio::Align::AlignI') ) {
399 0         0 $self->warn("Must specify a valid Bio::Align::AlignI object to the alignment function not $aln");
400 0         0 return undef;
401             } else {
402 0         0 $self->{'_alignment'} = $aln;
403             }
404             }
405 0         0 return $self->{'_alignment'};
406             }
407              
408             =head2 tree
409              
410             Title : tree
411             Usage : $njtree_best->tree($tree, %params);
412             Function: Get/Set the L object
413             Returns : L
414             Args : [optional] $tree => L,
415             [optional] %parameters => hash of tree-specific parameters
416              
417             Comment : We could potentially add support for running directly on a file
418             but we shall keep it simple
419             See also: L
420              
421             =cut
422              
423             sub tree {
424 0     0 1 0 my ($self, $tree, %params) = @_;
425 0 0       0 if( defined $tree ) {
426 0 0 0     0 if( ! ref($tree) || ! $tree->isa('Bio::Tree::TreeI') ) {
427 0         0 $self->warn("Must specify a valid Bio::Tree::TreeI object to the alignment function");
428             }
429 0         0 $self->{'_tree'} = $tree;
430             }
431 0         0 return $self->{'_tree'};
432             }
433              
434             =head2 check_names
435              
436             Title : check_names
437             Usage :
438             Function:
439             Example :
440             Returns :
441             Args :
442              
443              
444             =cut
445              
446             sub check_names {
447 0     0 1 0 my $self = shift;
448              
449 0         0 my $tree = $self->tree;
450 0         0 my $aln = $self->alignment;
451 0 0       0 if( ! $aln ) {
452 0         0 $self->warn("must have supplied a valid alignment file in order to run njtree_best");
453 0         0 return 0;
454             }
455 0 0       0 if( ! $tree ) {
456 0         0 $self->warn("must have supplied a valid species tree file in order to run njtree_best");
457 0         0 return 0;
458             }
459 0         0 foreach my $leaf ($tree->get_leaf_nodes) {
460 0         0 my $id = $leaf->id;
461 0         0 $id =~ s/\-\*.+//; # njtree does not consider anything after a \-\*
462 0         0 $self->{_treeids}{$id} = 1;
463             }
464 0         0 foreach my $seq ($aln->each_seq) {
465 0         0 my $id = $seq->id;
466 0         0 $id =~ s/.+\_//; # njtree only looks at the right side of the \_
467 0         0 $self->{_alnids}{$id} = 1;
468             }
469 0         0 foreach my $alnid (keys %{$self->{_alnids}}) {
  0         0  
470 0 0       0 $self->{_unmappedids}{$alnid} = 1 unless (defined($self->{_treeids}{$alnid}));
471             }
472 0 0       0 if (defined($self->{_unmappedids})) {
473 0         0 my $count = scalar(keys%{$self->{_unmappedids}});
  0         0  
474 0         0 my $unmapped = join(",",keys %{$self->{_unmappedids}});
  0         0  
475 0         0 $self->warn("$count unmapped ids between the aln and the tree $unmapped");
476             }
477             }
478              
479              
480              
481             =head2 _setparams
482              
483             Title : _setparams
484             Usage : Internal function, not to be called directly
485             Function: Create parameter inputs for njtree_best program
486             Example :
487             Returns : parameter string to be passed to njtree_best
488             during align or profile_align
489             Args : name of calling object
490              
491             =cut
492              
493             sub _setparams {
494 0     0   0 my ($self) = @_;
495 0         0 my ($attr, $value,$param_string);
496 0         0 $param_string = '';
497 0         0 my $laststr;
498 0         0 for $attr ( @NJTREE_BEST_PARAMS ) {
499 0         0 $value = $self->$attr();
500 0 0       0 next unless (defined $value);
501 0         0 my $attr_key = $attr;
502 0         0 $attr_key = ' -'.$attr_key;
503 0         0 $param_string .= $attr_key .' '.$value;
504              
505             }
506 0         0 for $attr ( @NJTREE_BEST_SWITCHES) {
507 0         0 $value = $self->$attr();
508 0 0       0 next unless ($value);
509 0         0 my $attr_key = $attr;
510 0         0 $attr_key = ' -'.$attr_key;
511 0         0 $param_string .= $attr_key ;
512             }
513              
514 0         0 return $param_string;
515             }
516              
517              
518             =head1 Bio::Tools::Run::BaseWrapper methods
519              
520             =cut
521              
522             =head2 save_tempfiles
523              
524             Title : save_tempfiles
525             Usage : $obj->save_tempfiles($newval)
526             Function:
527             Returns : value of save_tempfiles
528             Args : newvalue (optional)
529              
530              
531             =cut
532              
533             =head2 outfile_name
534              
535             Title : outfile_name
536             Usage : my $outfile = $njtree_best->outfile_name();
537             Function: Get/Set the name of the output file for this run
538             (if you wanted to do something special)
539             Returns : string
540             Args : [optional] string to set value to
541              
542              
543             =cut
544              
545              
546             =head2 tempdir
547              
548             Title : tempdir
549             Usage : my $tmpdir = $self->tempdir();
550             Function: Retrieve a temporary directory name (which is created)
551             Returns : string which is the name of the temporary directory
552             Args : none
553              
554              
555             =cut
556              
557             =head2 cleanup
558              
559             Title : cleanup
560             Usage : $njtree_best->cleanup();
561             Function: Will cleanup the tempdir directory
562             Returns : none
563             Args : none
564              
565              
566             =cut
567              
568             =head2 io
569              
570             Title : io
571             Usage : $obj->io($newval)
572             Function: Gets a L object
573             Returns : L
574             Args : none
575              
576              
577             =cut
578              
579             sub DESTROY {
580 1     1   1395 my $self= shift;
581 1 50       9 unless ( $self->save_tempfiles ) {
582 1         10 $self->cleanup();
583             }
584 1         5 $self->SUPER::DESTROY();
585             }
586              
587             1; # Needed to keep compiler happy