File Coverage

blib/lib/Bio/Tools/Run/Phylo/PAML/Codeml.pm
Criterion Covered Total %
statement 65 173 37.5
branch 19 94 20.2
condition 4 32 12.5
subroutine 15 23 65.2
pod 13 13 100.0
total 116 335 34.6


line stmt bran cond sub pod time code
1             # $Id$
2             #
3             # BioPerl module for Bio::Tools::Run::Phylo::PAML::Codeml
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Jason Stajich
8             #
9             # Copyright Jason Stajich
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Tools::Run::Phylo::PAML::Codeml - Wrapper aroud the PAML program codeml
18              
19             =head1 SYNOPSIS
20              
21             use Bio::Tools::Run::Phylo::PAML::Codeml;
22             use Bio::AlignIO;
23              
24             my $alignio = Bio::AlignIO->new(-format => 'phylip',
25             -file => 't/data/gf-s85.phylip');
26              
27             my $aln = $alignio->next_aln;
28              
29             my $codeml = Bio::Tools::Run::Phylo::PAML::Codeml->new();
30             $codeml->alignment($aln);
31             my ($rc,$parser) = $codeml->run();
32             my $result = $parser->next_result;
33             my $MLmatrix = $result->get_MLmatrix();
34             print "Ka = ", $MLmatrix->[0]->[1]->{'dN'},"\n";
35             print "Ks = ", $MLmatrix->[0]->[1]->{'dS'},"\n";
36             print "Ka/Ks = ", $MLmatrix->[0]->[1]->{'omega'},"\n";
37              
38             =head1 DESCRIPTION
39              
40             This is a wrapper around the codeml program of PAML (Phylogenetic
41             Analysis by Maximum Likelihood) package of Ziheng Yang. See
42             http://abacus.gene.ucl.ac.uk/software/paml.html for more information.
43              
44             This module is more about generating the properl codeml.ctl file and
45             will run the program in a separate temporary directory to avoid
46             creating temp files all over the place.
47              
48             =head1 FEEDBACK
49              
50             =head2 Mailing Lists
51              
52             User feedback is an integral part of the evolution of this and other
53             Bioperl modules. Send your comments and suggestions preferably to
54             the Bioperl mailing list. Your participation is much appreciated.
55              
56             bioperl-l@bioperl.org - General discussion
57             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58              
59             =head2 Support
60              
61             Please direct usage questions or support issues to the mailing list:
62              
63             I
64              
65             rather than to the module maintainer directly. Many experienced and
66             reponsive experts will be able look at the problem and quickly
67             address it. Please include a thorough description of the problem
68             with code and data examples if at all possible.
69              
70             =head2 Reporting Bugs
71              
72             Report bugs to the Bioperl bug tracking system to help us keep track
73             of the bugs and their resolution. Bug reports can be submitted via the
74             web:
75              
76             http://redmine.open-bio.org/projects/bioperl/
77              
78             =head1 AUTHOR - Jason Stajich
79              
80             Email jason-at-bioperl-dot-org
81              
82             =head1 CONTRIBUTORS
83              
84             Additional contributors names and emails here
85              
86             =head1 APPENDIX
87              
88             The rest of the documentation details each of the object methods.
89             Internal methods are usually preceded with a _
90              
91             =cut
92              
93              
94             # Let the code begin...
95              
96              
97             package Bio::Tools::Run::Phylo::PAML::Codeml;
98 1     1   177712 use vars qw(@ISA %VALIDVALUES $MINNAMELEN $PROGRAMNAME $PROGRAM);
  1         2  
  1         66  
99 1     1   4 use strict;
  1         1  
  1         15  
100 1     1   2 use Bio::Root::Root;
  1         1  
  1         17  
101 1     1   430 use Bio::AlignIO;
  1         39854  
  1         25  
102 1     1   6 use Bio::TreeIO;
  1         2  
  1         16  
103 1     1   379 use Bio::Tools::Run::WrapperBase;
  1         1  
  1         22  
104 1     1   6 use Bio::Tools::Phylo::PAML;
  1         1  
  1         16  
105 1     1   3 use Cwd;
  1         1  
  1         271  
106              
107             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
108              
109             =head2 Default Values
110              
111             Valid and default values for codeml programs are listed below. The
112             default values are always the first one listed. These descriptions
113             are essentially lifted from the example codeml.ctl file and pamlDOC
114             documentation provided by the author.
115              
116             B specifies the equilibrium codon frequencies in codon
117             substitution model. These frequencies can be assumed to be equal (1/61
118             each for the standard genetic code, B = 0), calculated from
119             the average nucleotide frequencies (B = 1), from the average
120             nucleotide frequencies at the three codon positions (B = 2),
121             or used as free parameters (B = 3). The number of parameters
122             involved in those models of codon frequencies is 0, 3, 9, and 60
123             (under the universal code), for B = 0, 1, 2, and 3
124             respectively.
125              
126             B specifies whether equal amino acid distances are assumed (=
127             0) or Grantham's matrix is used (= 1) (Yang et al. 1998).
128              
129             B = -2 performs ML estimation of dS and dN in pairwise
130             comparisons. The program will collect estimates of dS and dN into the
131             files 2ML.dS and 2ML.dN. Since many users seem interested in looking
132             at dN /dS ratios among lineages, examination of the tree shapes
133             indicated by branch lengths calculated from the two rates may be
134             interesting although the analysis is ad hoc. If your species names
135             have no more than 10 characters, you can use the output distance
136             matrices as input to Phylip programs such as neighbor without
137             change. Otherwise you need to edit the files to cut the names short.
138              
139             B concerns assumptions about the dN/dS rate ratios among
140             branches (Yang 1998; Yang and Nielsen 1998). B =0 means a single
141             dN/dS ratio for all lineages (branches), 1 means one ratio for each
142             branch (free ratio model), and 2 means arbitrary number of rations
143             (such as the 2-ratios or 3-ratios models. with B =2, you may
144             specify the omega ratios for the branches using branch labels (read
145             about the tree structure file in the document). This option seems
146             rather easy to use. Otherwise, the program will ask the user to input
147             a branch mark for the dN/dS ratio assumed for each branch. This should
148             be an integral number between 0 to k - 1 if k different dN/dS ratios
149             (omega_0 - omega_k - 1) are assumed for the branches of the
150             tree. B note basically, doing this interactively is not going
151             to work very well, so this module is really focused around using the 0
152             or 1 parameters. Read the program documentation if you'd like some more
153             detailed instructions.
154              
155             B specifies models that allow the dN/dS ratio (omega) to vary
156             among sites (Nielsen and Yang 1998, Yang et al. 2000) B = m
157             corresponds to model Mm in Yang et al (2000). The variable B
158             is used to specify the number of categories in the omega distribution
159             under some models. The values of ncatG() used to perform our
160             analyses are 3 for M3 (discrete), 5 for M4 (freq), 10 for the
161             continuous distributions (M5: gamma, M6: 2gamma, M7: beta, M8:beta&w,
162             M9:beta&gamma, M10: beta&gamma+1, M11:beta&normal>1, and
163             M12:0&2normal>1, M13:3normal>0). This means M8 will have 11 site
164             classes (10 from the beta distribution plus 1 additional class). The
165             posterior probabilities for site classes as well as the expected omega
166             values for sites are listed in the file rst, which may be useful to
167             pinpoint sites under positive selection, if they exist.
168              
169             To make it easy to run several B models in one go, the
170             executable L can be used,
171             which asks you how many and which models to run at the start of the
172             program. The number of categories used will then match those used in
173             Yang et al(2000).
174              
175             As noted in that paper, some of the models are hard to use, in
176             particular, M12 and M13. Recommended models are 0 (one-ratio), 1
177             (neutral), 2 (selection), 3 (discrete), 7 (beta), and 8
178             (beta&omega ). Some of the models like M2 and M8 are noted to be
179             prone to the problem of multiple local optima. You are advised to run
180             the program at least twice, once with a starting omega value E1 and a
181             second time with a value E1, and use the results corresponding to the
182             highest likelihood. The continuous neutral and selection models of
183             Nielsen and Yang (1998) are not implemented in the program.
184              
185              
186             B for genetic code and these correspond to 1-11 in the genbank
187             transl table.
188             0:universal code
189             1:mamalian mt
190             2:yeast mt
191             3:mold mt,
192             4:invertebrate mt
193             5:ciliate nuclear
194             6:echinoderm mt
195             7:euplotid mt
196             8:alternative yeast nu.
197             9:ascidian mt
198             10:blepharisma nu
199              
200             B For codon sequences, ancestral reconstruction is not
201             implemented for the models of variable dN/dS ratios among sites. The
202             output under codon-based models usually shows the encoded amino acid
203             for each codon. The output under "Prob of best character at each node,
204             listed by site" has two posterior probabilities for each node at each
205             codon (amino acid) site. The first is for the best codon. The second,
206             in parentheses, is for the most likely amino acid under the codon
207             substitution model. This is a sum of posterior probabilities across
208             synonymous codons. In theory it is possible although rare for the most
209             likely amino acid not to match the most likely codon.
210              
211             B for codon sequences (seqtype = 1): The codon frequencies in
212             each sequence are counted and listed in a genetic code table, together
213             with their sums across species. Each table contains six or fewer
214             species. For data of multiple genes (option G in the sequence file),
215             codon frequencies in each gene (summed over species) are also
216             listed. The nucleotide distributions at the three codon positions are
217             also listed. The method of Nei and Gojobori (1986) is used to
218             calculate the number of synonymous substitutions per synonymous site
219             (dS ) and the number of nonsynonymous substitutions per nonsynonymous
220             site (dN ) and their ratio (dN /dS ). These are used to construct
221             initial estimates of branch lengths for the likelihood analysis but
222             are not MLEs themselves. Note that the estimates of these quantities
223             for the a- and b-globin genes shown in Table 2 of Goldman and Yang
224             (1994), calculated using the MEGA package (Kumar et al., 1993), are
225             not accurate.
226              
227             Results of ancestral reconstructions (B = 1) are collected
228             in the file rst. Under models of variable dN/dS ratios among sites (NSsites models),
229             the posterior probabilities for site classes as well as positively
230             selected sites are listed in rst.
231              
232             INCOMPLETE DOCUMENTATION OF ALL METHODS
233              
234             =cut
235              
236             BEGIN {
237              
238 1     1   3 $MINNAMELEN = 25;
239 1 50       6 $PROGRAMNAME = 'codeml' . ($^O =~ /mswin/i ?'.exe':'');
240 1 50       5 if( defined $ENV{'PAMLDIR'} ) {
241 0 0       0 $PROGRAM = Bio::Root::IO->catfile($ENV{'PAMLDIR'},$PROGRAMNAME). ($^O =~ /mswin/i ?'.exe':'');;
242             }
243            
244             # valid values for parameters, the default one is always
245             # the first one in the array
246             # much of the documentation here is lifted directly from the codeml.ctl
247             # example file provided with the package
248             %VALIDVALUES = (
249 1         1284 'outfile' => 'mlc',
250             'noisy' => [ 0..3,9],
251             'verbose' => [ 1,0,2], # 0:concise, 1:detailed, 2:too much
252              
253             # (runmode) 0:user tree, 1:semi-autmatic, 2:automatic
254             # 3:stepwise addition, 4,5:PerturbationNNI
255             # -2:pairwise
256             'runmode' => [ -2, 0..5],
257              
258             'seqtype' => [ 1..3], # 1:codons, 2:AAs, 3:codons->AAs
259              
260             'CodonFreq' => [ 2, 0,1,3,4,5,6,7], # 0:1/61 each, 1:F1X4,
261             # 2:F3X4, 3:codon table
262              
263             # (aaDist) 0:equal, +:geometric, -:linear,
264             # 1-6:G1974,Miyata, c,p,v,a
265             'aaDist' => [ 0,'+','-', 1..6],
266              
267             # (aaRatefile) only used for aa seqs
268             # with model=empirical(_F)
269             # default is usually 'wag.dat', also
270             # dayhoff.dat, jones.dat, mtmam.dat, or your own
271             'aaRatefile' => 'wag.dat',
272              
273             # (model) models for codons
274             # 0: one, 1:b, 2:2 or more dN/dS ratios for branches
275             'model' => [0..3,7],
276            
277             # (NSsites) number of S sites
278             # 0: one w;1:neutral;2:selection; 3:discrete;4:freqs;
279             # 5:gamma;6:2gamma;7:beta;8:beta&w;9:betaγ
280             # 10:beta&gamma+1; 11:beta&normal>1; 12:0&2normal>1;
281             # 13:3normal>0
282             'NSsites' => [0..13],
283              
284             # (icode) genetic code
285             # 0:universal code
286             # 1:mamalian mt
287             # 2:yeast mt
288             # 3:mold mt,
289             # 4:invertebrate mt
290             # 5:ciliate nuclear
291             # 6:echinoderm mt
292             # 7:euplotid mt
293             # 8:alternative yeast nu.
294             # 9:ascidian mt
295             #10:blepharisma nu
296             # these correspond to 1-11 in the genbank transl table
297            
298             'icode' => [ 0..10],
299            
300             'Mgene' => [0,1], # 0:rates, 1:separate
301            
302             'fix_kappa'=> [0,1], # 0:estimate kappa, 1:fix kappa
303             'kappa' => '2', # initial or fixed kappa
304             'fix_omega'=> [0,1], # 0: estimate omega, 1: fix omega
305             'omega' => '1', # initial or fixed omega for
306             # codons or codon-base AAs
307             'fix_alpha'=> [1,0], # 0: estimate gamma shape param
308             # 1: fix it at alpha
309             'alpha' => '0.', # initial or fixed alpha
310             # 0: infinity (constant rate)
311             'Malpha' => [0,1], # different alphas for genes
312             'ncatG' => [1..10], # number of categories in
313             # dG of NSsites models
314              
315             # (clock)
316             # 0: no clock, 1: global clock, 2: local clock
317             # 3: TipDate
318             'clock' => [0..3],
319             # (getSE) Standard Error:
320             # 0:don't want them, 1: want S.E.
321             'getSE' => [0,1],
322             # (RateAncestor)
323             # 0,1,2 rates (alpha>0) or
324             # ancestral states (1 or 2)
325             'RateAncestor' => [1,0,2],
326             'Small_Diff' => '.5e-6',
327             # (cleandata) remove sites with ambiguity data
328             # 1: yes, 0:no
329             'cleandata' => [0,1],
330             # this is the number of datasets in
331             # the file - we would need to change
332             # our api to allow >1 alignment object
333             # to be referenced at time
334             'ndata' => 1,
335             # (method)
336             # 0: simultaneous,1: 1 branch at a time
337             'method' => [0,1],
338              
339             # allow branch lengths to be fixed
340             # 0 ignore
341             # -1 use random starting points
342             # 1 use the branch lengths in initial ML iteration
343             # 2 branch lengths are fixed
344             'fix_blength' => [0,-1,1,2],
345             );
346             }
347              
348             =head2 program_name
349              
350             Title : program_name
351             Usage : $factory->program_name()
352             Function: holds the program name
353             Returns: string
354             Args : None
355              
356             =cut
357              
358             sub program_name {
359 6     6 1 19 return 'codeml';
360             }
361              
362             =head2 program_dir
363              
364             Title : program_dir
365             Usage : ->program_dir()
366             Function: returns the program directory, obtained from ENV variable.
367             Returns: string
368             Args :
369              
370             =cut
371              
372             sub program_dir {
373 3 50   3 1 13 return Bio::Root::IO->catfile($ENV{PAMLDIR}) if $ENV{PAMLDIR};
374             }
375              
376              
377             =head2 new
378              
379             Title : new
380             Usage : my $obj = Bio::Tools::Run::Phylo::PAML::Codeml->new();
381             Function: Builds a new Bio::Tools::Run::Phylo::PAML::Codeml object
382             Returns : Bio::Tools::Run::Phylo::PAML::Codeml
383             Args : -alignment => the Bio::Align::AlignI object
384             -save_tempfiles => boolean to save the generated tempfiles and
385             NOT cleanup after onesself (default FALSE)
386             -tree => the Bio::Tree::TreeI object
387             -branchlengths => 0: ignore any branch lengths found on the tree
388             1: use as initial values
389             2: fix branch lengths
390             -params => a hashref of PAML parameters (all passed to set_parameter)
391             -executable => where the codeml executable resides
392              
393             See also: L, L
394              
395             =cut
396              
397             sub new {
398 1     1 1 967 my($class,@args) = @_;
399              
400 1         7 my $self = $class->SUPER::new(@args);
401 1         21 $self->{'_branchLengths'} = 0;
402 1         6 my ($aln, $tree, $st, $params, $exe,
403             $ubl) = $self->_rearrange([qw(ALIGNMENT TREE SAVE_TEMPFILES
404             PARAMS EXECUTABLE BRANCHLENGTHS)],
405             @args);
406 1 50       24 defined $aln && $self->alignment($aln);
407 1 50 0     3 defined $tree && $self->tree($tree, branchLengths => ($ubl || 0) );
408 1 50       2 defined $st && $self->save_tempfiles($st);
409 1 50       2 defined $exe && $self->executable($exe);
410              
411 1         4 $self->set_default_parameters();
412 1 50       3 if( defined $params ) {
413 1 50       3 if( ref($params) !~ /HASH/i ) {
414 0         0 $self->warn("Must provide a valid hash ref for parameter -FLAGS");
415             } else {
416 1         3 map { $self->set_parameter($_, $$params{$_}) } keys %$params;
  8         9  
417             }
418             }
419 1         3 return $self;
420             }
421              
422              
423             =head2 prepare
424              
425             Title : prepare
426             Usage : my $rundir = $codeml->prepare($aln);
427             Function: prepare the codeml analysis using the default or updated parameters
428             the alignment parameter must have been set
429             Returns : value of rundir
430             Args : L object,
431             L object [optional]
432              
433             =cut
434              
435             sub prepare{
436 0     0 1 0 my ($self,$aln,$tree) = @_;
437 0 0       0 unless ( $self->save_tempfiles ) {
438             # brush so we don't get plaque buildup ;)
439 0         0 $self->cleanup();
440             }
441 0 0       0 $tree = $self->tree unless $tree;
442 0 0       0 $aln = $self->alignment unless $aln;
443 0 0       0 if( ! $aln ) {
444 0         0 $self->warn("must have supplied a valid alignment file in order to run codeml");
445 0         0 return 0;
446             }
447 0         0 my ($tempdir) = $self->tempdir();
448 0         0 my ($tempseqFH,$tempseqfile);
449 0 0 0     0 if( ! ref($aln) && -e $aln ) {
450 0         0 $tempseqfile = $aln;
451             } else {
452 0 0       0 ($tempseqFH,$tempseqfile) = $self->io->tempfile
453             ('-dir' => $tempdir,
454             UNLINK => ($self->save_tempfiles ? 0 : 1));
455 0 0       0 my $alnout = Bio::AlignIO->new('-format' => 'phylip',
456             '-fh' => $tempseqFH,
457             '-interleaved' => 0,
458             '-idlength' => $MINNAMELEN > $aln->maxdisplayname_length() ? $MINNAMELEN : $aln->maxdisplayname_length() +1);
459            
460 0         0 $alnout->write_aln($aln);
461 0         0 $alnout->close();
462 0         0 undef $alnout;
463 0         0 close($tempseqFH);
464             }
465             # now let's print the codeml.ctl file.
466             # many of the these programs are finicky about what the filename is
467             # and won't even run without the properly named file. Ack
468            
469 0         0 my $codeml_ctl = "$tempdir/codeml.ctl";
470 0 0       0 open(CODEML, ">$codeml_ctl") or $self->throw("cannot open $codeml_ctl for writing");
471 0         0 print CODEML "seqfile = $tempseqfile\n";
472 0         0 my $outfile = $self->outfile_name;
473 0         0 print CODEML "outfile = $outfile\n";
474              
475 0 0       0 if( $tree ) {
476 0         0 my ($temptreeFH,$temptreefile);
477 0 0 0     0 if( ! ref($tree) && -e $tree ) {
478 0         0 $temptreefile = $tree;
479             } else {
480 0 0       0 ($temptreeFH,$temptreefile) = $self->io->tempfile
481             ('-dir' => $tempdir,
482             UNLINK => ($self->save_tempfiles ? 0 : 1));
483            
484 0         0 my $treeout = Bio::TreeIO->new('-format' => 'newick',
485             '-fh' => $temptreeFH);
486 0         0 $treeout->write_tree($tree);
487 0         0 $treeout->close();
488 0         0 close($temptreeFH);
489             }
490 0         0 print CODEML "treefile = $temptreefile\n";
491             }
492              
493 0         0 my %params = $self->get_parameters;
494 0         0 while( my ($param,$val) = each %params ) {
495 0 0       0 next if $param eq 'outfile';
496 0         0 print CODEML "$param = $val\n";
497             }
498 0         0 close(CODEML);
499             # my ($rc,$parser) = (1);
500             # {
501             # my $cwd = cwd();
502             # my $exit_status;
503             # chdir($tempdir);
504             # }
505 0         0 return $tempdir;
506             }
507              
508              
509              
510             =head2 run
511              
512             Title : run
513             Usage : my ($rc,$parser) = $codeml->run($aln,$tree);
514             Function: run the codeml analysis using the default or updated parameters
515             the alignment parameter must have been set
516             Returns : Return code, L
517             Args : L object,
518             L object [optional]
519              
520              
521             =cut
522              
523             sub run {
524 0     0 1 0 my ($self) = shift;;
525 0         0 my $outfile = $self->outfile_name;
526 0         0 my $tmpdir = $self->prepare(@_);
527            
528 0         0 my ($rc,$parser) = (1);
529             {
530 0         0 my $cwd = cwd();
  0         0  
531 0         0 my $exit_status;
532 0         0 chdir($tmpdir);
533 0         0 my $codemlexe = $self->executable();
534 0 0 0     0 $self->throw("unable to find or run executable for 'codeml'") unless $codemlexe && -e $codemlexe && -x _;
      0        
535 0         0 my $run;
536 0 0       0 if( $self->{'_branchLengths'} ) {
537 0 0       0 open($run, "echo $self->{'_branchLengths'} | $codemlexe |") or $self->throw("Cannot open exe $codemlexe");
538             } else {
539 0 0       0 open($run, "$codemlexe |") or $self->throw("Cannot open exe $codemlexe");
540             }
541 0         0 my @output = <$run>;
542 0         0 $exit_status = close($run);
543 0         0 $self->error_string(join('',@output));
544 0 0 0     0 if( (grep { /\berr(or)?: /io } @output) || !$exit_status) {
  0         0  
545 0         0 $self->warn("There was an error - see error_string for the program output");
546 0         0 $rc = 0;
547             }
548 0         0 eval {
549 0         0 $parser = Bio::Tools::Phylo::PAML->new(-file => "$tmpdir/$outfile",
550             -verbose => $self->verbose,
551             -dir => "$tmpdir");
552             };
553 0 0       0 if( $@ ) {
554 0         0 $self->warn($self->error_string);
555             }
556 0         0 chdir($cwd);
557             }
558 0         0 return ($rc,$parser);
559             }
560              
561             =head2 error_string
562              
563             Title : error_string
564             Usage : $obj->error_string($newval)
565             Function: Where the output from the last analysus run is stored.
566             Returns : value of error_string
567             Args : newvalue (optional)
568              
569              
570             =cut
571              
572             sub error_string{
573 0     0 1 0 my ($self,$value) = @_;
574 0 0       0 if( defined $value) {
575 0         0 $self->{'error_string'} = $value;
576             }
577 0         0 return $self->{'error_string'};
578              
579             }
580              
581             =head2 alignment
582              
583             Title : alignment
584             Usage : $codeml->align($aln);
585             Function: Get/Set the L object
586             Returns : L object
587             Args : [optional] L
588             Comment : We could potentially add support for running directly on a file
589             but we shall keep it simple
590             See also: L
591              
592             =cut
593              
594             sub alignment{
595 0     0 1 0 my ($self,$aln) = @_;
596              
597 0 0       0 if( defined $aln ) {
598 0 0 0     0 if( -e $aln ) {
    0          
599 0         0 $self->{'_alignment'} = $aln;
600             } elsif( !ref($aln) || ! $aln->isa('Bio::Align::AlignI') ) {
601 0         0 $self->warn("Must specify a valid Bio::Align::AlignI object to the alignment function not $aln");
602 0         0 return undef;
603             } else {
604 0         0 $self->{'_alignment'} = $aln;
605             }
606             }
607 0         0 return $self->{'_alignment'};
608             }
609              
610             =head2 tree
611              
612             Title : tree
613             Usage : $codeml->tree($tree, %params);
614             Function: Get/Set the L object
615             Returns : L
616             Args : [optional] $tree => L,
617             [optional] %parameters => hash of tree-specific parameters:
618             branchLengths: 0, 1 or 2
619             out
620              
621             Comment : We could potentially add support for running directly on a file
622             but we shall keep it simple
623             See also: L
624              
625             =cut
626              
627             sub tree {
628 0     0 1 0 my ($self, $tree, %params) = @_;
629 0 0       0 if( defined $tree ) {
630 0 0 0     0 if( ! ref($tree) || ! $tree->isa('Bio::Tree::TreeI') ) {
631 0         0 $self->warn("Must specify a valid Bio::Tree::TreeI object to the alignment function");
632             }
633 0         0 $self->{'_tree'} = $tree;
634 0 0       0 if ( defined $params{'_branchLengths'} ) {
635 0         0 my $ubl = $params{'_branchLengths'};
636 0 0       0 if ($ubl !~ m/^(0|1|2)$/) {
637 0         0 $self->throw("The branchLengths parameter to tree() must be 0 (ignore), 1 (initial values) or 2 (fixed values) only");
638             }
639 0         0 $self->{'_branchLengths'} = $ubl;
640             }
641             }
642 0         0 return $self->{'_tree'};
643             }
644              
645             =head2 get_parameters
646              
647             Title : get_parameters
648             Usage : my %params = $self->get_parameters();
649             Function: returns the list of parameters as a hash
650             Returns : associative array keyed on parameter names
651             Args : none
652              
653              
654             =cut
655              
656             sub get_parameters{
657 0     0 1 0 my ($self) = @_;
658             # we're returning a copy of this
659 0         0 return %{ $self->{'_codemlparams'} };
  0         0  
660             }
661              
662              
663             =head2 set_parameter
664              
665             Title : set_parameter
666             Usage : $codeml->set_parameter($param,$val);
667             Function: Sets a codeml parameter, will be validated against
668             the valid values as set in the %VALIDVALUES class variable.
669             The checks can be ignored if one turns off param checks like this:
670             $codeml->no_param_checks(1)
671             Returns : boolean if set was success, if verbose is set to -1
672             then no warning will be reported
673             Args : $param => name of the parameter
674             $value => value to set the parameter to
675             See also: L
676              
677             =cut
678              
679             sub set_parameter{
680 8     8 1 10 my ($self,$param,$value) = @_;
681 8 50 33     15 unless (defined $self->{'no_param_checks'} && $self->{'no_param_checks'} == 1) {
682 8 50       12 if ( ! defined $VALIDVALUES{$param} ) {
683 0         0 $self->warn("unknown parameter $param will not be set unless you force by setting no_param_checks to true");
684 0         0 return 0;
685             }
686 8 100 66     17 if ( ref( $VALIDVALUES{$param}) =~ /ARRAY/i &&
687 5         14 scalar @{$VALIDVALUES{$param}} > 0 ) {
688            
689 5 50       1 unless ( grep { $value eq $_ } @{ $VALIDVALUES{$param} } ) {
  37         42  
  5         7  
690 0         0 $self->warn("parameter $param specified value $value is not recognized, please see the documentation and the code for this module or set the no_param_checks to a true value");
691 0         0 return 0;
692             }
693             }
694             }
695 8         9 $self->{'_codemlparams'}->{$param} = $value;
696 8         8 return 1;
697             }
698              
699             =head2 set_default_parameters
700              
701             Title : set_default_parameters
702             Usage : $codeml->set_default_parameters(0);
703             Function: (Re)set the default parameters from the defaults
704             (the first value in each array in the
705             %VALIDVALUES class variable)
706             Returns : none
707             Args : boolean: keep existing parameter values
708              
709              
710             =cut
711              
712             sub set_default_parameters{
713 1     1 1 1 my ($self,$keepold) = @_;
714 1 50       3 $keepold = 0 unless defined $keepold;
715            
716 1         6 while( my ($param,$val) = each %VALIDVALUES ) {
717             # skip if we want to keep old values and it is already set
718 28 50 33     39 next if( defined $self->{'_codemlparams'}->{$param} && $keepold);
719 28 100       42 if(ref($val)=~/ARRAY/i ) {
720 21         48 $self->{'_codemlparams'}->{$param} = $val->[0];
721             } else {
722 7         14 $self->{'_codemlparams'}->{$param} = $val;
723             }
724             }
725             }
726              
727              
728             =head1 Bio::Tools::Run::WrapperBase methods
729              
730             =cut
731              
732             =head2 no_param_checks
733              
734             Title : no_param_checks
735             Usage : $obj->no_param_checks($newval)
736             Function: Boolean flag as to whether or not we should
737             trust the sanity checks for parameter values
738             Returns : value of no_param_checks
739             Args : newvalue (optional)
740              
741              
742             =cut
743              
744             sub no_param_checks{
745 0     0 1 0 my ($self,$value) = @_;
746 0 0       0 if( defined $value) {
747 0         0 $self->{'no_param_checks'} = $value;
748             }
749 0         0 return $self->{'no_param_checks'};
750             }
751              
752              
753             =head2 save_tempfiles
754              
755             Title : save_tempfiles
756             Usage : $obj->save_tempfiles($newval)
757             Function:
758             Returns : value of save_tempfiles
759             Args : newvalue (optional)
760              
761              
762             =cut
763              
764             =head2 outfile_name
765              
766             Title : outfile_name
767             Usage : my $outfile = $codeml->outfile_name();
768             Function: Get/Set the name of the output file for this run
769             (if you wanted to do something special)
770             Returns : string
771             Args : [optional] string to set value to
772              
773              
774             =cut
775              
776             sub outfile_name {
777 0     0 1 0 my $self = shift;
778 0 0       0 if( @_ ) {
779 0         0 return $self->{'_codemlparams'}->{'outfile'} = shift @_;
780             }
781 0 0       0 unless (defined $self->{'_codemlparams'}->{'outfile'}) {
782 0         0 $self->{'_codemlparams'}->{'outfile'} = 'mlc';
783             }
784 0         0 return $self->{'_codemlparams'}->{'outfile'};
785             }
786              
787             =head2 tempdir
788              
789             Title : tempdir
790             Usage : my $tmpdir = $self->tempdir();
791             Function: Retrieve a temporary directory name (which is created)
792             Returns : string which is the name of the temporary directory
793             Args : none
794              
795              
796             =cut
797              
798             =head2 cleanup
799              
800             Title : cleanup
801             Usage : $codeml->cleanup();
802             Function: Will cleanup the tempdir directory after a PAML run
803             Returns : none
804             Args : none
805              
806              
807             =cut
808              
809             =head2 io
810              
811             Title : io
812             Usage : $obj->io($newval)
813             Function: Gets a L object
814             Returns : L
815             Args : none
816              
817              
818             =cut
819              
820             sub DESTROY {
821 1     1   8277 my $self= shift;
822 1 50       10 unless ( $self->save_tempfiles ) {
823 1         10 $self->cleanup();
824             }
825 1         6 $self->SUPER::DESTROY();
826             }
827              
828             1;