File Coverage

blib/lib/Bio/Tools/Run/Phylo/Phast/PhyloFit.pm
Criterion Covered Total %
statement 18 61 29.5
branch 0 10 0.0
condition 0 7 0.0
subroutine 6 12 50.0
pod 4 4 100.0
total 28 94 29.7


line stmt bran cond sub pod time code
1             # $Id$
2             #
3             # BioPerl module for Bio::Tools::Run::Phylo::Phast::PhyloFit
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Sendu Bala
8             #
9             # Copyright Sendu Bala
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Tools::Run::Phylo::Phast::PhyloFit - Wrapper for phyloFit
18              
19             =head1 SYNOPSIS
20              
21             use Bio::Tools::Run::Phylo::Phast::PhyloFit;
22              
23             # Make a PhyloFit factory
24             $factory = Bio::Tools::Run::Phylo::Phast::PhastCons->new();
25              
26             # Generate an init.mod file for use by phastCons
27             my $init_file = $factory->run($alignment, $tree);
28              
29             =head1 DESCRIPTION
30              
31             This is a wrapper for running the phyloFit application by Adam Siepel. You
32             can get details here: http://compgen.bscb.cornell.edu/~acs/software.html
33              
34             Currently the interface is extremely simplified. Only the --tree form of usage
35             is allowed (not --init-model), which means a tree must be supplied with the
36             alignment (to run()). You can try supplying normal phyloFit arguments to new(),
37             or calling arg-named methods (excluding initial hyphens and converting others
38             to underscores, eg. $factory-Egaps_as_bases(1) to set the --gaps-as-bases arg).
39              
40             WARNING: the API may change in the future to allow for greater flexability and
41             access to more phyloFit features.
42              
43              
44             You will need to enable this PhyloFit wrapper to find the phast programs (at
45             least phyloFit itself).
46             This can be done in (at least) three ways:
47              
48             1. Make sure the phyloFit executable is in your path.
49             2. Define an environmental variable PHASTDIR which is a
50             directory which contains the phyloFit application:
51             In bash:
52              
53             export PHASTDIR=/home/username/phast/bin
54              
55             In csh/tcsh:
56              
57             setenv PHASTDIR /home/username/phast/bin
58              
59             3. Include a definition of an environmental variable PHASTDIR in
60             every script that will use this PhyloFit wrapper module, e.g.:
61              
62             BEGIN { $ENV{PHASTDIR} = '/home/username/phast/bin' }
63             use Bio::Tools::Run::Phylo::Phast::PhyloFit;
64              
65             =head1 FEEDBACK
66              
67             =head2 Mailing Lists
68              
69             User feedback is an integral part of the evolution of this and other
70             Bioperl modules. Send your comments and suggestions preferably to
71             the Bioperl mailing list. Your participation is much appreciated.
72              
73             bioperl-l@bioperl.org - General discussion
74             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
75              
76             =head2 Support
77              
78             Please direct usage questions or support issues to the mailing list:
79              
80             I
81              
82             rather than to the module maintainer directly. Many experienced and
83             reponsive experts will be able look at the problem and quickly
84             address it. Please include a thorough description of the problem
85             with code and data examples if at all possible.
86              
87             =head2 Reporting Bugs
88              
89             Report bugs to the Bioperl bug tracking system to help us keep track
90             of the bugs and their resolution. Bug reports can be submitted via
91             the web:
92              
93             http://redmine.open-bio.org/projects/bioperl/
94              
95             =head1 AUTHOR - Sendu Bala
96              
97             Email bix@sendu.me.uk
98              
99             =head1 APPENDIX
100              
101             The rest of the documentation details each of the object methods.
102             Internal methods are usually preceded with a _
103              
104             =cut
105              
106             package Bio::Tools::Run::Phylo::Phast::PhyloFit;
107 1     1   5 use strict;
  1         1  
  1         23  
108              
109 1     1   4 use Cwd;
  1         2  
  1         39  
110 1     1   4 use File::Spec;
  1         2  
  1         16  
111 1     1   4 use Bio::AlignIO;
  1         2  
  1         17  
112 1     1   4 use Bio::TreeIO;
  1         1  
  1         27  
113              
114 1     1   4 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
  1         2  
  1         302  
115              
116             our $PROGRAM_NAME = 'phyloFit';
117             our $PROGRAM_DIR = $ENV{'PHASTDIR'};
118              
119             # methods and their synonyms from the phastCons args we support
120             our %PARAMS = (subst_mod => 's',
121             min_informative => 'I',
122             precision => 'p',
123             log => 'l',
124             ancestor => 'A',
125             nrates => 'k',
126             alpha => 'a',
127             rate_constants => 'K',
128             features => 'g',
129             catmap => 'c',
130             do_cats => 'C',
131             reverse_groups => 'R');
132              
133             our %SWITCHES = (gaps_as_bases => 'G',
134             quiet => 'q',
135             EM => 'E',
136             init_random => 'r',
137             estimate_freqs => 'F',
138             markov => 'N',
139             non_overlapping => 'V');
140              
141             # just to be explicit, args we don't support (yet) or we handle ourselves
142             our %UNSUPPORTED = (msa_format => 'i',
143             out_root => 'o',
144             tree => 't',
145             help => 'h',
146             lnl => 'L',
147             init_model => 'M',
148             scale_only => 'B',
149             scale_subtree => 'S',
150             no_freqs => 'f',
151             no_rates => 'n',
152             post_probs => 'P',
153             expected_subs => 'X',
154             expected_total_subs => 'Z',
155             column_probs => 'U',
156             windows => 'w',
157             windows_explicit => 'v');
158              
159             =head2 program_name
160              
161             Title : program_name
162             Usage : $factory>program_name()
163             Function: holds the program name
164             Returns : string
165             Args : None
166              
167             =cut
168              
169             sub program_name {
170 0     0 1   return $PROGRAM_NAME;
171             }
172              
173             =head2 program_dir
174              
175             Title : program_dir
176             Usage : $factory->program_dir(@params)
177             Function: returns the program directory, obtained from ENV variable.
178             Returns : string
179             Args : None
180              
181             =cut
182              
183             sub program_dir {
184 0     0 1   return $PROGRAM_DIR;
185             }
186              
187             =head2 new
188              
189             Title : new
190             Usage : $factory = Bio::Tools::Run::Phylo::Phast::PhyloFit->new()
191             Function: creates a new PhyloFit factory
192             Returns : Bio::Tools::Run::Phylo::Phast::PhyloFit
193             Args : Most options understood by phastCons can be supplied as key =>
194             value pairs. Options that don't normally take a value
195             should be given a value of 1. You can type the keys as you would on
196             the command line (eg. '--gaps-as-bases' => 1) or with only a single
197             hyphen to start and internal hyphens converted to underscores (eg.
198             -gaps_as_bases => 1) to avoid having to quote the key.
199              
200             These options can NOT be used with this wrapper currently:
201             msa_format / i
202             out_root / o
203             tree / t
204             help / h
205             lnl / L
206             init_model / M
207             scale_only / B
208             scale_subtree / S
209             no_freqs / f
210             no_rates / n
211             post_probs / P
212             expected_subs / X
213             expected_total_subs / Z
214             column_probs / U
215             windows / w
216             windows_explicit / v
217              
218             =cut
219              
220             sub new {
221 0     0 1   my ($class, @args) = @_;
222 0           my $self = $class->SUPER::new(@args);
223            
224 0           $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
225 0           (map { $_ => $SWITCHES{$_} } keys %SWITCHES)},
  0            
226             -create => 1);
227            
228 0           return $self;
229             }
230              
231             =head2 run
232              
233             Title : run
234             Usage : $result = $factory->run($fasta_align_file, $newick_tree_file);
235             -or-
236             $result = $factory->run($align_object, $tree_object);
237             -or-
238             $result = $factory->run($align_object, $db_taxonomy_object);
239             Function: Runs phyloFit on an alignment.
240             Returns : filename of init.mod file produced
241             Args : The first argument represents an alignment, the second argument
242             a species tree.
243             The alignment can be provided as a multi-fasta format alignment
244             filename, or a Bio::Align::AlignI compliant object (eg. a
245             Bio::SimpleAlign).
246             The species tree can be provided as a newick format tree filename
247             or a Bio::Tree::TreeI compliant object. Alternatively a
248             Bio::DB::Taxonomy object can be supplied, in which case the species
249             tree will be generated by using the alignment sequence names as
250             species names and looking for those in the supplied database.
251              
252             In all cases, the alignment sequence names must correspond to node
253             ids in the species tree. Multi-word species names should be joined
254             with underscores to form the sequence names, eg. Homo_sapiens
255              
256             =cut
257              
258             sub run {
259 0     0 1   my ($self, $aln, $tree) = @_;
260            
261 0 0 0       ($aln && $tree) || $self->throw("alignment and tree must be supplied");
262 0           $self->_alignment($aln);
263 0           $tree = $self->_tree($tree);
264            
265 0           $tree->force_binary;
266            
267             # adjust tree node ids to convert spaces to underscores (eg. if tree
268             # generated from taxonomy)
269 0           foreach my $node ($tree->get_leaf_nodes) {
270 0           my $id = $node->id;
271 0           $id =~ s/ /_/g;
272 0           $node->id($id);
273             }
274            
275             # check node and seq names match
276 0           $self->_check_names;
277            
278 0           return $self->_run;
279             }
280              
281             sub _run {
282 0     0     my $self = shift;
283            
284 0   0       my $exe = $self->executable || return;
285            
286             # cd to a temp dir
287 0           my $temp_dir = $self->tempdir;
288 0           my $cwd = Cwd->cwd();
289 0 0         chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
290            
291 0           my $aln_file = $self->_write_alignment;
292 0           my $tree_file = $self->_write_tree;
293            
294             #...phyloFit --tree "(human,(mouse,rat))" --msa-format FASTA --out-root init alignment.fa
295 0           my $command = $exe.$self->_setparams($aln_file, $tree_file);
296 0           $self->debug("phyloFit command = $command\n");
297 0 0         system($command) && $self->throw("phyloFit call ($command) crashed: $?");
298            
299             # cd back again
300 0 0         chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
301            
302 0           return File::Spec->catfile($temp_dir, 'init.mod');
303             }
304              
305             =head2 _setparams
306              
307             Title : _setparams
308             Usage : Internal function, not to be called directly
309             Function: Creates a string of params to be used in the command string
310             Returns : string of params
311             Args : alignment and tree file names
312              
313             =cut
314              
315             sub _setparams {
316 0     0     my ($self, $aln_file, $tree_file) = @_;
317            
318 0           my $param_string = ' --tree '.$tree_file;
319 0           $param_string .= ' --msa-format FASTA';
320 0           $param_string .= ' --out-root init';
321            
322             # --min-informative defaults to 50, but must not be greater than the number
323             # of bases in the alignment
324 0           my $aln = $self->_alignment;
325 0           my $length = $aln->length;
326 0   0       my $min_informative = $self->min_informative || 50;
327 0 0         if ($length < $min_informative) {
328 0           $self->min_informative($length);
329             }
330            
331 0           $param_string .= $self->SUPER::_setparams(-params => [keys %PARAMS],
332             -switches => [keys %SWITCHES],
333             -double_dash => 1,
334             -underscore_to_dash => 1);
335 0           $param_string .= ' '.$aln_file;
336            
337 0           return $param_string;
338             }
339              
340             1;