File Coverage

blib/lib/Bio/Tools/Run/Phylo/Gerp.pm
Criterion Covered Total %
statement 34 77 44.1
branch 2 24 8.3
condition 1 12 8.3
subroutine 12 15 80.0
pod 4 4 100.0
total 53 132 40.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::Gerp
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Gerp - Wrapper for GERP
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::Phylo::Gerp;
21              
22             # Make a Gerp factory
23             $factory = Bio::Tools::Run::Phylo::Gerp->new();
24              
25             # Run Gerp with an alignment and tree file
26             my $parser = $factory->run($alignfilename, $treefilename);
27              
28             # or with alignment object and tree object (which needs branch lengths)
29             $parser = $factory->run($bio_simplalign, $bio_tree_tree);
30              
31             # (mixtures of the above are possible)
32              
33             # look at the results
34             while (my $feat = $parser->next_result) {
35             my $start = $feat->start;
36             my $end = $feat->end;
37             my $rs_score = $feat->score;
38             my $p_value = ($feat->annotation->get_Annotations('p-value'))[0]->value;
39             }
40              
41             =head1 DESCRIPTION
42              
43             This is a wrapper for running the GERP (v2) programs 'gerpcol' and 'gerpelem' by
44             Eugene Davydov (originally Gregory M. Cooper et al.). You can get details here:
45             http://mendel.stanford.edu/sidowlab/. GERP can be used for phylogenetic
46             footprinting/ shadowing (it finds 'constrained elements in multiple
47             alignments').
48              
49             You can try supplying normal gerpcol/gerpelem command-line arguments to new(),
50             eg. $factory-Enew(-e =E 0.05) or calling arg-named methods, eg.
51             $factory-Ee(0.05). The filename-related args (t, f, x) are handled internally
52             by the run() method. This wrapper currently only supports running GERP on a
53             single alignment at a time (ie. F isn't used at all, nor are multiple fs
54             possible).
55              
56              
57             You will need to enable this GERP wrapper to find the GERP executables.
58             This can be done in (at least) three ways:
59              
60             1. Make sure gerpcol and gerpelem are in your path.
61             2. Define an environmental variable GERPDIR which is a
62             directory which contains the GERP executables:
63             In bash:
64              
65             export GERPDIR=/home/username/gerp/
66              
67             In csh/tcsh:
68              
69             setenv GERPDIR /home/username/gerp
70              
71             3. Include a definition of an environmental variable GERPDIR in
72             every script that will use this GERP wrapper module, e.g.:
73              
74             BEGIN { $ENV{GERPDIR} = '/home/username/gerp/' }
75             use Bio::Tools::Run::Phylo::Gerp;
76              
77             =head1 FEEDBACK
78              
79             =head2 Mailing Lists
80              
81             User feedback is an integral part of the evolution of this and other
82             Bioperl modules. Send your comments and suggestions preferably to
83             the Bioperl mailing list. Your participation is much appreciated.
84              
85             bioperl-l@bioperl.org - General discussion
86             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87              
88             =head2 Support
89              
90             Please direct usage questions or support issues to the mailing list:
91              
92             I
93              
94             rather than to the module maintainer directly. Many experienced and
95             reponsive experts will be able look at the problem and quickly
96             address it. Please include a thorough description of the problem
97             with code and data examples if at all possible.
98              
99             =head2 Reporting Bugs
100              
101             Report bugs to the Bioperl bug tracking system to help us keep track
102             of the bugs and their resolution. Bug reports can be submitted via
103             the web:
104              
105             http://redmine.open-bio.org/projects/bioperl/
106              
107             =head1 AUTHOR - Sendu Bala
108              
109             Email bix@sendu.me.uk
110              
111             =head1 APPENDIX
112              
113             The rest of the documentation details each of the object methods.
114             Internal methods are usually preceded with a _
115              
116             =cut
117              
118             package Bio::Tools::Run::Phylo::Gerp;
119 1     1   130640 use strict;
  1         3  
  1         40  
120              
121 1     1   6 use Cwd;
  1         3  
  1         84  
122 1     1   7 use File::Spec;
  1         1  
  1         29  
123 1     1   6 use File::Basename;
  1         1  
  1         94  
124 1     1   734 use Bio::AlignIO;
  1         101296  
  1         26  
125 1     1   367 use Bio::TreeIO;
  1         14714  
  1         30  
126 1     1   479 use Bio::Tools::Phylo::Gerp;
  1         372  
  1         23  
127              
128 1     1   5 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
  1         1  
  1         567  
129              
130             our $PROGRAM_NAME = 'gerpcol';
131             our $PROGRAM_DIR;
132              
133             # methods for the gerp args we support
134             our @COLPARAMS = qw(r n s);
135             our @ELEMPARAMS = qw(l L t d p b a c r e);
136             our @SWITCHES = qw(v);
137              
138             # just to be explicit, args we don't support (yet) or we handle ourselves
139             our @UNSUPPORTED = qw(h t f F x);
140              
141             BEGIN {
142             # lets add all the gerp executables to the path
143 1     1   2 $PROGRAM_DIR = $ENV{'GERPDIR'};
144 1 50       560 $ENV{PATH} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR;
145             }
146              
147             =head2 program_name
148              
149             Title : program_name
150             Usage : $factory>program_name()
151             Function: holds the program name
152             Returns : string
153             Args : None
154              
155             =cut
156              
157             sub program_name {
158 7     7 1 6 my $self = shift;
159 7 50       12 if (@_) { $self->{program_name} = shift }
  0         0  
160 7   33     40 return $self->{program_name} || $PROGRAM_NAME;
161             }
162              
163             =head2 program_dir
164              
165             Title : program_dir
166             Usage : $factory->program_dir(@params)
167             Function: returns the program directory, obtained from ENV variable.
168             Returns : string
169             Args : None
170              
171             =cut
172              
173             sub program_dir {
174 4     4 1 16 return $PROGRAM_DIR;
175             }
176              
177             =head2 new
178              
179             Title : new
180             Usage : $factory = Bio::Tools::Run::Phylo::Gerp->new()
181             Function: creates a new GERP factory
182             Returns : Bio::Tools::Run::Phylo::Gerp
183             Args : Most options understood by GERP can be supplied as key =>
184             value pairs.
185              
186             These options can NOT be used with this wrapper:
187             h, t, f, F and x
188              
189             =cut
190              
191             sub new {
192 1     1 1 1016 my ($class, @args) = @_;
193 1         16 my $self = $class->SUPER::new(@args);
194            
195 1         45 $self->_set_from_args(\@args, -methods => [@COLPARAMS, @ELEMPARAMS,
196             @SWITCHES, 'quiet'],
197             -create => 1);
198            
199 1         7 return $self;
200             }
201              
202             =head2 run
203              
204             Title : run
205             Usage : $parser = $factory->run($align_file, $tree_file);
206             -or-
207             $parser = $factory->run($align_object, $tree_object);
208             Function: Runs GERP on an alignment.
209             Returns : Bio::Tools::Phylo::Gerp parser object, containing the results
210             Args : The first argument represents an alignment, the second argument
211             a phylogenetic tree with branch lengths.
212             The alignment can be provided as a MAF format alignment
213             filename, or a Bio::Align::AlignI compliant object (eg. a
214             Bio::SimpleAlign).
215             The species tree can be provided as a newick format tree filename
216             or a Bio::Tree::TreeI compliant object.
217              
218             In all cases, the alignment sequence names must correspond to node
219             ids in the tree. Multi-word species names should have the
220             spaces replaced with underscores (eg. Homo_sapiens)
221              
222             =cut
223              
224             sub run {
225 0     0 1   my ($self, $aln, $tree) = @_;
226 0   0       $self->_alignment($aln || $self->throw("An alignment must be supplied"));
227 0   0       $self->_tree($tree || $self->throw("A phylo tree must be supplied"));
228            
229             # check node and seq names match
230 0           $self->_check_names;
231            
232 0           return $self->_run;
233             }
234              
235             sub _run {
236 0     0     my $self = shift;
237            
238 0 0         $self->executable || return;
239            
240             # cd to a temp dir
241 0           my $temp_dir = $self->tempdir;
242 0           my $cwd = Cwd->cwd();
243 0 0         chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
244            
245 0           foreach my $prog ('gerpcol', 'gerpelem') {
246 0           delete $self->{'_pathtoexe'};
247 0           $self->program_name($prog);
248 0   0       my $exe = $self->executable || $self->throw("'$prog' executable not found");
249            
250 0           my $command = $exe.$self->_setparams($prog);
251 0           $self->debug("gerp command = $command\n");
252            
253             #eval {
254             # local $SIG{ALRM} = sub { die "alarm\n" };
255             # alarm 60;
256             # system($command) && $self->throw("gerp call ($command) failed: $! | $?");
257             # alarm 0;
258             #};
259             #die if $@ && $@ ne "alarm\n";
260             #if ($@) {
261             # die "Gerp timed out\n";
262             #}
263             #
264             # system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir");
265            
266 0 0         open(my $pipe, "$command |") || $self->throw("gerp call ($command) failed to start: $? | $!");
267 0           my $error = '';
268 0           my $warning = '';
269 0           while (<$pipe>) {
270 0 0         if ($self->quiet) {
271 0           $error .= $_;
272 0 0         $warning .= $_ if /warning/i;
273             }
274             else {
275 0           print;
276             }
277             }
278 0 0         close($pipe) || ($error ? $self->throw("gerp call ($command) failed: $error") : $self->throw("gerp call ($command) crashed: $?"));
    0          
279            
280             # (throws most likely due to seg fault in gerpelem when ~25000 entries
281             # in rates file, not much I can do about it!)
282            
283 0 0         $self->warn("GERP: ".$warning) if $warning;
284             }
285            
286             #system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir");
287            
288 0           my $result_file = $self->{align_base}.'.rates.elems';
289 0           my $parser = Bio::Tools::Phylo::Gerp->new(-file => $result_file);
290            
291             # cd back again
292 0 0         chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
293            
294 0           return $parser;
295             }
296              
297             =head2 _setparams
298              
299             Title : _setparams
300             Usage : Internal function, not to be called directly
301             Function: Creates a string of params to be used in the command string
302             Returns : string of params
303             Args : none
304              
305             =cut
306              
307             sub _setparams {
308 0     0     my ($self, $prog) = @_;
309            
310 0           my $param_string;
311 0 0         if ($prog eq 'gerpcol') {
312 0           my $align_file = $self->_write_alignment;
313 0           $param_string .= ' -f '.$align_file;
314 0           $self->{align_base} = basename($align_file);
315 0           $param_string .= ' -t '.$self->_write_tree;
316 0           $param_string .= $self->SUPER::_setparams(-params => \@COLPARAMS,
317             -switches => \@SWITCHES,
318             -dash => 1);
319             }
320             else {
321 0           $param_string .= ' -f '.$self->{align_base}.'.rates';
322 0           $param_string .= $self->SUPER::_setparams(-params => \@ELEMPARAMS,
323             -switches => \@SWITCHES,
324             -dash => 1);
325             }
326            
327 0           $param_string .= " 2>&1";
328            
329 0           return $param_string;
330             }
331              
332             1;