File Coverage

blib/lib/Bio/Tools/Run/Phylo/Gumby.pm
Criterion Covered Total %
statement 19 21 90.4
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 26 28 92.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Phylo::Gumby
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Phylo::Gumby - Wrapper for gumby
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::Phylo::Gumby;
21              
22             # Make a Gumby factory
23             $factory = Bio::Tools::Run::Phylo::Gumby->new();
24              
25             # Run gumby with an alignment and tree file
26             my @results = $factory->run($alignfilename, $treefilename);
27              
28             # or with alignment object and tree objects
29             @results = $factory->run($bio_simplalign, $bio_tree_tree);
30              
31             # or with alignment object and Bio::DB::Taxonomy object
32             @results = $factory->run($bio_simplalign, $bio_db_taxonomy);
33              
34             # specify the positions of exons in (at least) one of the alignment sequences
35             # to get better results
36             $factory->econs(1);
37             $factory->annots($gff_filename);
38             @results = $factory->run($alignfilename, $treefilename);
39              
40             # or using feature objects
41             $factory->annots(@bio_seqfeature_objects);
42             @results = $factory->run($alignfilename, $treefilename);
43              
44             # (mixtures of all the above are possible)
45              
46             # look at the results
47             foreach my $feat (@results) {
48             my $seq_id = $feat->seq_id;
49             my $start = $feat->start;
50             my $end = $feat->end;
51             my $score = $feat->score;
52             my ($pvalue) = $feat->get_tag_values('pvalue');
53             my ($kind) = $feat->get_tag_values('kind'); # 'all', 'exon' or 'nonexon'
54             }
55              
56             =head1 DESCRIPTION
57              
58             This is a wrapper for running the gumby application by Shyam Prabhakar. You
59             can get details here: http://pga.lbl.gov/gumby/. Gumby is used for phylogenetic
60             footprinting/ shadowing.
61              
62             You can try supplying normal gumby command-line arguments to new(), eg.
63              
64             $factory->new(-ratio => 2);
65              
66             or calling arg-named methods (excluding the initial hyphen, eg.
67              
68             $factory->econs(1);
69              
70             to set the -econs arg).
71              
72              
73             You will need to enable this Gumby wrapper to find the gumby program.
74             This can be done in (at least) three ways:
75              
76             1. Make sure the gumby executable is in your path.
77             2. Define an environmental variable GUMBYDIR which is a
78             directory which contains the gumby application:
79             In bash:
80              
81             export GUMBYDIR=/home/username/gumby/
82              
83             In csh/tcsh:
84              
85             setenv GUMBYDIR /home/username/gumby
86              
87             3. Include a definition of an environmental variable GUMBYDIR in
88             every script that will use this Gumby wrapper module, e.g.:
89              
90             BEGIN { $ENV{GUMBYDIR} = '/home/username/gumby/' }
91             use Bio::Tools::Run::Phylo::Gumby;
92              
93             =head1 FEEDBACK
94              
95             =head2 Mailing Lists
96              
97             User feedback is an integral part of the evolution of this and other
98             Bioperl modules. Send your comments and suggestions preferably to
99             the Bioperl mailing list. Your participation is much appreciated.
100              
101             bioperl-l@bioperl.org - General discussion
102             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
103              
104             =head2 Support
105              
106             Please direct usage questions or support issues to the mailing list:
107              
108             I
109              
110             rather than to the module maintainer directly. Many experienced and
111             reponsive experts will be able look at the problem and quickly
112             address it. Please include a thorough description of the problem
113             with code and data examples if at all possible.
114              
115             =head2 Reporting Bugs
116              
117             Report bugs to the Bioperl bug tracking system to help us keep track
118             of the bugs and their resolution. Bug reports can be submitted via
119             the web:
120              
121             http://redmine.open-bio.org/projects/bioperl/
122              
123             =head1 AUTHOR - Sendu Bala
124              
125             Email bix@sendu.me.uk
126              
127             =head1 APPENDIX
128              
129             The rest of the documentation details each of the object methods.
130             Internal methods are usually preceded with a _
131              
132             =cut
133              
134             package Bio::Tools::Run::Phylo::Gumby;
135 1     1   98617 use strict;
  1         1  
  1         22  
136              
137 1     1   2 use Cwd;
  1         1  
  1         41  
138 1     1   3 use File::Spec;
  1         1  
  1         13  
139 1     1   419 use Bio::AlignIO;
  1         84100  
  1         25  
140 1     1   374 use Bio::TreeIO;
  1         13192  
  1         38  
141 1     1   6 use Bio::Tools::GFF;
  1         0  
  1         16  
142 1     1   415 use Bio::Tools::Phylo::Gumby;
  0            
  0            
143              
144             use base qw(Bio::Tools::Run::Phylo::PhyloBase);
145              
146             our $PROGRAM_NAME = 'gumby';
147             our $PROGRAM_DIR = $ENV{'GUMBYDIR'};
148              
149             # methods for the gumby args we support
150             our @PARAMS = qw(annots ratio base plen prob);
151             our @SWITCHES = qw(econs);
152              
153             # just to be explicit, args we don't support (yet) or we handle ourselves
154             our @UNSUPPORTED = qw(o minseq blklen);
155              
156             =head2 program_name
157              
158             Title : program_name
159             Usage : $factory>program_name()
160             Function: holds the program name
161             Returns : string
162             Args : None
163              
164             =cut
165              
166             sub program_name {
167             return $PROGRAM_NAME;
168             }
169              
170             =head2 program_dir
171              
172             Title : program_dir
173             Usage : $factory->program_dir(@params)
174             Function: returns the program directory, obtained from ENV variable.
175             Returns : string
176             Args : None
177              
178             =cut
179              
180             sub program_dir {
181             return $PROGRAM_DIR;
182             }
183              
184             =head2 new
185              
186             Title : new
187             Usage : $factory = Bio::Tools::Run::Phylo::Gumby->new()
188             Function: creates a new Gumby factory
189             Returns : Bio::Tools::Run::Phylo::Gumby
190             Args : Most options understood by gumby can be supplied as key =>
191             value pairs.
192              
193             These options can NOT be used with this wrapper:
194             o
195             minseq
196             blklen
197              
198             =cut
199              
200             sub new {
201             my ($class, @args) = @_;
202             my $self = $class->SUPER::new(@args);
203            
204             $self->_set_from_args(\@args, -methods => [@PARAMS, @SWITCHES, 'quiet'],
205             -create => 1);
206            
207             return $self;
208             }
209              
210             =head2 annots
211              
212             Title : annots
213             Usage : $factory->annots(@gff_filenames)
214             Function: Specify annotation files for gumby to use
215             Returns : string of absolute filepaths to gff files
216             Args : list of gff filenames (can be relative), where the first column
217             corresponds to a sequence id from the alignment that will be supplied
218             to run()
219             OR
220             list of Bio::SeqFeatureI objects, which have seq_id() values that
221             will correspond to the sequence ids from the alignment that will
222             be supplied to run() (the objects will be grouped by seq_id and
223             output to gff files for use by gumby; filepaths to those tempfiles
224             will be returned). Note that all features must have source, seq_id
225             and primary_tag set or none will be used.
226              
227             NB: feature coordinates must be relative to the parts of the
228             sequences in the alignment you will supply, as though numbering
229             started at 1 for each each sequence in the alignment. There is
230             currently no automatic correction for this.
231              
232             =cut
233              
234             sub annots {
235             my $self = shift;
236             if (@_) {
237             my @files;
238             my %feats;
239             foreach my $thing (@_) {
240             if (ref($thing) && $thing->isa('Bio::SeqFeatureI')) {
241             my $seq_id = $thing->seq_id || $self->throw("Supplied a feature with no seq_id");
242             push(@{$feats{$seq_id}}, $thing);
243             }
244             elsif (-e $thing) {
245             push(@files, File::Spec->rel2abs($thing));
246             }
247             else {
248             $self->throw("'$thing' was not a Bio::SeqFeatureI or a file");
249             }
250             }
251            
252             if (keys %feats) {
253             my $temp_dir = $self->tempdir;
254            
255             while (my ($seq_id, $feats) = each %feats) {
256             my $temp_file = File::Spec->catfile($temp_dir, $seq_id.'.gff');
257             $temp_file = File::Spec->rel2abs($temp_file);
258             my $gffout = Bio::Tools::GFF->new(-file => ">$temp_file", -gff_version => 2);
259             $gffout->write_feature(@{$feats});
260             push(@files, $temp_file);
261             }
262             }
263            
264             $self->{annots} = \@files;
265             }
266            
267             if (defined $self->{annots}) {
268             return join(' ', @{$self->{annots}});
269             }
270             return;
271             }
272              
273             =head2 run
274              
275             Title : run
276             Usage : $result = $factory->run($fasta_align_file, $newick_tree_file);
277             -or-
278             $result = $factory->run($align_object, $tree_object);
279             -or-
280             $result = $factory->run($align_object, $db_taxonomy_object);
281             Function: Runs gumby on an alignment.
282             Returns : list of Bio::SeqFeature::Annotated (one per prediction and sequence)
283             Args : The first argument represents an alignment, the second argument
284             a species tree.
285             The alignment can be provided as a multi-fasta format alignment
286             filename, or a Bio::Align::AlignI compliant object (eg. a
287             Bio::SimpleAlign).
288             The species tree can be provided as a newick format tree filename
289             or a Bio::Tree::TreeI compliant object. Alternatively a
290             Bio::DB::Taxonomy object can be supplied, in which case the species
291             tree will be generated by using the alignment sequence names as
292             species names and looking for those in the supplied database.
293              
294             In all cases, the alignment sequence names must correspond to node
295             ids in the species tree. Multi-word species names should have the
296             spaces removed to form the sequence names, eg. Homosapiens.
297             Underscores may also be used for either or both of sequence and node
298             ids ('Homo_sapiens'), but underscores will be removed internally.
299              
300             NB: Gumby treats each sequence in the alignment as starting at
301             position 1. This method returns results with the coordinates
302             corrected so they match the coordinates of your input alignment. Eg.
303             if 'Homo_sapiens' sequence had the range 20..60 in your alignment,
304             the first Gumby result might be 1..5 which is corrected to 20..24.
305              
306             =cut
307              
308             sub run {
309             my ($self, $aln, $tree) = @_;
310            
311             ($aln && $tree) || $self->throw("alignment and tree must be supplied");
312             $aln = $self->_alignment($aln);
313             $tree = $self->_tree($tree);
314            
315             $tree->force_binary;
316            
317             # adjust seq & node ids to remove spaces and underscores (eg. if tree
318             # generated from taxonomy/ user input bad names)
319             foreach my $thing ($tree->get_leaf_nodes, $aln->each_seq) {
320             my $id = $thing->id;
321             $id =~ s/_aligned//; #*** dubious custom-handling for the allowed case of mlagan adding _aligned to id (according to gumby author)
322             $id =~ s/[ _]//g;
323             $thing->id($id);
324             }
325             my $new_aln = $aln->new;
326             foreach my $seq ($aln->each_seq) {
327             $new_aln->add_seq($seq);
328             }
329             $self->_alignment($new_aln);
330            
331             #*** at some stage we want to revert the ids back to original...
332            
333             # check node and seq names match
334             $self->_check_names;
335            
336             return $self->_run;
337             }
338              
339             sub _run {
340             my $self = shift;
341            
342             my $exe = $self->executable || return;
343            
344             # cd to a temp dir
345             my $temp_dir = $self->tempdir;
346             my $cwd = Cwd->cwd();
347             chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
348            
349             my $tree_file = 'tree_file';
350             my $aln_file = $self->_write_alignment;
351            
352             # generate a gumby-friendly tree file
353             my $tree = $self->_tree;
354             $tree = $tree->simplify_to_leaves_string;
355             open(my $tfhandle, '>', $tree_file) || $self->throw("Could not write to tree file '$tree_file'");
356             print $tfhandle $tree, "\n";
357             close($tfhandle);
358            
359             my $command = $exe.$self->_setparams($aln_file, $tree_file);
360             $self->debug("gumby command = $command\n");
361            
362             open(PIPE, "$command |") || $self->throw("gumby call ($command) failed to start: $? | $!");
363             my $error = '';
364             while () {
365             print unless $self->quiet;
366             if (/^ERROR: (.+)/ || /^mbgumbel\(\): (.+)/) {
367             $error .= $1;
368             }
369             }
370             close(PIPE) || ($error ? $self->warn("gumby call ($command) failed: $error") : $self->throw("gumby call ($command) crashed: $?"));
371            
372             my $aln = $self->_alignment();
373             my %offsets;
374             foreach my $seq ($aln->each_seq) {
375             $offsets{lc($seq->id)} = $seq->start - 1;
376             }
377            
378             my @feats = ();
379             foreach my $file ('out_all.align', 'out_exon.align', 'out_nonexon.align') {
380             -e $file || next;
381             my $parser = Bio::Tools::Phylo::Gumby->new(-file => $file);
382            
383             while (my @results = $parser->next_result) {
384             foreach my $result (@results) {
385             my $this_adjust = $offsets{lc($result->seq_id)};
386             $result->start($result->start + $this_adjust);
387             $result->end($result->end + $this_adjust);
388             }
389             push(@feats, @results);
390             }
391            
392             unlink($file);
393             }
394            
395             # cd back again
396             chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
397            
398             return @feats;
399             }
400              
401             =head2 _setparams
402              
403             Title : _setparams
404             Usage : Internal function, not to be called directly
405             Function: Creates a string of params to be used in the command string
406             Returns : string of params
407             Args : alignment and tree file names
408              
409             =cut
410              
411             sub _setparams {
412             my ($self, $aln_file, $tree_file) = @_;
413            
414             my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
415             my $param_string = ' '.$tree_file;
416             $param_string .= ' '.$aln_file;
417             $param_string .= $self->SUPER::_setparams(-params => \@PARAMS,
418             -switches => \@SWITCHES,
419             -dash => 1,);
420             $param_string .= ' -o out';
421             $param_string .= ' 2>&1';
422             $param_string .= " 1>$null" if $self->quiet;
423            
424             return $param_string;
425             }
426              
427             1;