File Coverage

blib/lib/Bio/Tools/Run/FootPrinter.pm
Criterion Covered Total %
statement 48 138 34.7
branch 7 60 11.6
condition 1 13 7.6
subroutine 13 17 76.4
pod 5 5 100.0
total 74 233 31.7


line stmt bran cond sub pod time code
1             # BioPerl module for FootPrinter
2             #
3             # Copyright Shawn Hoon
4             #
5             # You may distribute this module under the same terms as perl itself
6              
7             # POD documentation - main docs before the code
8              
9             =head1 NAME
10              
11             Bio::Tools::Run::FootPrinter - wrapper for the FootPrinter program
12              
13             =head1 SYNOPSIS
14              
15             use Bio::Tools::Run::FootPrinter;
16              
17             my @params = (size => 10,
18             max_mutations_per_branch => 4,
19             sequence_type => 'upstream',
20             subregion_size => 30,
21             position_change_cost => 5,
22             triplet_filtering => 1,
23             pair_filtering => 1,
24             post_filtering => 1,
25             inversion_cost => 1,
26             max_mutations => 4,
27             tree => "~/software/FootPrinter2.0/tree_of_life" );
28              
29             my $fp = Bio::Tools::Run::FootPrinter->new(@params, -verbose => 1);
30              
31             my $sio = Bio::SeqIO->new(-file => "seq.fa", -format => "fasta");
32              
33             while (my $seq = $sio->next_seq){
34             push @seq, $seq;
35             }
36             my @fp = $fp->run(@seq);
37              
38             foreach my $result(@fp){
39             print "***************\n".$result->seq_id."\n";
40             foreach my $feat($result->sub_SeqFeature){
41             print $feat->start."\t".$feat->end."\t".$feat->seq->seq."\n";
42             }
43             }
44              
45             =head1 DESCRIPTION
46              
47             From the FootPrinter manual:
48              
49             FootPrinter is a program that performs phylogenetic footprinting.
50             It takes as input a set of unaligned orthologous sequences from various
51             species, together with a phylogenetic tree relating these species.
52             It then searches for short regions of the sequences that are highly conserved,
53             according to a parsimony criterion.
54              
55             The regions identified are good candidates for regulatory elements.
56             By default, the program searches for regions that are well conserved across
57             all of the input sequences, but this can be relaxed to
58             find regions conserved in only a subset of the species
59              
60             =head2 About Footprinter
61              
62             Written by Mathieu Blanchette and Martin Tompa. Available here:
63              
64             http://www.mcb.mcgill.ca/~blanchem/FootPrinter2.1.tar.gz
65              
66              
67             =head2 Running Footprinter
68              
69             To run FootPrinter, you will need to set the environment variable
70             FOOTPRINTER_DIR to where the binary is located (even if the executable
71             is in your path). For example:
72              
73             setenv FOOTPRINTER_DIR /usr/local/bin/FootPrinter2.0/
74              
75              
76             =head2 Available Parameters
77              
78             PARAM VALUES DESCRIPTION
79             ------------------------------------------------------------------------
80             tree REQUIRED, Tree in Newick Format
81             to evaluate parsimony score
82             REQUIRED unless tree_of_life
83             exists in FOOTPRINTER_DIR
84             sequence_type upstream Default upstream
85             downstream
86             other
87             size 4-16 Specifies the size of the motifs sought
88             max_mutations 0-20 maximum parsimony score allowed for the motifs
89             max_mutations_per_branch 0-20 Allows at most a fixed number of mutations per
90             branch of the tree
91             losses files give span constraints so that the motifs
92             reported are statistically significant
93             Example files
94             universal([6-9]|1[0-2])(loose|tight)?.config
95             come with FootPrinter2.0.
96             Install these in FOOTPRINTER_DIR and use by
97             setting "losses" to "somewhat significant",
98             "significant", or "very significant". Do not
99             set loss_cost.
100             loss_cost 0-20 a cost associated with losing a motif along some
101             branch of the tre
102             subregion_size 1-infinity penalize motifs whose position in the sequences
103             varies too much
104             position_change_cost 0-20 Cost for changing subregion
105             triplet_filtering 1/0 pre-filtering step that removes from consideration
106             any substring that does not have a sufficiently good
107             pair of matching substrings in some pair of the other
108             input sequences
109             pair_filtering 1/0 Same as triplet filtering, but looks only for one match
110             per other sequence
111             post_filtering 1/0 when used in conjunction with the triplet filtering
112             option, this often significantly speeds up the program,
113             while still garanteeing optimal results
114             indel_cost 1-5 insertions and deletions will be allowed in the motifs
115             sought, at the given cost
116             inversion_cost 1-5 This option allows for motifs to undergo inversions,
117             at the given cost. An inversion reverse-complements
118             the motif.
119             details 1/0 Shows some of the details about the progress of the
120             computation
121             html 1/0 produce html output (never deleted)
122             ps 1/0 produce postscript output (never deleted)
123              
124             =head1 FEEDBACK
125              
126             =head2 Mailing Lists
127              
128             User feedback is an integral part of the evolution of this and other
129             Bioperl modules. Send your comments and suggestions preferably to one
130             of the Bioperl mailing lists. Your participation is much appreciated.
131              
132             bioperl-l@bioperl.org - General discussion
133             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
134              
135             =head2 Support
136              
137             Please direct usage questions or support issues to the mailing list:
138              
139             I
140              
141             rather than to the module maintainer directly. Many experienced and
142             reponsive experts will be able look at the problem and quickly
143             address it. Please include a thorough description of the problem
144             with code and data examples if at all possible.
145              
146             =head2 Reporting Bugs
147              
148             Report bugs to the Bioperl bug tracking system to help us keep track
149             the bugs and their resolution. Bug reports can be submitted via the
150             web:
151              
152             http://redmine.open-bio.org/projects/bioperl/
153              
154             =head1 AUTHOR - Shawn Hoon
155              
156             Email shawnh@fugu-sg.org
157              
158             =head1 APPENDIX
159              
160             The rest of the documentation details each of the object
161             methods. Internal methods are usually preceded with a "_".
162              
163             =cut
164              
165             package Bio::Tools::Run::FootPrinter;
166              
167 1         91 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
168 1     1   103033 @FP_SWITCHES @FP_PARAMS @OTHER_SWITCHES %OK_FIELD);
  1         2  
169              
170 1     1   4 use strict;
  1         1  
  1         16  
171 1     1   2 use Cwd;
  1         1  
  1         85  
172 1     1   705 use Bio::Root::Root;
  1         17758  
  1         29  
173 1     1   391 use Bio::Tools::Run::WrapperBase;
  1         1  
  1         69  
174 1     1   449 use Bio::Tools::FootPrinter;
  1         54300  
  1         29  
175 1     1   546 use Bio::SeqIO;
  1         9437  
  1         73  
176              
177             # Let the code begin...
178              
179             @ISA = qw(Bio::Tools::Run::WrapperBase);
180              
181             BEGIN {
182 1     1   3 @FP_PARAMS = qw(SEQUENCE_TYPE SIZE MAX_MUTATIONS MAX_MUTATIONS_PER_BRANCH
183             LOSSES LOSS_COST TREE PROGRAM SUBREGION_SIZE POSITION_CHANGE_COST
184             INDEL_COST INVERSION_COST );
185 1         2 @FP_SWITCHES = qw(TRIPLET_FILTERING PAIR_FILTERING POST_FILTERING DETAILS);
186 1         2 @OTHER_SWITCHES = qw(QUIET HTML PS);
187              
188             # Authorize attribute fields
189 1         3 foreach my $attr ( @FP_PARAMS, @FP_SWITCHES,
190 19         988 @OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
191             }
192              
193             =head2 program_name
194              
195             Title : program_name
196             Usage : $factory>program_name()
197             Function: holds the program name
198             Returns: string
199             Args : None
200              
201             =cut
202              
203             sub program_name {
204 6     6 1 25 return 'FootPrinter';
205             }
206              
207             =head2 program_dir
208              
209             Title : program_dir
210             Usage : $factory->program_dir(@params)
211             Function: returns the program directory, obtained from ENV variable.
212             Returns: string
213             Args :
214              
215             =cut
216              
217             sub program_dir {
218 3 50   3 1 10 return Bio::Root::IO->catfile($ENV{FOOTPRINTER_DIR}) if $ENV{FOOTPRINTER_DIR};
219             }
220              
221             =head2 executable
222              
223             Title : executable
224             Usage : my $exe = $footprinter->executable('FootPrinter');
225             Function: Finds the full path to the 'FootPrinter' executable
226             Returns : string representing the full path to the exe
227             Args : [optional] name of executable to set path to
228             [optional] boolean flag whether or not warn when exe is not found
229              
230             =cut
231              
232             sub executable {
233 1     1 1 55 my $self = shift;
234 1   0     7 my $exe = $self->SUPER::executable(@_) || return;
235            
236             # even if its executable, we still need the environment variable to have
237             # been set
238 0 0       0 if (! $ENV{FOOTPRINTER_DIR}) {
239 0         0 $self->warn("Environment variable FOOTPRINTER_DIR must be set, even if the FootPrinter executable is in your path");
240 0         0 return;
241             }
242            
243 0         0 return $exe;
244             }
245              
246             sub AUTOLOAD {
247 12     12   9 my $self = shift;
248 12         8 my $attr = $AUTOLOAD;
249 12         28 $attr =~ s/.*:://;
250 12         9 $attr = uc $attr;
251 12 50       25 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
252 12 100       18 $self->{$attr} = shift if @_;
253 12         25 return $self->{$attr};
254             }
255              
256             =head2 new
257              
258             Title : new
259             Usage : $rm->new($seq)
260             Function: creates a new wrapper
261             Returns: Bio::Tools::Run::FootPrinter
262             Args : self
263              
264             =cut
265              
266             sub new {
267 1     1 1 110 my ($class, @args) = @_;
268 1         12 my $self = $class->SUPER::new(@args);
269 1         78 my ($attr, $value);
270 1         4 while (@args) {
271 11         9 $attr = shift @args;
272 11         8 $value = shift @args;
273 11 50       15 next if( $attr =~ /^-/ ); # don't want named parameters
274 11         59 $self->$attr($value);
275             }
276              
277 1 50 33     12 if(!$self->tree && -e $ENV{FOOTPRINTER_DIR}."/tree_of_life"){
278 0         0 $self->tree($ENV{FOOTPRINTER_DIR}."/tree_of_life");
279             }
280              
281 1 50       3 unless($self->tree){
282 0         0 $self->debug("Phylogenetic tree not provided. FootPrinter won't be able to run without it. use \$fp->tree to set the tree file");
283             }
284              
285 1         2 return $self;
286             }
287              
288             =head2 run
289              
290             Title : run
291             Usage : $fp->run(@seq)
292             Function: carry out FootPrinter
293             Example :
294             Returns : An array of SeqFeatures
295             Args : An array of Bio::PrimarySeqI compliant object
296             At least 2 are needed.
297              
298             =cut
299              
300             sub run {
301 0     0 1   my ($self,@seq) = @_;
302              
303             #need at least 2 for comparative genomics duh.
304 0 0         $#seq > 0 || $self->throw("Need at least two sequences");
305 0 0         $self->tree || $self->throw("Need to specify a phylogenetic tree using -tree option");
306              
307 0           my $infile = $self->_setinput(@seq);
308              
309 0           my $param_string = $self->_setparams();
310 0           my @footprint_feats = $self->_run($infile,$self->tree,$param_string);
311 0           return @footprint_feats;
312              
313             }
314              
315             =head2 _run
316              
317             Title : _run
318             Usage : $fp->_run ($filename,$param_string)
319             Function: internal function that runs FootPrinter
320             Example :
321             Returns : an array of features
322             Args : the filename to the input sequence, filename to phylo tree
323             and the parameter string
324              
325             =cut
326              
327             sub _run {
328 0     0     my ($self,$infile,$tree,$param_string) = @_;
329 0           my $instring;
330 0   0       my $exe = $self->executable || return;
331 0           $self->debug( "Program ".$self->executable."\n");
332              
333 0           my $outfile = $infile.".seq.txt";
334 0           my $cmd_str = $self->executable. " $infile $tree $param_string";
335 0           $self->debug("FootPrinter command = $cmd_str");
336              
337 0 0         if ($self->verbose <=0){
338 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
339 0           $cmd_str.= " >&$null > $null";
340             }
341            
342             # will do brute-force clean up of junk files generated by FootPrinter
343 0           my $cwd = cwd();
344 0 0         opendir(my $cwd_dir, $cwd) || $self->throw("Could not open the current directory '$cwd'!");
345 0           my %ok_files;
346 0           foreach my $thing (readdir($cwd_dir)) {
347 0 0         if ($thing =~ /^mlc\./) {
348 0           $ok_files{$thing} = 1;
349             }
350             }
351 0           closedir($cwd_dir);
352            
353 0           my $status = system($cmd_str);
354 0 0 0       $self->throw("FootPrinter Call($cmd_str) crashed: $?\n")
355             unless $status == 0 || $status==256;
356            
357 0 0         unless (open (FP, $outfile)) {
358 0           $self->throw("Cannot open FootPrinter outfile for parsing");
359             }
360            
361 0           my $fp_parser = Bio::Tools::FootPrinter->new(-fh=>\*FP);
362 0           my @fp_feat;
363 0           while(my $fp_feat = $fp_parser->next_feature){
364 0           push @fp_feat, $fp_feat;
365             }
366              
367 0 0         unless( $self->save_tempfiles ) {
368 0           unlink $outfile;
369 0           unlink $infile; # is this dangerous??
370 0           unlink "$infile.order.txt"; # is this dangerous??
371            
372 0 0         opendir($cwd_dir, $cwd) || $self->throw("Could not open the current directory '$cwd'!");
373 0           foreach my $thing (readdir($cwd_dir)) {
374 0 0         if ($thing =~ /^mlc\./) {
375 0 0         unlink($thing) unless $ok_files{$thing};
376             }
377             }
378 0           closedir($cwd_dir);
379            
380 0           $self->cleanup();
381             }
382              
383 0           return @fp_feat;
384             }
385              
386             =head2 _setparams()
387              
388             Title : _setparams
389             Usage : Internal function, not to be called directly
390             Function: Create parameter inputs for FootPrinter program
391             Example :
392             Returns : parameter string to be passed to FootPrinter
393             Args : name of calling object
394              
395             =cut
396              
397             sub _setparams {
398 0     0     my ($attr, $value, $self);
399              
400 0           $self = shift;
401              
402 0           my $param_string = "";
403 0           for $attr ( @FP_PARAMS ) {
404 0           $value = $self->$attr();
405 0 0         next if $attr=~/TREE/i;
406 0 0         next unless (defined $value);
407              
408 0           my $attr_key = lc $attr; #put params in format expected by dba
409 0 0 0       if ($attr_key eq 'losses' && $value =~ /^\s*(somewhat|very)?\s*significant\s*$/) {
410 0           $value = "$ENV{FOOTPRINTER_DIR}/universal".$self->size();
411 0 0         if (defined $1) {
412 0 0         if ($1 eq 'somewhat') {
413 0           $value .= 'loose';
414             } else { # $1 eq 'very'
415 0           $value .= 'tight';
416             }
417             }
418 0           $value .= '.config';
419 0 0         -f $value or $self->throw("universal losses file $value does not exist");
420             }
421 0           $attr_key = ' -'.$attr_key;
422 0           $param_string .= $attr_key.' '.$value;
423             }
424              
425 0           for $attr ( @FP_SWITCHES) {
426 0           $value = $self->$attr();
427 0 0         next unless ($value);
428 0           my $attr_key = lc $attr; #put switches in format expected by dba
429 0           $attr_key = ' -'.$attr_key;
430 0           $param_string .= $attr_key ;
431             }
432              
433 0 0         $self->html() or $param_string .= " -no_html";
434 0 0         $self->ps() or $param_string .= " -no_ps";
435              
436 0           return $param_string;
437             }
438              
439              
440             =head2 _setinput()
441              
442             Title : _setinput
443             Usage : Internal function, not to be called directly
444             Function: writes input sequence to file and return the file name
445             Example :
446             Returns : string
447             Args : a Bio::PrimarySeqI compliant object
448              
449             =cut
450              
451             sub _setinput {
452 0     0     my ($self,@seq) = @_;
453 0           my ($tfh1,$outfile1);
454 0           $outfile1 = $self->outfile_name();
455 0 0         if (defined $outfile1) {
456 0           open($tfh1,">$outfile1");
457             } else {
458 0           ($tfh1,$outfile1) = $self->io->tempfile(-dir=>$self->tempdir);
459             }
460 0           my $out1 = Bio::SeqIO->new('-fh' => $tfh1,
461             '-format' => 'Fasta');
462 0           foreach my $seq(@seq){
463 0 0         $seq->isa("Bio::PrimarySeqI") || $self->throw("Need a Bio::PrimarySeq compliant object for FootPrinter");
464 0           $out1->write_seq($seq);
465             }
466 0           $out1->close(); # close the SeqIO object
467 0           close($tfh1); # close the fh explicitly (just in case)
468 0           undef($tfh1); # really get rid of it (just in case)
469 0           return ($outfile1);
470             }
471              
472              
473              
474             =head1 Bio::Tools::Run::Wrapper methods
475              
476             =cut
477              
478             =head2 no_param_checks
479              
480             Title : no_param_checks
481             Usage : $obj->no_param_checks($newval)
482             Function: Boolean flag as to whether or not we should
483             trust the sanity checks for parameter values
484             Returns : value of no_param_checks
485             Args : newvalue (optional)
486              
487              
488             =cut
489              
490             =head2 save_tempfiles
491              
492             Title : save_tempfiles
493             Usage : $obj->save_tempfiles($newval)
494             Function:
495             Returns : value of save_tempfiles
496             Args : newvalue (optional)
497              
498              
499             =cut
500              
501             =head2 outfile_name
502              
503             Title : outfile_name
504             Usage : my $outfile = $codeml->outfile_name();
505             Function: Get/Set the name of the output file for this run
506             (if you wanted to do something special)
507             Returns : string
508             Args : [optional] string to set value to
509              
510              
511             =cut
512              
513              
514             =head2 tempdir
515              
516             Title : tempdir
517             Usage : my $tmpdir = $self->tempdir();
518             Function: Retrieve a temporary directory name (which is created)
519             Returns : string which is the name of the temporary directory
520             Args : none
521              
522              
523             =cut
524              
525             =head2 cleanup
526              
527             Title : cleanup
528             Usage : $codeml->cleanup();
529             Function: Will cleanup the tempdir directory
530             Returns : none
531             Args : none
532              
533              
534             =cut
535              
536             =head2 io
537              
538             Title : io
539             Usage : $obj->io($newval)
540             Function: Gets a L object
541             Returns : L
542             Args : none
543              
544              
545             =cut
546              
547             1;