File Coverage

blib/lib/Bio/Tools/Run/Phylo/Phylip/SeqBoot.pm
Criterion Covered Total %
statement 58 136 42.6
branch 9 52 17.3
condition 0 3 0.0
subroutine 15 19 78.9
pod 5 5 100.0
total 87 215 40.4


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Phylo::Phylip::SeqBoot
2             #
3             # Created by
4             #
5             # Shawn Hoon
6             #
7             # You may distribute this module under the same terms as perl itself
8              
9             # POD documentation - main docs before the code
10              
11             =head1 NAME
12              
13             Bio::Tools::Run::Phylo::Phylip::SeqBoot - Wrapper for the phylip
14             program SeqBoot
15              
16             =head1 SYNOPSIS
17              
18             #Create a SimpleAlign object
19             @params = ('ktuple' => 2, 'matrix' => 'BLOSUM');
20             $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
21             $inputfilename = 't/data/cysprot.fa';
22             $aln = $factory->align($inputfilename); # $aln is a SimpleAlign object.
23              
24             # Use seqboot to generate bootstap alignments
25             my @params = ('datatype'=>'SEQUENCE','replicates'=>100);
26             my $seq = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
27              
28             my $aln_ref = $seq->run($aln);
29              
30             my $aio = Bio::AlignIO->new(-file=>">alignment.bootstrap",-format=>"phylip");
31             foreach my $ai(@{$aln_ref}){
32             $aio->write_aln($ai);
33             }
34              
35             # To prevent PHYLIP from truncating sequence names:
36             # Step 1. Shelf the original names:
37             my ($aln_safe, $ref_name)= # $aln_safe has serial names
38             $aln->set_displayname_safe(); # $ref_name holds orginal names
39             # Step 2. Run PHYLIP programs:
40             $aln_ref = $seq->run($aln_safe); # Use $aln_safe instead of $aln
41             # Step 3. Retrieve orgininal names
42             $aio = Bio::AlignIO->new(
43             -file=>">alignment.bootstrap",
44             -format=>"fasta"); # FASTA output to view full names
45             foreach my $ai(@{$aln_ref}){
46             my $new_aln=$ai->restore_displayname($ref_name); # Restore names
47             $aio->write_aln($new_aln);
48             }
49              
50             =head1 DESCRIPTION
51              
52             Wrapper for seqboot from the phylip package by Joseph Felsentein.
53              
54             Taken from phylip doc...
55              
56             "SEQBOOT is a general boostrapping tool. It is intended to allow you to
57             generate multiple data sets that are resampled versions of the input data set.
58             SEQBOOT can handle molecular sequences, binary characters,
59             restriction sites, or gene frequencies."
60              
61             More documentation on using seqboot and setting parameters may be found
62             in the phylip package.
63              
64             VERSION Support
65             This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
66             this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
67              
68             =head1 PARAMETERS FOR SEQBOOT
69              
70             =head2 MODEL
71              
72             Title : DATATYPE
73             Description : (optional)
74              
75             This program supports 3 different datatypes
76             SEQUENCE: Molecular Sequences
77             MORPH : Discrete Morphological Characters
78             REST : Restriction Sites
79             GENEFREQ: Gene Frequencies
80              
81             Defaults to SEQUENCE
82              
83             =head2 PERMUTE
84              
85             Title: PERMUTE
86             Description: (optional)
87              
88             3 different resampling methods are available:
89              
90             BOOTSTRAP : creating a new data set by sampling N
91             characters randomly with replacement The
92             resulting data set has the same size as the
93             original, but some characters have been left
94             out and others are duplicated
95              
96             JACKKNIFE : Delete-half-jackknifing. It involves sampling
97             a random half of the characters, and
98             including them in the data but dropping the
99             others The resulting data sets are half the
100             size of the original, and no characters are
101             duplicated.
102              
103             PERMUTE : Permuting species within characters. It
104             involves permuting the columns of the data
105             matrix separately. This produces data matrices
106             that have the same number and kinds of
107             characters but no taxonomic structure.
108              
109             Defaults to BOOTSTRAP
110              
111             =head2 REPLICATES
112              
113             Title : REPLICATES
114             Description : (optional)
115              
116             This options allows the user to set the number of
117             replicate data sets. Most statisticians would be
118             happiest with 1000 to 10,000 replicates in a
119             bootstrap, but 100 gives a good rough picture
120              
121             Defaults to 100
122              
123             =head2 ALLELES
124              
125             Title : ALLELES
126             Description : (optional)
127              
128             This option is to be used with gene frequencies datatype
129             option to specify that all alleles at each locus are in
130             the input file.
131              
132             Defaults to NULL
133              
134             =head1 FEEDBACK
135              
136             =head2 Mailing Lists
137              
138             User feedback is an integral part of the evolution of this and other
139             Bioperl modules. Send your comments and suggestions preferably to one
140             of the Bioperl mailing lists. Your participation is much appreciated.
141              
142             bioperl-l@bioperl.org - General discussion
143             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
144              
145             =head2 Support
146              
147             Please direct usage questions or support issues to the mailing list:
148              
149             I
150              
151             rather than to the module maintainer directly. Many experienced and
152             reponsive experts will be able look at the problem and quickly
153             address it. Please include a thorough description of the problem
154             with code and data examples if at all possible.
155              
156             =head2 Reporting Bugs
157              
158             Report bugs to the Bioperl bug tracking system to help us keep track
159             the bugs and their resolution. Bug reports can be submitted via the
160             web:
161              
162             http://redmine.open-bio.org/projects/bioperl/
163              
164             =head1 AUTHOR - Shawn Hoon
165              
166             Email shawnh@fugu-sg.org
167              
168             =head1 APPENDIX
169              
170             The rest of the documentation details each of the object
171             methods. Internal methods are usually preceded with a _
172              
173             =cut
174              
175             #'
176              
177            
178             package Bio::Tools::Run::Phylo::Phylip::SeqBoot;
179              
180 1         68 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
181             @SEQBOOT_PARAMS @OTHER_SWITCHES
182 1     1   101039 %OK_FIELD);
  1         2  
183 1     1   3 use strict;
  1         1  
  1         15  
184 1     1   630 use Bio::SimpleAlign;
  1         83322  
  1         61  
185 1     1   551 use Bio::AlignIO;
  1         4322  
  1         22  
186 1     1   413 use Bio::TreeIO;
  1         14074  
  1         29  
187 1     1   411 use Bio::Tools::Run::Phylo::Phylip::Base;
  1         1  
  1         25  
188 1     1   5 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
  1         1  
  1         157  
189 1     1   420 use Bio::Matrix::PhylipDist;
  1         1779  
  1         22  
190 1     1   4 use Cwd;
  1         2  
  1         88  
191              
192              
193             # inherit from Phylip::Base which has some methods for dealing with
194             # Phylip specifics
195             @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
196              
197             # You will need to enable the SeqBoot program. This
198             # can be done in (at least) 3 ways:
199             #
200             # 1. define an environmental variable PHYLIPDIR:
201             # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
202             #
203             # 2. include a definition of an environmental variable CLUSTALDIR in
204             # every script that will use Clustal.pm.
205             # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
206             #
207             # 3. You can set the path to the program through doing:
208             # my @params('executable'=>'/usr/local/bin/seqboot');
209             # my $SeqBoot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
210             #
211              
212              
213             BEGIN {
214 1     1   3 @SEQBOOT_PARAMS = qw(DATATYPE PERMUTE BLOCKSIZE REPLICATES READWEIGHTS READCAT);
215 1         1 @OTHER_SWITCHES = qw(QUIET);
216 1         2 foreach my $attr(@SEQBOOT_PARAMS,@OTHER_SWITCHES) {
217 7         915 $OK_FIELD{$attr}++;
218             }
219             }
220              
221             =head2 program_name
222              
223             Title : program_name
224             Usage : >program_name()
225             Function: holds the program name
226             Returns: string
227             Args : None
228              
229             =cut
230              
231             sub program_name {
232 6     6 1 22 return 'seqboot';
233             }
234              
235             =head2 program_dir
236              
237             Title : program_dir
238             Usage : ->program_dir()
239             Function: returns the program directory, obtained from ENV variable.
240             Returns: string
241             Args :
242              
243             =cut
244              
245             sub program_dir {
246 3 50   3 1 11 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
247             }
248              
249             sub new {
250 1     1 1 89 my ($class,@args) = @_;
251 1         15 my $self = $class->SUPER::new(@args);
252            
253 1         35 my ($attr, $value);
254 1         5 while (@args) {
255 4         4 $attr = shift @args;
256 4         5 $value = shift @args;
257 4 100       10 next if( $attr =~ /^-/ ); # don't want named parameters
258 3 50       7 if ($attr =~/PROGRAM/i) {
259 0         0 $self->executable($value);
260 0         0 next;
261             }
262 3 100       7 if ($attr =~ /IDLENGTH/i){
263 1         3 $self->idlength($value);
264 1         2 next;
265             }
266 2         12 $self->$attr($value);
267             }
268 1         2 return $self;
269             }
270              
271             sub AUTOLOAD {
272 2     2   3 my $self = shift;
273 2         4 my $attr = $AUTOLOAD;
274 2         6 $attr =~ s/.*:://;
275 2         3 $attr = uc $attr;
276 2 50       6 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
277 2 50       5 $self->{$attr} = shift if @_;
278 2         5 return $self->{$attr};
279             }
280              
281             =head2 idlength
282              
283             Title : idlength
284             Usage : $obj->idlength ($newval)
285             Function:
286             Returns : value of idlength
287             Args : newvalue (optional)
288              
289              
290             =cut
291              
292             sub idlength{
293 1     1 1 2 my $self = shift;
294 1 50       2 if( @_ ) {
295 1         2 my $value = shift;
296 1         1 $self->{'idlength'} = $value;
297             }
298 1         2 return $self->{'idlength'};
299              
300             }
301              
302              
303             =head2 run
304              
305             Title : run
306             Usage :
307             $inputfilename = 't/data/prot.phy';
308             $matrix= $seqboot_factory->run($inputfilename);
309             or
310             $seq_array_ref = \@seq_array; @seq_array is array of Seq objs
311             $aln = $clustalw_factory->align($seq_array_ref);
312             $aln_ref = $SeqBootfactory->run($aln);
313              
314             Function: Create bootstrap sets of alignments
315             Example :
316             Returns : an array ref of L
317             Args : Name of a file containing a multiple alignment in Phylip format
318             or an SimpleAlign object
319              
320             Throws an exception if argument is not either a string (eg a
321             filename) or a Bio::SimpleAlign object. If
322             argument is string, throws exception if file corresponding to string
323             name can not be found.
324              
325             =cut
326              
327             sub run{
328              
329 0     0 1   my ($self,$input) = @_;
330 0           my ($infilename);
331              
332             # Create input file pointer
333 0           $infilename = $self->_setinput($input);
334 0 0         if (!$infilename) {$self->throw("Problems setting up for seqboot. Probably bad input data in $input !");}
  0            
335              
336             # Create parameter string to pass to SeqBoot program
337 0           my $param_string = $self->_setparams();
338             # run SeqBoot
339 0           my $aln = $self->_run($infilename,$param_string);
340 0           return $aln;
341             }
342              
343             #################################################
344              
345             =head2 _run
346              
347             Title : _run
348             Usage : Internal function, not to be called directly
349             Function: makes actual system call to SeqBoot program
350             Example :
351             Returns : an array ref of
352             Args : Name of a file containing a set of multiple alignments in Phylip format
353             and a parameter string to be passed to SeqBoot
354              
355              
356             =cut
357              
358             sub _run {
359 0     0     my ($self,$infile,$param_string) = @_;
360 0           my $instring;
361 0           my $curpath = cwd;
362 0 0         unless( File::Spec->file_name_is_absolute($infile) ) {
363 0           $infile = $self->io->catfile($curpath,$infile);
364             }
365             #odd random seed
366 0           my $rand = (2 * int(rand(10000)) + 1);
367 0 0         if ($self->version == 3.5){
368 0           $instring = $infile."\n$rand\n$param_string";
369             }
370             else {
371 0           $instring = $infile."\n$param_string$rand\n";
372             }
373 0           $self->debug( "Program ".$self->executable." $instring\n");
374            
375 0           chdir($self->tempdir);
376             #open a pipe to run SeqBoot to bypass interactive menus
377 0 0 0       if ($self->quiet() || $self->verbose() < 0) {
378 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
379 0           open(SeqBoot,"|".$self->executable .">$null");
380             }
381             else {
382 0           open(SeqBoot,"|".$self->executable);
383             }
384 0           print SeqBoot $instring;
385 0           close(SeqBoot);
386            
387             # get the results
388 0           my $outfile = $self->io->catfile($self->tempdir,$self->outfile);
389 0           chdir($curpath);
390 0 0         $self->throw("SeqBoot did not create files correctly ($outfile)")
391             unless (-e $outfile);
392            
393             #parse the alignments
394 0           my @aln;
395             my @parse_params;
396              
397 0 0         push @parse_params, ('-interleaved' => 1) if $self->version == 3.6;
398 0           my $aio = Bio::AlignIO->new(-file=>$outfile,-format=>"phylip",
399             @parse_params);
400 0           while (my $aln = $aio->next_aln){
401 0           push @aln, $aln;
402             }
403              
404             # Clean up the temporary files created along the way...
405 0 0         unlink $outfile unless $self->save_tempfiles;
406            
407 0           return \@aln;
408             }
409              
410              
411             =head2 _setinput()
412              
413             Title : _setinput
414             Usage : Internal function, not to be called directly
415             Function: Create input file for SeqBoot program
416             Example :
417             Returns : name of file containing a multiple alignment in Phylip format
418             Args : SimpleAlign object reference or input file name
419              
420              
421             =cut
422              
423             sub _setinput {
424 0     0     my ($self, $input) = @_;
425 0           my ($alnfilename,$tfh);
426            
427             # a phy formatted alignment file
428 0 0         unless (ref $input) {
429             # check that file exists or throw
430 0           $alnfilename= $input;
431 0 0         unless (-e $input) {return 0;}
  0            
432 0           return $alnfilename;
433             }
434 0 0         my @input = ref($input) eq 'ARRAY' ? @{$input}: ($input);
  0            
435              
436 0           ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
437 0           my $alnIO = Bio::AlignIO->new(-fh => $tfh,
438             -format=>'phylip',
439             -idlength=>$self->idlength());
440 0           foreach my $input(@input){
441             # $input should be a Bio::Align::AlignI
442 0 0         $input->isa("Bio::Align::AlignI") || $self->throw("Expecting a Bio::Align::AlignI object");
443             # Open temporary file for both reading & writing of BioSeq array
444 0           $alnIO->write_aln($input);
445             }
446 0           $alnIO->close();
447 0           close($tfh);
448 0           return $alnfilename;
449             }
450              
451             =head2 _setparams()
452              
453             Title : _setparams
454             Usage : Internal function, not to be called directly
455             Function: Create parameter inputs for SeqBoot program
456             Example :
457             Returns : parameter string to be passed to SeqBoot
458             Args : name of calling object
459              
460             =cut
461              
462             sub _setparams {
463 0     0     my ($attr, $value, $self);
464              
465             #do nothing for now
466 0           $self = shift;
467 0           my $param_string = "";
468 0           my $cat = 0;
469 0           my $gene_freq = 0;
470 0           my %menu = %{$Menu{$self->version}->{'SEQBOOT'}};
  0            
471              
472 0           foreach my $attr ( @SEQBOOT_PARAMS) {
473 0           $value = $self->$attr();
474 0 0         next unless (defined $value);
475 0 0         if ($attr =~/REPLICATES/i){
    0          
476 0 0         if( $value !~ /(\d+(\.\d+)?)/ ) {
477 0           $self->warn("Expected a number in $attr\n");
478 0           next;
479             }
480 0           $param_string .= $menu{'REPLICATES'}."$value\n";
481             }
482             elsif($attr=~/DATATYPE/i){
483 0 0         $gene_freq = 1 if $value =~/GENEFREQ/i;
484 0           $param_string .= $menu{'DATATYPE'}{uc $value};
485             }
486             else {
487 0 0         if($attr =~/ALLELES/i){
488 0 0         if(!$gene_freq){
489 0           $self->warn("Alleles options only be used with alleles option");
490 0           return;
491             }
492 0           $param_string .=$menu{uc $attr};
493             }
494             }
495             }
496 0           $param_string .= $menu{'SUBMIT'};
497              
498 0           return $param_string;
499             }
500              
501              
502              
503             =head1 Bio::Tools::Run::Wrapper methods
504              
505             =cut
506              
507             =head2 no_param_checks
508              
509             Title : no_param_checks
510             Usage : $obj->no_param_checks($newval)
511             Function: Boolean flag as to whether or not we should
512             trust the sanity checks for parameter values
513             Returns : value of no_param_checks
514             Args : newvalue (optional)
515              
516              
517             =cut
518              
519             =head2 save_tempfiles
520              
521             Title : save_tempfiles
522             Usage : $obj->save_tempfiles($newval)
523             Function:
524             Returns : value of save_tempfiles
525             Args : newvalue (optional)
526              
527              
528             =cut
529              
530             =head2 outfile_name
531              
532             Title : outfile_name
533             Usage : my $outfile = $SeqBoot->outfile_name();
534             Function: Get/Set the name of the output file for this run
535             (if you wanted to do something special)
536             Returns : string
537             Args : [optional] string to set value to
538              
539              
540             =cut
541              
542              
543             =head2 tempdir
544              
545             Title : tempdir
546             Usage : my $tmpdir = $self->tempdir();
547             Function: Retrieve a temporary directory name (which is created)
548             Returns : string which is the name of the temporary directory
549             Args : none
550              
551              
552             =cut
553              
554             =head2 cleanup
555              
556             Title : cleanup
557             Usage : $codeml->cleanup();
558             Function: Will cleanup the tempdir directory after a SeqBoot run
559             Returns : none
560             Args : none
561              
562              
563             =cut
564              
565             =head2 io
566              
567             Title : io
568             Usage : $obj->io($newval)
569             Function: Gets a L object
570             Returns : L
571             Args : none
572              
573              
574             =cut
575              
576             1; # Needed to keep compiler happy