File Coverage

blib/lib/Bio/Tools/Run/Alignment/StandAloneFasta.pm
Criterion Covered Total %
statement 58 182 31.8
branch 13 96 13.5
condition 5 27 18.5
subroutine 14 21 66.6
pod 8 8 100.0
total 98 334 29.3


line stmt bran cond sub pod time code
1             #StandAloneFasta.pm v1.00 2002/11/01
2             #
3             #Bioperl module for Bio::Tools::Run::Alignment::StandAloneFasta
4             #
5             # Written by Tiequan Zhang
6             # Please direct questions and support issues to
7             #
8             # Cared for by Shawn Hoon
9             # Copyright Tiequan Zhang
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Tools::Run::Alignment::StandAloneFasta - Object for the local
18             execution of the Fasta3 programs ((t)fasta3, (t)fastx3, (t)fasty3
19             ssearch3)
20              
21             =head1 SYNOPSIS
22              
23             #!/usr/bin/perl
24             use Bio::Tools::Run::Alignment::StandAloneFasta;
25             use Bio::SeqIO;
26             use strict;
27             my @arg=(
28             'b' =>'15',
29             'O' =>'resultfile',
30             'H'=>'',
31             'program'=>'fasta34'
32             );
33              
34             my $factory=Bio::Tools::Run::Alignment::StandAloneFasta->new(@arg);
35             $factory->ktup(1);
36              
37             $factory->library('p');
38              
39             #print result file name
40             print $factory->O;
41              
42              
43             my @fastreport=$factory->run($ARGV[0]);
44              
45             foreach (@fastreport) {
46             print "Parsed fasta report:\n";
47             my $result = $_->next_result;
48             while( my $hit = $result->next_hit()) {
49             print "\thit name: ", $hit->name(), "\n";
50             while( my $hsp = $hit->next_hsp()) {
51             print "E: ", $hsp->evalue(), "frac_identical: ",
52             $hsp->frac_identical(), "\n";
53             }
54             }
55             }
56              
57             #pass in seq objects
58             my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>"fasta");
59             my $seq = $sio->next_seq;
60             my @fastreport=$factory->run($ARGV[0]);
61              
62             =head1 DESCRIPTION
63              
64             This wrapper works with version 3 of the FASTA program package (see
65             W. R. Pearson and D. J. Lipman (1988), "Improved Tools for Biological
66             Sequence Analysis", PNAS 85:2444-2448 (Pearson and Lipman, 1988);
67             W. R. Pearson (1996) "Effective protein sequence comparison"
68             Meth. Enzymol. 266:227-258 (Pearson, 1996); Pearson et. al. (1997)
69             Genomics 46:24-36 (Zhang et al., 1997); Pearson, (1999) Meth. in
70             Molecular Biology 132:185-219 (Pearson, 1999). Version 3 of the FASTA
71             packages contains many programs for searching DNA and protein
72             databases and one program (prss3) for evaluating statistical
73             significance from randomly shuffled sequences.
74              
75             Fasta is available at ftp://ftp.virginia.edu/pub/fasta
76              
77             =head1 FEEDBACK
78              
79             =head2 Mailing Lists
80              
81             User feedback is an integral part of the evolution of this and other
82             Bioperl modules. Send your comments and suggestions preferably to one
83             of the Bioperl mailing lists. Your participation is much appreciated.
84              
85             bioperl-l@bioperl.org - General discussion
86             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87              
88             =head2 Support
89              
90             Please direct usage questions or support issues to the mailing list:
91              
92             I
93              
94             rather than to the module maintainer directly. Many experienced and
95             reponsive experts will be able look at the problem and quickly
96             address it. Please include a thorough description of the problem
97             with code and data examples if at all possible.
98              
99             =head2 Reporting Bugs
100              
101             Report bugs to the Bioperl bug tracking system to help us keep track
102             the bugs and their resolution. Bug reports can be submitted via the
103             web:
104              
105             http://redmine.open-bio.org/projects/bioperl/
106              
107             =head1 AUTHOR - Tiequan Zhang
108              
109             Adapted for bioperl by Shawn Hoon
110             Enhanced by Jason Stajich
111              
112             Email tqzhang1973@yahoo.com
113             shawnh@fugu-sg.org
114             jason-at-bioperl.org
115              
116             =head1 Appendix
117              
118             The rest of the documendation details each of the object
119             methods. Internal methods are preceded with a underscore
120              
121             =cut
122              
123             package Bio::Tools::Run::Alignment::StandAloneFasta;
124              
125 1         83 use vars qw ($AUTOLOAD @ISA $library %parameters
126 1     1   109971 $ktup @FASTA_PARAMS %OK_FIELD @OTHER_PARAMS);
  1         2  
127 1     1   6 use strict;
  1         1  
  1         19  
128              
129 1     1   277 use Bio::Root::Root;
  1         17606  
  1         44  
130 1     1   10 use Bio::Root::IO;
  1         2  
  1         20  
131 1     1   405 use Bio::Seq;
  1         28449  
  1         61  
132 1     1   420 use Bio::SeqIO;
  1         20658  
  1         37  
133 1     1   324 use Bio::SearchIO;
  1         7792  
  1         54  
134 1     1   341 use Bio::Tools::Run::WrapperBase;
  1         4  
  1         100  
135              
136             BEGIN {
137 1     1   8 @FASTA_PARAMS=qw(a A b c E d f g h H i j l L M m n O o p Q q r R s S w x y z);
138 1         2 @OTHER_PARAMS =qw(program output database);
139 1         2 foreach my $att (@FASTA_PARAMS, @OTHER_PARAMS) {$OK_FIELD{$att}++;}
  33         66  
140 1         2 $ktup=2;
141 1         1803 %parameters=('H' => '',
142             'q' => '',
143             'm' =>'1', 'O' =>'');
144            
145             }
146              
147             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
148              
149             sub new {
150 1     1 1 719 my ($caller,@args)=@_;
151             #chained new
152 1         14 my $self = $caller->SUPER::new(@args);
153 1         49 while(@args){
154 6         11 my $attr = shift @args;
155 6         11 my $value = shift @args;
156 6 100 66     29 next if ($attr=~/^-/ || ! $attr);
157 5         39 $self->$attr($value);
158             }
159 1         14 return $self;
160             }
161              
162             sub AUTOLOAD {
163 7     7   14 my $self = shift;
164 7         11 my $attr = $AUTOLOAD;
165 7         28 $attr =~ s/.*:://;
166 7 50       21 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
167 7 100       20 $self->{$attr} = shift if @_;
168 7         27 return $self->{$attr};
169             }
170              
171              
172             =head2 program_name
173              
174             Title : program_name
175             Usage : $factory->program_name()
176             Function: holds the program name
177             Returns: string
178             Args : None
179              
180             =cut
181              
182             sub program_name {
183 2     2 1 3 my ($self) = shift;
184 2         8 return $self->program(@_);
185             }
186              
187             =head2 executable
188              
189             Title : executable
190             Usage : my $exe = $blastfactory->executable('blastall');
191             Function: Finds the full path to the 'codeml' executable
192             Returns : string representing the full path to the exe
193             Args : [optional] name of executable to set path to
194             [optional] boolean flag whether or not warn when exe is not found
195              
196              
197             =cut
198              
199             sub executable {
200 1     1 1 540 my ($self, $exename, $exe,$warn) = @_;
201 1 50       4 $exename = 'fasta34' unless defined $exename;
202              
203 1 50 33     11 if( defined $exe && -x $exe ) {
204 0         0 $self->{'_pathtoexe'}->{$exename} = $exe;
205             }
206 1 50       5 unless( defined $self->{'_pathtoexe'}->{$exename} ) {
207 1         9 my $f = $self->program_path($exename);
208 1 50 33     21 $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
209            
210             # This is how I meant to split up these conditionals --jason
211             # if exe is null we will execute this (handle the case where
212             # PROGRAMDIR pointed to something invalid)
213 1 50       4 unless( $exe ) { # we didn't find it in that last conditional
214 1 50 33     8 if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
215 0         0 $self->{'_pathtoexe'}->{$exename} = $exe;
216             } else {
217 1 50       272 $self->warn("Cannot find executable for $exename") if $warn;
218 1         4 $self->{'_pathtoexe'}->{$exename} = undef;
219             }
220             }
221             }
222 1         3 return $self->{'_pathtoexe'}->{$exename};
223             }
224              
225             =head2 program_dir
226              
227             Title : program_dir
228             Usage : $factory->program_dir(@params)
229             Function: returns the program directory, obtained from ENV variable.
230             Returns: string
231             Args :
232              
233             =cut
234              
235             sub program_dir {
236 1 50   1 1 6 return Bio::Root::IO->catfile($ENV{FASTADIR}) if $ENV{FASTADIR};
237             }
238              
239             =head2 run
240              
241             Title : run
242              
243             Usage : my @fasta_object = $factory->($input,$onefile);
244             where $factory is the name of executable FASTA program;
245             $input is file name containing the sequences in the format
246             of fasta or Bio::Seq object or array of Bio::Seq object;
247             $onefile is 0 if you want to save the outputs to different files
248             default: outputs are saved in one file
249              
250             Function: Attempts to run an executable FASTA program
251             and return array of fasta objects containing the fasta report
252             Returns : aray of fasta report object
253             If the user specify the output file(s),
254             the raw fasta report will be saved
255             Args : sequence object OR array reference of sequence objects
256             filename of file containing fasta formatted sequences
257              
258             =cut
259              
260              
261             sub run {
262 0     0 1   my ($self,$input,$onefile)=@_;
263 0           local * FASTARUN;
264              
265 0           $self->io->_io_cleanup;
266 0   0       my $program = $self->executable($self->program_name) ||
267             $self->throw("FASTA program not found or not executable.\n");
268             # You should specify a library file
269 0 0         $self->throw("You didn't choose library.\n") unless ( $library);
270            
271 0           my @seqs = $self->_setinput($input);
272 0 0         return 0 unless (@seqs);
273              
274 0           my @fastobj;
275 0           my ($fhout, $tempoutfile)=$self->io->tempfile(-dir=>$self->tempdir);
276              
277 0           my $outfile=$self->O();
278              
279             # The outputs from executable FASTA program will
280             # be saved into different files if $onefile is 0,
281             # else will be concatenated into one file
282 0   0       my $onfile = (!defined $onefile || $onefile =~ /^0$/);
283              
284 0 0         unless( $onfile ) {
285 0           my $count=0;
286              
287             # do some fancy stuff here to test if we are running fasta34
288             # with mlib so we just pass in a single file rather than
289             # running fasta N times
290             # (not implemented yet)
291            
292 0           foreach my $seq (@seqs){
293 0           $count++;
294             # Decide if the output will be saved into a temporary file
295 0 0         if( $outfile ) {
296 0           $self->O(sprintf("%s_%d",$outfile,$count));
297             }
298            
299 0           my ($fhinput,$teminputfile)=
300             $self->io->tempfile(-dir=>$self->tempdir);
301              
302 0           my $temp = Bio::SeqIO->new(-fh=>$fhinput, '-format'=>'Fasta');
303 0           $temp->write_seq($seq);
304 0           $temp->close();
305 0           close $fhinput;
306 0           undef $fhinput;
307 0           my $para= $self->_setparams;
308              
309 0           $para .=" $teminputfile $library $ktup";
310 0           $para ="$program $para";
311 0           my $object;
312 0 0         unless( $outfile ) {
313 0 0         open(FASTARUN, "$para |") || $self->throw($@);
314 0           $object = Bio::SearchIO->new(-fh=>\*FASTARUN,
315             -format=>"fasta");
316             } else {
317 0 0         if ( $self->verbose() < 0) {
318 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
319 0           $para .= " >$null 2>$null";
320             } else {
321 0           $self->debug("Going to execute: $para");
322             }
323 0           my $status = system($para);
324 0 0         $self->throw("$para crashed: $?\n" )unless ($status==0);
325 0           $object = Bio::SearchIO->new(-file=>$self->O,
326             -format=>"fasta");
327             }
328 0           push @fastobj, $object;
329             }
330             } else {
331 0 0         if ($outfile){
332 0 0         open (FILE, ">$outfile") or $self->throw("can't use $outfile:$!");
333 0           close(FILE);
334             }
335            
336 0           foreach my $seq (@seqs){
337 0           my ($fhinput,$teminputfile)=$self->io->tempfile(-dir=>$self->tempdir);
338 0           my $temp=Bio::SeqIO->new(-fh=>$fhinput, '-format'=>'fasta');
339 0           $temp->write_seq($seq);
340 0           $temp->close();
341 0           close $fhinput;
342 0           undef $fhinput;
343            
344 0 0         $self->O($tempoutfile) if( $outfile );
345 0           my $para= $self->_setparams;
346 0           $para .= " $teminputfile $library $ktup";
347 0           $para ="$program $para";
348 0           my $object;
349 0 0         unless( $outfile ) {
350 0 0         open(FASTARUN, "$para |") || $self->throw($@);
351 0           $object=Bio::SearchIO->new(-fh=>\*FASTARUN,
352             -format=>"fasta");
353             } else {
354 0 0         if ( $self->verbose() < 0) {
355 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
356 0           $para .= " >$null 2>$null";
357             } else {
358 0           $self->debug("Going to execute: $para");
359             }
360 0           my $status = system($para);
361 0 0         $self->throw("$para crashed: $?\n" )unless ($status==0);
362 0           $object = Bio::SearchIO->new(-file=>$self->O,
363             -format=>"fasta");
364             }
365 0           push @fastobj, $object;
366            
367             # The output in the temporary file
368             # will be saved at the end of $outfile
369 0 0         if($outfile){
370 0 0         open (FHOUT, $tempoutfile) or die("can't open the $tempoutfile file");
371 0 0         open (FH, ">>$outfile") or die("can't use the $outfile file");
372 0           print FH ();
373 0           close (FHOUT);
374 0           close (FH);
375             }
376             }
377             }
378 0           return @fastobj;
379             }
380              
381             =head2 library
382              
383             Title : library
384             Usage : my $lb = $self->library
385             Function: Fetch or set the name of the library to search against
386             Returns : The name of the library
387             Args : No argument if user wants to fetch the name of library file;
388             A letter or a string of letter preceded by %;
389             (e.g. P or %pn, the letter is the character in the third field
390             of any line of fastlibs file ) or the name of library file
391             (if environmental variable FASTLIBS is not set);
392             if user wants to set the name of library file to search against
393              
394             =cut
395              
396             sub library {
397 0     0 1   my($self,$lb)=@_;
398 0 0         return $library if (!defined($lb));
399              
400 0 0 0       if ( ($lb =~ /^%[a-zA-Z]+$/)||($lb=~ /^[a-zA-Z]$/)){
401 0 0         if(! defined $ENV{'FASTLIBS'} ){
402 0           $self->throw("abbrv. list request but FASTLIBS undefined, cannot use $lb");
403             }
404             } else {
405 0 0         unless ( -e $lb){
406 0           $self->throw("cannot open $lb library");
407             }
408             }
409              
410 0           return $library=$lb;
411             }
412              
413             *database = \&library;
414              
415             =head2 output
416              
417             Title : output
418             Usage : $obj->output($newval)
419             Function: The output directory if we want to use this
420             Example :
421             Returns : value of output (a scalar)
422             Args : on set, new value (a scalar or undef, optional)
423              
424              
425             =cut
426              
427             sub output{
428 0     0 1   my $self = shift;
429              
430 0 0         return $self->{'output'} = shift if @_;
431 0           return $self->{'output'};
432             }
433              
434             =head2 ktup
435              
436             Title : ktup
437             Usage : my $ktup = $self->ktup
438             Function: Fetch or set the ktup value for executable FASTA programs
439             Example :
440             Returns : The value of ktup if defined, else undef is returned
441             Args : No argument if user want to fetch ktup value; A integer value between 1-6 if user want to set the
442             ktup value
443              
444             =cut
445              
446             sub ktup {
447 0     0 1   my($self,$k)=@_;
448 0 0         if(!defined($k)){return $ktup;}
  0            
449 0 0         if ($k =~ /^[1-6]$/){
450 0           $ktup=$k;
451 0           return $ktup
452             } else {
453 0           $self->warn("You should set the ktup value between 1-6. The FASTA program will decide your default ktup value.");
454 0           return $ktup= undef;
455             }
456             }
457              
458             =head2 _setinput
459              
460             Title : _setinput
461             Usage : Internal function, not to be called directly
462             Function: Create input file(s) for Blast executable
463             Example :
464             Returns : array of Bio::Seq object reference
465             Args : Seq object reference or input file name
466              
467             =cut
468              
469             sub _setinput {
470              
471 0     0     my ($self, $input) = @_;
472              
473 0 0         if( ! defined $input ) {
474 0           $self->throw("Calling fasta program with no input");
475             }
476              
477 0           my @seqs;
478 0 0         if( ! ref $input ) {
    0          
    0          
479 0 0         if( -e $input ) {
480 0           my $seqio = Bio::SeqIO->new(-format => 'fasta', -file => $input);
481 0           while( my $seq = $seqio->next_seq ) {
482 0           push @seqs, $seq;
483             }
484             } else {
485 0           $self->throw("Input $input was not a valid filename");
486             }
487             } elsif( ref($input) =~ /ARRAY/i ) {
488 0           foreach ( @$input ) {
489 0 0 0       if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
490 0           push @seqs, $_;
491             } else {
492 0           $self->warn("Trying to add a " . ref($_) ." but expected a Bio::PrimarySeqI");
493             }
494             }
495 0 0         if( ! @seqs) {
496 0           $self->throw("Did not pass in valid input -- no sequence objects found");
497             }
498             } elsif( $input->isa('Bio::PrimarySeqI') ) {
499 0           push @seqs, $input;
500             }
501 0           return @seqs;
502             }
503              
504             =head2 _exist
505              
506             Title : _exist
507             Usage : Internal function, not to be called directly
508             Function: Determine whether a executable FASTA program can be found
509             Cf. the DESCRIPTION section of this POD for how to make sure
510             for your FASTA installation to be found. This method checks for
511             existence of the blastall executable in the path.
512             Returns : 1 if FASTA program found at expected location, 0 otherwise.
513             Args : none
514              
515             =cut
516              
517             sub _exist {
518 0     0     my $exe = shift @_;
519              
520 0 0         return 0 unless($exe =~ /fast|ssearch/);
521              
522 0 0         $exe .='.exe' if ($^O =~ /mswin/i);
523              
524 0           my $f;
525            
526 0   0       return ($f=Bio::Root::IO->exists_exe($exe))&&(-x $f);
527             }
528              
529             =head2 _setparams
530              
531             Title : _setparams
532             Usage : Internal function, not to be called directly
533             Function: Create parameter inputs for FASTA executable
534             Returns : part of parameter string to be passed to FASTA program
535             Args : none
536              
537             =cut
538              
539             sub _setparams {
540 0     0     my ($self,$attr,$value);
541 0           $self = shift;
542 0           my $para = "";
543 0           foreach my $attr(@FASTA_PARAMS) {
544 0           $value = $self->$attr();
545 0 0         next unless (defined $value);
546 0           $para .=" -$attr $value";
547             }
548 0           $para .= " -q ";
549 0           return $para;
550             }
551              
552             1;
553             __END__