File Coverage

blib/lib/Bio/Tools/Run/Alignment/Probcons.pm
Criterion Covered Total %
statement 49 156 31.4
branch 2 58 3.4
condition 1 13 7.6
subroutine 15 23 65.2
pod 8 8 100.0
total 75 258 29.0


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Alignment::Probcons
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Albert Vilella
6             #
7             #
8             #
9             # You may distribute this module under the same terms as perl itself
10             #
11             # POD documentation - main docs before the code
12              
13             =head1 NAME
14              
15             Bio::Tools::Run::Alignment::Probcons - Object for the calculation of an
16             iterative multiple sequence alignment from a set of unaligned
17             sequences or alignments using the Probcons program
18              
19             =head1 SYNOPSIS
20              
21             # Build a muscle alignment factory
22             $factory = Bio::Tools::Run::Alignment::Probcons->new(@params);
23              
24             # Pass the factory a list of sequences to be aligned.
25             $inputfilename = 't/cysprot.fa';
26             # $aln is a SimpleAlign object.
27             $aln = $factory->align($inputfilename);
28              
29             # or where @seq_array is an array of Bio::Seq objects
30             $seq_array_ref = \@seq_array;
31             $aln = $factory->align($seq_array_ref);
32              
33             # Or one can pass the factory a pair of (sub)alignments
34             #to be aligned against each other, e.g.:
35              
36             #There are various additional options and input formats available.
37             #See the DESCRIPTION section that follows for additional details.
38              
39             #To run probcons with training, try something like:
40              
41             #First round to generate train.params
42             $factory = Bio::Tools::Run::Alignment::Probcons->new
43             (
44             'iterative-refinement' => '1000',
45             'consistency' => '5',
46             'pre-training' => '20',
47             'emissions' => '',
48             'verbose' => '',
49             'train' => "$dir/$subdir/$outdir/train.params",
50             );
51             $factory->outfile_name("$dir/$subdir/$outdir/train.params");
52              
53             #Second round to use train.params to get a high qual alignment
54              
55             $seq_array_ref = \@seq_array;
56             $aln = $factory->align($seq_array_ref);
57             $aln = '';
58             $factory = '';
59              
60             $factory = Bio::Tools::Run::Alignment::Probcons->new
61             (
62             'iterative-refinement' => '1000',
63             'consistency' => '5',
64             'pre-training' => '20',
65             'verbose' => '',
66             'paramfile' => "$dir/$subdir/$outdir/train.params",
67             );
68             $factory->outfile_name("$dir/$subdir/$outdir/outfile.afa");
69             $aln = $factory->align($seq_array_ref);
70              
71             =head1 DESCRIPTION
72              
73             Probcons is a Probabilistic Consistency-based Multiple Alignment of
74             Amino Acid Sequences. You can get it and see information about it at this URL
75             http://probcons.stanford.edu/
76              
77              
78             =head2 Helping the module find your executable
79              
80             You will need to enable Probcons to find the probcons program. This can be
81             done in (at least) three ways:
82              
83             1. Make sure the probcons executable is in your path (i.e.
84             'which probcons' returns a valid program
85             2. define an environmental variable PROBCONSDIR which points to a
86             directory containing the 'probcons' app:
87             In bash
88             export PROBCONSDIR=/home/progs/probcons or
89             In csh/tcsh
90             setenv PROBCONSDIR /home/progs/probcons
91              
92             3. include a definition of an environmental variable PROBCONSDIR
93             in every script that will
94             BEGIN {$ENV{PROBCONSDIR} = '/home/progs/probcons'; }
95             use Bio::Tools::Run::Alignment::Probcons;
96              
97             =head1 FEEDBACK
98              
99             =head2 Mailing Lists
100              
101             User feedback is an integral part of the evolution of this and other
102             Bioperl modules. Send your comments and suggestions preferably to one
103             of the Bioperl mailing lists. Your participation is much appreciated.
104              
105             bioperl-l@bioperl.org - General discussion
106             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
107              
108             =head2 Support
109              
110             Please direct usage questions or support issues to the mailing list:
111              
112             I
113              
114             rather than to the module maintainer directly. Many experienced and
115             reponsive experts will be able look at the problem and quickly
116             address it. Please include a thorough description of the problem
117             with code and data examples if at all possible.
118              
119             =head2 Reporting Bugs
120              
121             Report bugs to the Bioperl bug tracking system to help us keep track
122             the bugs and their resolution. Bug reports can be submitted via the web:
123              
124             http://redmine.open-bio.org/projects/bioperl/
125              
126             =head1 AUTHOR - Jason Stajich
127              
128             Email jason-at-bioperl-dot-org
129              
130             =head1 APPENDIX
131              
132             The rest of the documentation details each of the object
133             methods. Internal methods are usually preceded with a _
134              
135             =cut
136              
137             package Bio::Tools::Run::Alignment::Probcons;
138              
139 1         71 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
140             @PROBCONS_PARAMS @PROBCONS_SWITCHES @OTHER_SWITCHES
141             %OK_FIELD
142 1     1   101227 );
  1         2  
143 1     1   4 use strict;
  1         1  
  1         14  
144 1     1   548 use Bio::Seq;
  1         42984  
  1         24  
145 1     1   484 use Bio::SeqIO;
  1         18815  
  1         32  
146 1     1   620 use Bio::SimpleAlign;
  1         26525  
  1         44  
147 1     1   541 use Bio::AlignIO;
  1         1002  
  1         23  
148 1     1   5 use Bio::Root::Root;
  1         1  
  1         13  
149 1     1   3 use Bio::Root::IO;
  1         1  
  1         15  
150 1     1   401 use Bio::Factory::ApplicationFactoryI;
  1         105  
  1         18  
151 1     1   402 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         69  
152             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
153             Bio::Factory::ApplicationFactoryI);
154              
155              
156             BEGIN {
157 1     1   3 %DEFAULTS = ( 'AFORMAT' => 'fasta' );
158 1         3 @PROBCONS_PARAMS = qw (CONSISTENCY ITERATIVE-REFINEMENT
159             PRE-TRAINING ANNOT TRAIN PARAMFILE MATRIXFILE
160             CLUSTALW PAIRS VITERBI VERBOSE EMISSIONS);
161             #FIXME: Last line are switches, dunno how to set them,
162             #gave as params
163 1         0 @PROBCONS_SWITCHES = qw();
164 1         2 @OTHER_SWITCHES = qw();
165              
166             # Authorize attribute fields
167 1         2 foreach my $attr ( @PROBCONS_PARAMS, @OTHER_SWITCHES ) {
168 12         976 $OK_FIELD{$attr}++; }
169             }
170              
171             =head2 program_name
172              
173             Title : program_name
174             Usage : $factory->program_name()
175             Function: holds the program name
176             Returns: string
177             Args : None
178              
179             =cut
180              
181             sub program_name {
182 6     6 1 25 return 'probcons';
183             }
184              
185             =head2 program_dir
186              
187             Title : program_dir
188             Usage : $factory->program_dir(@params)
189             Function: returns the program directory, obtained from ENV variable.
190             Returns: string
191             Args :
192              
193             =cut
194              
195             sub program_dir {
196 3 50   3 1 11 return Bio::Root::IO->catfile($ENV{PROBCONSDIR}) if $ENV{PROBCONSDIR};
197             }
198              
199             =head2 new
200              
201             Title : new
202             Usage : my $probcons = Bio::Tools::Run::Alignment::Probcons->new();
203             Function: Constructor
204             Returns : Bio::Tools::Run::Alignment::Probcons
205             Args : -outfile_name => $outname
206              
207              
208             =cut
209              
210             sub new{
211 1     1 1 81 my ($class,@args) = @_;
212 1         10 my $self = $class->SUPER::new(@args);
213 1         20 my ($on) = $self->SUPER::_rearrange([qw(OUTFILE_NAME)], @args);
214 1   50     16 $self->outfile_name($on || '');
215 1         1 my ($attr, $value);
216 1         4 $self->aformat($DEFAULTS{'AFORMAT'});
217            
218 1         4 while ( @args) {
219 0         0 $attr = shift @args;
220 0         0 $value = shift @args;
221 0 0       0 next if( $attr =~ /^-/); # don't want named parameters
222 0         0 $self->$attr($value);
223 0 0       0 if ($attr =~ /verbose/i) {
224 0         0 $self->{verbose_set} = 1;
225             }
226             }
227 1         2 return $self;
228             }
229              
230             sub AUTOLOAD {
231 0     0   0 my $self = shift;
232 0         0 my $attr = $AUTOLOAD;
233 0         0 $attr =~ s/.*:://;
234 0         0 $attr = uc $attr;
235             # aliasing
236 0 0       0 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
237              
238 0 0       0 $self->{$attr} = shift if @_;
239 0         0 return $self->{$attr};
240             }
241              
242             =head2 error_string
243              
244             Title : error_string
245             Usage : $obj->error_string($newval)
246             Function: Where the output from the last analysus run is stored.
247             Returns : value of error_string
248             Args : newvalue (optional)
249              
250              
251             =cut
252              
253             sub error_string{
254 0     0 1 0 my ($self,$value) = @_;
255 0 0       0 if( defined $value) {
256 0         0 $self->{'error_string'} = $value;
257             }
258 0         0 return $self->{'error_string'};
259              
260             }
261              
262             =head2 version
263              
264             Title : version
265             Usage : exit if $prog->version() < 1.8
266             Function: Determine the version number of the program
267             Example :
268             Returns : float or undef
269             Args : none
270              
271             =cut
272              
273             sub version {
274 0     0 1 0 my ($self) = @_;
275 0         0 my $exe;
276 0 0       0 return undef unless $exe = $self->executable;
277 0         0 my $string = `$exe 2>&1` ;
278             #PROBCONS version 1.09 - align multiple protein sequences and print to standard output
279 0         0 $string =~ /PROBCONS\s+version\s+(\d+\.\d+)/m;
280 0   0     0 return $1 || undef;
281             }
282              
283             =head2 run
284              
285             Title : run
286             Usage : my $output = $application->run(\@seqs);
287             Function: Generic run of an application
288             Returns : Bio::SimpleAlign object
289             Args : Arrayref of Bio::PrimarySeqI objects or
290             a filename to run on
291              
292             =cut
293              
294             sub run {
295 0     0 1 0 my $self = shift;
296 0         0 return $self->align(shift);
297             }
298              
299             =head2 align
300              
301             Title : align
302             Usage :
303             $inputfilename = 't/data/cysprot.fa';
304             $aln = $factory->align($inputfilename);
305             or
306             $seq_array_ref = \@seq_array;
307             # @seq_array is array of Seq objs
308             $aln = $factory->align($seq_array_ref);
309             Function: Perform a multiple sequence alignment
310             Returns : Reference to a SimpleAlign object containing the
311             sequence alignment.
312             Args : Name of a file containing a set of unaligned fasta sequences
313             or else an array of references to Bio::Seq objects.
314              
315             Throws an exception if argument is not either a string (eg a
316             filename) or a reference to an array of Bio::Seq objects. If
317             argument is string, throws exception if file corresponding to string
318             name can not be found. If argument is Bio::Seq array, throws
319             exception if less than two sequence objects are in array.
320              
321             =cut
322              
323             sub align {
324 0     0 1 0 my ($self,$input) = @_;
325             # Create input file pointer
326 0         0 $self->io->_io_cleanup();
327 0         0 my ($infilename) = $self->_setinput($input);
328 0 0       0 if (! $infilename) {
329 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
330             }
331              
332 0         0 my $param_string = $self->_setparams($infilename);
333              
334             # run probcons
335 0         0 return &_run($self, $param_string);
336             }
337              
338             =head2 _run
339              
340             Title : _run
341             Usage : Internal function, not to be called directly
342             Function: makes actual system call to probcons program
343             Example :
344             Returns : nothing; probcons output is written to a
345             temporary file OR specified output file
346             Args : Name of a file containing a set of unaligned fasta sequences
347             and hash of parameters to be passed to probcons
348              
349              
350             =cut
351              
352             sub _run {
353 0     0   0 my ($self, $params) = @_;
354 0         0 my $commandstring = $self->executable." $params";
355            
356 0         0 $self->debug( "probcons command = $commandstring \n");
357              
358 0         0 my $status = system($commandstring);
359 0 0       0 if ($status) {
360 0         0 $self->warn( "Probcons call crashed: $? [command $commandstring]\n");
361 0         0 return;
362             }
363            
364 0         0 my $outfile = $self->outfile_name();
365 0 0 0     0 if (-e $outfile || -z $outfile) {
366 0         0 my $in = Bio::AlignIO->new('-file' => $outfile,
367             '-format' => $self->aformat);
368 0         0 my $aln = $in->next_aln();
369 0         0 return $aln;
370             }
371 0         0 return; # some modes of operation do not generate an output alignment
372             }
373              
374              
375             =head2 _setinput
376              
377             Title : _setinput
378             Usage : Internal function, not to be called directly
379             Function: Create input file for probcons program
380             Example :
381             Returns : name of file containing probcons data input AND
382             Args : Arrayref of Seqs or input file name
383              
384              
385             =cut
386              
387             sub _setinput {
388 0     0   0 my ($self,$input) = @_;
389 0         0 my ($infilename, $seq, $temp, $tfh);
390 0 0       0 if (! ref $input) {
    0          
391             # check that file exists or throw
392 0         0 $infilename = $input;
393 0 0       0 unless (-e $input) {return 0;}
  0         0  
394             # let's peek and guess
395 0 0       0 open(IN,$infilename) || $self->throw("Cannot open $infilename");
396 0         0 my $header;
397 0         0 while( defined ($header = ) ) {
398 0 0       0 last if $header !~ /^\s+$/;
399             }
400 0         0 close(IN);
401 0 0       0 if ( $header !~ /^>\s*\S+/ ){
402 0         0 $self->throw("Need to provide a FASTA format file to probcons!");
403             }
404 0         0 return ($infilename);
405             } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
406             # array of BioSeq objects...
407             # Open temporary file for both reading & writing of array
408 0         0 ($tfh,$infilename) = $self->io->tempfile();
409 0 0       0 if( ! ref($input->[0]) ) {
    0          
410 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
411 0         0 return undef;
412             } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
413 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
414             '-format' => 'fasta');
415 0         0 my $ct = 1;
416 0         0 foreach $seq (@$input) {
417 0 0 0     0 return 0 unless ( ref($seq) &&
418             $seq->isa("Bio::PrimarySeqI") );
419 0 0 0     0 if( ! defined $seq->display_id ||
420             $seq->display_id =~ /^\s+$/) {
421 0         0 $seq->display_id( "Seq".$ct++);
422             }
423 0         0 $temp->write_seq($seq);
424             }
425 0         0 $temp->close();
426 0         0 undef $temp;
427 0         0 close($tfh);
428 0         0 $tfh = undef;
429             } else {
430 0         0 $self->warn( "got an array ref with 1st entry ".
431             $input->[0].
432             " and don't know what to do with it\n");
433             }
434 0         0 return ($infilename);
435             } else {
436 0         0 $self->warn("Got $input and don't know what to do with it\n");
437             }
438 0         0 return 0;
439             }
440              
441              
442             =head2 _setparams
443              
444             Title : _setparams
445             Usage : Internal function, not to be called directly
446             Function: Create parameter inputs for probcons program
447             Example :
448             Returns : parameter string to be passed to probcons
449             during align or profile_align
450             Args : name of calling object
451              
452             =cut
453              
454             sub _setparams {
455 0     0   0 my ($self, $infilename) = @_;
456 0         0 my ($attr, $value,$param_string);
457 0         0 $param_string = '';
458 0         0 my $laststr;
459 0         0 for $attr ( @PROBCONS_PARAMS ) {
460 0         0 $value = $self->$attr();
461 0 0       0 next unless (defined $value);
462 0         0 my $attr_key = lc $attr;
463 0 0       0 $attr_key = ' --'.$attr_key unless ($attr eq 'ANNOT');
464 0 0       0 $attr_key = ' -'.$attr_key if ($attr eq 'ANNOT');
465 0         0 $param_string .= $attr_key .' '.$value;
466              
467             }
468 0 0       0 if ($self->{verbose_set}) {
469 0         0 $param_string .= ' --verbose';
470             }
471 0         0 for $attr ( @PROBCONS_SWITCHES) {
472 0         0 $value = $self->$attr();
473 0 0       0 next unless ($value);
474 0         0 my $attr_key = lc $attr; #put switches in format expected by tcoffee
475 0         0 $attr_key = ' -'.$attr_key;
476 0         0 $param_string .= $attr_key ;
477             }
478             # Set default output file if no explicit output file selected
479 0 0       0 unless ($self->outfile_name ) {
480 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
481 0         0 close($tfh);
482 0         0 undef $tfh;
483 0         0 $self->outfile_name($outfile);
484             }
485             #FIXME: This may be only for *nixes. Double check in other OSes
486 0         0 $param_string .= " $infilename > ".$self->outfile_name;
487            
488 0 0       0 if ($self->verbose < 0) {
489 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
490 0         0 $param_string .= " 2> $null";
491             }
492 0         0 return $param_string;
493             }
494              
495             =head2 aformat
496              
497             Title : aformat
498             Usage : my $alignmentformat = $self->aformat();
499             Function: Get/Set alignment format
500             Returns : string
501             Args : string
502              
503              
504             =cut
505              
506             sub aformat{
507 1     1 1 2 my $self = shift;
508 1 50       4 $self->{'_aformat'} = shift if @_;
509 1         1 return $self->{'_aformat'};
510             }
511              
512             =head1 Bio::Tools::Run::BaseWrapper methods
513              
514             =cut
515              
516             =head2 no_param_checks
517              
518             Title : no_param_checks
519             Usage : $obj->no_param_checks($newval)
520             Function: Boolean flag as to whether or not we should
521             trust the sanity checks for parameter values
522             Returns : value of no_param_checks
523             Args : newvalue (optional)
524              
525              
526             =cut
527              
528             =head2 save_tempfiles
529              
530             Title : save_tempfiles
531             Usage : $obj->save_tempfiles($newval)
532             Function:
533             Returns : value of save_tempfiles
534             Args : newvalue (optional)
535              
536              
537             =cut
538              
539             =head2 outfile_name
540              
541             Title : outfile_name
542             Usage : my $outfile = $probcons->outfile_name();
543             Function: Get/Set the name of the output file for this run
544             (if you wanted to do something special)
545             Returns : string
546             Args : [optional] string to set value to
547              
548              
549             =cut
550              
551              
552             =head2 tempdir
553              
554             Title : tempdir
555             Usage : my $tmpdir = $self->tempdir();
556             Function: Retrieve a temporary directory name (which is created)
557             Returns : string which is the name of the temporary directory
558             Args : none
559              
560              
561             =cut
562              
563             =head2 cleanup
564              
565             Title : cleanup
566             Usage : $probcons->cleanup();
567             Function: Will cleanup the tempdir directory
568             Returns : none
569             Args : none
570              
571              
572             =cut
573              
574             =head2 io
575              
576             Title : io
577             Usage : $obj->io($newval)
578             Function: Gets a L object
579             Returns : L
580             Args : none
581              
582              
583             =cut
584              
585             1; # Needed to keep compiler happy