File Coverage

blib/lib/Bio/Tools/Run/Alignment/Kalign.pm
Criterion Covered Total %
statement 58 159 36.4
branch 5 56 8.9
condition 1 16 6.2
subroutine 16 23 69.5
pod 8 8 100.0
total 88 262 33.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Alignment::Kalign
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Albert Vilella
7             #
8             # Copyright Albert Vilella
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Alignment::Kalign - Object for the calculation of an
17             iterative multiple sequence alignment from a set of unaligned
18             sequences or alignments using the KALIGN program
19              
20             =head1 SYNOPSIS
21              
22             # Build a kalign alignment factory
23             $factory = Bio::Tools::Run::Alignment::Kalign->new(@params);
24              
25             # Pass the factory a list of sequences to be aligned.
26             $inputfilename = 't/cysprot.fa';
27             # $aln is a SimpleAlign object.
28             $aln = $factory->align($inputfilename);
29              
30             # or where @seq_array is an array of Bio::Seq objects
31             $seq_array_ref = \@seq_array;
32             $aln = $factory->align($seq_array_ref);
33              
34             # Or one can pass the factory a pair of (sub)alignments
35             #to be aligned against each other, e.g.:
36              
37             #There are various additional options and input formats available.
38             #See the DESCRIPTION section that follows for additional details.
39              
40             =head1 DESCRIPTION
41              
42             Please cite:
43              
44             Timo Lassmann and Erik L.L. Sonnhammer (2005)
45             Kalign - an accurate and fast multiple sequence alignment algorithm.
46             BMC Bioinformatics 6:298
47              
48             http://msa.cgb.ki.se/downloads/kalign/current.tar.gz
49              
50              
51             =head2 Helping the module find your executable
52              
53             You will need to enable Kalign to find the kalign program. This can be
54             done in (at least) three ways:
55              
56             1. Make sure the kalign executable is in your path (i.e.
57             'which kalign' returns a valid program
58             2. define an environmental variable KALIGNDIR which points to a
59             directory containing the 'kalign' app:
60             In bash
61             export KALIGNDIR=/home/progs/kalign or
62             In csh/tcsh
63             setenv KALIGNDIR /home/progs/kalign
64              
65             3. include a definition of an environmental variable KALIGNDIR
66             in every script that will
67             BEGIN {$ENV{KALIGNDIR} = '/home/progs/kalign'; }
68             use Bio::Tools::Run::Alignment::Kalign;
69              
70             =head1 FEEDBACK
71              
72             =head2 Mailing Lists
73              
74             User feedback is an integral part of the evolution of this and other
75             Bioperl modules. Send your comments and suggestions preferably to one
76             of the Bioperl mailing lists. Your participation is much appreciated.
77              
78             bioperl-l@bioperl.org - General discussion
79             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
80              
81             =head2 Support
82              
83             Please direct usage questions or support issues to the mailing list:
84              
85             I
86              
87             rather than to the module maintainer directly. Many experienced and
88             reponsive experts will be able look at the problem and quickly
89             address it. Please include a thorough description of the problem
90             with code and data examples if at all possible.
91              
92             =head2 Reporting Bugs
93              
94             Report bugs to the Bioperl bug tracking system to help us keep track
95             the bugs and their resolution. Bug reports can be submitted via the web:
96              
97             http://redmine.open-bio.org/projects/bioperl/
98              
99             =head1 AUTHOR - Albert Vilella
100              
101             Email idontlikespam@hotmail.com
102              
103             =head1 APPENDIX
104              
105             The rest of the documentation details each of the object
106             methods. Internal methods are usually preceded with a _
107              
108             =cut
109              
110             package Bio::Tools::Run::Alignment::Kalign;
111              
112 1         71 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
113             @KALIGN_PARAMS @KALIGN_SWITCHES %OK_FIELD
114 1     1   117116 );
  1         2  
115 1     1   4 use strict;
  1         2  
  1         16  
116 1     1   361 use Bio::Seq;
  1         41871  
  1         27  
117 1     1   318 use Bio::SeqIO;
  1         17275  
  1         32  
118 1     1   451 use Bio::SimpleAlign;
  1         25742  
  1         45  
119 1     1   427 use Bio::AlignIO;
  1         1176  
  1         24  
120 1     1   6 use Bio::Root::Root;
  1         2  
  1         12  
121 1     1   4 use Bio::Root::IO;
  1         2  
  1         15  
122 1     1   245 use Bio::Factory::ApplicationFactoryI;
  1         123  
  1         21  
123 1     1   305 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         70  
124             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
125             Bio::Factory::ApplicationFactoryI);
126              
127              
128             BEGIN {
129 1     1   6 %DEFAULTS = ( 'AFORMAT' => 'fasta' );
130 1         3 @KALIGN_PARAMS = qw(IN OUT GAPOPEN GAPEXTENSION TERMINAL_GAP_EXTENSION_PENALTY MATRIX_BONUS
131             SORT FEATURE DISTANCE TREE ZCUTOFF FORMAT
132             MAXMB MAXHOURS MAXITERS);
133 1         2 @KALIGN_SWITCHES = qw(QUIET);
134              
135             # Authorize attribute fields
136 1         2 foreach my $attr ( @KALIGN_PARAMS, @KALIGN_SWITCHES ) {
137 16         1155 $OK_FIELD{$attr}++; }
138             }
139              
140             =head2 program_name
141              
142             Title : program_name
143             Usage : $factory->program_name()
144             Function: holds the program name
145             Returns: string
146             Args : None
147              
148             =cut
149              
150             sub program_name {
151 6     6 1 24 return 'kalign';
152             }
153              
154             =head2 program_dir
155              
156             Title : program_dir
157             Usage : $factory->program_dir(@params)
158             Function: returns the program directory, obtained from ENV variable.
159             Returns: string
160             Args :
161              
162             =cut
163              
164             sub program_dir {
165 3 50   3 1 11 return Bio::Root::IO->catfile($ENV{KALIGNDIR}) if $ENV{KALIGNDIR};
166             }
167              
168             =head2 new
169              
170             Title : new
171             Usage : my $kalign = Bio::Tools::Run::Alignment::Kalign->new();
172             Function: Constructor
173             Returns : Bio::Tools::Run::Alignment::Kalign
174             Args : -outfile_name => $outname
175              
176              
177             =cut
178              
179             sub new {
180 1     1 1 83 my ($class,@args) = @_;
181 1         3 my( @kalign_args, @obj_args);
182 1         4 while( my $arg = shift @args ) {
183 0 0       0 if( $arg =~ /^-/ ) {
184 0         0 push @obj_args, $arg, shift @args;
185             } else {
186 0         0 push @kalign_args,$arg, shift @args;
187             }
188             }
189 1         10 my $self = $class->SUPER::new(@obj_args);
190            
191 1         19 my ($on) = $self->_rearrange([qw(OUTFILE_NAME)],@obj_args);
192            
193 1   50     18 $self->outfile_name($on || '');
194 1         2 my ($attr, $value);
195             # FIXME: only tested with fasta output format right now...
196 1         9 $self->aformat($DEFAULTS{'AFORMAT'});
197              
198 1         3 while ( @kalign_args) {
199 0         0 $attr = shift @kalign_args;
200 0         0 $value = shift @kalign_args;
201 0 0       0 next if( $attr =~ /^-/); # don't want named parameters
202 0         0 $self->$attr($value);
203             }
204            
205 1 50       9 if( defined $self->out ) {
206 0         0 $self->outfile_name($self->out);
207             }
208 1         3 return $self;
209             }
210              
211             sub AUTOLOAD {
212 1     1   2 my $self = shift;
213 1         2 my $attr = $AUTOLOAD;
214 1         5 $attr =~ s/.*:://;
215 1         3 $attr = uc $attr;
216             # aliasing
217 1 50       4 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
218              
219 1 50       2 $self->{$attr} = shift if @_;
220 1         4 return $self->{$attr};
221             }
222              
223             =head2 error_string
224              
225             Title : error_string
226             Usage : $obj->error_string($newval)
227             Function: Where the output from the last analysus run is stored.
228             Returns : value of error_string
229             Args : newvalue (optional)
230              
231              
232             =cut
233              
234             sub error_string{
235 0     0 1 0 my ($self,$value) = @_;
236 0 0       0 if( defined $value) {
237 0         0 $self->{'error_string'} = $value;
238             }
239 0         0 return $self->{'error_string'};
240              
241             }
242              
243             =head2 version
244              
245             Title : version
246             Usage : exit if $prog->version() < 2
247             Function: Determine the version number of the program
248             Example :
249             Returns : float or undef
250             Args : none
251              
252             =cut
253              
254             sub version {
255 0     0 1 0 my ($self) = @_;
256 0         0 my $exe;
257             # Kalign version 2.01, Copyright (C) 2004, 2005, 2006 Timo Lassmann
258 0 0       0 return undef unless $exe = $self->executable;
259 0         0 my $string = `$exe 2>&1` ;
260 0         0 $string =~ /Kalign\s+version\s+(\d+\.\d+)/m;
261 0   0     0 return $1 || undef;
262             }
263              
264             =head2 run
265              
266             Title : run
267             Usage : my $output = $application->run(\@seqs);
268             Function: Generic run of an application
269             Returns : Bio::SimpleAlign object
270             Args : Arrayref of Bio::PrimarySeqI objects or
271             a filename to run on
272              
273             =cut
274              
275             sub run {
276 0     0 1 0 my $self = shift;
277 0         0 return $self->align(shift);
278             }
279              
280             =head2 align
281              
282             Title : align
283             Usage :
284             $inputfilename = 't/data/cysprot.fa';
285             $aln = $factory->align($inputfilename);
286             or
287             $seq_array_ref = \@seq_array;
288             # @seq_array is array of Seq objs
289             $aln = $factory->align($seq_array_ref);
290             Function: Perform a multiple sequence alignment
291             Returns : Reference to a SimpleAlign object containing the
292             sequence alignment.
293             Args : Name of a file containing a set of unaligned fasta sequences
294             or else an array of references to Bio::Seq objects.
295              
296             Throws an exception if argument is not either a string (eg a
297             filename) or a reference to an array of Bio::Seq objects. If
298             argument is string, throws exception if file corresponding to string
299             name can not be found. If argument is Bio::Seq array, throws
300             exception if less than two sequence objects are in array.
301              
302             =cut
303              
304             sub align {
305 0     0 1 0 my ($self,$input) = @_;
306             # Create input file pointer
307 0         0 $self->io->_io_cleanup();
308 0         0 my $infilename;
309 0 0       0 if( defined $input ) {
    0          
310 0         0 $infilename = $self->_setinput($input);
311             } elsif( defined $self->in ) {
312 0         0 $infilename = $self->_setinput($self->in);
313             } else {
314 0         0 $self->throw("No inputdata provided\n");
315             }
316 0 0       0 if (! $infilename) {
317 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
318             }
319              
320 0         0 my $param_string = $self->_setparams();
321              
322             # run kalign
323 0         0 return &_run($self, $infilename, $param_string);
324             }
325              
326             =head2 _run
327              
328             Title : _run
329             Usage : Internal function, not to be called directly
330             Function: makes actual system call to kalign program
331             Example :
332             Returns : nothing; kalign output is written to a
333             temporary file OR specified output file
334             Args : Name of a file containing a set of unaligned fasta sequences
335             and hash of parameters to be passed to kalign
336              
337              
338             =cut
339              
340             sub _run {
341 0     0   0 my ($self,$infilename,$params) = @_;
342 0         0 my $commandstring = $self->executable." -in $infilename $params";
343            
344 0         0 $self->debug( "kalign command = $commandstring \n");
345              
346 0         0 my $status = system($commandstring);
347 0         0 my $outfile = $self->outfile_name();
348 0 0 0     0 if( !-e $outfile || -z $outfile ) {
349 0         0 $self->warn( "Kalign call crashed: $? [command $commandstring]\n");
350 0         0 return undef;
351             }
352              
353 0         0 my $in = Bio::AlignIO->new('-file' => $outfile,
354             '-format' => $self->aformat);
355 0         0 my $aln = $in->next_aln();
356 0         0 return $aln;
357             }
358              
359              
360             =head2 _setinput
361              
362             Title : _setinput
363             Usage : Internal function, not to be called directly
364             Function: Create input file for kalign program
365             Example :
366             Returns : name of file containing kalign data input AND
367             Args : Arrayref of Seqs or input file name
368              
369              
370             =cut
371              
372             sub _setinput {
373 0     0   0 my ($self,$input) = @_;
374 0         0 my ($infilename, $seq, $temp, $tfh);
375 0 0       0 if (! ref $input) {
    0          
376             # check that file exists or throw
377 0         0 $infilename = $input;
378 0 0       0 unless (-e $input) {return 0;}
  0         0  
379             # let's peek and guess
380 0 0       0 open(IN,$infilename) || $self->throw("Cannot open $infilename");
381 0         0 my $header;
382 0         0 while( defined ($header = ) ) {
383 0 0       0 last if $header !~ /^\s+$/;
384             }
385 0         0 close(IN);
386 0 0       0 if ( $header !~ /^>\s*\S+/ ){
387 0         0 $self->throw("Need to provide a FASTA format file to kalign!");
388             }
389 0         0 return ($infilename);
390             } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
391             # array of BioSeq objects...
392             # Open temporary file for both reading & writing of array
393 0         0 ($tfh,$infilename) = $self->io->tempfile();
394 0 0       0 if( ! ref($input->[0]) ) {
    0          
395 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
396 0         0 return undef;
397             } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
398 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
399             '-format' => 'fasta');
400 0         0 my $ct = 1;
401 0         0 foreach $seq (@$input) {
402 0 0 0     0 return 0 unless ( ref($seq) &&
403             $seq->isa("Bio::PrimarySeqI") );
404 0 0 0     0 if( ! defined $seq->display_id ||
405             $seq->display_id =~ /^\s+$/) {
406 0         0 $seq->display_id( "Seq".$ct++);
407             }
408 0         0 $temp->write_seq($seq);
409             }
410 0         0 $temp->close();
411 0         0 undef $temp;
412 0         0 close($tfh);
413 0         0 $tfh = undef;
414             } else {
415 0         0 $self->warn( "got an array ref with 1st entry ".
416             $input->[0].
417             " and don't know what to do with it\n");
418             }
419 0         0 return ($infilename);
420             } else {
421 0         0 $self->warn("Got $input and don't know what to do with it\n");
422             }
423 0         0 return 0;
424             }
425              
426              
427             =head2 _setparams
428              
429             Title : _setparams
430             Usage : Internal function, not to be called directly
431             Function: Create parameter inputs for kalign program
432             Example :
433             Returns : parameter string to be passed to kalign
434             during align or profile_align
435             Args : name of calling object
436              
437             =cut
438              
439             sub _setparams {
440 0     0   0 my ($self) = @_;
441 0         0 my ($attr, $value,$param_string);
442 0         0 $param_string = '';
443 0         0 my $laststr;
444 0         0 for $attr ( @KALIGN_PARAMS ) {
445 0         0 $value = $self->$attr();
446 0 0       0 next unless (defined $value);
447 0         0 my $attr_key = lc $attr;
448 0         0 $attr_key = ' -'.$attr_key;
449 0         0 $param_string .= $attr_key .' '.$value;
450              
451             }
452 0         0 for $attr ( @KALIGN_SWITCHES) {
453 0         0 $value = $self->$attr();
454 0 0       0 next unless ($value);
455 0         0 my $attr_key = lc $attr; #put switches in format expected by tcoffee
456 0         0 $attr_key = ' -'.$attr_key;
457 0         0 $param_string .= $attr_key ;
458             }
459              
460             # Set default output file if no explicit output file selected
461 0 0       0 unless ($self->outfile_name ) {
462 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
463 0         0 close($tfh);
464 0         0 undef $tfh;
465 0         0 $self->outfile_name($outfile);
466             }
467 0         0 $param_string .= " -out ".$self->outfile_name;
468            
469 0 0 0     0 if ($self->quiet() || $self->verbose < 0) {
470 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
471 0         0 $param_string .= " 2> $null";
472             }
473 0         0 return $param_string;
474             }
475              
476             =head2 aformat
477              
478             Title : aformat
479             Usage : my $alignmentformat = $self->aformat();
480             Function: Get/Set alignment format
481             Returns : string
482             Args : string
483              
484              
485             =cut
486              
487             sub aformat{
488 1     1 1 2 my $self = shift;
489 1 50       5 $self->{'_aformat'} = shift if @_;
490 1         2 return $self->{'_aformat'};
491             }
492              
493             =head1 Bio::Tools::Run::BaseWrapper methods
494              
495             =cut
496              
497             =head2 no_param_checks
498              
499             Title : no_param_checks
500             Usage : $obj->no_param_checks($newval)
501             Function: Boolean flag as to whether or not we should
502             trust the sanity checks for parameter values
503             Returns : value of no_param_checks
504             Args : newvalue (optional)
505              
506              
507             =cut
508              
509             =head2 save_tempfiles
510              
511             Title : save_tempfiles
512             Usage : $obj->save_tempfiles($newval)
513             Function:
514             Returns : value of save_tempfiles
515             Args : newvalue (optional)
516              
517              
518             =cut
519              
520             =head2 outfile_name
521              
522             Title : outfile_name
523             Usage : my $outfile = $kalign->outfile_name();
524             Function: Get/Set the name of the output file for this run
525             (if you wanted to do something special)
526             Returns : string
527             Args : [optional] string to set value to
528              
529              
530             =cut
531              
532              
533             =head2 tempdir
534              
535             Title : tempdir
536             Usage : my $tmpdir = $self->tempdir();
537             Function: Retrieve a temporary directory name (which is created)
538             Returns : string which is the name of the temporary directory
539             Args : none
540              
541              
542             =cut
543              
544             =head2 cleanup
545              
546             Title : cleanup
547             Usage : $kalign->cleanup();
548             Function: Will cleanup the tempdir directory
549             Returns : none
550             Args : none
551              
552              
553             =cut
554              
555             =head2 io
556              
557             Title : io
558             Usage : $obj->io($newval)
559             Function: Gets a L object
560             Returns : L
561             Args : none
562              
563              
564             =cut
565              
566             1; # Needed to keep compiler happy