File Coverage

blib/lib/Bio/Tools/Run/Alignment/TCoffee.pm
Criterion Covered Total %
statement 71 258 27.5
branch 12 102 11.7
condition 0 41 0.0
subroutine 19 27 70.3
pod 10 10 100.0
total 112 438 25.5


line stmt bran cond sub pod time code
1             package Bio::Tools::Run::Alignment::TCoffee;
2             $Bio::Tools::Run::Alignment::TCoffee::VERSION = '1.7.2';
3 1     1   186956 use utf8;
  1         4  
  1         9  
4 1     1   43 use strict;
  1         3  
  1         30  
5 1     1   9 use warnings;
  1         5  
  1         60  
6              
7 1         137 use vars qw($AUTOLOAD @ISA $PROGRAM_NAME $PROGRAM_DIR %DEFAULTS
8             @TCOFFEE_PARAMS @TCOFFEE_SWITCHES @OTHER_SWITCHES %OK_FIELD
9 1     1   9 );
  1         4  
10              
11 1     1   9 use Cwd;
  1         4  
  1         83  
12 1     1   8 use Bio::Seq;
  1         3  
  1         36  
13 1     1   8 use Bio::SeqIO;
  1         4  
  1         30  
14 1     1   8 use Bio::SimpleAlign;
  1         3  
  1         36  
15 1     1   7 use Bio::AlignIO;
  1         9  
  1         37  
16 1     1   15 use Bio::Root::IO;
  1         3  
  1         38  
17 1     1   378 use Bio::Factory::ApplicationFactoryI;
  1         174  
  1         34  
18 1     1   444 use Bio::Tools::Run::WrapperBase;
  1         2033  
  1         227  
19              
20             @ISA = qw(Bio::Tools::Run::WrapperBase
21             Bio::Factory::ApplicationFactoryI);
22              
23             # ABSTRACT: Object for the calculation of a multiple sequence alignment from a set of unaligned sequences or alignments using the TCoffee program
24             # AUTHOR: Jason Stajich
25             # AUTHOR: Peter Schattner
26             # OWNER: Jason Stajich
27             # OWNER: Peter Schattner
28             # LICENSE: Perl_5
29              
30              
31             # You will need to enable TCoffee to find the tcoffee program. This can be done
32             # in (at least) twp ways:
33             # 1. define an environmental variable TCOFFEE:
34             # export TCOFFEEDIR=/home/progs/tcoffee or
35             # 2. include a definition of an environmental variable TCOFFEEDIR
36             # in every script that will
37             # use Bio::Tools::Run::Alignment::TCoffee.pm.
38             # BEGIN {$ENV{TCOFFEEDIR} = '/home/progs/tcoffee'; }
39              
40             BEGIN {
41 1     1   5 $PROGRAM_NAME = 't_coffee';
42 1         5 $PROGRAM_DIR = $ENV{'TCOFFEEDIR'};
43 1         7 %DEFAULTS = ( 'MATRIX' => 'blosum',
44             'OUTPUT' => 'clustalw',
45             'AFORMAT'=> 'msf',
46             'METHODS' => [qw(lalign_id_pair clustalw_pair)]
47             );
48              
49              
50 1         7 @TCOFFEE_PARAMS = qw(IN TYPE PARAMETERS DO_NORMALISE EXTEND
51             DP_MODE KTUPLE NDIAGS DIAG_MODE SIM_MATRIX
52             MATRIX GAPOPEN GAPEXT COSMETIC_PENALTY TG_MODE
53             WEIGHT SEQ_TO_ALIGN NEWTREE USETREE TREE_MODE
54             OUTFILE OUTPUT CASE CPU OUT_LIB OUTORDER SEQNOS
55             RUN_NAME CONVERT
56             );
57              
58 1         4 @TCOFFEE_SWITCHES = qw(QUICKTREE);
59              
60 1         4 @OTHER_SWITCHES = qw(QUIET ALIGN KEEPDND);
61              
62             # Authorize attribute fields
63 1         4 foreach my $attr ( @TCOFFEE_PARAMS, @TCOFFEE_SWITCHES, @OTHER_SWITCHES ) {
64 33         3431 $OK_FIELD{$attr}++; }
65             }
66              
67              
68             sub program_name {
69 6     6 1 413 return $PROGRAM_NAME;
70             }
71              
72              
73             sub program_dir {
74 3     3 1 95 return $PROGRAM_DIR;
75             }
76              
77             sub new {
78 1     1 1 128 my ($class,@args) = @_;
79 1         17 my $self = $class->SUPER::new(@args);
80 1         13 my ($attr, $value);
81              
82 1         5 while (@args) {
83 0         0 $attr = shift @args;
84 0         0 $value = shift @args;
85 0 0       0 next if( $attr =~ /^-/); # don't want named parameters
86 0         0 $self->$attr($value);
87             }
88 1 50       14 $self->matrix($DEFAULTS{'MATRIX'}) unless( $self->matrix );
89 1 50       63 $self->output($DEFAULTS{'OUTPUT'}) unless( $self->output );
90 1 50       4 $self->methods($DEFAULTS{'METHODS'}) unless $self->methods;
91             # $self->aformat($DEFAULTS{'AFORMAT'}) unless $self->aformat;
92              
93 1         3 return $self;
94             }
95              
96             sub AUTOLOAD {
97 7     7   3766 my $self = shift;
98 7         17 my $attr = $AUTOLOAD;
99 7         41 $attr =~ s/.*:://;
100 7         23 $attr = uc $attr;
101             # aliasing
102 7 50       22 $attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME';
103 7 50       26 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
104              
105 7 100       24 $self->{$attr} = shift if @_;
106 7         99 return $self->{$attr};
107             }
108              
109              
110             sub error_string{
111 0     0 1 0 my ($self,$value) = @_;
112 0 0       0 if( defined $value) {
113 0         0 $self->{'error_string'} = $value;
114             }
115 0         0 return $self->{'error_string'};
116              
117             }
118              
119              
120             sub version {
121 1     1 1 649 my ($self) = @_;
122 1         5 my $exe;
123 1 0       10 return undef unless $exe = $self->executable;
124 0         0 my $string = `$exe -quiet=stdout 2>&1` ;
125 0         0 $string =~ /Version_([\d.]+)/;
126 0   0     0 return $1 || undef;
127              
128             }
129              
130              
131             sub run{
132 0     0 1 0 my ($self,@args) = @_;
133 0         0 my ($type,$seq,$profile) = $self->_rearrange([qw(TYPE
134             SEQ
135             PROFILE)],
136             @args);
137 0 0       0 if( $type =~ /align/i ) {
    0          
138 0         0 return $self->align($seq);
139             } elsif( $type =~ /profile/i ) {
140 0         0 return $self->profile_align($profile,$seq);
141             } else {
142 0         0 $self->warn("unrecognized alignment type $type\n");
143             }
144 0         0 return undef;
145             }
146              
147              
148             sub align {
149 0     0 1 0 my ($self,$input) = @_;
150             # Create input file pointer
151 0         0 $self->io->_io_cleanup();
152 0         0 my ($infilename,$type) = $self->_setinput($input);
153 0 0       0 if (!$infilename) {
154 0         0 $self->throw("Bad input data or less than 2 sequences in $input !");
155             }
156              
157 0         0 my $param_string = $self->_setparams();
158              
159             # run tcoffee
160 0         0 return $self->_run('align', [$infilename,$type], $param_string);
161             }
162              
163             #################################################
164              
165              
166             sub profile_align {
167 0     0 1 0 my $self = shift;
168 0         0 my $input1 = shift;
169 0         0 my $input2 = shift;
170              
171 0         0 my ($temp,$infilename1,$infilename2,$type1,$type2,$input,$seq);
172              
173 0         0 $self->io->_io_cleanup();
174             # Create input file pointers
175 0         0 ($infilename1,$type1) = $self->_setinput($input1);
176 0         0 ($infilename2,$type2) = $self->_setinput($input2);
177 0 0       0 unless ($type1) {
178 0         0 $self->throw("Unknown type for first argument");
179             }
180 0 0       0 unless ($type2) {
181 0         0 $self->throw("Unknown type for second argument")
182             }
183              
184 0 0 0     0 if (!$infilename1 || !$infilename2) {
185 0         0 $self->throw("Bad input data: $input1 or $input2 !");
186             }
187              
188 0         0 my $param_string = $self->_setparams();
189             # run tcoffee
190 0         0 my $aln = $self->_run('profile-aln',
191             [$infilename1,$type1],
192             [$infilename2,$type2],
193             $param_string)
194             ;
195             }
196             #################################################
197              
198              
199             sub _run {
200 0     0   0 my ($infilename, $infile1,$infile2) = ('','','');
201 0         0 my $self = shift;
202 0         0 my $command = shift;
203 0         0 my $instring;
204 0 0       0 if ($command =~ /align/) {
205 0         0 my $infile = shift ;
206 0         0 my $type;
207 0         0 ($infilename,$type) = @$infile;
208 0         0 $instring = '-in='.join(',',($infilename, 'X'.$self->matrix,
209             $self->methods));
210             }
211 0 0       0 if ($command =~ /profile/) {
212 0         0 my $in1 = shift ;
213 0         0 my $in2 = shift ;
214 0         0 my ($type1,$type2);
215 0         0 ($infile1,$type1) = @$in1;
216 0         0 ($infile2,$type2) = @$in2;
217             # in later versions (tested on 5.72 and 7.54) the API for profile
218             # alignment changed. This attempts to do the right thing for older
219             # versions but corrects for newer ones
220 0 0 0     0 if ($self->version && $self->version < 5) {
221             # this breaks severely on newer TCoffee (>= v5)
222 0 0 0     0 unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i) ) {
223             $instring = '-in='.join(',',
224             ($type2.$infile2),
225             'X'.$self->matrix,
226 0         0 (map {'M'.$_} $self->methods)
  0         0  
227             );
228 0         0 $instring .= ' -profile='.$infile1;
229             } else {
230             $instring = '-in='.join(',',(
231             $type1.$infile1,
232             $type2.$infile2,
233 0         0 (map {'M'.$_} $self->methods)
  0         0  
234             )
235             );
236             }
237             } else {
238 0 0       0 if ($type2 eq 'S') {
239             # second infile is a sequence, not an alignment
240 0         0 $instring .= ' -profile='.join(',',$infile1);
241 0         0 $instring .= ' -seq='.join(',',$infile2);
242             } else {
243 0         0 $instring .= ' -profile='.join(',',$infile1,$infile2);
244             }
245 0 0 0     0 $instring .= ' -matrix='.$self->matrix unless (($self->matrix =~ /none/i) || ($self->matrix =~ /null/i)) ;
246 0 0       0 $instring .= ' -method='.join(',',$self->methods) if ($self->methods) ;
247             }
248             }
249 0         0 my $param_string = shift;
250             # my ($paramfh,$parameterFile) = $self->io->tempfile;
251             # print $paramfh ( "$instring\n-output=gcg$param_string") ;
252             # close($paramfh);
253             # my $commandstring = "t_coffee -output=gcg -parameters $parameterFile" ; ##MJL
254              
255 0         0 my $commandstring = $self->executable." $instring $param_string";
256              
257             #$self->debug( "tcoffee command = $commandstring \n");
258              
259 0         0 my $status = system($commandstring);
260 0         0 my $outfile = $self->outfile();
261              
262 0 0 0     0 if( !-e $outfile || -z $outfile ) {
263 0         0 $self->warn( "TCoffee call crashed: $? [command $commandstring]\n");
264 0         0 return undef;
265             }
266              
267             # retrieve alignment (Note: MSF format for AlignIO = GCG format of
268             # tcoffee)
269              
270 0         0 my $in = Bio::AlignIO->new('-file' => $outfile, '-format' =>
271             $self->output);
272 0         0 my $aln = $in->next_aln();
273              
274             # Replace file suffix with dnd to find name of dendrogram file(s) to delete
275 0 0       0 if( ! $self->keepdnd ) {
276 0         0 foreach my $f ( $infilename, $infile1, $infile2 ) {
277 0 0 0     0 next if( !defined $f || $f eq '');
278 0         0 $f =~ s/\.[^\.]*$// ;
279             # because TCoffee writes these files to the CWD
280 0 0       0 if( $Bio::Root::IO::PATHSEP ) {
281 0         0 my @line = split(/$Bio::Root::IO::PATHSEP/, $f);
282 0         0 $f = pop @line;
283             } else {
284 0         0 (undef, undef, $f) = File::Spec->splitpath($f);
285             }
286 0 0       0 unlink $f .'.dnd' if( $f ne '' );
287             }
288             }
289 0         0 return $aln;
290             }
291              
292              
293              
294             sub _setinput {
295 0     0   0 my ($self,$input) = @_;
296 0         0 my ($infilename, $seq, $temp, $tfh);
297             # If $input is not a reference it better be the name of a
298             # file with the sequence/ alignment data...
299 0         0 my $type = '';
300 0 0       0 if (! ref $input) {
    0          
    0          
    0          
301             # check that file exists or throw
302 0         0 $infilename = $input;
303 0 0       0 unless (-e $input) {return 0;}
  0         0  
304             # let's peek and guess
305 0 0       0 open(my $IN,$infilename) || $self->throw("Cannot open $infilename");
306 0         0 my $header = <$IN>;
307 0 0 0     0 if( $header =~ /^\s+\d+\s+\d+/ ||
      0        
308             $header =~ /Pileup/i ||
309             $header =~ /clustal/i ) { # phylip
310 0         0 $type = 'A';
311             }
312              
313             # On some systems, having filenames with / in them (ie. a file in a
314             # directory) causes t-coffee to completely fail. It warns on all systems.
315             # The -no_warning option solves this, but there is still some strange
316             # bug when doing certain profile-related things. This is magically solved
317             # by copying the profile file to a temp file in the current directory, so
318             # it its filename supplied to t-coffee contains no /
319             # (It's messy here - I just do this to /all/ input files to most easily
320             # catch all variants of providing a profile - it may only be the last
321             # form (isa("Bio::PrimarySeqI")) that causes a problem?)
322              
323 0         0 my (undef, undef, $adjustedfilename) = File::Spec->splitpath($infilename);
324 0 0       0 if ($adjustedfilename ne $infilename) {
325 0         0 my ($fh, $tempfile) = $self->io->tempfile(-dir => cwd());
326 0         0 seek($IN, 0, 0);
327 0         0 while (<$IN>) {
328 0         0 print $fh $_;
329             }
330 0         0 close($fh);
331 0         0 (undef, undef, $tempfile) = File::Spec->splitpath($tempfile);
332 0         0 $infilename = $tempfile;
333 0         0 $type = 'S';
334             }
335              
336 0         0 close($IN);
337 0         0 return ($infilename,$type);
338             } elsif (ref($input) =~ /ARRAY/i ) {
339             # $input may be an array of BioSeq objects...
340             # Open temporary file for both reading & writing of array
341 0         0 ($tfh,$infilename) = $self->io->tempfile(-dir => cwd());
342 0         0 (undef, undef, $infilename) = File::Spec->splitpath($infilename);
343 0 0       0 if( ! ref($input->[0]) ) {
    0          
    0          
344 0         0 $self->warn("passed an array ref which did not contain objects to _setinput");
345 0         0 return undef;
346             } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
347 0         0 $temp = Bio::SeqIO->new('-fh' => $tfh,
348             '-format' => 'fasta');
349 0         0 my $ct = 1;
350 0         0 foreach $seq (@$input) {
351 0 0 0     0 return 0 unless ( ref($seq) &&
352             $seq->isa("Bio::PrimarySeqI") );
353 0 0 0     0 if( ! defined $seq->display_id ||
354             $seq->display_id =~ /^\s+$/) {
355 0         0 $seq->display_id( "Seq".$ct++);
356             }
357 0         0 $temp->write_seq($seq);
358             }
359 0         0 $temp->close();
360 0         0 undef $temp;
361 0         0 close($tfh);
362 0         0 $tfh = undef;
363 0         0 $type = 'S';
364             } elsif( $input->[0]->isa('Bio::Align::AlignI' ) ) {
365 0         0 $temp = Bio::AlignIO->new('-fh' => $tfh,
366             '-format' => $self->aformat);
367 0         0 foreach my $aln (@$input) {
368 0 0 0     0 next unless ( ref($aln) &&
369             $aln->isa("Bio::Align::AlignI") );
370 0         0 $temp->write_aln($aln);
371             }
372 0         0 $temp->close();
373 0         0 undef $temp;
374 0         0 close($tfh);
375 0         0 $tfh = undef;
376 0         0 $type = 'A';
377             } else {
378 0         0 $self->warn( "got an array ref with 1st entry ".
379             $input->[0].
380             " and don't know what to do with it\n");
381             }
382 0         0 return ($infilename,$type);
383             # $input may be a SimpleAlign object.
384             } elsif ( $input->isa("Bio::Align::AlignI") ) {
385             # Open temporary file for both reading & writing of SimpleAlign object
386 0         0 ($tfh, $infilename) = $self->io->tempfile(-dir => cwd());
387 0         0 (undef, undef, $infilename) = File::Spec->splitpath($infilename);
388 0         0 $temp = Bio::AlignIO->new(-fh=>$tfh,
389             '-format' => 'clustalw');
390 0         0 $temp->write_aln($input);
391 0         0 close($tfh);
392 0         0 undef $tfh;
393 0         0 return ($infilename,'A');
394             }
395              
396             # or $input may be a single BioSeq object (to be added to
397             # a previous alignment)
398             elsif ( $input->isa("Bio::PrimarySeqI")) {
399             # Open temporary file for both reading & writing of BioSeq object
400 0         0 ($tfh,$infilename) = $self->io->tempfile(-dir => cwd());
401 0         0 (undef, undef, $infilename) = File::Spec->splitpath($infilename);
402 0         0 $temp = Bio::SeqIO->new(-fh=> $tfh, '-format' =>'Fasta');
403 0         0 $temp->write_seq($input);
404 0         0 $temp->close();
405 0         0 close($tfh);
406 0         0 undef $tfh;
407 0         0 return ($infilename,'S');
408             } else {
409 0         0 $self->warn("Got $input and don't know what to do with it\n");
410             }
411 0         0 return 0;
412             }
413              
414              
415              
416             sub _setparams {
417 0     0   0 my ($self) = @_;
418 0         0 my ($attr, $value,$param_string);
419 0         0 $param_string = '';
420 0         0 my $laststr;
421 0         0 for $attr ( @TCOFFEE_PARAMS ) {
422 0         0 $value = $self->$attr();
423 0 0       0 next unless (defined $value);
424 0         0 my $attr_key = lc $attr;
425 0 0       0 if( $attr_key =~ /matrix/ ) {
426 0         0 $self->{'_in'} = [ "X".lc($value) ];
427             } else {
428 0         0 $attr_key = ' -'.$attr_key;
429 0         0 $param_string .= $attr_key .'='.$value;
430             }
431             }
432 0         0 for $attr ( @TCOFFEE_SWITCHES) {
433 0         0 $value = $self->$attr();
434 0 0       0 next unless ($value);
435 0         0 my $attr_key = lc $attr; #put switches in format expected by tcoffee
436 0         0 $attr_key = ' -'.$attr_key;
437 0         0 $param_string .= $attr_key ;
438             }
439              
440             # Set default output file if no explicit output file selected
441 0 0       0 unless ($self->outfile ) {
442 0         0 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
443 0         0 close($tfh);
444 0         0 undef $tfh;
445 0         0 $self->outfile($outfile);
446 0         0 $param_string .= " -outfile=$outfile" ;
447             }
448              
449 0 0 0     0 if ($self->quiet() || $self->verbose < 0) { $param_string .= ' -quiet';}
  0         0  
450              
451             # -no_warning is required on some systems with certain versions or failure
452             # is guaranteed
453 0 0 0     0 if ($self->version >= 4 && $self->version < 4.7) {
454 0         0 $param_string .= ' -no_warning';
455             }
456              
457 0         0 return $param_string;
458             }
459              
460              
461             sub aformat{
462 0     0 1 0 my $self = shift;
463 0 0       0 $self->{'_aformat'} = shift if @_;
464 0         0 return $self->{'_aformat'};
465             }
466              
467              
468              
469             sub methods{
470 2     2 1 9 my ($self) = shift;
471              
472 2 50       6 @{$self->{'_methods'}} = @{shift || []} if @_;
  1 100       4  
  1         4  
473 2 100       3 return @{$self->{'_methods'} || []};
  2         13  
474             }
475              
476              
477              
478              
479              
480              
481              
482              
483              
484              
485             1;
486              
487             __END__