File Coverage

blib/lib/Bio/Tools/Run/Alignment/Pal2Nal.pm
Criterion Covered Total %
statement 18 72 25.0
branch 0 20 0.0
condition 0 8 0.0
subroutine 7 10 70.0
pod 4 4 100.0
total 29 114 25.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Alignment::Pal2Nal
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Alignment::Pal2Nal - Wrapper for Pal2Nal
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::Alignment::Pal2Nal;
21              
22             # Make a Pal2Nal factory
23             $factory = Bio::Tools::Run::Alignment::Pal2Nal->new();
24              
25             # Run Pal2Nal with a protein alignment file and a multi-fasta nucleotide
26             # file
27             my $aln = $factory->run($protein_alignfilename, $nucleotide_filename);
28              
29             # or with Bioperl objects
30             $aln = $factory->run($protein_bio_simplalign, [$nucleotide_bio_seq1,
31             $nucleotide_bio_seq2]);
32              
33             # combinations of files/ objects are possible
34              
35             # $aln isa Bio::SimpleAlign of the nucleotide sequences aligned according to
36             # the protein alignment
37              
38             =head1 DESCRIPTION
39              
40             This is a wrapper for running the Pal2Nal perl script by Mikita Suyama. You
41             can get details here: http://coot.embl.de/pal2nal/. Pal2Nal is used for aligning
42             a set of nucleotide sequences based on an alignment of their translations.
43              
44             You can try supplying normal pal2nal command-line arguments to new(), eg.
45             new() or calling arg-named methods (excluding the initial hyphen, eg.
46             $factory->(1) to set the - arg).
47              
48              
49             You will need to enable this Pal2Nal wrapper to find the pal2nal.pl script.
50             This can be done in (at least) three ways:
51              
52             1. Make sure the script is in your path.
53             2. Define an environmental variable PAL2NALDIR which is a
54             directory which contains the script:
55             In bash:
56              
57             export PAL2NALDIR=/home/username/pal2nal/
58              
59             In csh/tcsh:
60              
61             setenv PAL2NALDIR /home/username/pal2nal
62              
63             3. Include a definition of an environmental variable PAL2NALDIR in
64             every script that will use this Pal2Nal wrapper module, e.g.:
65              
66             BEGIN { $ENV{PAL2NALDIR} = '/home/username/pal2nal/' }
67             use Bio::Tools::Run::Alignment::Pal2Nal;
68              
69             =head1 FEEDBACK
70              
71             =head2 Mailing Lists
72              
73             User feedback is an integral part of the evolution of this and other
74             Bioperl modules. Send your comments and suggestions preferably to
75             the Bioperl mailing list. Your participation is much appreciated.
76              
77             bioperl-l@bioperl.org - General discussion
78             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79              
80             =head2 Support
81              
82             Please direct usage questions or support issues to the mailing list:
83              
84             I
85              
86             rather than to the module maintainer directly. Many experienced and
87             reponsive experts will be able look at the problem and quickly
88             address it. Please include a thorough description of the problem
89             with code and data examples if at all possible.
90              
91             =head2 Reporting Bugs
92              
93             Report bugs to the Bioperl bug tracking system to help us keep track
94             of the bugs and their resolution. Bug reports can be submitted via
95             the web:
96              
97             http://redmine.open-bio.org/projects/bioperl/
98              
99             =head1 AUTHOR - Sendu Bala
100              
101             Email bix@sendu.me.uk
102              
103             =head1 APPENDIX
104              
105             The rest of the documentation details each of the object methods.
106             Internal methods are usually preceded with a _
107              
108             =cut
109              
110             package Bio::Tools::Run::Alignment::Pal2Nal;
111 1     1   104799 use strict;
  1         1  
  1         23  
112              
113 1     1   432 use Bio::AlignIO;
  1         86601  
  1         27  
114 1     1   472 use Bio::SeqIO;
  1         6228  
  1         31  
115              
116 1     1   5 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
  1         2  
  1         412  
117              
118             our $PROGRAM_NAME = 'pal2nal.pl';
119             our $PROGRAM_DIR = $ENV{'PAL2NALDIR'};
120              
121             # methods for the pal2nal args we support
122             our @PARAMS = qw(codontable);
123             our @SWITCHES = qw(blockonly nogap nomismatch);
124              
125             # just to be explicit, args we don't support (yet) or we handle ourselves
126             our @UNSUPPORTED = qw(output html h nostderr);
127              
128             =head2 program_name
129              
130             Title : program_name
131             Usage : $factory>program_name()
132             Function: holds the program name
133             Returns : string
134             Args : None
135              
136             =cut
137              
138             sub program_name {
139 7     7 1 26 return $PROGRAM_NAME;
140             }
141              
142             =head2 program_dir
143              
144             Title : program_dir
145             Usage : $factory->program_dir(@params)
146             Function: returns the program directory, obtained from ENV variable.
147             Returns : string
148             Args : None
149              
150             =cut
151              
152             sub program_dir {
153 4     4 1 426 return $PROGRAM_DIR;
154             }
155              
156             =head2 new
157              
158             Title : new
159             Usage : $factory = Bio::Tools::Run::Alignment::Pal2Nal->new()
160             Function: creates a new Pal2Nal factory.
161             Returns : Bio::Tools::Run::Alignment::Pal2Nal
162             Args : Most options understood by pal2nal.pl can be supplied as key =>
163             value pairs.
164              
165             These options can NOT be used with this wrapper:
166             -output
167             -html
168             -h
169             -nostderr
170              
171             =cut
172              
173             sub new {
174 1     1 1 110 my ($class, @args) = @_;
175 1         15 my $self = $class->SUPER::new(@args);
176            
177 1         21 $self->_set_from_args(\@args, -methods => [@PARAMS, @SWITCHES, 'quiet'],
178             -create => 1);
179            
180 1         483 return $self;
181             }
182              
183             =head2 run
184              
185             Title : run
186             Usage : $result = $factory->run($protein_align_file, $multi_fasta_nucleotide);
187             -or-
188             $result = $factory->run($prot_align_object, [$bioseq_object1, ...]);
189             Function: Runs pal2nal on a protein alignment and set of nucleotide sequences.
190             Returns : Bio::SimpleAlign;
191             Args : The first argument represents a protein alignment, the second
192             argument a set of nucleotide sequences.
193             The alignment can be provided as an alignment file readable by
194             Bio::AlignIO, or a Bio::Align::AlignI compliant object (eg. a
195             Bio::SimpleAlign).
196             The nucleotide sequences can be provided as a single filename of a
197             fasta file containing multiple nucleotide sequences, or an array ref
198             of filenames, each file containing one sequence. Alternatively, an
199             array ref of Bio::PrimarySeqI compliant objects can be supplied.
200            
201             In all cases, the protein alignment sequence names must correspond to
202             the names of the supplied nucleotide sequences.
203              
204             =cut
205              
206             sub run {
207 0     0 1   my ($self, $aln, $nucs) = @_;
208            
209 0 0 0       ($aln && $nucs) || $self->throw("alignment and nucleotides must be supplied");
210 0           $aln = $self->_alignment($aln);
211            
212             # gaps must be -, not .
213 0           my $fixed_aln = Bio::SimpleAlign->new();
214 0           foreach my $seq ($aln->each_seq) {
215 0           my $str = $seq->seq;
216 0           $str =~ s/\./-/g;
217 0           $fixed_aln->add_seq(Bio::LocatableSeq->new(-id => $seq->id, -seq => $str));
218             }
219 0           $self->_alignment($fixed_aln);
220            
221 0           my $nucs_file;
222 0 0         if (-e $nucs) {
    0          
223 0           $nucs_file = $nucs;
224             }
225             elsif (ref($nucs) eq 'ARRAY') {
226 0 0         (my $tempfh, $nucs_file) = $self->io->tempfile('-dir' => $self->tempdir(), UNLINK => ($self->save_tempfiles ? 0 : 1));
227 0           close($tempfh);
228 0           my $sout = Bio::SeqIO->new(-file => ">".$nucs_file, -format => 'fasta');
229            
230 0           foreach my $nuc (@{$nucs}) {
  0            
231 0 0 0       if (-e $nuc) {
    0          
232 0           my $sin = Bio::SeqIO->new(-file => $nuc);
233 0           while (my $nuc_seq = $sin->next_seq) {
234 0           $sout->write_seq($nuc_seq);
235             }
236             }
237             elsif (ref($nuc) && $nuc->isa('Bio::PrimarySeqI')) {
238 0           $sout->write_seq($nuc);
239             }
240             else {
241 0           $self->throw("Don't understand nucleotide argument '$nuc'");
242             }
243             }
244             }
245             else {
246 0           $self->throw("Don't understand nucleotide argument '$nucs'");
247             }
248            
249 0           return $self->_run($nucs_file);
250             }
251              
252             sub _run {
253 0     0     my ($self, $nucs_file) = @_;
254            
255 0   0       my $exe = $self->executable || return;
256            
257 0           my $aln_file = $self->_write_alignment;
258            
259 0 0         my ($rfh, $result_file) = $self->io->tempfile('-dir' => $self->tempdir(), UNLINK => ($self->save_tempfiles ? 0 : 1));
260 0 0         my ($efh, $error_file) = $self->io->tempfile('-dir' => $self->tempdir(), UNLINK => ($self->save_tempfiles ? 0 : 1));
261 0           close($rfh);
262 0           undef $rfh;
263 0           close($efh);
264 0           undef $efh;
265 0           my $command = $exe.$self->_setparams($aln_file, $nucs_file, $result_file, $error_file);
266 0           $self->debug("pal2nal command = $command\n");
267            
268 0 0         system($command) && $self->throw("pal2nal call ($command) failed: $! | $?");
269 0           open(my $errfh, '<', $error_file);
270 0           my $errors;
271 0           while (<$errfh>) {
272 0           $errors .= $_;
273             }
274 0           close($errfh);
275 0 0         $self->throw("pal2nal call ($command) had errors:\n$errors") if $errors;
276            
277 0           my $ain = Bio::AlignIO->new(-file => $result_file, -format => 'fasta');
278 0           my $aln = $ain->next_aln;
279 0           $ain->close;
280            
281 0           return $aln;
282             }
283              
284             =head2 _setparams
285              
286             Title : _setparams
287             Usage : Internal function, not to be called directly
288             Function: Creates a string of params to be used in the command string
289             Returns : string of params
290             Args : alignment and tree file names
291              
292             =cut
293              
294             sub _setparams {
295 0     0     my ($self, $aln_file, $nucs_file, $result_file, $error_file) = @_;
296            
297 0           my $param_string = ' '.$aln_file;
298 0           $param_string .= ' '.$nucs_file;
299 0           $param_string .= $self->SUPER::_setparams(-params => \@PARAMS,
300             -switches => \@SWITCHES,
301             -dash => 1);
302 0           $param_string .= ' -output fasta';
303            
304 0           $param_string .= " > $result_file 2> $error_file";
305            
306 0           return $param_string;
307             }
308              
309             1;