File Coverage

Bio/AlignIO/clustalw.pm
Criterion Covered Total %
statement 104 117 88.8
branch 38 54 70.3
condition 8 18 44.4
subroutine 8 8 100.0
pod 4 4 100.0
total 162 201 80.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::clustalw
3             #
4             # based on the Bio::SeqIO modules
5             # by Ewan Birney
6             # and Lincoln Stein
7             # and the Bio::SimpleAlign module of Ewan Birney
8             #
9             # Copyright Peter Schattner
10             #
11             # You may distribute this module under the same terms as perl itself
12             # History
13             # September 5, 2000
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::AlignIO::clustalw - clustalw sequence input/output stream
19              
20             =head1 SYNOPSIS
21              
22             Do not use this module directly. Use it via the Bio::AlignIO class.
23              
24             =head1 DESCRIPTION
25              
26             This object can transform Bio::Align::AlignI objects to and from clustalw
27             files.
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to one
35             of the Bioperl mailing lists. Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Support
41              
42             Please direct usage questions or support issues to the mailing list:
43              
44             I
45              
46             rather than to the module maintainer directly. Many experienced and
47             reponsive experts will be able look at the problem and quickly
48             address it. Please include a thorough description of the problem
49             with code and data examples if at all possible.
50              
51             =head2 Reporting Bugs
52              
53             Report bugs to the Bioperl bug tracking system to help us keep track
54             the bugs and their resolution. Bug reports can be submitted via the
55             web:
56              
57             https://github.com/bioperl/bioperl-live/issues
58              
59             =head1 AUTHORS - Peter Schattner
60              
61             Email: schattner@alum.mit.edu
62              
63              
64             =head1 APPENDIX
65              
66             The rest of the documentation details each of the object
67             methods. Internal methods are usually preceded with a _
68              
69             =cut
70              
71             # Let the code begin...
72              
73             package Bio::AlignIO::clustalw;
74 10     10   490 use vars qw($LINELENGTH $CLUSTALPRINTVERSION);
  10         11  
  10         510  
75 10     10   36 use strict;
  10         447  
  10         346  
76              
77              
78             $LINELENGTH = 60;
79             $CLUSTALPRINTVERSION = '1.81';
80 10     10   33 use base qw(Bio::AlignIO);
  10         12  
  10         9348  
81              
82             =head2 new
83              
84             Title : new
85             Usage : $alignio = Bio::AlignIO->new(-format => 'clustalw',
86             -file => 'filename');
87             Function: returns a new Bio::AlignIO object to handle clustalw files
88             Returns : Bio::AlignIO::clustalw object
89             Args : -verbose => verbosity setting (-1, 0, 1, 2)
90             -file => name of file to read in or to write, with ">"
91             -fh => alternative to -file param - provide a filehandle
92             to read from or write to
93             -format => alignment format to process or produce
94             -percentages => display a percentage of identity
95             in each line of the alignment (clustalw only)
96             -linelength=> alignment output line length (default 60)
97              
98             =cut
99              
100             sub _initialize {
101 46     46   80 my ( $self, @args ) = @_;
102 46         130 $self->SUPER::_initialize(@args);
103 46         143 my ( $percentages, $ll ) =
104             $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
105 46 50       122 defined $percentages && $self->percentages($percentages);
106 46   33     180 $self->line_length( $ll || $LINELENGTH );
107             }
108              
109             =head2 next_aln
110              
111             Title : next_aln
112             Usage : $aln = $stream->next_aln()
113             Function: returns the next alignment in the stream
114             Returns : Bio::Align::AlignI object
115             Args : NONE
116              
117             See L for details
118              
119             =cut
120              
121             sub next_aln {
122 29     29 1 105 my ($self) = @_;
123 29         33 my $first_line;
124              
125 29         103 while ( $first_line = $self->_readline ) {
126 29 50       109 last if $first_line !~ /^$/;
127             }
128 29         97 $self->_pushback($first_line);
129 29 50 33     54 if ( defined( $first_line = $self->_readline )
130             && $first_line !~ /CLUSTAL/ )
131             {
132 0         0 $self->throw(
133             "trying to parse a file which does not start with a CLUSTAL header"
134             );
135             }
136 29         37 my %alignments;
137 29         95 my $aln = Bio::SimpleAlign->new(
138             -source => 'clustalw',
139             -verbose => $self->verbose
140             );
141 29         39 my $order = 0;
142 29         26 my %order;
143 29         70 $self->{_lastline} = '';
144 29         36 my ($first_block, $seen_block) = (0,0);
145 29         68 while ( defined( $_ = $self->_readline ) ) {
146 2273 100 100     5538 next if (/^\s+$/ && !$first_block);
147 2215 100       2925 if (/^\s$/) { # line contains no description
148 217         171 $seen_block = 1;
149 217         322 next;
150             }
151 1998         1346 $first_block = 1;
152             # break the loop if we come to the end of the current alignment
153             # and push back the CLUSTAL header
154 1998 50       2543 if (/CLUSTAL/) {
155 0         0 $self->_pushback($_);
156 0         0 last;
157             }
158              
159 1998         1714 my ( $seqname, $aln_line ) = ( '', '' );
160 1998 100       6313 if (/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
    100          
161              
162             # clustal 1.4 format
163 48         97 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
164              
165             # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
166             }
167             elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
168 1715         2271 ( $seqname, $aln_line ) = ( $1, $2 );
169 1715 100       2700 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
170 1         2 $self->{_lastline} = $_;
171 1         2 next;
172             }
173             }
174             else {
175 235         252 $self->{_lastline} = $_;
176 235         420 next;
177             }
178              
179 1762 100       2131 if ( !$seen_block ) {
180 211 50       282 if (exists $order{$seqname}) {
181 0         0 $self->warn("Duplicate sequence : $seqname\n".
182             "Can't guarantee alignment quality");
183             }
184             else {
185 211         301 $order{$seqname} = $order++;
186             }
187             }
188              
189 1762         4048 $alignments{$seqname} .= $aln_line;
190             }
191              
192 29         37 my ( $sname, $start, $end );
193 29         160 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
  442         384  
194 211 100       405 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
195 6         18 ( $sname, $start, $end ) = ( $1, $2, $3 );
196             }
197             else {
198 205         260 ( $sname, $start ) = ( $name, 1 );
199 205         276 my $str = $alignments{$name};
200 205         7240 $str =~ s/[^A-Za-z]//g;
201 205         226 $end = length($str);
202             }
203             my $seq = Bio::LocatableSeq->new
204             (
205 211         559 '-seq' => $alignments{$name},
206             '-display_id' => $sname,
207             '-start' => $start,
208             '-end' => $end,
209             '-alphabet' => $self->alphabet,
210             );
211 211         470 $aln->add_seq($seq);
212             }
213              
214             # not sure if this should be a default option - or we can pass in
215             # an option to do this in the future? --jason stajich
216             # $aln->map_chars('\.','-');
217            
218             # no sequences added, so just return
219 29 50       96 return $aln if $aln->num_sequences;
220 0         0 return;
221             }
222              
223             =head2 write_aln
224              
225             Title : write_aln
226             Usage : $stream->write_aln(@aln)
227             Function: writes the clustalw-format object (.aln) into the stream
228             Returns : 1 for success and 0 for error
229             Args : Bio::Align::AlignI object
230              
231             =cut
232              
233             sub write_aln {
234 2     2 1 9 my ( $self, @aln ) = @_;
235 2         2 my ( $count, $length, $seq, @seq, $tempcount, $line_len );
236 2   33     5 $line_len = $self->line_length || $LINELENGTH;
237 2         6 foreach my $aln (@aln) {
238 2 50 33     16 if ( !$aln || !$aln->isa('Bio::Align::AlignI') ) {
239 0         0 $self->warn(
240             "Must provide a Bio::Align::AlignI object when calling write_aln"
241             );
242 0         0 next;
243             }
244 2         11 my $matchline = $aln->match_line;
245 2 50       22 if ( $self->force_displayname_flat ) {
246 0         0 $aln->set_displayname_flat(1);
247             }
248             $self->_print(
249 2 50       30 sprintf( "CLUSTAL W (%s) multiple sequence alignment\n\n\n",
250             $CLUSTALPRINTVERSION )
251             ) or return;
252 2         7 $length = $aln->length();
253 2         4 $count = $tempcount = 0;
254 2         6 @seq = $aln->each_seq();
255 2         4 my $max = 22;
256 2         5 foreach $seq (@seq) {
257 12 50       23 $max = length( $aln->displayname( $seq->get_nse() ) )
258             if ( length( $aln->displayname( $seq->get_nse() ) ) > $max );
259             }
260              
261 2         7 while ( $count < $length ) {
262 16         18 my ( $linesubstr, $first ) = ( '', 1 );
263 16         15 foreach $seq (@seq) {
264              
265             #
266             # Following lines are to suppress warnings
267             # if some sequences in the alignment are much longer than others.
268              
269 96         69 my ($substring);
270 96         111 my $seqchars = $seq->seq();
271             SWITCH: {
272 96 100       63 if ( length($seqchars) >= ( $count + $line_len ) ) {
  96 50       138  
273 84         77 $substring = substr( $seqchars, $count, $line_len );
274 84 100       98 if ($first) {
275 14         14 $linesubstr =
276             substr( $matchline, $count, $line_len );
277 14         14 $first = 0;
278             }
279 84         69 last SWITCH;
280             }
281             elsif ( length($seqchars) >= $count ) {
282 12         12 $substring = substr( $seqchars, $count );
283 12 100       18 if ($first) {
284 2         6 $linesubstr = substr( $matchline, $count );
285 2         3 $first = 0;
286             }
287 12         11 last SWITCH;
288             }
289 0         0 $substring = "";
290             }
291             $self->_print(
292 96 50       141 sprintf(
293             "%-" . $max . "s %s\n",
294             $aln->displayname( $seq->get_nse() ), $substring
295             )
296             ) or return;
297             }
298              
299 16         12 my $percentages = '';
300 16 50       26 if ( $self->percentages ) {
301 0         0 my ($strcpy) = ($linesubstr);
302 0         0 my $count = ( $strcpy =~ tr/\*// );
303 0         0 $percentages =
304             sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
305             }
306             $self->_print(
307 16         38 sprintf(
308             "%-" . $max . "s %s%s\n",
309             '', $linesubstr, $percentages
310             )
311             );
312 16 50       22 $self->_print( sprintf("\n\n") ) or return;
313 16         26 $count += $line_len;
314             }
315             }
316 2 50 33     7 $self->flush if $self->_flush_on_write && defined $self->_fh;
317 2         7 return 1;
318             }
319              
320             =head2 percentages
321              
322             Title : percentages
323             Usage : $obj->percentages($newval)
324             Function: Set the percentages flag - whether or not to show percentages in
325             each output line
326             Returns : value of percentages
327             Args : newvalue (optional)
328              
329              
330             =cut
331              
332             sub percentages {
333 16     16 1 13 my ( $self, $value ) = @_;
334 16 50       23 if ( defined $value ) {
335 0         0 $self->{'_percentages'} = $value;
336             }
337 16         26 return $self->{'_percentages'};
338             }
339              
340             =head2 line_length
341              
342             Title : line_length
343             Usage : $obj->line_length($newval)
344             Function: Set the alignment output line length
345             Returns : value of line_length
346             Args : newvalue (optional)
347              
348              
349             =cut
350              
351             sub line_length {
352 48     48 1 57 my ( $self, $value ) = @_;
353 48 100       87 if ( defined $value ) {
354 46         78 $self->{'_line_length'} = $value;
355             }
356 48         83 return $self->{'_line_length'};
357             }
358              
359             1;