File Coverage

Bio/AlignIO/clustalw.pm
Criterion Covered Total %
statement 104 117 88.8
branch 38 54 70.3
condition 8 18 44.4
subroutine 8 8 100.0
pod 4 4 100.0
total 162 201 80.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::clustalw
3             #
4             # based on the Bio::SeqIO modules
5             # by Ewan Birney
6             # and Lincoln Stein
7             # and the Bio::SimpleAlign module of Ewan Birney
8             #
9             # Copyright Peter Schattner
10             #
11             # You may distribute this module under the same terms as perl itself
12             # History
13             # September 5, 2000
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::AlignIO::clustalw - clustalw sequence input/output stream
19              
20             =head1 SYNOPSIS
21              
22             Do not use this module directly. Use it via the Bio::AlignIO class.
23              
24             =head1 DESCRIPTION
25              
26             This object can transform Bio::Align::AlignI objects to and from clustalw
27             files.
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to one
35             of the Bioperl mailing lists. Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Support
41              
42             Please direct usage questions or support issues to the mailing list:
43              
44             I
45              
46             rather than to the module maintainer directly. Many experienced and
47             reponsive experts will be able look at the problem and quickly
48             address it. Please include a thorough description of the problem
49             with code and data examples if at all possible.
50              
51             =head2 Reporting Bugs
52              
53             Report bugs to the Bioperl bug tracking system to help us keep track
54             the bugs and their resolution. Bug reports can be submitted via the
55             web:
56              
57             https://github.com/bioperl/bioperl-live/issues
58              
59             =head1 AUTHORS - Peter Schattner
60              
61             Email: schattner@alum.mit.edu
62              
63              
64             =head1 APPENDIX
65              
66             The rest of the documentation details each of the object
67             methods. Internal methods are usually preceded with a _
68              
69             =cut
70              
71             # Let the code begin...
72              
73             package Bio::AlignIO::clustalw;
74 10     10   419 use vars qw($LINELENGTH $CLUSTALPRINTVERSION);
  10         12  
  10         483  
75 10     10   35 use strict;
  10         489  
  10         352  
76              
77              
78             $LINELENGTH = 60;
79             $CLUSTALPRINTVERSION = '1.81';
80 10     10   34 use base qw(Bio::AlignIO);
  10         10  
  10         9314  
81              
82             =head2 new
83              
84             Title : new
85             Usage : $alignio = Bio::AlignIO->new(-format => 'clustalw',
86             -file => 'filename');
87             Function: returns a new Bio::AlignIO object to handle clustalw files
88             Returns : Bio::AlignIO::clustalw object
89             Args : -verbose => verbosity setting (-1, 0, 1, 2)
90             -file => name of file to read in or to write, with ">"
91             -fh => alternative to -file param - provide a filehandle
92             to read from or write to
93             -format => alignment format to process or produce
94             -percentages => display a percentage of identity
95             in each line of the alignment (clustalw only)
96             -linelength=> alignment output line length (default 60)
97              
98             =cut
99              
100             sub _initialize {
101 46     46   93 my ( $self, @args ) = @_;
102 46         168 $self->SUPER::_initialize(@args);
103 46         158 my ( $percentages, $ll ) =
104             $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
105 46 50       127 defined $percentages && $self->percentages($percentages);
106 46   33     255 $self->line_length( $ll || $LINELENGTH );
107             }
108              
109             =head2 next_aln
110              
111             Title : next_aln
112             Usage : $aln = $stream->next_aln()
113             Function: returns the next alignment in the stream
114             Returns : Bio::Align::AlignI object
115             Args : NONE
116              
117             See L for details
118              
119             =cut
120              
121             sub next_aln {
122 29     29 1 55 my ($self) = @_;
123 29         37 my $first_line;
124              
125 29         395 while ( $first_line = $self->_readline ) {
126 29 50       126 last if $first_line !~ /^$/;
127             }
128 29         109 $self->_pushback($first_line);
129 29 50 33     50 if ( defined( $first_line = $self->_readline )
130             && $first_line !~ /CLUSTAL/ )
131             {
132 0         0 $self->throw(
133             "trying to parse a file which does not start with a CLUSTAL header"
134             );
135             }
136 29         41 my %alignments;
137 29         94 my $aln = Bio::SimpleAlign->new(
138             -source => 'clustalw',
139             -verbose => $self->verbose
140             );
141 29         38 my $order = 0;
142 29         21 my %order;
143 29         100 $self->{_lastline} = '';
144 29         47 my ($first_block, $seen_block) = (0,0);
145 29         108 while ( defined( $_ = $self->_readline ) ) {
146 2273 100 100     5234 next if (/^\s+$/ && !$first_block);
147 2215 100       2868 if (/^\s$/) { # line contains no description
148 217         168 $seen_block = 1;
149 217         321 next;
150             }
151 1998         1313 $first_block = 1;
152             # break the loop if we come to the end of the current alignment
153             # and push back the CLUSTAL header
154 1998 50       2479 if (/CLUSTAL/) {
155 0         0 $self->_pushback($_);
156 0         0 last;
157             }
158              
159 1998         1597 my ( $seqname, $aln_line ) = ( '', '' );
160 1998 100       6077 if (/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
    100          
161              
162             # clustal 1.4 format
163 48         97 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
164              
165             # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
166             }
167             elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
168 1715         2122 ( $seqname, $aln_line ) = ( $1, $2 );
169 1715 100       2546 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
170 1         2 $self->{_lastline} = $_;
171 1         2 next;
172             }
173             }
174             else {
175 235         234 $self->{_lastline} = $_;
176 235         398 next;
177             }
178              
179 1762 100       2026 if ( !$seen_block ) {
180 211 50       244 if (exists $order{$seqname}) {
181 0         0 $self->warn("Duplicate sequence : $seqname\n".
182             "Can't guarantee alignment quality");
183             }
184             else {
185 211         313 $order{$seqname} = $order++;
186             }
187             }
188              
189 1762         3760 $alignments{$seqname} .= $aln_line;
190             }
191              
192 29         54 my ( $sname, $start, $end );
193 29         196 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
  413         391  
194 211 100       434 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
195 6         16 ( $sname, $start, $end ) = ( $1, $2, $3 );
196             }
197             else {
198 205         280 ( $sname, $start ) = ( $name, 1 );
199 205         289 my $str = $alignments{$name};
200 205         7472 $str =~ s/[^A-Za-z]//g;
201 205         328 $end = length($str);
202             }
203             my $seq = Bio::LocatableSeq->new
204             (
205 211         570 '-seq' => $alignments{$name},
206             '-display_id' => $sname,
207             '-start' => $start,
208             '-end' => $end,
209             '-alphabet' => $self->alphabet,
210             );
211 211         515 $aln->add_seq($seq);
212             }
213              
214             # not sure if this should be a default option - or we can pass in
215             # an option to do this in the future? --jason stajich
216             # $aln->map_chars('\.','-');
217            
218             # no sequences added, so just return
219 29 50       124 return $aln if $aln->num_sequences;
220 0         0 return;
221             }
222              
223             =head2 write_aln
224              
225             Title : write_aln
226             Usage : $stream->write_aln(@aln)
227             Function: writes the clustalw-format object (.aln) into the stream
228             Returns : 1 for success and 0 for error
229             Args : Bio::Align::AlignI object
230              
231             =cut
232              
233             sub write_aln {
234 2     2 1 6 my ( $self, @aln ) = @_;
235 2         4 my ( $count, $length, $seq, @seq, $tempcount, $line_len );
236 2   33     5 $line_len = $self->line_length || $LINELENGTH;
237 2         4 foreach my $aln (@aln) {
238 2 50 33     28 if ( !$aln || !$aln->isa('Bio::Align::AlignI') ) {
239 0         0 $self->warn(
240             "Must provide a Bio::Align::AlignI object when calling write_aln"
241             );
242 0         0 next;
243             }
244 2         8 my $matchline = $aln->match_line;
245 2 50       31 if ( $self->force_displayname_flat ) {
246 0         0 $aln->set_displayname_flat(1);
247             }
248             $self->_print(
249 2 50       41 sprintf( "CLUSTAL W (%s) multiple sequence alignment\n\n\n",
250             $CLUSTALPRINTVERSION )
251             ) or return;
252 2         9 $length = $aln->length();
253 2         4 $count = $tempcount = 0;
254 2         9 @seq = $aln->each_seq();
255 2         4 my $max = 22;
256 2         4 foreach $seq (@seq) {
257 12 50       24 $max = length( $aln->displayname( $seq->get_nse() ) )
258             if ( length( $aln->displayname( $seq->get_nse() ) ) > $max );
259             }
260              
261 2         9 while ( $count < $length ) {
262 16         19 my ( $linesubstr, $first ) = ( '', 1 );
263 16         18 foreach $seq (@seq) {
264              
265             #
266             # Following lines are to suppress warnings
267             # if some sequences in the alignment are much longer than others.
268              
269 96         53 my ($substring);
270 96         134 my $seqchars = $seq->seq();
271             SWITCH: {
272 96 100       68 if ( length($seqchars) >= ( $count + $line_len ) ) {
  96 50       124  
273 84         84 $substring = substr( $seqchars, $count, $line_len );
274 84 100       95 if ($first) {
275 14         15 $linesubstr =
276             substr( $matchline, $count, $line_len );
277 14         13 $first = 0;
278             }
279 84         73 last SWITCH;
280             }
281             elsif ( length($seqchars) >= $count ) {
282 12         13 $substring = substr( $seqchars, $count );
283 12 100       17 if ($first) {
284 2         8 $linesubstr = substr( $matchline, $count );
285 2         3 $first = 0;
286             }
287 12         11 last SWITCH;
288             }
289 0         0 $substring = "";
290             }
291             $self->_print(
292 96 50       163 sprintf(
293             "%-" . $max . "s %s\n",
294             $aln->displayname( $seq->get_nse() ), $substring
295             )
296             ) or return;
297             }
298              
299 16         15 my $percentages = '';
300 16 50       23 if ( $self->percentages ) {
301 0         0 my ($strcpy) = ($linesubstr);
302 0         0 my $count = ( $strcpy =~ tr/\*// );
303 0         0 $percentages =
304             sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
305             }
306             $self->_print(
307 16         41 sprintf(
308             "%-" . $max . "s %s%s\n",
309             '', $linesubstr, $percentages
310             )
311             );
312 16 50       25 $self->_print( sprintf("\n\n") ) or return;
313 16         28 $count += $line_len;
314             }
315             }
316 2 50 33     9 $self->flush if $self->_flush_on_write && defined $self->_fh;
317 2         10 return 1;
318             }
319              
320             =head2 percentages
321              
322             Title : percentages
323             Usage : $obj->percentages($newval)
324             Function: Set the percentages flag - whether or not to show percentages in
325             each output line
326             Returns : value of percentages
327             Args : newvalue (optional)
328              
329              
330             =cut
331              
332             sub percentages {
333 16     16 1 16 my ( $self, $value ) = @_;
334 16 50       21 if ( defined $value ) {
335 0         0 $self->{'_percentages'} = $value;
336             }
337 16         32 return $self->{'_percentages'};
338             }
339              
340             =head2 line_length
341              
342             Title : line_length
343             Usage : $obj->line_length($newval)
344             Function: Set the alignment output line length
345             Returns : value of line_length
346             Args : newvalue (optional)
347              
348              
349             =cut
350              
351             sub line_length {
352 48     48 1 71 my ( $self, $value ) = @_;
353 48 100       107 if ( defined $value ) {
354 46         87 $self->{'_line_length'} = $value;
355             }
356 48         96 return $self->{'_line_length'};
357             }
358              
359             1;