File Coverage

Bio/AlignIO/clustalw.pm
Criterion Covered Total %
statement 104 117 88.8
branch 38 54 70.3
condition 8 18 44.4
subroutine 8 8 100.0
pod 4 4 100.0
total 162 201 80.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::clustalw
3             #
4             # based on the Bio::SeqIO modules
5             # by Ewan Birney
6             # and Lincoln Stein
7             # and the Bio::SimpleAlign module of Ewan Birney
8             #
9             # Copyright Peter Schattner
10             #
11             # You may distribute this module under the same terms as perl itself
12             # History
13             # September 5, 2000
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::AlignIO::clustalw - clustalw sequence input/output stream
19              
20             =head1 SYNOPSIS
21              
22             Do not use this module directly. Use it via the Bio::AlignIO class.
23              
24             =head1 DESCRIPTION
25              
26             This object can transform Bio::Align::AlignI objects to and from clustalw
27             files.
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to one
35             of the Bioperl mailing lists. Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Support
41              
42             Please direct usage questions or support issues to the mailing list:
43              
44             I
45              
46             rather than to the module maintainer directly. Many experienced and
47             reponsive experts will be able look at the problem and quickly
48             address it. Please include a thorough description of the problem
49             with code and data examples if at all possible.
50              
51             =head2 Reporting Bugs
52              
53             Report bugs to the Bioperl bug tracking system to help us keep track
54             the bugs and their resolution. Bug reports can be submitted via the
55             web:
56              
57             https://github.com/bioperl/bioperl-live/issues
58              
59             =head1 AUTHORS - Peter Schattner
60              
61             Email: schattner@alum.mit.edu
62              
63              
64             =head1 APPENDIX
65              
66             The rest of the documentation details each of the object
67             methods. Internal methods are usually preceded with a _
68              
69             =cut
70              
71             # Let the code begin...
72              
73             package Bio::AlignIO::clustalw;
74 10     10   657 use vars qw($LINELENGTH $CLUSTALPRINTVERSION);
  10         14  
  10         558  
75 10     10   59 use strict;
  10         15  
  10         344  
76              
77              
78             $LINELENGTH = 60;
79             $CLUSTALPRINTVERSION = '1.81';
80 10     10   44 use base qw(Bio::AlignIO);
  10         13  
  10         10441  
81              
82             =head2 new
83              
84             Title : new
85             Usage : $alignio = Bio::AlignIO->new(-format => 'clustalw',
86             -file => 'filename');
87             Function: returns a new Bio::AlignIO object to handle clustalw files
88             Returns : Bio::AlignIO::clustalw object
89             Args : -verbose => verbosity setting (-1, 0, 1, 2)
90             -file => name of file to read in or to write, with ">"
91             -fh => alternative to -file param - provide a filehandle
92             to read from or write to
93             -format => alignment format to process or produce
94             -percentages => display a percentage of identity
95             in each line of the alignment (clustalw only)
96             -linelength=> alignment output line length (default 60)
97              
98             =cut
99              
100             sub _initialize {
101 46     46   106 my ( $self, @args ) = @_;
102 46         171 $self->SUPER::_initialize(@args);
103 46         162 my ( $percentages, $ll ) =
104             $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
105 46 50       163 defined $percentages && $self->percentages($percentages);
106 46   33     217 $self->line_length( $ll || $LINELENGTH );
107             }
108              
109             =head2 next_aln
110              
111             Title : next_aln
112             Usage : $aln = $stream->next_aln()
113             Function: returns the next alignment in the stream
114             Returns : Bio::Align::AlignI object
115             Args : NONE
116              
117             See L for details
118              
119             =cut
120              
121             sub next_aln {
122 29     29 1 402 my ($self) = @_;
123 29         35 my $first_line;
124              
125 29         112 while ( $first_line = $self->_readline ) {
126 29 50       114 last if $first_line !~ /^$/;
127             }
128 29         116 $self->_pushback($first_line);
129 29 50 33     57 if ( defined( $first_line = $self->_readline )
130             && $first_line !~ /CLUSTAL/ )
131             {
132 0         0 $self->throw(
133             "trying to parse a file which does not start with a CLUSTAL header"
134             );
135             }
136 29         58 my %alignments;
137 29         80 my $aln = Bio::SimpleAlign->new(
138             -source => 'clustalw',
139             -verbose => $self->verbose
140             );
141 29         50 my $order = 0;
142 29         36 my %order;
143 29         100 $self->{_lastline} = '';
144 29         56 my ($first_block, $seen_block) = (0,0);
145 29         75 while ( defined( $_ = $self->_readline ) ) {
146 2273 100 100     5232 next if (/^\s+$/ && !$first_block);
147 2215 100       3039 if (/^\s$/) { # line contains no description
148 217         220 $seen_block = 1;
149 217         327 next;
150             }
151 1998         1698 $first_block = 1;
152             # break the loop if we come to the end of the current alignment
153             # and push back the CLUSTAL header
154 1998 50       2686 if (/CLUSTAL/) {
155 0         0 $self->_pushback($_);
156 0         0 last;
157             }
158              
159 1998         2313 my ( $seqname, $aln_line ) = ( '', '' );
160 1998 100       6296 if (/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
    100          
161              
162             # clustal 1.4 format
163 48         154 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
164              
165             # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
166             }
167             elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
168 1715         3028 ( $seqname, $aln_line ) = ( $1, $2 );
169 1715 100       2656 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
170 1         2 $self->{_lastline} = $_;
171 1         2 next;
172             }
173             }
174             else {
175 235         316 $self->{_lastline} = $_;
176 235         399 next;
177             }
178              
179 1762 100       2130 if ( !$seen_block ) {
180 211 50       340 if (exists $order{$seqname}) {
181 0         0 $self->warn("Duplicate sequence : $seqname\n".
182             "Can't guarantee alignment quality");
183             }
184             else {
185 211         332 $order{$seqname} = $order++;
186             }
187             }
188              
189 1762         4135 $alignments{$seqname} .= $aln_line;
190             }
191              
192 29         57 my ( $sname, $start, $end );
193 29         189 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
  419         467  
194 211 100       462 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
195 6         20 ( $sname, $start, $end ) = ( $1, $2, $3 );
196             }
197             else {
198 205         315 ( $sname, $start ) = ( $name, 1 );
199 205         322 my $str = $alignments{$name};
200 205         7094 $str =~ s/[^A-Za-z]//g;
201 205         318 $end = length($str);
202             }
203             my $seq = Bio::LocatableSeq->new
204             (
205 211         574 '-seq' => $alignments{$name},
206             '-display_id' => $sname,
207             '-start' => $start,
208             '-end' => $end,
209             '-alphabet' => $self->alphabet,
210             );
211 211         509 $aln->add_seq($seq);
212             }
213              
214             # not sure if this should be a default option - or we can pass in
215             # an option to do this in the future? --jason stajich
216             # $aln->map_chars('\.','-');
217            
218             # no sequences added, so just return
219 29 50       115 return $aln if $aln->num_sequences;
220 0         0 return;
221             }
222              
223             =head2 write_aln
224              
225             Title : write_aln
226             Usage : $stream->write_aln(@aln)
227             Function: writes the clustalw-format object (.aln) into the stream
228             Returns : 1 for success and 0 for error
229             Args : Bio::Align::AlignI object
230              
231             =cut
232              
233             sub write_aln {
234 2     2 1 12 my ( $self, @aln ) = @_;
235 2         5 my ( $count, $length, $seq, @seq, $tempcount, $line_len );
236 2   33     5 $line_len = $self->line_length || $LINELENGTH;
237 2         5 foreach my $aln (@aln) {
238 2 50 33     16 if ( !$aln || !$aln->isa('Bio::Align::AlignI') ) {
239 0         0 $self->warn(
240             "Must provide a Bio::Align::AlignI object when calling write_aln"
241             );
242 0         0 next;
243             }
244 2         10 my $matchline = $aln->match_line;
245 2 50       42 if ( $self->force_displayname_flat ) {
246 0         0 $aln->set_displayname_flat(1);
247             }
248             $self->_print(
249 2 50       74 sprintf( "CLUSTAL W (%s) multiple sequence alignment\n\n\n",
250             $CLUSTALPRINTVERSION )
251             ) or return;
252 2         9 $length = $aln->length();
253 2         5 $count = $tempcount = 0;
254 2         7 @seq = $aln->each_seq();
255 2         5 my $max = 22;
256 2         6 foreach $seq (@seq) {
257 12 50       24 $max = length( $aln->displayname( $seq->get_nse() ) )
258             if ( length( $aln->displayname( $seq->get_nse() ) ) > $max );
259             }
260              
261 2         10 while ( $count < $length ) {
262 16         23 my ( $linesubstr, $first ) = ( '', 1 );
263 16         20 foreach $seq (@seq) {
264              
265             #
266             # Following lines are to suppress warnings
267             # if some sequences in the alignment are much longer than others.
268              
269 96         96 my ($substring);
270 96         150 my $seqchars = $seq->seq();
271             SWITCH: {
272 96 100       101 if ( length($seqchars) >= ( $count + $line_len ) ) {
  96 50       140  
273 84         113 $substring = substr( $seqchars, $count, $line_len );
274 84 100       110 if ($first) {
275 14         18 $linesubstr =
276             substr( $matchline, $count, $line_len );
277 14         15 $first = 0;
278             }
279 84         87 last SWITCH;
280             }
281             elsif ( length($seqchars) >= $count ) {
282 12         17 $substring = substr( $seqchars, $count );
283 12 100       19 if ($first) {
284 2         4 $linesubstr = substr( $matchline, $count );
285 2         5 $first = 0;
286             }
287 12         15 last SWITCH;
288             }
289 0         0 $substring = "";
290             }
291             $self->_print(
292 96 50       194 sprintf(
293             "%-" . $max . "s %s\n",
294             $aln->displayname( $seq->get_nse() ), $substring
295             )
296             ) or return;
297             }
298              
299 16         22 my $percentages = '';
300 16 50       32 if ( $self->percentages ) {
301 0         0 my ($strcpy) = ($linesubstr);
302 0         0 my $count = ( $strcpy =~ tr/\*// );
303 0         0 $percentages =
304             sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
305             }
306             $self->_print(
307 16         66 sprintf(
308             "%-" . $max . "s %s%s\n",
309             '', $linesubstr, $percentages
310             )
311             );
312 16 50       32 $self->_print( sprintf("\n\n") ) or return;
313 16         34 $count += $line_len;
314             }
315             }
316 2 50 33     12 $self->flush if $self->_flush_on_write && defined $self->_fh;
317 2         10 return 1;
318             }
319              
320             =head2 percentages
321              
322             Title : percentages
323             Usage : $obj->percentages($newval)
324             Function: Set the percentages flag - whether or not to show percentages in
325             each output line
326             Returns : value of percentages
327             Args : newvalue (optional)
328              
329              
330             =cut
331              
332             sub percentages {
333 16     16 1 24 my ( $self, $value ) = @_;
334 16 50       29 if ( defined $value ) {
335 0         0 $self->{'_percentages'} = $value;
336             }
337 16         34 return $self->{'_percentages'};
338             }
339              
340             =head2 line_length
341              
342             Title : line_length
343             Usage : $obj->line_length($newval)
344             Function: Set the alignment output line length
345             Returns : value of line_length
346             Args : newvalue (optional)
347              
348              
349             =cut
350              
351             sub line_length {
352 48     48 1 93 my ( $self, $value ) = @_;
353 48 100       102 if ( defined $value ) {
354 46         83 $self->{'_line_length'} = $value;
355             }
356 48         96 return $self->{'_line_length'};
357             }
358              
359             1;