File Coverage

Bio/AlignIO/mega.pm
Criterion Covered Total %
statement 85 90 94.4
branch 20 26 76.9
condition 3 9 33.3
subroutine 8 8 100.0
pod 2 2 100.0
total 118 135 87.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::mega
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::AlignIO::mega - Parse and Create MEGA format data files
17              
18             =head1 SYNOPSIS
19              
20             use Bio::AlignIO;
21             my $alignio = Bio::AlignIO->new(-format => 'mega',
22             -file => 't/data/hemoglobinA.meg');
23              
24             while( my $aln = $alignio->next_aln ) {
25             # process each alignment or convert to another format like NEXUS
26             }
27              
28             =head1 DESCRIPTION
29              
30             This object handles reading and writing data streams in the MEGA
31             format (Kumar and Nei).
32              
33              
34             =head1 FEEDBACK
35              
36             =head2 Mailing Lists
37              
38             User feedback is an integral part of the evolution of this and other
39             Bioperl modules. Send your comments and suggestions preferably to
40             the Bioperl mailing list. Your participation is much appreciated.
41              
42             bioperl-l@bioperl.org - General discussion
43             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44              
45             =head2 Support
46              
47             Please direct usage questions or support issues to the mailing list:
48              
49             I
50              
51             rather than to the module maintainer directly. Many experienced and
52             reponsive experts will be able look at the problem and quickly
53             address it. Please include a thorough description of the problem
54             with code and data examples if at all possible.
55              
56             =head2 Reporting Bugs
57              
58             Report bugs to the Bioperl bug tracking system to help us keep track
59             of the bugs and their resolution. Bug reports can be submitted the
60             web:
61              
62             https://github.com/bioperl/bioperl-live/issues
63              
64             =head1 AUTHOR - Jason Stajich
65              
66             Email jason-at-bioperl.org
67              
68             =head1 APPENDIX
69              
70             The rest of the documentation details each of the object methods.
71             Internal methods are usually preceded with a _
72              
73             =cut
74              
75              
76             # Let the code begin...
77              
78              
79             package Bio::AlignIO::mega;
80 2     2   385 use vars qw($MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
  2         2  
  2         113  
81 2     2   6 use strict;
  2         2  
  2         37  
82              
83 2     2   478 use Bio::SimpleAlign;
  2         2  
  2         66  
84 2     2   11 use Bio::LocatableSeq;
  2         4  
  2         123  
85              
86             # symbols are changed due to MEGA's use of '.' for redundant sequences
87              
88             BEGIN {
89 2     2   3 $MEGANAMELEN = 10;
90 2         5 $LINELEN = 60;
91 2         3 $BLOCKLEN = 10;
92 2         3 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard);
  8         57  
93             }
94 2     2   10 use base qw(Bio::AlignIO);
  2         3  
  2         1154  
95              
96              
97             =head2 next_aln
98              
99             Title : next_aln
100             Usage : $aln = $stream->next_aln()
101             Function: returns the next alignment in the stream.
102             Supports the following MEGA format features:
103             - The file has to start with '#mega'
104             - Reads in the name of the alignment from a comment
105             (anything after '!TITLE: ') .
106             - Reads in the format parameters datatype
107              
108             Returns : L object - returns 0 on end of file
109             or on error
110             Args : NONE
111              
112              
113             =cut
114              
115             sub next_aln{
116 2     2 1 6 my ($self) = @_;
117 2         1 my $entry;
118 2         2 my ($alphabet,%seqs);
119 2         5 local $Bio::LocatableSeq::OTHER_SYMBOLS = '\*\?\.';
120 2         3 local $Bio::LocatableSeq::GAP_SYMBOLS = '\-';
121 2         14 my $aln = Bio::SimpleAlign->new(-source => 'mega');
122              
123 2   33     12 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
124              
125 2 50       9 $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
126             unless $entry =~ /^#mega/i;
127              
128 2         4 while( defined($entry = $self->_readline() ) ) {
129 24         22 local($_) = $entry;
130 24 100       87 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
  2 100       9  
    100          
131             elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
132 2         10 my (@fields) = split(/\s+/,$1);
133 2         4 foreach my $f ( @fields ) {
134 6         12 my ($name,$value) = split(/\=/,$f);
135 6 100       15 if( $name eq 'datatype' ) {
    100          
    50          
136 2         3 $alphabet = $value;
137             } elsif( $name eq 'identical' ) {
138 2         7 $aln->match_char($value);
139             } elsif( $name eq 'indel' ) {
140 2         8 $aln->gap_char($value);
141             }
142             }
143             } elsif( /^\#/ ) {
144 2         3 last;
145             }
146             }
147 2         3 my @order;
148 2         5 while( defined($entry) ) {
149 42 100       91 if( $entry !~ /^\s+$/ ) {
150             # this is to skip the leading '#'
151 36         39 my $seqname = substr($entry,1,$MEGANAMELEN-1);
152 36         79 $seqname =~ s/(\S+)\s+$/$1/g;
153 36         42 my $line = substr($entry,$MEGANAMELEN);
154 36         103 $line =~ s/\s+//g;
155 36 100       54 if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
  12         12  
156 36         53 $seqs{$seqname} .= $line;
157             }
158 42         59 $entry = $self->_readline();
159             }
160              
161 2         5 foreach my $seqname ( @order ) {
162 12         13 my $s = $seqs{$seqname};
163 12         65 $s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
164 12         13 my $end = length($s);
165             my $seq = Bio::LocatableSeq->new('-alphabet' => $alphabet,
166             '-display_id' => $seqname,
167 12         36 '-seq' => $seqs{$seqname},
168             '-start' => 1,
169             '-end' => $end);
170              
171 12         25 $aln->add_seq($seq);
172             }
173 2         8 $aln->unmatch;
174 2 50       6 return $aln if $aln->num_sequences;
175 0         0 return;
176             }
177              
178             =head2 write_aln
179              
180             Title : write_aln
181             Usage : $stream->write_aln(@aln)
182             Function: writes the $aln object into the stream in MEGA format
183             Returns : 1 for success and 0 for error
184             Args : L object
185              
186             =cut
187              
188             sub write_aln{
189 1     1 1 6 my ($self,@aln) = @_;
190 1         2 my $count = 0;
191 1         1 my $wrapped = 0;
192 1         2 my $maxname;
193              
194 1         2 foreach my $aln ( @aln ) {
195 1 50 33     13 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
196 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
197 0         0 return 0;
198             } elsif( ! $aln->is_flush($self->verbose) ) {
199 0         0 $self->warn("All Sequences in the alignment must be the same length");
200 0         0 return 0;
201             }
202 1         5 $aln->match();
203 1         3 my $len = $aln->length();
204 1         3 my $format = sprintf('datatype=%s identical=%s indel=%s;',
205             $aln->get_seq_by_pos(1)->alphabet(),
206             $aln->match_char, $aln->gap_char);
207              
208 1         5 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
209             $aln->id, $format));
210              
211 1         3 my ($count, $blockcount,$length) = ( 0,0,$aln->length());
212 1         5 $aln->set_displayname_flat();
213 1         2 while( $count < $length ) {
214 3         5 foreach my $seq ( $aln->each_seq ) {
215 18         21 my $seqchars = $seq->seq();
216 18         15 $blockcount = 0;
217 18         19 my $substring = substr($seqchars, $count, $LINELEN);
218 18         8 my @blocks;
219 18         26 while( $blockcount < length($substring) ) {
220 90         89 push @blocks, substr($substring, $blockcount,$BLOCKLEN);
221 90         105 $blockcount += $BLOCKLEN;
222             }
223 18         34 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
224             substr($aln->displayname($seq->get_nse()),
225             0,$MEGANAMELEN-2),
226             join(' ', @blocks)));
227             }
228 3         4 $self->_print("\n");
229 3         5 $count += $LINELEN;
230             }
231             }
232 1 50 33     3 $self->flush if $self->_flush_on_write && defined $self->_fh;
233 1         3 return 1;
234             }
235              
236              
237             1;