File Coverage

Bio/AlignIO/mega.pm
Criterion Covered Total %
statement 85 90 94.4
branch 20 26 76.9
condition 3 9 33.3
subroutine 8 8 100.0
pod 2 2 100.0
total 118 135 87.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::mega
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::AlignIO::mega - Parse and Create MEGA format data files
17              
18             =head1 SYNOPSIS
19              
20             use Bio::AlignIO;
21             my $alignio = Bio::AlignIO->new(-format => 'mega',
22             -file => 't/data/hemoglobinA.meg');
23              
24             while( my $aln = $alignio->next_aln ) {
25             # process each alignment or convert to another format like NEXUS
26             }
27              
28             =head1 DESCRIPTION
29              
30             This object handles reading and writing data streams in the MEGA
31             format (Kumar and Nei).
32              
33              
34             =head1 FEEDBACK
35              
36             =head2 Mailing Lists
37              
38             User feedback is an integral part of the evolution of this and other
39             Bioperl modules. Send your comments and suggestions preferably to
40             the Bioperl mailing list. Your participation is much appreciated.
41              
42             bioperl-l@bioperl.org - General discussion
43             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44              
45             =head2 Support
46              
47             Please direct usage questions or support issues to the mailing list:
48              
49             I
50              
51             rather than to the module maintainer directly. Many experienced and
52             reponsive experts will be able look at the problem and quickly
53             address it. Please include a thorough description of the problem
54             with code and data examples if at all possible.
55              
56             =head2 Reporting Bugs
57              
58             Report bugs to the Bioperl bug tracking system to help us keep track
59             of the bugs and their resolution. Bug reports can be submitted the
60             web:
61              
62             https://github.com/bioperl/bioperl-live/issues
63              
64             =head1 AUTHOR - Jason Stajich
65              
66             Email jason-at-bioperl.org
67              
68             =head1 APPENDIX
69              
70             The rest of the documentation details each of the object methods.
71             Internal methods are usually preceded with a _
72              
73             =cut
74              
75              
76             # Let the code begin...
77              
78              
79             package Bio::AlignIO::mega;
80 2     2   471 use vars qw($MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
  2         1  
  2         106  
81 2     2   7 use strict;
  2         1  
  2         31  
82              
83 2     2   495 use Bio::SimpleAlign;
  2         4  
  2         50  
84 2     2   9 use Bio::LocatableSeq;
  2         2  
  2         87  
85              
86             # symbols are changed due to MEGA's use of '.' for redundant sequences
87              
88             BEGIN {
89 2     2   2 $MEGANAMELEN = 10;
90 2         3 $LINELEN = 60;
91 2         3 $BLOCKLEN = 10;
92 2         3 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard);
  8         44  
93             }
94 2     2   11 use base qw(Bio::AlignIO);
  2         2  
  2         1064  
95              
96              
97             =head2 next_aln
98              
99             Title : next_aln
100             Usage : $aln = $stream->next_aln()
101             Function: returns the next alignment in the stream.
102             Supports the following MEGA format features:
103             - The file has to start with '#mega'
104             - Reads in the name of the alignment from a comment
105             (anything after '!TITLE: ') .
106             - Reads in the format parameters datatype
107              
108             Returns : L object - returns 0 on end of file
109             or on error
110             Args : NONE
111              
112              
113             =cut
114              
115             sub next_aln{
116 2     2 1 7 my ($self) = @_;
117 2         3 my $entry;
118 2         2 my ($alphabet,%seqs);
119 2         3 local $Bio::LocatableSeq::OTHER_SYMBOLS = '\*\?\.';
120 2         4 local $Bio::LocatableSeq::GAP_SYMBOLS = '\-';
121 2         15 my $aln = Bio::SimpleAlign->new(-source => 'mega');
122              
123 2   33     12 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
124              
125 2 50       10 $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
126             unless $entry =~ /^#mega/i;
127              
128 2         7 while( defined($entry = $self->_readline() ) ) {
129 24         26 local($_) = $entry;
130 24 100       92 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
  2 100       9  
    100          
131             elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
132 2         9 my (@fields) = split(/\s+/,$1);
133 2         5 foreach my $f ( @fields ) {
134 6         11 my ($name,$value) = split(/\=/,$f);
135 6 100       15 if( $name eq 'datatype' ) {
    100          
    50          
136 2         4 $alphabet = $value;
137             } elsif( $name eq 'identical' ) {
138 2         7 $aln->match_char($value);
139             } elsif( $name eq 'indel' ) {
140 2         6 $aln->gap_char($value);
141             }
142             }
143             } elsif( /^\#/ ) {
144 2         4 last;
145             }
146             }
147 2         2 my @order;
148 2         5 while( defined($entry) ) {
149 42 100       86 if( $entry !~ /^\s+$/ ) {
150             # this is to skip the leading '#'
151 36         38 my $seqname = substr($entry,1,$MEGANAMELEN-1);
152 36         77 $seqname =~ s/(\S+)\s+$/$1/g;
153 36         36 my $line = substr($entry,$MEGANAMELEN);
154 36         104 $line =~ s/\s+//g;
155 36 100       61 if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
  12         14  
156 36         45 $seqs{$seqname} .= $line;
157             }
158 42         50 $entry = $self->_readline();
159             }
160              
161 2         3 foreach my $seqname ( @order ) {
162 12         14 my $s = $seqs{$seqname};
163 12         66 $s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
164 12         12 my $end = length($s);
165             my $seq = Bio::LocatableSeq->new('-alphabet' => $alphabet,
166             '-display_id' => $seqname,
167 12         34 '-seq' => $seqs{$seqname},
168             '-start' => 1,
169             '-end' => $end);
170              
171 12         26 $aln->add_seq($seq);
172             }
173 2         7 $aln->unmatch;
174 2 50       6 return $aln if $aln->num_sequences;
175 0         0 return;
176             }
177              
178             =head2 write_aln
179              
180             Title : write_aln
181             Usage : $stream->write_aln(@aln)
182             Function: writes the $aln object into the stream in MEGA format
183             Returns : 1 for success and 0 for error
184             Args : L object
185              
186             =cut
187              
188             sub write_aln{
189 1     1 1 6 my ($self,@aln) = @_;
190 1         2 my $count = 0;
191 1         1 my $wrapped = 0;
192 1         2 my $maxname;
193              
194 1         2 foreach my $aln ( @aln ) {
195 1 50 33     10 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
196 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
197 0         0 return 0;
198             } elsif( ! $aln->is_flush($self->verbose) ) {
199 0         0 $self->warn("All Sequences in the alignment must be the same length");
200 0         0 return 0;
201             }
202 1         4 $aln->match();
203 1         4 my $len = $aln->length();
204 1         3 my $format = sprintf('datatype=%s identical=%s indel=%s;',
205             $aln->get_seq_by_pos(1)->alphabet(),
206             $aln->match_char, $aln->gap_char);
207              
208 1         4 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
209             $aln->id, $format));
210              
211 1         4 my ($count, $blockcount,$length) = ( 0,0,$aln->length());
212 1         5 $aln->set_displayname_flat();
213 1         3 while( $count < $length ) {
214 3         5 foreach my $seq ( $aln->each_seq ) {
215 18         23 my $seqchars = $seq->seq();
216 18         12 $blockcount = 0;
217 18         17 my $substring = substr($seqchars, $count, $LINELEN);
218 18         13 my @blocks;
219 18         26 while( $blockcount < length($substring) ) {
220 90         73 push @blocks, substr($substring, $blockcount,$BLOCKLEN);
221 90         95 $blockcount += $BLOCKLEN;
222             }
223 18         36 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
224             substr($aln->displayname($seq->get_nse()),
225             0,$MEGANAMELEN-2),
226             join(' ', @blocks)));
227             }
228 3         7 $self->_print("\n");
229 3         5 $count += $LINELEN;
230             }
231             }
232 1 50 33     4 $self->flush if $self->_flush_on_write && defined $self->_fh;
233 1         3 return 1;
234             }
235              
236              
237             1;