File Coverage

Bio/AlignIO/msf.pm
Criterion Covered Total %
statement 88 98 89.8
branch 20 28 71.4
condition 5 9 55.5
subroutine 8 8 100.0
pod 2 2 100.0
total 123 145 84.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::msf
3             # based on the Bio::SeqIO::msf module
4             # by Ewan Birney
5             # and Lincoln Stein
6             #
7             # and the SimpleAlign.pm module of Ewan Birney
8             #
9             # Copyright Peter Schattner
10             #
11             # You may distribute this module under the same terms as perl itself
12             # _history
13             # September 5, 2000
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::AlignIO::msf - msf sequence input/output stream
19              
20             =head1 SYNOPSIS
21              
22             Do not use this module directly. Use it via the L class.
23              
24             =head1 DESCRIPTION
25              
26             This object can transform L objects to and from msf
27             flat file databases.
28              
29             =head1 FEEDBACK
30              
31             =head2 Support
32              
33             Please direct usage questions or support issues to the mailing list:
34              
35             I
36              
37             rather than to the module maintainer directly. Many experienced and
38             reponsive experts will be able look at the problem and quickly
39             address it. Please include a thorough description of the problem
40             with code and data examples if at all possible.
41              
42             =head2 Reporting Bugs
43              
44             Report bugs to the Bioperl bug tracking system to help us keep track
45             the bugs and their resolution. Bug reports can be submitted via the
46             web:
47              
48             https://github.com/bioperl/bioperl-live/issues
49              
50             =head1 AUTHORS - Peter Schattner
51              
52             Email: schattner@alum.mit.edu
53              
54              
55             =head1 APPENDIX
56              
57             The rest of the documentation details each of the object
58             methods. Internal methods are usually preceded with a _
59              
60             =cut
61              
62             # Let the code begin...
63              
64             package Bio::AlignIO::msf;
65 3     3   411 use vars qw(%valid_type);
  3         3  
  3         105  
66 3     3   8 use strict;
  3         4  
  3         47  
67              
68 3     3   557 use Bio::SeqIO::gcg; # for GCG_checksum()
  3         4  
  3         72  
69 3     3   485 use Bio::SimpleAlign;
  3         3  
  3         61  
70              
71 3     3   11 use base qw(Bio::AlignIO);
  3         3  
  3         418  
72              
73             BEGIN {
74 3     3   1891 %valid_type = qw( dna N rna N protein P );
75             }
76              
77             =head2 next_aln
78              
79             Title : next_aln
80             Usage : $aln = $stream->next_aln()
81             Function: returns the next alignment in the stream. Tries to read *all* MSF
82             It reads all non whitespace characters in the alignment
83             area. For MSFs with weird gaps (eg ~~~) map them by using
84             $aln->map_chars('~','-')
85             Returns : Bio::Align::AlignI object
86             Args : NONE
87              
88             =cut
89              
90             sub next_aln {
91 3     3 1 9 my $self = shift;
92 3         4 my $entry;
93 3         3 my (%hash,$name,$str,@names,$seqname,$start,$end,$count,$seq);
94              
95 3         23 my $aln = Bio::SimpleAlign->new(-source => 'gcg' );
96              
97 3         21 while( $entry = $self->_readline) {
98 63 100       90 $entry =~ m{//} && last; # move to alignment section
99 60 100       148 $entry =~ /Name:\s+(\S+)/ && do { $name = $1;
  48         56  
100 48         59 $hash{$name} = ""; # blank line
101 48         84 push(@names,$name); # we need it ordered!
102             };
103             # otherwise - skip
104             }
105              
106             # alignment section
107              
108 3         7 while( $entry = $self->_readline) {
109 276 50       451 next if ( $entry =~ /^\s+(\d+)/ ) ;
110 276 100       577 $entry =~ /^\s*(\S+)\s+(.*)$/ && do {
111 240         228 $name = $1;
112 240         219 $str = $2;
113 240 50       338 if( ! exists $hash{$name} ) {
114 0         0 $self->throw("$name exists as an alignment line but not in the header. Not confident of what is going on!");
115             }
116 240         692 $str =~ s/\s//g;
117 240         217 $str =~ s/~/-/g;
118 240         491 $hash{$name} .= $str;
119             };
120             }
121              
122 3 50       10 return if @names < 1;
123              
124             # now got this as a name - sequence hash. Let's make some sequences!
125              
126 3         7 for $name ( @names ) {
127 48 50       192 if( $name =~ m{(\S+)/(\d+)-(\d+)} ) {
128 48         80 $seqname = $1;
129 48         43 $start = $2;
130 48         40 $end = $3;
131             } else {
132 0         0 $seqname = $name;
133 0         0 $start = 1;
134 0         0 $str = $hash{$name};
135 0         0 $str =~ s/[^0-9A-Za-z$Bio::LocatableSeq::OTHER_SYMBOLS]//g;
136              
137 0         0 $end = length($str);
138             }
139              
140 48         128 $seq = Bio::LocatableSeq->new('-seq' => $hash{$name},
141             '-display_id' => $seqname,
142             '-start' => $start,
143             '-end' => $end,
144             '-alphabet' => $self->alphabet,
145             );
146 48         108 $aln->add_seq($seq);
147              
148             # If $end <= 0, we have either reached the end of
149             # file in <> or we have encountered some other error
150              
151             }
152 3 50       10 return $aln if $aln->num_sequences;
153 0         0 return;
154             }
155              
156              
157             =head2 write_aln
158              
159             Title : write_aln
160             Usage : $stream->write_aln(@aln)
161             Function: writes the $aln object into the stream in MSF format
162             Sequence type of the alignment is determined by the first sequence.
163             Returns : 1 for success and 0 for error
164             Args : Bio::Align::AlignI object
165              
166              
167             =cut
168              
169             sub write_aln {
170 2     2 1 9 my ($self,@aln) = @_;
171 2         3 my $msftag;
172             my $type;
173 2         4 my $count = 0;
174 2         3 my $maxname;
175 2         3 my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index);
176 2         4 foreach my $aln (@aln) {
177 2 50 33     14 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
178 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
179 0         0 next;
180             }
181 2         220 $date = localtime(time);
182 2         4 $msftag = "MSF";
183 2         12 $type = $valid_type{$aln->get_seq_by_pos(1)->alphabet};
184 2         8 $maxname = $aln->maxdisplayname_length();
185 2         7 $length = $aln->length();
186 2         6 $name = $aln->id();
187 2 50       7 if( !defined $name ) {
188 0         0 $name = "Align";
189             }
190              
191 2         6 $self->_print (sprintf("\n%s MSF: %d Type: %s %s Check: 00 ..\n\n",
192             $name, $aln->num_sequences, $type, $date));
193              
194 2 100       9 my $seqCountFormat = "%".($maxname > 20 ? $maxname + 2: 22)."s%-27d%27d\n";
195 2 100       10 my $seqNameFormat = "%-".($maxname > 20 ? $maxname : 20)."s ";
196            
197 2         6 foreach $seq ( $aln->each_seq() ) {
198 22         38 $name = $aln->displayname($seq->get_nse());
199 22         23 $miss = $maxname - length ($name);
200 22         17 $miss += 2;
201 22         25 $pad = " " x $miss;
202              
203 22         29 $self->_print (sprintf(" Name: %s%sLen: %d Check: %d Weight: 1.00\n",$name,$pad,length $seq->seq(), Bio::SeqIO::gcg->GCG_checksum($seq)));
204              
205 22         38 $hash{$name} = $seq->seq();
206 22         32 push(@arr,$name);
207             }
208             # ok - heavy handed, but there you go.
209             #
210 2         6 $self->_print ("\n//\n\n\n");
211              
212 2         6 while( $count < $length ) {
213             # there is another block to go!
214 14         35 $self->_print (sprintf($seqCountFormat,' ',$count+1,$count+50));
215 14         14 foreach $name ( @arr ) {
216 134         209 $self->_print (sprintf($seqNameFormat,$name));
217              
218 134         78 $tempcount = $count;
219 134         95 $index = 0;
220 134   100     323 while( ($tempcount + 10 < $length) && ($index < 5) ) {
221              
222 636         1372 $self->_print (sprintf("%s ",substr($hash{$name},
223             $tempcount,10)));
224              
225 636         508 $tempcount += 10;
226 636         1562 $index++;
227             } #
228             # ok, could be the very last guy ;)
229             #
230 134 100       151 if( $index < 5) {
231             # space to print!
232             #
233 22         48 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
234 22         21 $tempcount += 10;
235             }
236 134         147 $self->_print ("\n");
237             }
238 14         17 $self->_print ("\n\n");
239 14         18 $count = $tempcount;
240             }
241             }
242 2 50 33     6 $self->flush if $self->_flush_on_write && defined $self->_fh;
243 2         11 return 1;
244             }
245              
246             1;