File Coverage

Bio/AlignIO/msf.pm
Criterion Covered Total %
statement 88 98 89.8
branch 20 28 71.4
condition 5 9 55.5
subroutine 8 8 100.0
pod 2 2 100.0
total 123 145 84.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::msf
3             # based on the Bio::SeqIO::msf module
4             # by Ewan Birney
5             # and Lincoln Stein
6             #
7             # and the SimpleAlign.pm module of Ewan Birney
8             #
9             # Copyright Peter Schattner
10             #
11             # You may distribute this module under the same terms as perl itself
12             # _history
13             # September 5, 2000
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::AlignIO::msf - msf sequence input/output stream
19              
20             =head1 SYNOPSIS
21              
22             Do not use this module directly. Use it via the L class.
23              
24             =head1 DESCRIPTION
25              
26             This object can transform L objects to and from msf
27             flat file databases.
28              
29             =head1 FEEDBACK
30              
31             =head2 Support
32              
33             Please direct usage questions or support issues to the mailing list:
34              
35             I
36              
37             rather than to the module maintainer directly. Many experienced and
38             reponsive experts will be able look at the problem and quickly
39             address it. Please include a thorough description of the problem
40             with code and data examples if at all possible.
41              
42             =head2 Reporting Bugs
43              
44             Report bugs to the Bioperl bug tracking system to help us keep track
45             the bugs and their resolution. Bug reports can be submitted via the
46             web:
47              
48             https://github.com/bioperl/bioperl-live/issues
49              
50             =head1 AUTHORS - Peter Schattner
51              
52             Email: schattner@alum.mit.edu
53              
54              
55             =head1 APPENDIX
56              
57             The rest of the documentation details each of the object
58             methods. Internal methods are usually preceded with a _
59              
60             =cut
61              
62             # Let the code begin...
63              
64             package Bio::AlignIO::msf;
65 3     3   400 use vars qw(%valid_type);
  3         5  
  3         132  
66 3     3   12 use strict;
  3         4  
  3         70  
67              
68 3     3   600 use Bio::SeqIO::gcg; # for GCG_checksum()
  3         6  
  3         96  
69 3     3   501 use Bio::SimpleAlign;
  3         5  
  3         92  
70              
71 3     3   18 use base qw(Bio::AlignIO);
  3         6  
  3         677  
72              
73             BEGIN {
74 3     3   1852 %valid_type = qw( dna N rna N protein P );
75             }
76              
77             =head2 next_aln
78              
79             Title : next_aln
80             Usage : $aln = $stream->next_aln()
81             Function: returns the next alignment in the stream. Tries to read *all* MSF
82             It reads all non whitespace characters in the alignment
83             area. For MSFs with weird gaps (eg ~~~) map them by using
84             $aln->map_chars('~','-')
85             Returns : Bio::Align::AlignI object
86             Args : NONE
87              
88             =cut
89              
90             sub next_aln {
91 3     3 1 10 my $self = shift;
92 3         4 my $entry;
93 3         5 my (%hash,$name,$str,@names,$seqname,$start,$end,$count,$seq);
94              
95 3         25 my $aln = Bio::SimpleAlign->new(-source => 'gcg' );
96              
97 3         23 while( $entry = $self->_readline) {
98 63 100       94 $entry =~ m{//} && last; # move to alignment section
99 60 100       144 $entry =~ /Name:\s+(\S+)/ && do { $name = $1;
  48         54  
100 48         63 $hash{$name} = ""; # blank line
101 48         83 push(@names,$name); # we need it ordered!
102             };
103             # otherwise - skip
104             }
105              
106             # alignment section
107              
108 3         8 while( $entry = $self->_readline) {
109 276 50       478 next if ( $entry =~ /^\s+(\d+)/ ) ;
110 276 100       636 $entry =~ /^\s*(\S+)\s+(.*)$/ && do {
111 240         255 $name = $1;
112 240         176 $str = $2;
113 240 50       329 if( ! exists $hash{$name} ) {
114 0         0 $self->throw("$name exists as an alignment line but not in the header. Not confident of what is going on!");
115             }
116 240         725 $str =~ s/\s//g;
117 240         194 $str =~ s/~/-/g;
118 240         492 $hash{$name} .= $str;
119             };
120             }
121              
122 3 50       14 return if @names < 1;
123              
124             # now got this as a name - sequence hash. Let's make some sequences!
125              
126 3         8 for $name ( @names ) {
127 48 50       216 if( $name =~ m{(\S+)/(\d+)-(\d+)} ) {
128 48         83 $seqname = $1;
129 48         46 $start = $2;
130 48         51 $end = $3;
131             } else {
132 0         0 $seqname = $name;
133 0         0 $start = 1;
134 0         0 $str = $hash{$name};
135 0         0 $str =~ s/[^0-9A-Za-z$Bio::LocatableSeq::OTHER_SYMBOLS]//g;
136              
137 0         0 $end = length($str);
138             }
139              
140 48         126 $seq = Bio::LocatableSeq->new('-seq' => $hash{$name},
141             '-display_id' => $seqname,
142             '-start' => $start,
143             '-end' => $end,
144             '-alphabet' => $self->alphabet,
145             );
146 48         104 $aln->add_seq($seq);
147              
148             # If $end <= 0, we have either reached the end of
149             # file in <> or we have encountered some other error
150              
151             }
152 3 50       15 return $aln if $aln->num_sequences;
153 0         0 return;
154             }
155              
156              
157             =head2 write_aln
158              
159             Title : write_aln
160             Usage : $stream->write_aln(@aln)
161             Function: writes the $aln object into the stream in MSF format
162             Sequence type of the alignment is determined by the first sequence.
163             Returns : 1 for success and 0 for error
164             Args : Bio::Align::AlignI object
165              
166              
167             =cut
168              
169             sub write_aln {
170 2     2 1 9 my ($self,@aln) = @_;
171 2         5 my $msftag;
172             my $type;
173 2         3 my $count = 0;
174 2         4 my $maxname;
175 2         4 my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index);
176 2         4 foreach my $aln (@aln) {
177 2 50 33     17 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
178 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
179 0         0 next;
180             }
181 2         330 $date = localtime(time);
182 2         5 $msftag = "MSF";
183 2         12 $type = $valid_type{$aln->get_seq_by_pos(1)->alphabet};
184 2         9 $maxname = $aln->maxdisplayname_length();
185 2         10 $length = $aln->length();
186 2         30 $name = $aln->id();
187 2 50       8 if( !defined $name ) {
188 0         0 $name = "Align";
189             }
190              
191 2         8 $self->_print (sprintf("\n%s MSF: %d Type: %s %s Check: 00 ..\n\n",
192             $name, $aln->num_sequences, $type, $date));
193              
194 2 100       12 my $seqCountFormat = "%".($maxname > 20 ? $maxname + 2: 22)."s%-27d%27d\n";
195 2 100       8 my $seqNameFormat = "%-".($maxname > 20 ? $maxname : 20)."s ";
196            
197 2         8 foreach $seq ( $aln->each_seq() ) {
198 22         40 $name = $aln->displayname($seq->get_nse());
199 22         29 $miss = $maxname - length ($name);
200 22         18 $miss += 2;
201 22         22 $pad = " " x $miss;
202              
203 22         43 $self->_print (sprintf(" Name: %s%sLen: %d Check: %d Weight: 1.00\n",$name,$pad,length $seq->seq(), Bio::SeqIO::gcg->GCG_checksum($seq)));
204              
205 22         38 $hash{$name} = $seq->seq();
206 22         42 push(@arr,$name);
207             }
208             # ok - heavy handed, but there you go.
209             #
210 2         11 $self->_print ("\n//\n\n\n");
211              
212 2         8 while( $count < $length ) {
213             # there is another block to go!
214 14         44 $self->_print (sprintf($seqCountFormat,' ',$count+1,$count+50));
215 14         16 foreach $name ( @arr ) {
216 134         231 $self->_print (sprintf($seqNameFormat,$name));
217              
218 134         83 $tempcount = $count;
219 134         85 $index = 0;
220 134   100     351 while( ($tempcount + 10 < $length) && ($index < 5) ) {
221              
222 636         1401 $self->_print (sprintf("%s ",substr($hash{$name},
223             $tempcount,10)));
224              
225 636         574 $tempcount += 10;
226 636         1575 $index++;
227             } #
228             # ok, could be the very last guy ;)
229             #
230 134 100       152 if( $index < 5) {
231             # space to print!
232             #
233 22         57 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
234 22         21 $tempcount += 10;
235             }
236 134         146 $self->_print ("\n");
237             }
238 14         20 $self->_print ("\n\n");
239 14         24 $count = $tempcount;
240             }
241             }
242 2 50 33     14 $self->flush if $self->_flush_on_write && defined $self->_fh;
243 2         23 return 1;
244             }
245              
246             1;