File Coverage

Bio/AlignIO/fasta.pm
Criterion Covered Total %
statement 80 85 94.1
branch 21 30 70.0
condition 8 20 40.0
subroutine 7 7 100.0
pod 3 3 100.0
total 119 145 82.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::fasta
3             #
4             # Copyright Peter Schattner
5             #
6             # You may distribute this module under the same terms as perl itself
7             # POD documentation - main docs before the code
8              
9             =head1 NAME
10              
11             Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
12              
13             =head1 SYNOPSIS
14              
15             Do not use this module directly. Use it via the L
16             class.
17              
18             =head1 DESCRIPTION
19              
20             This object can transform L objects to and from
21             fasta flat files. This is for the fasta alignment format, not
22             for the FastA sequence analysis program. To process the alignments from
23             FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
24              
25             =head1 FEEDBACK
26              
27             =head2 Support
28              
29             Please direct usage questions or support issues to the mailing list:
30              
31             I
32              
33             rather than to the module maintainer directly. Many experienced and
34             reponsive experts will be able look at the problem and quickly
35             address it. Please include a thorough description of the problem
36             with code and data examples if at all possible.
37              
38             =head2 Reporting Bugs
39              
40             Report bugs to the Bioperl bug tracking system to help us keep track
41             the bugs and their resolution. Bug reports can be submitted via the
42             web:
43              
44             https://github.com/bioperl/bioperl-live/issues
45              
46             =head1 AUTHORS
47              
48             Peter Schattner
49              
50             =head1 APPENDIX
51              
52             The rest of the documentation details each of the object
53             methods. Internal methods are usually preceded with a _
54              
55             =cut
56              
57             # Let the code begin...
58              
59             package Bio::AlignIO::fasta;
60 6     6   427 use strict;
  6         42  
  6         211  
61              
62 6     6   31 use base qw(Bio::AlignIO);
  6         9  
  6         718  
63             our $WIDTH = 60;
64 6     6   34 use Bio::LocatableSeq;
  6         10  
  6         3828  
65              
66             =head2 next_aln
67              
68             Title : next_aln
69             Usage : $aln = $stream->next_aln
70             Function: returns the next alignment in the stream.
71             Returns : Bio::Align::AlignI object - returns 0 on end of file
72             or on error
73             Args : -width => optional argument to specify the width sequence
74             will be written (60 chars by default)
75              
76             See L
77              
78             =cut
79              
80             sub next_aln {
81 10     10 1 45 my $self = shift;
82 10         37 my ($width) = $self->_rearrange( [qw(WIDTH)], @_ );
83 10   33     70 $self->width( $width || $WIDTH );
84              
85 10         18 my ($start, $end, $name, $seqname, $seq, $seqchar,
86             $entry, $tempname, $tempdesc, %align, $desc, $maxlen
87             );
88 10         69 my $aln = Bio::SimpleAlign->new();
89              
90 10         87 while ( defined( $entry = $self->_readline ) ) {
91 540         592 chomp $entry;
92 540 100       982 if ( $entry =~ s/^>\s*(\S+)\s*// ) {
93 86         198 $tempname = $1;
94 86         99 chomp($entry);
95 86         89 $tempdesc = $entry;
96 86 100       129 if ( defined $name ) {
97 76         264 $seqchar =~ s/\s//g;
98 76         147 $seqname = $name;
99 76         81 $start = 1;
100 76         172 $end = $self->_get_len($seqchar);
101 76         204 $seq = Bio::LocatableSeq->new(
102             -seq => $seqchar,
103             -display_id => $seqname,
104             -description => $desc,
105             -start => $start,
106             -end => $end,
107             -alphabet => $self->alphabet,
108             );
109 76         270 $aln->add_seq($seq);
110 76         256 $self->debug("Reading $seqname\n");
111             }
112 86         112 $desc = $tempdesc;
113 86         108 $name = $tempname;
114 86         92 $desc = $entry;
115 86         102 $seqchar = "";
116 86         225 next;
117             }
118              
119             # removed redundant symbol validation
120             # this is already done in Bio::PrimarySeq
121 454         907 $seqchar .= $entry;
122             }
123              
124             # Next two lines are to silence warnings that
125             # otherwise occur at EOF when using <$fh>
126 10 50       48 $name = "" if ( !defined $name );
127 10 50       25 $seqchar = "" if ( !defined $seqchar );
128 10         49 $seqchar =~ s/\s//g;
129              
130             # Put away last name and sequence
131 10 100       51 if ( $name =~ /(\S+\/(\d+)-(\d+))$/ ) {
132 5         14 $seqname = $1;
133 5         14 $start = $2;
134 5         13 $end = $3;
135             }
136             else {
137 5         7 $seqname = $name;
138 5         8 $start = 1;
139 5         12 $end = $self->_get_len($seqchar);
140             }
141              
142             # This logic now also reads empty lines at the
143             # end of the file. Skip this is seqchar and seqname is null
144 10 50 33     34 unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
145 10         39 $seq = Bio::LocatableSeq->new(
146             -seq => $seqchar,
147             -display_id => $seqname,
148             -description => $desc,
149             -start => $start,
150             -end => $end,
151             -alphabet => $self->alphabet,
152             );
153 10         42 $aln->add_seq($seq);
154 10         34 $self->debug("Reading $seqname\n");
155             }
156 10         42 my $alnlen = $aln->length;
157 10         27 foreach my $seq ( $aln->each_seq ) {
158 86 100       124 if ( $seq->length < $alnlen ) {
159 39         63 my ($diff) = ( $alnlen - $seq->length );
160 39         63 $seq->seq( $seq->seq() . "-" x $diff );
161             }
162             }
163              
164             # no sequences means empty alignment (possible EOF)
165 10 50       47 return $aln if $aln->num_sequences;
166 0         0 return;
167             }
168              
169              
170             =head2 write_aln
171              
172             Title : write_aln
173             Usage : $stream->write_aln(@aln)
174             Function: writes the $aln object into the stream in fasta format
175             Returns : 1 for success and 0 for error
176             Args : L object
177              
178             See L
179              
180             =cut
181              
182             sub write_aln {
183 2     2 1 9 my ($self,@aln) = @_;
184 2         6 my $width = $self->width;
185 2         6 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
186              
187 2         5 foreach my $aln (@aln) {
188 2 50 33     13 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
189 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
190 0         0 next;
191             }
192 2 50       14 if( $self->force_displayname_flat ) {
193 0         0 $aln->set_displayname_flat(1);
194             }
195 2         7 foreach $rseq ( $aln->each_seq() ) {
196 17         34 $name = $aln->displayname($rseq->get_nse());
197 17         37 $seq = $rseq->seq();
198 17   100     33 $desc = $rseq->description || '';
199 17 100       33 $desc = ' '.$desc if $desc;
200 17 50       111 $self->_print (">$name$desc\n") or return;
201 17         22 $count = 0;
202 17         20 $length = length($seq);
203 17 50 33     44 if(defined $seq && $length > 0) {
204 17         199 $seq =~ s/(.{1,$width})/$1\n/g;
205             } else {
206 0         0 $seq = "\n";
207             }
208 17         37 $self->_print($seq);
209             }
210             }
211 2 50 33     6 $self->flush if $self->_flush_on_write && defined $self->_fh;
212 2         8 return 1;
213             }
214              
215             =head2 _get_len
216              
217             Title : _get_len
218             Usage :
219             Function: determine number of alphabetic chars
220             Returns : integer
221             Args : sequence string
222              
223             =cut
224              
225             sub _get_len {
226 81     81   143 my ($self,$seq) = @_;
227 81         126 my $chars = $Bio::LocatableSeq::GAP_SYMBOLS.$Bio::LocatableSeq::FRAMESHIFT_SYMBOLS;
228 81         840 $seq =~ s{[$chars]+}{}gi;
229 81         160 return CORE::length($seq);
230             }
231              
232             =head2 width
233              
234             Title : width
235             Usage : $obj->width($newwidth)
236             $width = $obj->width;
237             Function: Get/set width of alignment
238             Returns : integer value of width
239             Args : on set, new value (a scalar or undef, optional)
240              
241              
242             =cut
243              
244             sub width{
245 12     12 1 21 my $self = shift;
246              
247 12 100       46 return $self->{'_width'} = shift if @_;
248 2   33     9 return $self->{'_width'} || $WIDTH;
249             }
250              
251             1;