File Coverage

Bio/AlignIO/fasta.pm
Criterion Covered Total %
statement 80 85 94.1
branch 21 30 70.0
condition 8 20 40.0
subroutine 7 7 100.0
pod 3 3 100.0
total 119 145 82.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::fasta
3             #
4             # Copyright Peter Schattner
5             #
6             # You may distribute this module under the same terms as perl itself
7             # POD documentation - main docs before the code
8              
9             =head1 NAME
10              
11             Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
12              
13             =head1 SYNOPSIS
14              
15             Do not use this module directly. Use it via the L
16             class.
17              
18             =head1 DESCRIPTION
19              
20             This object can transform L objects to and from
21             fasta flat files. This is for the fasta alignment format, not
22             for the FastA sequence analysis program. To process the alignments from
23             FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
24              
25             =head1 FEEDBACK
26              
27             =head2 Support
28              
29             Please direct usage questions or support issues to the mailing list:
30              
31             I
32              
33             rather than to the module maintainer directly. Many experienced and
34             reponsive experts will be able look at the problem and quickly
35             address it. Please include a thorough description of the problem
36             with code and data examples if at all possible.
37              
38             =head2 Reporting Bugs
39              
40             Report bugs to the Bioperl bug tracking system to help us keep track
41             the bugs and their resolution. Bug reports can be submitted via the
42             web:
43              
44             https://github.com/bioperl/bioperl-live/issues
45              
46             =head1 AUTHORS
47              
48             Peter Schattner
49              
50             =head1 APPENDIX
51              
52             The rest of the documentation details each of the object
53             methods. Internal methods are usually preceded with a _
54              
55             =cut
56              
57             # Let the code begin...
58              
59             package Bio::AlignIO::fasta;
60 6     6   426 use strict;
  6         9  
  6         171  
61              
62 6     6   19 use base qw(Bio::AlignIO);
  6         8  
  6         714  
63             our $WIDTH = 60;
64 6     6   25 use Bio::LocatableSeq;
  6         8  
  6         3672  
65              
66             =head2 next_aln
67              
68             Title : next_aln
69             Usage : $aln = $stream->next_aln
70             Function: returns the next alignment in the stream.
71             Returns : Bio::Align::AlignI object - returns 0 on end of file
72             or on error
73             Args : -width => optional argument to specify the width sequence
74             will be written (60 chars by default)
75              
76             See L
77              
78             =cut
79              
80             sub next_aln {
81 10     10 1 30 my $self = shift;
82 10         39 my ($width) = $self->_rearrange( [qw(WIDTH)], @_ );
83 10   33     53 $self->width( $width || $WIDTH );
84              
85 10         11 my ($start, $end, $name, $seqname, $seq, $seqchar,
86             $entry, $tempname, $tempdesc, %align, $desc, $maxlen
87             );
88 10         55 my $aln = Bio::SimpleAlign->new();
89              
90 10         50 while ( defined( $entry = $self->_readline ) ) {
91 540         429 chomp $entry;
92 540 100       878 if ( $entry =~ s/^>\s*(\S+)\s*// ) {
93 86         152 $tempname = $1;
94 86         67 chomp($entry);
95 86         60 $tempdesc = $entry;
96 86 100       125 if ( defined $name ) {
97 76         403 $seqchar =~ s/\s//g;
98 76         75 $seqname = $name;
99 76         67 $start = 1;
100 76         124 $end = $self->_get_len($seqchar);
101 76         248 $seq = Bio::LocatableSeq->new(
102             -seq => $seqchar,
103             -display_id => $seqname,
104             -description => $desc,
105             -start => $start,
106             -end => $end,
107             -alphabet => $self->alphabet,
108             );
109 76         204 $aln->add_seq($seq);
110 76         195 $self->debug("Reading $seqname\n");
111             }
112 86         91 $desc = $tempdesc;
113 86         75 $name = $tempname;
114 86         72 $desc = $entry;
115 86         69 $seqchar = "";
116 86         205 next;
117             }
118              
119             # removed redundant symbol validation
120             # this is already done in Bio::PrimarySeq
121 454         770 $seqchar .= $entry;
122             }
123              
124             # Next two lines are to silence warnings that
125             # otherwise occur at EOF when using <$fh>
126 10 50       35 $name = "" if ( !defined $name );
127 10 50       25 $seqchar = "" if ( !defined $seqchar );
128 10         41 $seqchar =~ s/\s//g;
129              
130             # Put away last name and sequence
131 10 100       48 if ( $name =~ /(\S+\/(\d+)-(\d+))$/ ) {
132 5         12 $seqname = $1;
133 5         9 $start = $2;
134 5         9 $end = $3;
135             }
136             else {
137 5         8 $seqname = $name;
138 5         8 $start = 1;
139 5         13 $end = $self->_get_len($seqchar);
140             }
141              
142             # This logic now also reads empty lines at the
143             # end of the file. Skip this is seqchar and seqname is null
144 10 50 33     43 unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
145 10         45 $seq = Bio::LocatableSeq->new(
146             -seq => $seqchar,
147             -display_id => $seqname,
148             -description => $desc,
149             -start => $start,
150             -end => $end,
151             -alphabet => $self->alphabet,
152             );
153 10         39 $aln->add_seq($seq);
154 10         36 $self->debug("Reading $seqname\n");
155             }
156 10         38 my $alnlen = $aln->length;
157 10         27 foreach my $seq ( $aln->each_seq ) {
158 86 100       105 if ( $seq->length < $alnlen ) {
159 39         52 my ($diff) = ( $alnlen - $seq->length );
160 39         53 $seq->seq( $seq->seq() . "-" x $diff );
161             }
162             }
163              
164             # no sequences means empty alignment (possible EOF)
165 10 50       38 return $aln if $aln->num_sequences;
166 0         0 return;
167             }
168              
169              
170             =head2 write_aln
171              
172             Title : write_aln
173             Usage : $stream->write_aln(@aln)
174             Function: writes the $aln object into the stream in fasta format
175             Returns : 1 for success and 0 for error
176             Args : L object
177              
178             See L
179              
180             =cut
181              
182             sub write_aln {
183 2     2 1 9 my ($self,@aln) = @_;
184 2         7 my $width = $self->width;
185 2         3 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
186              
187 2         4 foreach my $aln (@aln) {
188 2 50 33     14 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
189 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
190 0         0 next;
191             }
192 2 50       11 if( $self->force_displayname_flat ) {
193 0         0 $aln->set_displayname_flat(1);
194             }
195 2         6 foreach $rseq ( $aln->each_seq() ) {
196 17         34 $name = $aln->displayname($rseq->get_nse());
197 17         24 $seq = $rseq->seq();
198 17   100     26 $desc = $rseq->description || '';
199 17 100       29 $desc = ' '.$desc if $desc;
200 17 50       49 $self->_print (">$name$desc\n") or return;
201 17         14 $count = 0;
202 17         10 $length = length($seq);
203 17 50 33     49 if(defined $seq && $length > 0) {
204 17         217 $seq =~ s/(.{1,$width})/$1\n/g;
205             } else {
206 0         0 $seq = "\n";
207             }
208 17         31 $self->_print($seq);
209             }
210             }
211 2 50 33     6 $self->flush if $self->_flush_on_write && defined $self->_fh;
212 2         7 return 1;
213             }
214              
215             =head2 _get_len
216              
217             Title : _get_len
218             Usage :
219             Function: determine number of alphabetic chars
220             Returns : integer
221             Args : sequence string
222              
223             =cut
224              
225             sub _get_len {
226 81     81   92 my ($self,$seq) = @_;
227 81         99 my $chars = $Bio::LocatableSeq::GAP_SYMBOLS.$Bio::LocatableSeq::FRAMESHIFT_SYMBOLS;
228 81         763 $seq =~ s{[$chars]+}{}gi;
229 81         126 return CORE::length($seq);
230             }
231              
232             =head2 width
233              
234             Title : width
235             Usage : $obj->width($newwidth)
236             $width = $obj->width;
237             Function: Get/set width of alignment
238             Returns : integer value of width
239             Args : on set, new value (a scalar or undef, optional)
240              
241              
242             =cut
243              
244             sub width{
245 12     12 1 15 my $self = shift;
246              
247 12 100       40 return $self->{'_width'} = shift if @_;
248 2   33     10 return $self->{'_width'} || $WIDTH;
249             }
250              
251             1;