File Coverage

blib/lib/BioX/Seq/Stream/FASTA.pm
Criterion Covered Total %
statement 54 56 96.4
branch 15 18 83.3
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 76 81 93.8


line stmt bran cond sub pod time code
1             package BioX::Seq::Stream::FASTA;
2              
3 1     1   6 use strict;
  1         1  
  1         34  
4 1     1   5 use warnings;
  1         1  
  1         34  
5              
6 1     1   413 use parent qw/BioX::Seq::Stream/;
  1         259  
  1         5  
7              
8             sub _check_type {
9              
10 15     15   38 my ($class,$self) = @_;
11 15         82 return substr($self->{buffer},0,1) eq '>';
12              
13             }
14              
15             sub _init {
16              
17 10     10   16 my ($self) = @_;
18              
19             # First two bytes should not contain line ending chars
20             die "Missing ID in initial header (check file format)\n"
21 10 50       38 if ($self->{buffer} =~ /[\r\n]/);
22 10         13 my $fh = $self->{fh};
23 10         45 $self->{buffer} .= <$fh>;
24              
25             # detect line endings for text files based on first line
26             # (other solutions, such as using the :crlf layer or s///
27             # instead of chomp may be marginally more robust but slow
28             # things down too much)
29 10 100       133 if ($self->{buffer} =~ /([\r\n]{1,2})$/) {
30 9         42 $self->{rec_sep} = $1;
31             }
32             else {
33 1         19 die "Failed to detect line endings\n";
34             }
35 9         53 local $/ = $self->{rec_sep};
36              
37             # Parse initial header line
38 9         23 chomp $self->{buffer};
39 9 50       57 if ($self->{buffer} =~ /^>(\S+)\s*(.+)?$/) {
40 9         23 $self->{next_id} = $1;
41 9         49 $self->{next_desc} = $2;
42 9         17 $self->{buffer} = undef;
43             }
44             else {
45 0         0 die "Failed to parse initial FASTA header (check file format)\n";
46             }
47              
48 9         27 return;
49              
50             }
51              
52             sub next_seq {
53            
54 114     114 1 1797 my ($self) = @_;
55              
56 114         141 my $fh = $self->{fh};
57 114         137 my $id = $self->{next_id};
58 114         138 my $desc = $self->{next_desc};
59 114         140 my $seq = '';
60              
61 114         262 local $/ = $self->{rec_sep};
62            
63 114         327 my $line = <$fh>;
64              
65 114         251 while ($line) {
66              
67 692         822 chomp $line;
68              
69             # match next record header
70 692 100       1121 if ($line =~ /^>(\S+)\s*(.+)?$/) {
71              
72 108         220 $self->{next_id} = $1;
73 108         161 $self->{next_desc} = $2;
74              
75             # if not in fast mode, double-check sequence for correctness
76 108 100       173 if (! $self->{fast}) {
77             # allow any alpha character or characters possibly representing
78             # gaps or stop codons
79 59 100       293 if ($seq =~ /[^A-Za-z\-\.\*]/) {
80 1         6 die "Previous sequence record invalid\n";
81             }
82             }
83              
84 107         281 my $obj = BioX::Seq->new($seq, $id, $desc);
85 107         139 $obj->{_input_format} = 'fasta';
86 107         305 return $obj;
87              
88             }
89             else {
90 584         674 $seq .= $line;
91             }
92              
93 584         1016 $line = <$fh>;
94              
95             }
96              
97             # should only reach here on last read
98 6 100       26 if (defined $self->{next_id}) {
99 4         10 delete $self->{next_id};
100 4         6 delete $self->{next_desc};
101              
102             # if not in fast mode, double-check sequence for correctness
103 4 100       9 if (! $self->{fast}) {
104             # allow any alpha character or characters possibly representing
105             # gaps or stop codons
106 3 50       13 if ($seq =~ /[^A-Za-z\-\.\*]/) {
107 0         0 die "Previous sequence record invalid\n";
108             }
109             }
110              
111 4         14 my $obj = BioX::Seq->new($seq, $id, $desc);
112 4         7 $obj->{_input_format} = 'fasta';
113 4         15 return $obj;
114             }
115 2         13 return undef;
116              
117             }
118              
119             1;
120              
121             __END__