File Coverage

blib/lib/BioX/Seq/Stream/FASTA.pm
Criterion Covered Total %
statement 54 56 96.4
branch 15 18 83.3
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 76 81 93.8


line stmt bran cond sub pod time code
1             package BioX::Seq::Stream::FASTA;
2              
3 1     1   6 use strict;
  1         2  
  1         41  
4 1     1   6 use warnings;
  1         2  
  1         30  
5              
6 1     1   591 use parent qw/BioX::Seq::Stream/;
  1         336  
  1         5  
7              
8             sub _check_type {
9              
10 15     15   49 my ($class,$self) = @_;
11 15         114 return substr($self->{buffer},0,1) eq '>';
12              
13             }
14              
15             sub _init {
16              
17 10     10   26 my ($self) = @_;
18              
19             # First two bytes should not contain line ending chars
20             die "Missing ID in initial header (check file format)\n"
21 10 50       46 if ($self->{buffer} =~ /[\r\n]/);
22 10         19 my $fh = $self->{fh};
23 10         58 $self->{buffer} .= <$fh>;
24              
25             # detect line endings for text files based on first line
26             # (other solutions, such as using the :crlf layer or s///
27             # instead of chomp may be marginally more robust but slow
28             # things down too much)
29 10 100       177 if ($self->{buffer} =~ /([\r\n]{1,2})$/) {
30 9         58 $self->{rec_sep} = $1;
31             }
32             else {
33 1         22 die "Failed to detect line endings\n";
34             }
35 9         87 local $/ = $self->{rec_sep};
36              
37             # Parse initial header line
38 9         27 chomp $self->{buffer};
39 9 50       52 if ($self->{buffer} =~ /^>(\S+)\s*(.+)?$/) {
40 9         25 $self->{next_id} = $1;
41 9         44 $self->{next_desc} = $2;
42 9         22 $self->{buffer} = undef;
43             }
44             else {
45 0         0 die "Failed to parse initial FASTA header (check file format)\n";
46             }
47              
48 9         45 return;
49              
50             }
51              
52             sub next_seq {
53            
54 114     114 1 2590 my ($self) = @_;
55              
56 114         195 my $fh = $self->{fh};
57 114         219 my $id = $self->{next_id};
58 114         274 my $desc = $self->{next_desc};
59 114         217 my $seq = '';
60              
61 114         448 local $/ = $self->{rec_sep};
62            
63 114         507 my $line = <$fh>;
64              
65 114         389 while ($line) {
66              
67 692         1285 chomp $line;
68              
69             # match next record header
70 692 100       1666 if ($line =~ /^>(\S+)\s*(.+)?$/) {
71              
72 108         306 $self->{next_id} = $1;
73 108         253 $self->{next_desc} = $2;
74              
75             # if not in fast mode, double-check sequence for correctness
76 108 100       270 if (! $self->{fast}) {
77             # allow any alpha character or characters possibly representing
78             # gaps or stop codons
79 59 100       565 if ($seq =~ /[^A-Za-z\-\.\*]/) {
80 1         7 die "Previous sequence record invalid\n";
81             }
82             }
83              
84 107         406 my $obj = BioX::Seq->new($seq, $id, $desc);
85 107         215 $obj->{_input_format} = 'fasta';
86 107         474 return $obj;
87              
88             }
89             else {
90 584         1014 $seq .= $line;
91             }
92              
93 584         1540 $line = <$fh>;
94              
95             }
96              
97             # should only reach here on last read
98 6 100       38 if (defined $self->{next_id}) {
99 4         13 delete $self->{next_id};
100 4         19 delete $self->{next_desc};
101              
102             # if not in fast mode, double-check sequence for correctness
103 4 100       13 if (! $self->{fast}) {
104             # allow any alpha character or characters possibly representing
105             # gaps or stop codons
106 3 50       21 if ($seq =~ /[^A-Za-z\-\.\*]/) {
107 0         0 die "Previous sequence record invalid\n";
108             }
109             }
110              
111 4         19 my $obj = BioX::Seq->new($seq, $id, $desc);
112 4         13 $obj->{_input_format} = 'fasta';
113 4         22 return $obj;
114             }
115 2         24 return undef;
116              
117             }
118              
119             1;
120              
121             __END__