File Coverage

blib/lib/BioX/Seq/Stream/FASTQ.pm
Criterion Covered Total %
statement 44 44 100.0
branch 9 12 75.0
condition 6 10 60.0
subroutine 6 6 100.0
pod 1 1 100.0
total 66 73 90.4


line stmt bran cond sub pod time code
1             package BioX::Seq::Stream::FASTQ;
2              
3 1     1   7 use strict;
  1         2  
  1         28  
4 1     1   5 use warnings;
  1         2  
  1         27  
5              
6 1     1   5 use parent qw/BioX::Seq::Stream/;
  1         2  
  1         4  
7              
8             sub _check_type {
9              
10 15     15   42 my ($class,$self) = @_;
11 15         78 return substr($self->{buffer},0,1) eq '@';
12              
13             }
14              
15             sub _init {
16              
17 3     3   7 my ($self) = @_;
18              
19             # First two bytes should not contain line ending chars
20             die "Missing ID in initial header (check file format)\n"
21 3 50       24 if ($self->{buffer} =~ /[\r\n]/);
22 3         8 my $fh = $self->{fh};
23 3         47 $self->{buffer} .= <$fh>;
24              
25             # detect line endings for text files based on first line
26             # (other solutions, such as using the :crlf layer or s///
27             # instead of chomp may be marginally more robust but slow
28             # things down too much)
29 3 100       184 if ($self->{buffer} =~ /([\r\n]{1,2})$/) {
30 2         13 $self->{rec_sep} = $1;
31             }
32             else {
33 1         25 die "Failed to detect line endings\n";
34             }
35              
36 2         15 return;
37            
38             }
39              
40             sub next_seq {
41            
42 4     4 1 1306 my ($self) = @_;
43 4         9 my $fh = $self->{fh};
44              
45 4         27 local $/ = $self->{rec_sep};
46              
47 4   66     29 my $line = $self->{buffer} // <$fh>;
48 4 50       16 return undef if (! defined $line);
49 4         14 chomp $line;
50              
51 4         36 my ($id, $desc) = ($line =~ /^\@(\S+)\s*(.+)?$/);
52 4 50       15 die "Bad FASTQ ID line\n" if (! defined $id);
53              
54             # seq and qual can be multiline (although rare)
55             # qual is tricky since it can contain '@' but we compare to the
56             # sequence length to know when to stop parsing (must be equal lengths)
57 4   50     18 my $seq = <$fh> // die "Bad or missing FASTQ sequence";
58 4         106 chomp $seq;
59              
60             SEQ:
61 4         14 while (my $line = <$fh>) {
62 5         97 chomp $line;
63 5 100       23 last SEQ if ($line =~ /^\+/);
64 1         4 $seq .= $line;
65             }
66              
67 4         9 my $seq_len = length $seq;
68              
69 4   50     12 QUAL:
70             my $qual = <$fh> // die "Bad or missing FASTQ format";
71 4         90 chomp $qual;
72              
73 4   66     26 while ( (length($qual) < $seq_len) && defined (my $line = <$fh>) ) {
74 2         4 chomp $line;
75 2         7 $qual .= $line;
76             }
77 4 100       23 die "Bad FASTQ quality length" if ($seq_len != length($qual));
78              
79 3         6 $self->{buffer} = undef;
80              
81 3         33 my $obj = BioX::Seq->new($seq, $id, $desc, $qual);
82 3         7 $obj->{_input_format} = 'fastq';
83 3         27 return $obj;
84              
85             }
86              
87             1;
88              
89             __END__