File Coverage

blib/lib/Compress/BGZF/Writer.pm
Criterion Covered Total %
statement 83 106 78.3
branch 11 24 45.8
condition n/a
subroutine 18 21 85.7
pod 6 6 100.0
total 118 157 75.1


line stmt bran cond sub pod time code
1             package Compress::BGZF::Writer;
2              
3 2     2   773 use strict;
  2         2  
  2         75  
4 2     2   7 use warnings;
  2         2  
  2         37  
5              
6 2     2   6 use Carp;
  2         2  
  2         82  
7 2     2   6 use Compress::Zlib;
  2         2  
  2         367  
8 2     2   7 use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/;
  2         2  
  2         192  
9              
10 2     2   9 use constant HEAD_BYTES => 18;
  2         2  
  2         112  
11 2     2   7 use constant FOOT_BYTES => 8;
  2         2  
  2         99  
12 2     2   6 use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1;
  2         2  
  2         84  
13 2     2   6 use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200';
  2         2  
  2         1442  
14              
15             ## no critic
16             # allow for filehandle tie'ing
17 3     3   6 sub TIEHANDLE { Compress::BGZF::Writer::new(@_) }
18 2015     2015   4166 sub PRINT { Compress::BGZF::Writer::_queue(@_) }
19 3     3   13 sub CLOSE { Compress::BGZF::Writer::finalize(@_) }
20             ## use critic
21              
22             sub new_filehandle {
23              
24             #-------------------------------------------------------------------------
25             # ARG 0 : (optional) output filename
26             #-------------------------------------------------------------------------
27             # RET 0 : filehandle GLOB
28             #-------------------------------------------------------------------------
29              
30 3     3 1 1507 my ($class, $fn_out) = @_;
31              
32 3         273 open my $fh, '<', undef;
33 3 50       16 tie *$fh, $class, $fn_out or croak "failed to tie filehandle";
34 3         4 return $fh;
35              
36             }
37              
38             sub new {
39              
40             #-------------------------------------------------------------------------
41             # ARG 0 : (optional) output filename
42             #-------------------------------------------------------------------------
43             # RET 0 : Compress::BGZF::Writer object
44             #-------------------------------------------------------------------------
45              
46 3     3 1 5 my ($class, $fn_out) = @_;
47 3         5 my $self = bless {}, $class;
48              
49             # initialize
50 3 50       8 if (defined $fn_out) {
51 3 50       96 open $self->{fh}, '>', $fn_out
52             or croak "Error openingn file for writing";
53             }
54             else {
55 0         0 $self->{fh} = \*STDOUT;
56             }
57 3         7 binmode $self->{fh};
58              
59 3         11 $self->{c_level} = Z_DEFAULT_COMPRESSION;
60 3         10 $self->{buffer} = ''; # contents waiting to be compressed/written
61              
62             # these variables are tracked to allow for virtual offset calculation
63 3         5 $self->{block_offset} = 0; # offset of current block in compressed data
64 3         3 $self->{buffer_offset} = 0; # offset of current pos in uncompressed block
65              
66             # these variables are tracked to allow for index creation
67 3         3 $self->{u_offset} = 0; #uncompressed file offset
68 3         5 $self->{idx} = [];
69              
70 3         10 return $self;
71              
72             }
73              
74             sub set_level {
75              
76             #-------------------------------------------------------------------------
77             # ARG 0 : compression level desired
78             #-------------------------------------------------------------------------
79             # no returns
80             #-------------------------------------------------------------------------
81              
82 0     0 1 0 my ($self, $level) = @_;
83              
84 0 0       0 croak "Invalid compression level (allowed 0-9)"
85             if ($level !~ /^\d$/);
86 0         0 $self->{c_level} = $level;
87              
88 0         0 return;
89              
90             }
91              
92             sub add_data {
93              
94             # a wrapper around the queue() function that returns the virtual offset
95             # of the chunk added
96              
97             #-------------------------------------------------------------------------
98             # ARG 0 : data chunk to queue for compression
99             #-------------------------------------------------------------------------
100             # RET 1 : virtual offset of data written
101             #-------------------------------------------------------------------------
102              
103 0     0 1 0 my ($self, $content) = @_;
104              
105 0         0 my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset};
106 0         0 $self->_queue( $content );
107              
108 0         0 return $vo;
109              
110             }
111              
112             sub _queue {
113              
114             #-------------------------------------------------------------------------
115             # ARG 0 : data chunk to queue for compression
116             #-------------------------------------------------------------------------
117             # no returns
118             #-------------------------------------------------------------------------
119              
120 2015     2015   1456 my ($self, $content) = @_;
121              
122 2015         1598 $self->{buffer} .= $content;
123              
124             # compress/write in 64k chunks
125 2015         2391 while (length($self->{buffer}) >= FLUSH_SIZE) {
126              
127 5         79 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
128 5         9 my $unwritten = $self->_write_block($chunk);
129             $self->{buffer} = $unwritten . $self->{buffer}
130 5 100       31 if ( length($unwritten) );
131              
132             }
133 2015         1333 $self->{buffer_offset} = length $self->{buffer};
134              
135 2015         2796 return;
136            
137             }
138              
139             sub _write_block {
140              
141             #-------------------------------------------------------------------------
142             # ARG 0 : independent data block to compress
143             #-------------------------------------------------------------------------
144             # RET 0 : remaining data that wasn't written
145             #-------------------------------------------------------------------------
146              
147 8     8   10 my ($self, $chunk) = @_;
148              
149 8         8 my $chunk_len = length($chunk);
150              
151             # payload is compressed with DEFLATE
152             rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level})
153 8 50       27 or croak "deflate failed: $RawDeflateError\n";
154              
155             # very rarely, a DEFLATEd string may be larger than input. This can result
156             # in a block size > 2**16, which violates the BGZF specification and
157             # causes corruption of the BC field. Fix those edge cases here (somewhat
158             # slow but shouldn't happen often) and send the rest back to the buffer
159 8         22271 my $trimmed = '';
160 8         17 while (length($payload) > FLUSH_SIZE) {
161 4         9 my $trim_len = int( $chunk_len * 0.05 );
162 4         52 $trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed;
163             rawdeflate(\$chunk, \$payload, -Level => $self->{c_level})
164 4 50       15 or croak "deflate failed: $RawDeflateError\n";
165 4         10367 $chunk_len = length($chunk);
166             }
167              
168 8         10 my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES;
169              
170 8 50       13 croak "Internal error: block size > 65536" if ($block_size > 2**16);
171              
172             # payload is wrapped with appropriate headers and footers
173 8 50       7 print { $self->{fh} } pack(
  8         1985  
174             "a*va*VV",
175             BGZF_HEADER,
176             $block_size - 1,
177             $payload,
178             crc32($chunk),
179             $chunk_len,
180             ) or croak "Error writing compressed block";
181              
182             # increment the current offsets
183 8         14 $self->{block_offset} += $block_size;
184 8         10 $self->{u_offset} += $chunk_len;
185 8         5 push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}];
  8         23  
186              
187 8         16 return $trimmed;
188              
189             }
190              
191             sub finalize {
192              
193             #-------------------------------------------------------------------------
194             # no arguments
195             #-------------------------------------------------------------------------
196             # no returns
197             #-------------------------------------------------------------------------
198              
199 6     6 1 6 my ($self) = @_;
200              
201 6         17 while (length($self->{buffer}) > 0) {
202              
203             croak "file closed but buffer not empty"
204 3 50       8 if ( ! defined fileno($self->{fh}) );
205              
206 3         17 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
207 3         5 my $unwritten = $self->_write_block($chunk);
208             $self->{buffer} = $unwritten . $self->{buffer}
209 3 50       11 if ( length($unwritten) );
210              
211             }
212 6         160 close $self->{fh};
213              
214 6         13 return;
215              
216             }
217              
218             sub write_index {
219              
220             #-------------------------------------------------------------------------
221             # ARG 0 : index output filename
222             #-------------------------------------------------------------------------
223             # No returns
224             #-------------------------------------------------------------------------
225              
226 0     0 1 0 my ($self, $fn_out) = @_;
227              
228 0         0 $self->finalize(); # always clear remaining buffer to fully populate index
229 0 0       0 croak "missing index output filename" if (! defined $fn_out);
230 0         0 open my $fh_out, '>:raw', $fn_out;
231              
232 0         0 my @offsets = @{ $self->{idx} };
  0         0  
233 0         0 pop @offsets; # last offset is EOF
234 0         0 print {$fh_out} pack('Q<', scalar(@offsets));
  0         0  
235 0         0 print {$fh_out} pack('Q
  0         0  
  0         0  
236              
237 0         0 close $fh_out;
238 0         0 return;
239              
240             }
241              
242             sub DESTROY {
243              
244 3     3   554 my ($self) = @_;
245              
246             # make sure we call finalize in case the caller forgot
247 3         6 $self->finalize();
248              
249 3         100 return;
250              
251             }
252              
253             1;
254              
255              
256             __END__