File Coverage

blib/lib/Compress/BGZF/Writer.pm
Criterion Covered Total %
statement 83 106 78.3
branch 11 24 45.8
condition n/a
subroutine 18 21 85.7
pod 6 6 100.0
total 118 157 75.1


line stmt bran cond sub pod time code
1             package Compress::BGZF::Writer;
2              
3 2     2   801 use strict;
  2         2  
  2         44  
4 2     2   6 use warnings;
  2         2  
  2         36  
5              
6 2     2   6 use Carp;
  2         2  
  2         80  
7 2     2   7 use Compress::Zlib;
  2         1  
  2         415  
8 2     2   9 use IO::Compress::RawDeflate qw/rawdeflate $RawDeflateError/;
  2         2  
  2         216  
9              
10 2     2   6 use constant HEAD_BYTES => 18;
  2         2  
  2         91  
11 2     2   6 use constant FOOT_BYTES => 8;
  2         2  
  2         92  
12 2     2   7 use constant FLUSH_SIZE => 2**16 - HEAD_BYTES - FOOT_BYTES - 1;
  2         1  
  2         91  
13 2     2   7 use constant BGZF_HEADER => pack "H*", '1f8b08040000000000ff060042430200';
  2         2  
  2         1431  
14              
15             ## no critic
16             # allow for filehandle tie'ing
17 3     3   8 sub TIEHANDLE { Compress::BGZF::Writer::new(@_) }
18 2015     2015   4643 sub PRINT { Compress::BGZF::Writer::_queue(@_) }
19 3     3   15 sub CLOSE { Compress::BGZF::Writer::finalize(@_) }
20             ## use critic
21              
22             sub new_filehandle {
23              
24             #-------------------------------------------------------------------------
25             # ARG 0 : (optional) output filename
26             #-------------------------------------------------------------------------
27             # RET 0 : filehandle GLOB
28             #-------------------------------------------------------------------------
29              
30 3     3 1 1516 my ($class, $fn_out) = @_;
31              
32 3         322 open my $fh, '<', undef;
33 3 50       19 tie *$fh, $class, $fn_out or croak "failed to tie filehandle";
34 3         8 return $fh;
35              
36             }
37              
38             sub new {
39              
40             #-------------------------------------------------------------------------
41             # ARG 0 : (optional) output filename
42             #-------------------------------------------------------------------------
43             # RET 0 : Compress::BGZF::Writer object
44             #-------------------------------------------------------------------------
45              
46 3     3 1 5 my ($class, $fn_out) = @_;
47 3         7 my $self = bless {}, $class;
48              
49             # initialize
50 3 50       9 if (defined $fn_out) {
51 3 50       148 open $self->{fh}, '>', $fn_out
52             or croak "Error openingn file for writing";
53             }
54             else {
55 0         0 $self->{fh} = \*STDOUT;
56             }
57 3         10 binmode $self->{fh};
58              
59 3         13 $self->{c_level} = Z_DEFAULT_COMPRESSION;
60 3         14 $self->{buffer} = ''; # contents waiting to be compressed/written
61              
62             # these variables are tracked to allow for virtual offset calculation
63 3         4 $self->{block_offset} = 0; # offset of current block in compressed data
64 3         4 $self->{buffer_offset} = 0; # offset of current pos in uncompressed block
65              
66             # these variables are tracked to allow for index creation
67 3         4 $self->{u_offset} = 0; #uncompressed file offset
68 3         6 $self->{idx} = [];
69              
70 3         14 return $self;
71              
72             }
73              
74             sub set_level {
75              
76             #-------------------------------------------------------------------------
77             # ARG 0 : compression level desired
78             #-------------------------------------------------------------------------
79             # no returns
80             #-------------------------------------------------------------------------
81              
82 0     0 1 0 my ($self, $level) = @_;
83              
84 0 0       0 croak "Invalid compression level (allowed 0-9)"
85             if ($level !~ /^\d$/);
86 0         0 $self->{c_level} = $level;
87              
88 0         0 return;
89              
90             }
91              
92             sub add_data {
93              
94             # a wrapper around the queue() function that returns the virtual offset
95             # of the chunk added
96              
97             #-------------------------------------------------------------------------
98             # ARG 0 : data chunk to queue for compression
99             #-------------------------------------------------------------------------
100             # RET 1 : virtual offset of data written
101             #-------------------------------------------------------------------------
102              
103 0     0 1 0 my ($self, $content) = @_;
104              
105 0         0 my $vo = ($self->{block_offset} << 16) | $self->{buffer_offset};
106 0         0 $self->_queue( $content );
107              
108 0         0 return $vo;
109              
110             }
111              
112             sub _queue {
113              
114             #-------------------------------------------------------------------------
115             # ARG 0 : data chunk to queue for compression
116             #-------------------------------------------------------------------------
117             # no returns
118             #-------------------------------------------------------------------------
119              
120 2015     2015   1557 my ($self, $content) = @_;
121              
122 2015         1710 $self->{buffer} .= $content;
123              
124             # compress/write in 64k chunks
125 2015         2846 while (length($self->{buffer}) >= FLUSH_SIZE) {
126              
127 5         88 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
128 5         9 my $unwritten = $self->_write_block($chunk);
129             $self->{buffer} = $unwritten . $self->{buffer}
130 5 100       34 if ( length($unwritten) );
131              
132             }
133 2015         1387 $self->{buffer_offset} = length $self->{buffer};
134              
135 2015         3107 return;
136            
137             }
138              
139             sub _write_block {
140              
141             #-------------------------------------------------------------------------
142             # ARG 0 : independent data block to compress
143             #-------------------------------------------------------------------------
144             # RET 0 : remaining data that wasn't written
145             #-------------------------------------------------------------------------
146              
147 8     8   8 my ($self, $chunk) = @_;
148              
149 8         10 my $chunk_len = length($chunk);
150              
151             # payload is compressed with DEFLATE
152             rawdeflate(\$chunk, \my $payload, -Level => $self->{c_level})
153 8 50       31 or croak "deflate failed: $RawDeflateError\n";
154              
155             # very rarely, a DEFLATEd string may be larger than input. This can result
156             # in a block size > 2**16, which violates the BGZF specification and
157             # causes corruption of the BC field. Fix those edge cases here (somewhat
158             # slow but shouldn't happen often) and send the rest back to the buffer
159 8         24500 my $trimmed = '';
160 8         19 while (length($payload) > FLUSH_SIZE) {
161 4         7 my $trim_len = int( $chunk_len * 0.05 );
162 4         55 $trimmed = substr($chunk, -$trim_len, $trim_len, '') . $trimmed;
163             rawdeflate(\$chunk, \$payload, -Level => $self->{c_level})
164 4 50       17 or croak "deflate failed: $RawDeflateError\n";
165 4         10822 $chunk_len = length($chunk);
166             }
167              
168 8         11 my $block_size = length($payload) + HEAD_BYTES + FOOT_BYTES;
169              
170 8 50       16 croak "Internal error: block size > 65536" if ($block_size > 2**16);
171              
172             # payload is wrapped with appropriate headers and footers
173 8 50       8 print { $self->{fh} } pack(
  8         2055  
174             "a*va*VV",
175             BGZF_HEADER,
176             $block_size - 1,
177             $payload,
178             crc32($chunk),
179             $chunk_len,
180             ) or croak "Error writing compressed block";
181              
182             # increment the current offsets
183 8         17 $self->{block_offset} += $block_size;
184 8         8 $self->{u_offset} += $chunk_len;
185 8         8 push @{ $self->{idx} }, [$self->{block_offset}, $self->{u_offset}];
  8         26  
186              
187 8         17 return $trimmed;
188              
189             }
190              
191             sub finalize {
192              
193             #-------------------------------------------------------------------------
194             # no arguments
195             #-------------------------------------------------------------------------
196             # no returns
197             #-------------------------------------------------------------------------
198              
199 6     6 1 6 my ($self) = @_;
200              
201 6         19 while (length($self->{buffer}) > 0) {
202              
203             croak "file closed but buffer not empty"
204 3 50       9 if ( ! defined fileno($self->{fh}) );
205              
206 3         16 my $chunk = substr $self->{buffer}, 0, FLUSH_SIZE, '';
207 3         6 my $unwritten = $self->_write_block($chunk);
208             $self->{buffer} = $unwritten . $self->{buffer}
209 3 50       11 if ( length($unwritten) );
210              
211             }
212 6         151 close $self->{fh};
213              
214 6         17 return;
215              
216             }
217              
218             sub write_index {
219              
220             #-------------------------------------------------------------------------
221             # ARG 0 : index output filename
222             #-------------------------------------------------------------------------
223             # No returns
224             #-------------------------------------------------------------------------
225              
226 0     0 1 0 my ($self, $fn_out) = @_;
227              
228 0         0 $self->finalize(); # always clear remaining buffer to fully populate index
229 0 0       0 croak "missing index output filename" if (! defined $fn_out);
230 0         0 open my $fh_out, '>:raw', $fn_out;
231              
232 0         0 my @offsets = @{ $self->{idx} };
  0         0  
233 0         0 pop @offsets; # last offset is EOF
234 0         0 print {$fh_out} pack('Q<', scalar(@offsets));
  0         0  
235 0         0 print {$fh_out} pack('Q
  0         0  
  0         0  
236              
237 0         0 close $fh_out;
238 0         0 return;
239              
240             }
241              
242             sub DESTROY {
243              
244 3     3   682 my ($self) = @_;
245              
246             # make sure we call finalize in case the caller forgot
247 3         9 $self->finalize();
248              
249 3         109 return;
250              
251             }
252              
253             1;
254              
255              
256             __END__