File Coverage

blib/lib/Compress/LZW/Progressive.pm
Criterion Covered Total %
statement 127 203 62.5
branch 41 108 37.9
condition 4 22 18.1
subroutine 8 11 72.7
pod 5 6 83.3
total 185 350 52.8


line stmt bran cond sub pod time code
1             package Compress::LZW::Progressive;
2              
3 2     2   57997 use strict;
  2         5  
  2         80  
4 2     2   12 use warnings;
  2         4  
  2         66  
5 2     2   1113 use Compress::LZW::Progressive::Dict;
  2         5  
  2         61  
6 2     2   11 use bytes;
  2         3  
  2         10  
7              
8             our $VERSION = '0.102';
9              
10             my @empty_dict;
11             @empty_dict[0..255] = map { chr } 0..255;
12              
13             sub new {
14 2     2 1 32 my ($class, %args) = @_;
15              
16 2   50     10 $args{bits} ||= 16;
17              
18 2         10 my $code_counter = (2 ** $args{bits}) - 1;
19 2         7 $args{code_end_segment} = $code_counter--;
20 2         5 $args{code_add_start} = $code_counter--;
21 2         5 $args{code_add_end} = $code_counter--;
22 2         5 $args{code_delete_start} = $code_counter--;
23 2         3 $args{code_delete_end} = $code_counter--;
24 2         6 $args{code_delete_count} = $code_counter--;
25 2         7 $args{code_max} = $code_counter--;
26              
27 2         4 $args{compress_resets} = 0;
28 2         5 $args{decompress_resets} = 0;
29              
30 2         4 $args{compress_deleted_least_used_codes} = 0;
31              
32 2         7 my $self = bless \%args, $class;
33 2         9 $self->reset();
34              
35 2         11 return $self;
36             }
37              
38             sub reset {
39 6     6 1 2336 my ($self, $which) = @_;
40 6   50     38 $which ||= '';
41              
42 6 50 33     71 if (! $which || $which eq 'compress') {
43 6         41 $self->{cdict} = Compress::LZW::Progressive::Dict->new();
44 6         7912 $self->{compress_resets}++;
45             }
46              
47 6 50 33     37 if (! $which || $which eq 'decompress') {
48 6         21 $self->{ddict_reuse_codes} = [];
49 6         980 $self->{ddict} = [ @empty_dict ];
50 6         656 $self->{ddict_usage} = Compress::LZW::Progressive::Dict->new();
51 6         1234 $self->{dnext} = 256;
52 6         16 $self->{decompress_resets}++;
53             }
54              
55 6         16 $self->{code_frequency} = {};
56 6         729 $self->{stats} = {};
57             }
58              
59             sub compress {
60 86     86 1 47717 my ($self, $str) = @_;
61              
62 86         177 my $dict = $self->{cdict};
63 86         177 my $debug = $self->{debug};
64              
65 86         148 my @out = ();
66              
67 86         8124 my @char = split //, $str;
68 86         1986 while (int @char > 0) {
69 8556 0       15245 print "Matching '".join('', @char[0..($#char > 20 ? 20 : $#char)])."'\n" if $debug;
    50          
70              
71             # Find the code that matches the most of the upcoming chars
72 8556         24156 my $code = $dict->code_matching_array(\@char);
73 8556 50       18361 die "Caouldn't find code to match '".join('', @char)."'" if ! defined $code;
74              
75 8556         26009 my $phrase = $dict->phrase($code);
76 8556 50       18844 die "Found code that has no phrase ($code)" if ! length $phrase;
77              
78 8556         21293 $dict->increment_code_usage_count($code);
79 8556 50       15788 print " + $code for '$phrase'" if $debug;
80 8556         14213 push @out, $code;
81              
82             # Remove the phrase found from the start of the @char
83 8556         17176 splice @char, 0, length($phrase);
84 8556 100       17728 if (! defined $char[0]) {
85 86 50       183 print "\n" if $debug;
86 86         178 last;
87             }
88              
89             # If I'm running out of code space...
90 8470 100       25241 if ($dict->codes_used + 1 == $self->{code_max}) {
91             # First, try getting some old, unused codes, and asking the client to delete that many (1/4th of custom codes)
92 3         10 my $delete_max_old_codes = int(($dict->codes_used - 256) * .25);
93 3 50       13 if (my @delete = $dict->least_used_codes($delete_max_old_codes)) {
94 3 50       12 print "Asking reusal of ".int(@delete)." codes ".join(', ', @delete)."\n" if $debug;
95 3 50       67 die "Couldn't delete codes" unless $dict->delete_codes(@delete);
96              
97             # Push to out codestream
98 3         42 push @out, $self->{code_delete_count};
99 3         11 push @out, int @delete;
100              
101 3         82 $self->{compress_deleted_least_used_codes}++;
102              
103             # ...and continue with next code creation
104             }
105             # Otherwise (probably won't get here), do a full dict reset and skip to next char (can't create new)
106             else {
107 0 0       0 print " + reset code '".$self->{code_max}."'\n" if $debug;
108 0         0 push @out, $self->{code_max};
109              
110 0         0 $self->{compress_resets}++;
111 0         0 $dict = Compress::LZW::Progressive::Dict->new();
112 0 0       0 print "\n" if $debug;
113 0         0 next;
114             }
115             }
116              
117 8470         17333 my $new_phrase = $phrase . $char[0];
118 8470         22842 my $new_code = $dict->add($new_phrase);
119 8470 50       19733 print ", creating $new_code => '$new_phrase'\n" if $debug;
120 8470         25892 $dict->increment_code_usage_count($new_code);
121             }
122 86 50       181 print "End of \@char; putting end segment code\n" if $debug;
123 86         215 push @out, $self->{code_end_segment};
124              
125 86         259 $self->{stats}{last_compress_in_bytes} = length $str;
126 86         270 $self->{stats}{last_compress_out_bytes} = int(@out) * 2;
127 86         302 $self->{stats}{compress_in_bytes} += $self->{stats}{last_compress_in_bytes};
128 86         208 $self->{stats}{compress_out_bytes} += $self->{stats}{last_compress_out_bytes};
129            
130 86         176 $self->{cdict} = $dict;
131 86         1888 return pack 'S*', @out;
132             }
133              
134             sub decompress {
135 45     45 1 309 my ($self, $str) = @_;
136            
137 45         185 my $dict = $self->{ddict};
138 45         84 my $dict_usage = $self->{ddict_usage};
139 45         120 my $reuse = $self->{ddict_reuse_codes};
140 45         89 my $debug = $self->{debug};
141 45         101 my $next = $self->{dnext};
142              
143 45         1727 my @code = unpack 'S*', $str;
144            
145 45         296 my $last_code;
146 45         93 my $return = '';
147 45         185 while (defined (my $code = shift @code)) {
148 8645 100       22632 if ($code >= $self->{code_max}) {
149 89 50       383 print "Code $code\n" if $debug;
150             # Resetting dictionary to scratch
151 89 50       379 if ($code == $self->{code_max}) {
    100          
    50          
    50          
152 0 0       0 print "Resetting decompress as have reached the max code '$self->{code_max}'\n" if $debug;
153              
154 0         0 $self->{decompress_resets}++;
155 0         0 $next = 256;
156 0         0 $dict = [ @empty_dict ];
157 0         0 $last_code = undef;
158             }
159             # End of segment; don't allow last code to affect new codes
160             elsif ($code == $self->{code_end_segment}) {
161 86 50       252 print "Reached seg code '$self->{code_end_segment}'\n" if $debug;
162 86         111 $last_code = undef;
163             }
164             # Process a list of codes to delete
165             elsif ($code == $self->{code_delete_start}) {
166 0         0 while (defined (my $delete_code = shift @code)) {
167 0 0       0 last if $delete_code == $self->{code_delete_end};
168 0         0 $dict_usage->{codes_used}[$delete_code] = undef;
169 0         0 $dict->[$delete_code] = undef;
170 0         0 push @$reuse, $delete_code;
171             }
172             }
173             # Received a request to delete a number of unused codes; find that many least used codes and delete them
174             elsif ($code == $self->{code_delete_count}) {
175 3         7 my $delete_count = shift @code;
176              
177 3         16 my @delete = $dict_usage->least_used_codes($delete_count);
178 3 50       147 if (int(@delete) != $delete_count) {
179 0         0 die "Tried to find $delete_count unused codes, but found ".int(@delete)." instead; (".join(', ', @delete).")\n";
180             }
181 3 50       14 print "Reusing ".int(@delete)." (asked $delete_count) codes ".join(', ', @delete)."\n" if $debug;
182 3         11 foreach my $delete_code (@delete) {
183 2874 50       5320 if (! $dict->[$delete_code]) {
184 0         0 die "Attempting to delete non-defined code $delete_code";
185             }
186 2874         3819 $dict_usage->{codes_used}[$delete_code] = undef;
187 2874         3404 $dict->[$delete_code] = undef;
188 2874         3559 push @$reuse, $delete_code;
189             }
190             }
191 89         312 next;
192             }
193              
194 8556         8329 my $next_code;
195 8556 100       13967 if (defined $dict->[$code]) {
196 8553         12037 $return .= $dict->[$code];
197              
198 8553 50       14446 print " + '".$dict->[$code]."' from $code" if $debug;
199 8553 100       19374 if (defined $last_code) {
200 8467 100       17210 $next_code = @$reuse ? shift @$reuse : $next++;
201 8467         18371 $dict->[$next_code] = $dict->[$last_code] . substr($dict->[$code], 0, 1);
202 8467 50       18277 print " and adding '".$dict->[$next_code]."' to dict on code $next_code" if $debug;
203             }
204 8553 50       16323 print "\n" if $debug;
205             }
206             # This is the edge case where repeating phrase won't be defined (see wikipedia.org on LZW)
207             else {
208 3 50       7 $next_code = @$reuse ? shift @$reuse : $next++;
209 3         6 my $dp = $dict->[$last_code];
210 3         14 $return .= $dict->[$code] = $dp . substr($dp, 0, 1);
211 3 50       6 print " + '".$dict->[$code]."' from $code\n" if $debug;
212             }
213 8556 100       28807 $dict_usage->increment_code_usage_count($next_code) if defined $next_code;
214 8556         19882 $dict_usage->increment_code_usage_count($code);
215 8556         27370 $last_code = $code;
216             }
217              
218 45         180 $self->{stats}{last_decompress_in_bytes} = length $str;
219 45         111 $self->{stats}{last_decompress_out_bytes} = length $return;
220 45         99 $self->{stats}{decompress_in_bytes} += $self->{stats}{last_decompress_in_bytes};
221 45         107 $self->{stats}{decompress_out_bytes} += $self->{stats}{last_decompress_out_bytes};
222              
223 45         119 $self->{dnext} = $next;
224 45         70 $self->{ddict} = $dict;
225 45         218 return $return;
226             }
227              
228             sub stats {
229 0     0 1   my ($self, $type, $phrases) = @_;
230              
231 0           my $devel_size;
232 0           eval {
233 0           require Devel::Size;
234 0           $devel_size = 1;
235             };
236 0 0         if ($@) {
237 0           print STDERR "Devel::Size not installed so stats() will exclude data size\n";
238             }
239              
240 0           my @return;
241              
242 0           push @return, sprintf "Bits %d", $self->{bits};
243 0 0 0       if (! $type || $type eq 'compress') {
244 0           push @return, sprintf "Compress efficiency: %3.1f%% (%3.1f%% last) with %d/%d codes used",
245             100 * (1 - ($self->{stats}{compress_out_bytes} / $self->{stats}{compress_in_bytes})),
246             100 * (1 - ($self->{stats}{last_compress_out_bytes} / $self->{stats}{last_compress_in_bytes})),
247             $self->{cdict}->codes_used,
248             $self->{code_max},
249             ;
250 0 0         push @return, sprintf "cdict: %.2f Kb",
251             Devel::Size::total_size($self->{cdict}) / 1024
252             if $devel_size;
253              
254 0 0         if ($phrases) {
255             # Collect stats on phrase lengths
256 0           my $smallest = 100;
257 0           my $largest = 0;
258 0           my $total = 0;
259 0           my $avg_count = 0;
260 0           foreach my $code (256..$#{ $self->{cdict}{array} }) {
  0            
261 0           my $phrase = $self->{cdict}->phrase($code);
262 0 0         next unless defined $phrase;
263 0           my $length = length $phrase;
264 0 0         $smallest = $length if $length < $smallest;
265 0           $total += $length;
266 0           $avg_count++;
267 0 0         $largest = $length if $length > $largest;
268             }
269 0           my $average = $total / $avg_count;
270              
271 0           push @return, sprintf "phrase lengths, sm: %d, avg: %d, lg: %d, total: %d",
272             $smallest, $average, $largest, $total;
273             }
274             }
275 0 0 0       if (! $type || $type eq 'decompress') {
276 0           push @return, sprintf "ddict: %.2f Kb [%d/%d codes used]",
277             (Devel::Size::total_size($self->{ddict}) +
278             Devel::Size::total_size($self->{ddict_reuse_codes}) +
279             Devel::Size::total_size($self->{ddict_usage})) / 1024,
280 0 0         $self->{dnext} - int @{ $self->{ddict_reuse_codes} },
281             $self->{code_max}
282             if $devel_size;
283             }
284              
285 0           return join("; ", @return);
286             }
287              
288             sub dict_dump {
289 0     0 0   my $self = shift;
290 0           my $return = " Index | Compress | Decompress \n";
291              
292 0           my $comp = $self->{cdict}{array}; #[ sort { $self->{cdict}{$a} <=> $self->{cdict}{$b} } keys %{ $self->{cdict} } ];
293 0           my $decomp = $self->{ddict};
294 0 0         my $count = $#{ $comp } > $#{ $decomp } ? $#{ $comp } : $#{ $decomp };
  0            
  0            
  0            
  0            
295            
296 0           my @char_map = qw/nul soh stx etx eot enq ack bel bs ht lf vt ff cr so si dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us/;
297 0           $char_map[127] = 'del';
298             my $show_invis = sub {
299 0     0     my $return = '';
300 0           foreach my $char (split(//, shift)) {
301 0           my $num = ord $char;
302 0 0 0       if (($num < 32 || $num == 127 || $num > 128) && $num != 10) {
      0        
303 0 0         if (defined $char_map[$num]) {
304 0           $return .= "^$char_map[$num]"."[$num]";
305             } else {
306 0           $return .= "[$num]";
307             }
308             } else {
309 0           $return .= $char;
310             }
311             }
312 0           return $return;
313 0           };
314              
315 0           for my $i (0..$count) {
316 0 0         my $c = defined $comp->[$i] ? $comp->[$i] : '[undef]';
317 0 0         my $d = defined $decomp->[$i] ? $decomp->[$i] : '[undef]';
318 0 0         next if $c eq $d;
319            
320 0 0         printf " %-6d | %6s %s %6s\n", $i, $show_invis->($c), ($c eq $d ? '=' : '!'), $show_invis->($d);
321             }
322             }
323              
324             1;
325              
326              
327             __END__