File Coverage

blib/lib/Data/BitStream/Code/Comma.pm
Criterion Covered Total %
statement 64 68 94.1
branch 20 32 62.5
condition 5 9 55.5
subroutine 7 7 100.0
pod 2 2 100.0
total 98 118 83.0


line stmt bran cond sub pod time code
1             package Data::BitStream::Code::Comma;
2 28     28   37900 use strict;
  28         64  
  28         1453  
3 28     28   192 use warnings;
  28         179  
  28         1766  
4             BEGIN {
5 28     28   92 $Data::BitStream::Code::Comma::AUTHORITY = 'cpan:DANAJ';
6 28         2599 $Data::BitStream::Code::Comma::VERSION = '0.08';
7             }
8              
9             our $CODEINFO = { package => __PACKAGE__,
10             name => 'Comma',
11             universal => 1,
12             params => 1,
13             encodesub => sub {shift->put_comma(@_)},
14             decodesub => sub {shift->get_comma(@_)}, };
15              
16 28     28   269 use Moo::Role;
  28         78  
  28         708  
17             requires qw(read write);
18              
19             sub put_comma {
20 1632     1632 1 26704 my $self = shift;
21 1632 50       5480 $self->error_stream_mode('write') unless $self->writing;
22 1632         2636 my $bits = shift;
23 1632 50 33     9248 $self->error_code('param', 'bits must be in range 1-16') unless $bits >= 1 && $bits <= 16;
24              
25 1632 50       9722 return $self->put_unary(@_) if $bits == 1;
26 1632         2934 my $comma = ~(~0 << $bits); # 1 x $bits is the terminator
27 1632         3077 my $base = 2**$bits - 1; # The base of the digits we're writing
28              
29 1632         3106 foreach my $val (@_) {
30 4686 100 100     22862 $self->error_code('zeroval') unless defined $val and $val >= 0;
31              
32 4682 100       12668 if ($val == 0) { $self->write( $bits, $comma ); next; } # c
  150         535  
  150         339  
33              
34 4532         7116 my $v = $val;
35 4532         8287 my @stack = ($comma);
36 4532         12000 while ($v > 0) {
37 17345         23980 push @stack, $v % $base;
38 17345         38018 $v = int($v / $base);
39             }
40             # Write the stack. Simple way:
41             # $self->write($bits, pop @stack) while @stack;
42 4532         6029 my $cword = 0;
43 4532         5013 my $cbits = 0;
44 4532         10854 while (@stack) {
45 21877         34839 $cword = ($cword << $bits) | pop @stack;
46 21877         54174 $cbits += $bits;
47 21877 50       60768 if (($cbits + $bits) > 32) {
48 0         0 $self->write($cbits, $cword);
49 0         0 $cword = 0;
50 0         0 $cbits = 0;
51             }
52             }
53 4532 50       20201 $self->write($cbits, $cword) if $cbits;
54             }
55 1628         7568 1;
56             }
57              
58             sub get_comma {
59 1673     1673 1 25616 my $self = shift;
60 1673 50       7757 $self->error_stream_mode('read') if $self->writing;
61 1673         2401 my $bits = shift;
62 1673 50 33     9987 $self->error_code('param', 'bits must be in range 1-16') unless $bits >= 1 && $bits <= 16;
63              
64 1673 50       4109 return $self->get_unary(@_) if $bits == 1;
65 1673         2809 my $comma = ~(~0 << $bits); # 1 x $bits is the terminator
66 1673         9905 my $base = 2**$bits - 1; # The base of the digits we're writing
67              
68 1673         2138 my $count = shift;
69 1673 100       3859 if (!defined $count) { $count = 1; }
  1631 50       2478  
    0          
70 42         73 elsif ($count < 0) { $count = ~0; } # Get everything
71 0         0 elsif ($count == 0) { return; }
72              
73 1673         2126 my @vals;
74 1673         5636 $self->code_pos_start('Comma');
75 1673         61608 while ($count-- > 0) {
76 4769         15668 $self->code_pos_set;
77 4769         624026 my $tval = $self->read($bits);
78 4769 100       13495 last unless defined $tval;
79              
80 4725         6089 my $val = 0;
81 4725         11073 while ($tval != $comma) {
82 18053         23970 $val = $base * $val + $tval;
83 18053         52454 $tval = $self->read($bits);
84 18050 50       61112 $self->error_off_stream unless defined $tval;
85             }
86 4722         15276 push @vals, $val;
87             }
88 1670         7118 $self->code_pos_end;
89 1670 100       60457 wantarray ? @vals : $vals[-1];
90             }
91              
92 28     28   29101 no Moo::Role;
  28         85  
  28         192  
93             1;
94              
95             # ABSTRACT: A Role implementing Comma codes
96              
97             =pod
98              
99             =head1 NAME
100              
101             Data::BitStream::Code::Comma - A Role implementing Comma codes
102              
103             =head1 VERSION
104              
105             version 0.08
106              
107             =head1 DESCRIPTION
108              
109             A role written for L that provides get and set methods for
110             Comma codes. The role applies to a stream object.
111              
112             Comma codes are described in many sources. The codes are written in C-bit
113             chunks, where a chunk consisting of all 1 bits indicates the end of the code.
114             The number to be encoded is stored in base C<2^k-1>. The case of 1-bit comma
115             codes degenerates into unary codes. The most common comma code in current use
116             is the ternary comma code which uses 2-bit chunks and stores the number in
117             base 3 (hence why it is called ternary comma). Example for ternary comma:
118              
119             value code binary bits
120             0 c 11 2
121             1 1c 0111 4
122             2 2c 1011 4
123             3 10c 010011 6
124             4 11c 010111 6
125             .. 8 22c 101011 6
126             9 100c 01000011 8
127             .. 64 2101c 1001000111 10
128             .. 10000 111201101c 01010110000101000111 20
129              
130             Comma codes using larger chunks compact larger numbers better, but the
131             terminator also grows. This means smaller values take more bits to encode,
132             and all codes have many wasted bits after the information.
133              
134             Also note that skipping the leading C<0>s for all codes results in a large
135             waste of space. For instance, the codes C<0xc>, C<0xxc>, C<0xxxc>, etc. are
136             all not used, even though they are uniquely decodable. Note that Fenwick's
137             table 6 (p6) shows C<0c> being used, but no other leading zero. This is not
138             the case in Sayood's table 3.19 (p71) where no entry has a leading zero.
139              
140             These codes are a special case of the block-based taboo codes (Pigeon 2001).
141             The taboo codes fully utilize all the bits.
142              
143             =head1 METHODS
144              
145             =head2 Provided Object Methods
146              
147             =over 4
148              
149             =item B< put_comma($bits, $value) >
150              
151             =item B< put_comma($bits, @values) >
152              
153             Insert one or more values as Comma codes using C<$bits> bits. Returns 1.
154              
155             =item B< get_comma($bits) >
156              
157             =item B< get_comma($bits, $count) >
158              
159             Decode one or more Comma codes from the stream. If count is omitted,
160             one value will be read. If count is negative, values will be read until
161             the end of the stream is reached. In scalar context it returns the last
162             code read; in array context it returns an array of all codes read.
163              
164             =back
165              
166             =head2 Parameters
167              
168             The parameter C must be an integer between 1 and 16. This indicates
169             the number of bits used per chunk.
170              
171             If C is 1, then unary coding is used.
172              
173             Ternary comma coding is the special case of comma coding with C.
174              
175             Byte coding is the special case of comma coding with C.
176              
177             =head2 Required Methods
178              
179             =over 4
180              
181             =item B< read >
182              
183             =item B< write >
184              
185             These methods are required for the role.
186              
187             =back
188              
189             =head1 SEE ALSO
190              
191             =over 4
192              
193             =item Peter Fenwick, "Punctured Elias Codes for variable-length coding of the integers", Technical Report 137, Department of Computer Science, University of Auckland, December 1996.
194              
195             =item Peter Fenwick, "Ziv-Lempel encoding with multi-bit flags", Proc. Data Compression Conference (IEEE DCC), Snowbird, Utah, pp 138-147, March 1993.
196              
197             =item Khalid Sayood (editor), "Lossless Compression Handbook", 2003.
198              
199             =back
200              
201             =head1 AUTHORS
202              
203             Dana Jacobsen
204              
205             =head1 COPYRIGHT
206              
207             Copyright 2012 by Dana Jacobsen
208              
209             This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
210              
211             =cut