File Coverage

blib/lib/Number/Nary.pm
Criterion Covered Total %
statement 97 98 98.9
branch 31 34 91.1
condition 3 3 100.0
subroutine 20 20 100.0
pod 3 3 100.0
total 154 158 97.4


line stmt bran cond sub pod time code
1             package Number::Nary 1.100313;
2             # ABSTRACT: encode and decode numbers as n-ary strings
3              
4 5     5   291376 use strict;
  5         90  
  5         132  
5 5     5   21 use warnings;
  5         8  
  5         153  
6              
7 5     5   33 use Carp qw(croak);
  5         8  
  5         265  
8 5     5   28 use Scalar::Util 0.90 qw(reftype);
  5         150  
  5         247  
9 5     5   2443 use List::MoreUtils 0.09 qw(uniq);
  5         59172  
  5         34  
10 5     5   6393 use UDCode ();
  5         2312  
  5         209  
11              
12 5         60 use Sub::Exporter -setup => {
13             exports => [ qw(n_codec n_encode n_decode) ],
14             groups => {
15             default => [ qw(n_codec) ],
16             codec_pair => \&_generate_codec_pair,
17             }
18 5     5   2741 };
  5         57008  
19              
20             sub _generate_codec_pair {
21 2     2   295 my (undef, undef, $arg, undef) = @_;
22              
23 2         6 my $local_arg = {%$arg};
24 2         4 my $digits = delete $local_arg->{digits};
25              
26 2         5 my %pair;
27 2         4 @pair{qw(encode decode)} = n_codec($digits, $local_arg);
28 2         5 return \%pair;
29             }
30              
31             #pod =head1 SYNOPSIS
32             #pod
33             #pod This module lets you convert numbers into strings that encode the number using
34             #pod the digit set of your choice. For example, you could get routines to convert
35             #pod to and from hex like so:
36             #pod
37             #pod my ($enc_hex, $dec_hex) = n_codec('0123456789ABCDEF');
38             #pod
39             #pod my $hex = $enc_hex->(255); # sets $hex to FF
40             #pod my $num = $dec_hex->('A0'); # sets $num to 160
41             #pod
42             #pod This would be slow and stupid, since Perl already provides the means to easily
43             #pod and quickly convert between decimal and hex representations of numbers.
44             #pod Number::Nary's utility comes from the fact that it can encode into bases
45             #pod composed of arbitrary digit sets.
46             #pod
47             #pod my ($enc, $dec) = n_codec('0123'); # base 4 (for working with nybbles?)
48             #pod
49             #pod # base64
50             #pod my ($enc, $dec) = n_codec(
51             #pod join('', 'A' .. 'Z', 'a' .. 'z', 0 .. 9, '+', '/', '=')
52             #pod );
53             #pod
54             #pod =func n_codec
55             #pod
56             #pod my ($encode_sub, $decode_sub) = n_codec($digit_string, \%arg);
57             #pod
58             #pod This routine returns a reference to a subroutine which will encode numbers into
59             #pod the given set of digits and a reference which will do the reverse operation.
60             #pod
61             #pod The digits may be given as a string or an arrayref. This routine will croak if
62             #pod the set of digits contains repeated digits, or if there could be ambiguity
63             #pod in decoding a string of the given digits. (Number::Nary is overly aggressive
64             #pod about weeding out possibly ambiguous digit sets, for the sake of the author's
65             #pod sanity.)
66             #pod
67             #pod The encode sub will croak if it is given input other than a non-negative
68             #pod integer.
69             #pod
70             #pod The decode sub will croak if given a string that contains characters not in the
71             #pod digit string, or, for fixed-string digit sets, if the lenth of the string to
72             #pod decode is not a multiple of the length of the component digits.
73             #pod
74             #pod Valid arguments to be passed in the second parameter are:
75             #pod
76             #pod predecode - if given, this coderef will be used to preprocess strings
77             #pod passed to the decoder
78             #pod
79             #pod postencode - if given, this coderef will be used to postprocess strings
80             #pod produced by the encoder
81             #pod
82             #pod =cut
83              
84             sub _split_len_iterator {
85 10     10   20 my ($length) = @_;
86              
87             return sub {
88 20     20   34 my ($string, $callback) = @_;
89              
90 20         56 my $places = length($string) / $length;
91              
92 20 100       130 croak "string length is not a multiple of digit length"
93             unless $places == int $places;
94              
95 19         41 for my $position (1 .. $places) {
96 71         115 my $digit = substr $string, (-$length * $position), $length;
97 71         97 $callback->($digit, $position);
98             }
99             }
100 10         57 }
101              
102             sub _split_digit_iterator {
103 2     2   5 my ($digits) = @_;
104              
105             sub {
106 1     1   3 my ($string, $callback) = @_;
107 1         2 my @digits;
108 1         3 ITER: while (length $string) {
109 4         6 for (@$digits) {
110 14 100       28 if (index($string, $_) == 0) {
111 4         10 push @digits, substr($string, 0, length $_, '');
112 4         10 next ITER;
113             }
114             }
115 0         0 croak "could not decompose string '$string'";
116             }
117              
118 1         3 for (1 .. @digits) {
119 4         8 $callback->($digits[-$_], $_);
120             }
121             }
122 2         22 }
123              
124             sub _set_iterator {
125 14     14   26 my ($digits, $length_ref) = @_;
126              
127 14 50       33 croak "digit set is empty" unless @$digits;
128             croak "digit set contains zero-length digit"
129 5 50   5   3800 if do { no warnings 'uninitialized'; grep { ! length $_ } @$digits };
  5         18  
  5         3373  
  14         20  
  14         25  
  160         244  
130 14 100       228 croak "digit set contains repeated digits" if @$digits != uniq @$digits;
131              
132 13         48 my @lengths = uniq map { length } @$digits;
  154         226  
133              
134 13 100       58 return _split_len_iterator($lengths[0]) if @lengths == 1;
135              
136 3 100       12 croak "digit set may be ambiguous" if ! UDCode::is_udcode(@$digits);
137              
138 2         265 return _split_digit_iterator($digits);
139             }
140              
141             sub n_codec {
142 14     14 1 1744 my ($digit_set, $arg) = @_;
143              
144 14         45 my @digits;
145              
146 14 100       42 if (ref $digit_set) {
147 6 50       27 croak "digit set must be a string or arrayref"
148             unless reftype $digit_set eq 'ARRAY';
149 6         21 @digits = @$digit_set;
150             } else {
151 8         34 @digits = split //, $digit_set;
152             }
153              
154 14         38 my $iterator = _set_iterator(\@digits);
155              
156             my $encode_sub = sub {
157 22     22   3639 my ($value) = @_;
158              
159 22 100 100     1040 croak "value isn't an non-negative integer"
160             if not defined $value
161             or $value !~ /\A\d+\z/;
162              
163 14         27 my $string = '';
164              
165 14 100       30 if (@digits == 1) {
166 2         5 $string = $digits[0] x $value;
167             } else {
168 12         18 while (1) {
169 32         46 my $digit = $value % @digits;
170 32         56 $value = int($value / @digits);
171 32         56 $string = "$digits[$digit]$string";
172 32 100       69 last unless $value;
173             }
174             }
175              
176 14 100       38 $string = $arg->{postencode}->($string) if $arg->{postencode};
177 14         61 return $string;
178 12         46 };
179              
180 12         21 my %digit_value = do { my $i = 0; map { $_ => $i++ } @digits; };
  12         15  
  12         22  
  151         257  
181              
182             my $decode_sub = sub {
183 22     22   4884 my ($string) = @_;
184 22 100       53 return unless defined $string;
185              
186 21 100       53 $string = $arg->{predecode}->($string) if $arg->{predecode};
187              
188 21         46 my $value = 0;
189              
190             $iterator->($string, sub {
191 75     26   103 my ($digit, $position) = @_;
192             croak "string to decode contains invalid digits"
193 75 100       270 unless exists $digit_value{$digit};
194              
195             # Stupid hack, but I'm just cramming unary support in here at the moment.
196             # It can be polished up later, if needed. -- rjbs, 2009-11-22
197             $value += @digits == 1
198             ? 1
199 73 100       179 : ($digit_value{$digit} * @digits ** ($position++ - 1));
200 21         106 });
201              
202 18         122 return $value;
203 12         46 };
204              
205 12         48 return ($encode_sub, $decode_sub);
206             }
207              
208             #pod =func n_encode
209             #pod
210             #pod my $string = n_encode($value, $digit_string);
211             #pod
212             #pod This encodes the given value into a string using the given digit string. It is
213             #pod written in terms of C, above, so it's not efficient at all for
214             #pod multiple uses in one process.
215             #pod
216             #pod =func n_decode
217             #pod
218             #pod my $number = n_decode($string, $digit_string);
219             #pod
220             #pod This is the decoding equivalent to C, above.
221             #pod
222             #pod =cut
223              
224             # If you really can't stand using n_codec, you could memoize these.
225 1     1 1 277 sub n_encode { (n_codec($_[1]))[0]->($_[0]) }
226 3     3 1 747 sub n_decode { (n_codec($_[1]))[1]->($_[0]) }
227              
228             #pod =head1 EXPORTS
229             #pod
230             #pod C is exported by default. C and C are exported.
231             #pod
232             #pod Pairs of routines to encode and decode may be imported by using the
233             #pod C group as follows:
234             #pod
235             #pod use Number::Nary -codec_pair => { digits => '01234567', -suffix => '8' };
236             #pod
237             #pod my $encoded = encode8($number);
238             #pod my $decoded = decode8($encoded);
239             #pod
240             #pod For more information on this kind of exporting, see L.
241             #pod
242             #pod =head1 SECRET ORIGINS
243             #pod
244             #pod I originally used this system to produce unique worksheet names in Excel. I
245             #pod had a large report generating system that used Win32::OLE, and to keep track of
246             #pod what was where I'd Storable-digest the options used to produce each worksheet
247             #pod and then n-ary encode them into the set of characters that were valid in
248             #pod worksheet names. Working out that set of characters was by far the hardest
249             #pod part.
250             #pod
251             #pod =head1 ACKNOWLEDGEMENTS
252             #pod
253             #pod Thanks, Jesse Vincent. When I remarked, on IRC, that this would be trivial to
254             #pod do, he said, "Great. Would you mind doing it?" (Well, more or less.) It was
255             #pod a fun little distraction.
256             #pod
257             #pod Mark Jason Dominus and Michael Peters offered some useful advice on how to weed
258             #pod out ambiguous digit sets, enabling me to allow digit sets made up of
259             #pod varying-length digits.
260             #pod
261             #pod =head1 SEE ALSO
262             #pod
263             #pod L is in the same problem space wth Number::Nary. It provides
264             #pod only an OO interface and does not reliably handle multicharacter digits or
265             #pod recognize ambiguous digit sets.
266             #pod
267             #pod =cut
268              
269             1; # my ($encode_sub, $decode_sub) = n_codec('8675309'); # jennynary
270              
271             __END__