File Coverage

blib/lib/Convert/Base85.pm
Criterion Covered Total %
statement 55 61 90.1
branch 0 2 0.0
condition 0 3 0.0
subroutine 8 9 88.8
pod 3 3 100.0
total 66 78 84.6


line stmt bran cond sub pod time code
1             package Convert::Base85;
2              
3 3     3   134256 use 5.016001;
  3         36  
4 3     3   16 use warnings;
  3         5  
  3         88  
5 3     3   14 use strict;
  3         6  
  3         58  
6              
7 3     3   15 use Carp;
  3         22  
  3         217  
8 3         373 use Math::Int128 qw(
9             uint128
10             uint128_to_number
11             uint128_add
12             uint128_and
13             uint128_divmod
14             uint128_left
15             uint128_mul
16 3     3   1545 uint128_right);
  3         20610  
17              
18             #
19             # Three '#' for encoding information, four '#' for decoding.
20             #
21             #use Smart::Comments q(#####);
22              
23             our $VERSION = '1.02';
24              
25 3     3   28 use Exporter qw(import);
  3         6  
  3         2702  
26              
27             our %EXPORT_TAGS;
28             our @EXPORT_OK = (qw(base85_check base85_encode base85_decode));
29              
30             #
31             # Add an :all tag.
32             #
33             $EXPORT_TAGS{all} = [@EXPORT_OK];
34              
35             =head1 NAME
36              
37             Convert::Base85 - Encoding and decoding to and from Base 85 strings
38              
39             =head1 SYNOPSIS
40              
41             use Convert::Base85;
42            
43             my $encoded = Convert::Base85::encode($data);
44             my $decoded = Convert::Base85::decode($encoded);
45              
46             or
47              
48             use Convert::Base85 qw(base85_encode base85_decode);
49            
50             my $encoded = base85_encode($data);
51             my $decoded = base85_decode($encoded);
52              
53             =head1 DESCRIPTION
54              
55             This module implements a I conversion for encoding binary
56             data as text. This is done by interpreting each group of sixteen bytes
57             as a 128-bit integer, which is then converted to a twenty-digit base 85
58             representation using the alphanumeric characters 0-9, A-Z, and a-z, in
59             addition to the punctuation characters !, #, $, %, &, (, ), *, +, -, ;, <, =, >,
60             ?, @, ^, _, `, {, |, }, and ~, in that order.
61              
62             This creates a string that is five fourths (1.25) larger than the original
63             data, making it more efficient than L's 3-to-4 ratio (1.3333).
64              
65             As noted above, the conversion makes use of 128-bit arithmatic, which most
66             computers can't handle natively, which is why the module L
67             needs to be installed as well.
68              
69             =cut
70              
71             #
72             # character value
73             # 0..9: 0..9
74             # A..Z: 10..35
75             # a..z: 36..61
76             # punc: 62..84
77             #
78             # Take a number from 0 to 84, and turn it into a character.
79             #
80             my @b85_encode = ('0' .. '9', 'A' .. 'Z', 'a' .. 'z',
81             '!', '#', '$', '%', '&', '(', ')', '*', '+',
82             '-', ';', '<', '=', '>', '?', '@', '^', '_',
83             '`', '{', '|', '}', '~');
84              
85             #
86             # Take the ord() of a character, and return the number (from 0 to 84)
87             # for it. Unknown characters return -1.
88             #
89             my @b85_decode = (
90             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92             -1, 62, -1, 63, 64, 65, 66, -1, 67, 68, 69, 70, -1, 71, -1, -1,
93             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, 72, 73, 74, 75, 76,
94             77, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
95             25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, 78, 79,
96             80, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
97             51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 81, 82, 83, 84, -1);
98              
99             =head1 FUNCTIONS
100              
101             =head3 base85_check
102              
103             Examine a string for characters that fall outside the Base 85 character set.
104              
105             Returns the first character position that fails the test, or -1 if no characters fail.
106              
107             if (my $d = base85_check($base85str) >= 0)
108             {
109             carp "Incorrect character at position $d; cannot decode input string";
110             return undef;
111             }
112              
113             =cut
114              
115             sub base85_check
116             {
117 0     0 1 0 my($str) = @_;
118 0         0 my(@chars) = split(//, $str);
119              
120             #
121             ### Check validity of: $str
122             ### Which becomes array: @chars
123             #
124 0         0 for my $j (0 .. $#chars)
125             {
126 0         0 my $o = ord($chars[$j]);
127 0 0 0     0 return $j if ($o > 0x7f or $b85_decode[$o] == -1);
128             }
129 0         0 return -1;
130             }
131              
132             =head3 base85_encode
133              
134             =head3 Convert::Base85::encode
135              
136             Converts input data to Base85 test.
137              
138             This function may be exported as C into the caller's
139             namespace.
140              
141             my $datalen = length($data);
142             my $encoded = base85_encode($data);
143              
144             Or, if you want to have managable lines, read 48 bytes at a time and
145             write 60-character lines (remembering that C takes 16 bytes
146             at a time and encodes to 20 bytes). Remember to save the original length
147             in case the data had to be padded out to a multiple of 16.
148              
149             =cut
150              
151             sub encode
152             {
153 2     2 1 410 my($plain) = @_;
154 2         4 my @mlist;
155 2         8 my $rem = uint128();
156              
157             #
158             # Extra zero bytes to bring the length up to a multiple of sixteen.
159             #
160 2         6 my $extra = -length($plain) % 16;
161 2         6 $plain .= "\0" x $extra;
162              
163 2         12 for my $str16 (unpack '(a16)*', $plain)
164             {
165 3         10 my @tmplist = (0) x 20;
166 3         9 my $total16 = uint128(0);
167 3         9 my @plain = unpack('C*', $str16);
168              
169             #
170             ### @plain: join(", ", @plain)
171             #
172 3         7 for my $p (@plain)
173             {
174 48         94 uint128_left($total16, $total16, 8);
175 48         132 uint128_add($total16, $total16, uint128($p));
176             }
177              
178             #
179             ##### total16: "$total16"
180             #
181 3         8 for my $j (reverse 0 .. 19)
182             {
183 60         137 uint128_divmod($total16, $rem, $total16, uint128(85));
184 60         110 $tmplist[$j] = uint128_to_number($rem);
185             }
186 3         25 push @mlist, @tmplist;
187             }
188              
189 2         5 return join "", map{$b85_encode[$_]} @mlist;
  60         106  
190             }
191              
192             *base85_encode = \&encode;
193              
194              
195             =head3 base85_decode
196              
197             =head3 Convert::Base85::decode
198              
199             Converts the Base85-encoded string back to bytes. Any spaces, linebreaks, or
200             other whitespace are stripped from the string before decoding.
201              
202             This function may be exported as C into the caller's namespace.
203              
204             If your original data wasn't an even multiple of sixteen in length, the
205             decoded data may have some padding with null bytes ('\0'), which can be removed.
206              
207             #
208             # Decode the string and compare its length with the length of the original data.
209             #
210             my $decoded = base85_decode($data);
211             my $padding = length($decoded) - $datalen;
212             chop $decoded while ($padding-- > 0);
213              
214             =cut
215              
216             sub decode
217             {
218 2     2 1 777 my($encoded) = @_;
219              
220 2         7 $encoded =~ tr[ \t\r\n\f][]d;
221              
222 2         4 my $extra = -length($encoded) % 20;
223              
224 2         4 my @mlist;
225 2         7 my $imul = uint128(85);
226 2         5 my $rem = uint128();
227              
228 2         8 for my $str20 (unpack '(a20)*', $encoded)
229             {
230 3         8 my $total20 = uint128(0);
231 3         10 my @tmplist = (q(0)) x 16;
232              
233 3         9 my @coded = unpack('C*', $str20);
234              
235             #
236             #### $str20: $str20
237             #### @coded: join(", ", @coded)
238             #
239 3         5 for my $c (@coded)
240             {
241 60         101 my $iadd = uint128($b85_decode[$c]);
242 60         111 uint128_mul($total20, $total20, $imul);
243 60         106 uint128_add($total20, $total20, $iadd);
244             }
245              
246             #
247             ##### total20: "$total20"
248             #
249 3         7 for my $j (reverse 0 .. 15)
250             {
251 48         119 uint128_divmod($total20, $rem, $total20, uint128(256));
252 48         81 $tmplist[$j] = uint128_to_number($rem);
253             }
254              
255             #
256             ##### @tmplist: join(", ", @tmplist)
257             #
258 3         25 push @mlist, @tmplist;
259             }
260              
261 2         6 return join "", map{chr($_)} @mlist;
  48         85  
262             }
263              
264             *base85_decode = \&decode;
265              
266             =head1 SEE ALSO
267              
268             =head2 The Base85 Character Set
269              
270             The Base85 character set is described by Robert Elz in his RFC1924 of
271             April 1st 1996,
272             L<"A Compact Representation of IPv6 Addresses"|https://tools.ietf.org/html/rfc1924>
273             which are made up from the 94 printable ASCII characters, minus
274             quote marks, comma, slash and backslash, and the brackets.
275              
276             Despite it being an
277             L,
278             the reasoning for the choice of characters for the set was solid.
279              
280             The character set is:
281              
282             '0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&',
283             '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_',
284             '`', '|', and '~'.
285              
286             and allows for the possibility of using the string in a MIME container.
287              
288             =head2 Ascii85
289              
290             Base85 is similar in concept to L,
291             a format developed for the btoa program, and later adopted with changes by Adobe
292             for Postscript's ASCII85Encode filter. There are, of course, modules on CPAN
293             that provide this format.
294              
295             =over 3
296              
297             =item
298              
299             L
300              
301             =item
302              
303             L
304              
305             =back
306              
307             =head2 Base64
308              
309             L encoding is an eight-bit to six-bit
310             encoding scheme that, depending on the characters used for encoding, has been used
311             for uuencode and MIME transfer, among many other formats. There are, of course,
312             modules on CPAN that provide this format.
313              
314             =over 3
315              
316             =item
317              
318             L
319              
320             =item
321              
322             L
323              
324             =back
325              
326             =head1 AUTHOR
327              
328             John M. Gamble C<< >>
329              
330             =head1 BUGS
331              
332             Please report any bugs or feature requests to C,
333             or through the web interface at L.
334             I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
335              
336             =head1 SUPPORT
337              
338             This module is on Github at L.
339              
340             You can also look for information on L.
341              
342             =head1 LICENSE AND COPYRIGHT
343              
344             Copyright (c) 2019 John M. Gamble.
345              
346             This program is free software; you can redistribute it and/or modify it
347             under the terms of either: the GNU General Public License as published
348             by the Free Software Foundation; or the Artistic License.
349              
350             See L for more information.
351              
352              
353             1;
354              
355             __END__