File Coverage

blib/lib/Data/UUID/NCName.pm
Criterion Covered Total %
statement 106 125 84.8
branch 18 38 47.3
condition 15 27 55.5
subroutine 24 29 82.7
pod 8 8 100.0
total 171 227 75.3


line stmt bran cond sub pod time code
1             package Data::UUID::NCName;
2              
3 2     2   63073 use 5.012;
  2         6  
4 2     2   10 use strict;
  2         3  
  2         39  
5 2     2   8 use warnings FATAL => 'all';
  2         2  
  2         64  
6 2     2   8 use feature 'state';
  2         2  
  2         225  
7 2     2   18 use base 'Exporter::Tiny';
  2         3  
  2         919  
8 2     2   8706 use overload;
  2         841  
  2         11  
9              
10 2     2   878 use MIME::Base32 ();
  2         2088  
  2         38  
11 2     2   806 use MIME::Base64 ();
  2         1732  
  2         47  
12 2     2   2396 use Math::BigInt ();
  2         58660  
  2         86  
13 2     2   1189 use Encode::Base58::BigInt ();
  2         46324  
  2         53  
14 2     2   15 use Carp ();
  2         3  
  2         38  
15              
16 2     2   1200 use Type::Params qw(compile multisig);
  2         195730  
  2         26  
17 2     2   918 use Types::Standard qw(slurpy Maybe Any Item Str Int Dict Object Optional);
  2         15  
  2         18  
18 2     2   6054 use Type::Library -base, -declare => qw(Stringable AnyUUID Format Radix Ver);
  2         4  
  2         10  
19 2     2   2842 use Type::Utils -all;
  2         15076  
  2         21  
20              
21             sub _to_string {
22 0     0   0 my $x = shift;
23 0 0 0     0 overload::Method($x, '""') || $x->can('to_string') || $x->can('as_string');
24             }
25              
26             my %B58_FWD = do {
27             my @flickr = split //,
28             q{123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ};
29             my @btc = split //,
30             q{123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz};
31             map { $flickr[$_] => $btc[$_] } (0..$#flickr);
32             };
33             my %B58_REV = reverse %B58_FWD;
34              
35              
36             sub _to_base58 {
37             # if we turn this to a bigint we need to remove the leading nulls first
38 2000     2000   3062 my $str = shift;
39 2000         6904 $str =~ s/\A(\0*)//;
40 2000   50     6502 my $null = $1 // '';
41             # warn unpack 'H*', $str;
42 2000         9054 my $big = Math::BigInt->from_bytes($str);
43 2000         1169235 my $b58 = Encode::Base58::BigInt::encode_base58($big);
44 2000         12291002 ('1' x length($null)) . join '', map { $B58_FWD{$_} } (split //, $b58);
  41738         81578  
45             }
46              
47             sub _from_base58 {
48             # again we have to remove any leading zero values (1 in base58) or
49             # they will be clobbered when turned into a bigint
50 2000     2000   9443 my ($null, $b58) = ($_[0] =~ /\A(1*)(.*?)\z/);
51 2000         11025 $b58 = join '', map { $B58_REV{$_} } (split //, $b58);
  41738         59490  
52 2000         9919 my $big = Math::BigInt->new(Encode::Base58::BigInt::decode_base58($b58));
53 2000         14610915 ("\0" x length($null)) . $big->to_bytes;
54             }
55              
56             declare Stringable, as Object, where \&_to_string;
57              
58             declare AnyUUID, as Str|Stringable, where {
59 2     2   6633 use bytes;
  2         28  
  2         10  
60             my $x = ref $_ ? _to_string($_)->($_) : $_;
61              
62             return 1 if length $x == 16;
63              
64             if (my ($hex) = ($x =~ /^\s*(?i:urn:uuid:)?([0-9A-Fa-f-]{32,})\s*$/sm)) {
65             $hex =~ s/-//g;
66             return 1 if length $hex == 32;
67             }
68              
69             if (my ($b64) = ($x =~ m!^\s*([0-9A-Za-z+/_-]=*)\s*$!sm)) {
70             $b64 =~ tr!-_!+/!;
71             return 1 if 16 == length(MIME::Base64::decode($b64));
72             }
73              
74             return;
75             };
76              
77             enum Format, [qw(str hex b64 bin)];
78             enum Radix, [32, 58, 64];
79             enum Ver, [0, 1]; # there may be more versions later on
80              
81             =encoding utf8
82              
83             =head1 NAME
84              
85             Data::UUID::NCName - Make valid NCName tokens which are also UUIDs
86              
87             =head1 VERSION
88              
89             Version 0.07
90              
91             =cut
92              
93             our $VERSION = '0.07';
94              
95             =head1 SYNOPSIS
96              
97             use Data::UUID::NCName qw(:all);
98              
99             my $uuid = '1ff916f3-6ed7-443a-bef5-f4c85f18cd10';
100             my $ncn = to_ncname($uuid, version => 1);
101             my $ncn32 = to_ncname($uuid, version => 1, radix => 32);
102              
103             # $ncn is now "EH_kW827XQ6719MhfGM0QL".
104             # $ncn32 is "ed74rn43o25b255puzbprrtiql" and case-insensitive.
105              
106             # from Test::More, this will output 'ok':
107             is(from_ncname($ncn, version => 1),
108             $uuid, 'Decoding result matches original');
109              
110             =head1 DESCRIPTION
111              
112             The purpose of this module is to devise an alternative representation
113             of the L which conforms to
114             the constraints of various other identifiers such as NCName, and create an
115             L mapping between
116             them.
117              
118             =head1 FORMAT DEPRECATION NOTICE
119              
120             After careful consideration, I have decided to change the UUID-NCName
121             format in a minor yet incompatible way. In particular, I have moved
122             the quartet containing the
123             L|https://tools.ietf.org/html/rfc4122#section-4.1.1> to the
124             very end of the identifier, whereas it previously was mixed into the
125             middle somewhere.
126              
127             This can be considered an application of L
128             Law|https://en.wikipedia.org/wiki/Postel%27s_law>, based on the
129             assumption that these identifiers will be generated through other
130             methods, and potentially naïvely. Like the C field, the
131             C field has a limited acceptable range of values. If, for
132             example, one were to attempt to generate a conforming identifier by
133             simply generating a random Base32 or Base64 string, it will be
134             difficult to ensure that the C field will indeed conform when
135             the identifier is converted to a standard UUID. By moving the
136             C field out to the end of the identifier, everything between
137             the C and C bookends can be generated randomly
138             without any further consideration, like so:
139              
140             our @B64_ALPHA = ('A'..'Z', 'a'..'z', 0..9, qw(- _));
141              
142             sub make_cheapo_b64_uuid_ncname () {
143             my @vals = map { int rand 64 } (1..20); # generate content
144             push @vals, 8 + int rand 4; # last digit is special
145             'E' . join '', map { $B64_ALPHA[$_] } @vals; # 'E' for UUID V4
146             }
147              
148             # voilà:
149             my $cheap = make_cheapo_b64_uuid_ncname;
150             # EPrakcT1o2arqWSOuIMGSK or something
151              
152             # as expected, we can decode it (version 1, naturally)
153             my $uu = Data::UUID::NCName::from_ncname($cheap, version => 1);
154             # 3eb6a471-3d68-4d9a-aaea-5923ae20c192 - UUID is valid
155              
156             Furthermore, since the default behaviour is to align the bits of the
157             last byte to the size of the encoding symbol, and since the C
158             bits are masked, a compliant RFC4122 UUID will I end with
159             C, C, C, or C, in I Base32 (case-insensitive) and
160             Base64 variants.
161              
162             Since I have already released this module prior to this format change,
163             I have added a C parameter to both L and
164             L. The version currently defaults to C<1>, the new one,
165             but will issue a warning if not explicitly set. Later I will
166             finally remove the warning with C<1> as the default. This should
167             ensure that any code written during the transition produces the
168             correct results.
169              
170             =over 4
171              
172             Unless you have to support identifiers generated from version 0.04 or
173             older, B 1>>.
174              
175             =back
176              
177             =head1 RATIONALE & METHOD
178              
179             The UUID is a generic identifier which is large enough to be globally
180             unique. This makes it useful as a canonical name for data objects in
181             distributed systems, especially those that cross administrative
182             jurisdictions, such as the World-Wide Web. The
183             L,
184             however, of the UUID, precludes it from being used in many places
185             where it would be useful to do so.
186              
187             In particular, there are grammars for many types of identifiers which
188             must not begin with a digit. Others are case-insensitive, or
189             prohibited from containing hyphens (present in both the standard
190             notation and Base64URL), or indeed anything outside of
191             C<^[A-Za-z_][0-9A-Za-z_]*$>.
192              
193             The hexadecimal notation of the UUID has a 5/8 chance of beginning
194             with a digit, Base64 has a 5/32 chance, and Base32 has a 3/16
195             chance. As such, the identifier must be modified in such a way as to
196             guarantee beginning with an alphabetic letter (or underscore C<_>, but some
197             grammars even prohibit that, so we omit it as well).
198              
199             While it is conceivable to simply add a padding character, there are a
200             few considerations which make it more appealing to derive the initial
201             character from the content of the UUID itself:
202              
203             =over 4
204              
205             =item *
206              
207             UUIDs are large (128-bit) identifiers as it is, and it is undesirable
208             to add meaningless syntax to them if we can avoid doing so.
209              
210             =item *
211              
212             128 bits is an inconvenient number for aligning to both Base32 (130)
213             and Base64 (132), though 120 divides cleanly into 5, 6 and 8.
214              
215             =item *
216              
217             The 13th quartet, or higher four bits of the
218             C of the UUID is constant, as it indicates
219             the UUID's version. If we encode this value using the scheme common to
220             both Base64 and Base32, we get values between C and C

, with the

221             valid subset between C and C.
222              
223             =back
224              
225             B extract the UUID's version quartet, shift all subsequent
226             data 4 bits to the left, zero-pad to the octet, encode with either
227             I or I, truncate, and finally prepend the encoded
228             version character. VoilE, one token-safe UUID.
229              
230             =head1 APPLICATIONS
231              
232             =over 4
233              
234             =item XML IDs
235              
236             The C production appears to have been constricted, inadvertently
237             or otherwise, from L in both
238             the XML 1.0 and 1.1 specifications, to
239             L by L
240             Part 2|http://www.w3.org/TR/xmlschema-2/#ID>. This removes the colon
241             character C<:> from the grammar. The net effect is that
242              
243            
244              
245             while being a I ID I valid under DTD validation, is
246             I valid per XML Schema Part 2 or anything that uses it
247             (e.g. Relax NG).
248              
249             =item RDF blank node identifiers
250              
251             Blank node identifiers in RDF are intended for serialization, to act
252             as a handle so that multiple RDF statements can refer to the same
253             blank node. The L
254             specifies|http://www.w3.org/TR/rdf-concepts/#section-URI-Vocabulary>
255             that the validity constraints of blank node identifiers be delegated
256             to the concrete syntax specifications. The L
257             specification|http://www.w3.org/TR/rdf-syntax-grammar/#rdf-id>
258             lists the blank node identifier as NCName. However, according to
259             L, this is a
260             valid blank node identifier:
261              
262             _:42df00ec-30a2-431f-be9e-e3a612b325db
263              
264             despite L
265             version|http://www.w3.org/TeamSubmission/turtle/#nodeID> listing a
266             production equivalent to the more conservative NCName. NTriples
267             syntax is L
268             constrained|http://www.w3.org/TR/rdf-testcases/#ntriples>, given as
269             C<^[A-Za-z][0-9A-Za-z]*$>.
270              
271             =item Generated symbols
272              
273             =over 4
274              
275             There are only two hard things in computer science: cache
276             invalidation and naming things [and off-by-one errors].
277              
278             -- Phil Karlton [extension of unknown origin]
279              
280             =back
281              
282             Suppose you wanted to create a L
283             programming|http://en.wikipedia.org/wiki/Literate_programming> system
284             (I do). One of your (my) stipulations is that the symbols get defined
285             in the I, rather than the I. However, you (I) still want
286             to be able to validate the code's syntax, and potentially even run the
287             code, without having to commit to naming anything. You are (I am) also
288             interested in creating a global map of classes, datatypes and code
289             fragments, which can be operated on and tested in isolation, ported to
290             other languages, or transplanted into the more conventional packages
291             of programs, libraries and frameworks. The Base32 UUID NCName
292             representation should be adequate for placeholder symbols in just
293             about any programming language, save for those which do not permit
294             identifiers as long as 26 characters (which are extremely scarce).
295              
296             =back
297              
298             =head1 EXPORT
299              
300             No subroutines are exported by default. Be sure to include at least
301             one of the following in your C statement:
302              
303             =over 4
304              
305             =item :all
306              
307             Import all functions.
308              
309             =item :decode
310              
311             Import decode-only functions.
312              
313             =item :encode
314              
315             Import encode-only functions.
316              
317             =item :32
318              
319             Import base32-only functions.
320              
321             =item :58
322              
323             Import base58-only functions.
324              
325             =item :64
326              
327             Import base64-only functions.
328              
329             =back
330              
331             =cut
332              
333             # exporter stuff
334              
335             our %EXPORT_TAGS = (
336             all => [qw(to_ncname from_ncname
337             to_ncname_32 from_ncname_32
338             to_ncname_58 from_ncname_58
339             to_ncname_64 from_ncname_64)],
340             decode => [qw(from_ncname from_ncname_32 from_ncname_58 from_ncname_64)],
341             encode => [qw(to_ncname to_ncname_32 to_ncname_58 to_ncname_64)],
342             32 => [qw(to_ncname_32 from_ncname_32)],
343             58 => [qw(to_ncname_58 from_ncname_58)],
344             64 => [qw(to_ncname_64 from_ncname_64)],
345             );
346              
347             # export nothing by default
348             our @EXPORT = ();
349             our @EXPORT_OK = @{$EXPORT_TAGS{all}};
350              
351             # uuid format string, so meta.
352             my $UUF = sprintf('%s-%s-%s-%s-%s', '%02x' x 4, ('%02x' x 2) x 3, '%02x' x 6);
353             # yo dawg i herd u liek format strings so we put a format string in yo
354             # format string
355              
356             # dispatch tables for encoding/decoding
357              
358             my %ENCODE = (
359             32 => sub {
360             my @in = unpack 'C*', shift;
361             my $align = shift;
362             $in[-1] >>= 1 if $align;
363             my $out = MIME::Base32::encode_rfc3548(pack 'C*', @in);
364              
365             # we want lower case because IT IS RUDE TO SHOUT
366             lc substr($out, 0, 25);
367             },
368             58 => sub {
369             my @in = unpack 'C*', shift;
370             my $variant = pop(@in) >> 4;
371             # warn unpack 'H*', pack 'C*', @in;
372             my $out = _to_base58(pack 'C*', @in);
373             # warn $out;
374             $out . ('_' x (21 - length $out)) . _encode_version($variant);
375             },
376             64 => sub {
377             my @in = unpack 'C*', shift;
378             my $align = shift;
379             $in[-1] >>= 2 if $align;
380              
381             my $out = MIME::Base64::encode(pack 'C*', @in);
382             # note that the rfc4648 sequence ends in +/ or -_
383             $out =~ tr!+/!-_!;
384              
385             substr($out, 0, 21);
386             },
387             );
388              
389             my %DECODE = (
390             32 => sub {
391             my ($in, $align) = @_;
392              
393             $in = uc substr($in, 0, 25) . '0';
394              
395             my @out = unpack 'C*', MIME::Base32::decode_rfc3548($in);
396             $out[-1] <<= 1 if $align;
397              
398             pack 'C*', @out;
399             },
400             58 => sub {
401             my ($in, $align) = @_;
402             my ($b58, $variant) = ($in =~ /^(.*)(.)$/);
403             $variant = _decode_version($variant) << 4;
404             $b58 =~ tr/_//d;
405             _from_base58($b58) . chr($variant);
406             },
407             64 => sub {
408             my ($in, $align) = @_;
409              
410             $in = substr($in, 0, 21) . 'A==';
411             # note that the rfc4648 sequence ends in +/ or -_
412             $in =~ tr!-_!+/!;
413              
414             #warn unpack 'H*', MIME::Base64::decode($in);
415              
416             my @out = unpack 'C*', MIME::Base64::decode($in);
417              
418             $out[-1] <<= 2 if $align;
419              
420             pack 'C*', @out;
421             },
422             );
423              
424             my @TRANSFORM = (
425             # old version, prior to format change
426             [
427             # _bin_uuid_to_pair
428             sub {
429             my $data = shift;
430             my @list = unpack 'N4', $data;
431              
432             # take the 4 bits the version
433             my $ver = ($list[1] & 0x0000f000) >> 12;
434             # patch the hole with by shifting everything left by 4 bits
435             $list[1] = ($list[1] & 0xffff0000) |
436             (($list[1] & 0x00000fff) << 4) | ($list[2] >> 28);
437             $list[2] = ($list[2] & 0x0fffffff) << 4 | ($list[3] >> 28);
438             $list[3] <<= 4; # note variant bits are not removed
439              
440             return $ver, pack 'N4', @list;
441             },
442             # _pair_to_bin_uuid
443             sub {
444             my ($ver, $data) = @_;
445              
446             $ver &= 0xf;
447              
448             my @list = unpack 'N4', $data;
449              
450             $list[3] >>= 4;
451             $list[3] |= (($list[2] & 0xf) << 28);
452             $list[2] >>= 4;
453             $list[2] |= (($list[1] & 0xf) << 28);
454             $list[1] = ($list[1] & 0xffff0000) | ($ver << 12) |
455             (($list[1] >> 4) & 0xfff);
456              
457             #warn unpack 'H*', pack 'N4', @list;
458              
459             pack 'N4', @list;
460             },
461             ],
462             # new version
463             [
464             # _bin_uuid_to_pair
465             sub {
466             my $data = shift;
467             my @list = unpack 'N4', $data;
468              
469             my $ver = ($list[1] & 0x0000f000) >> 12; # version
470             my $var = ($list[2] & 0xf0000000) >> 24; # variant
471             $list[1] = ($list[1] & 0xffff0000) |
472             (($list[1] & 0x00000fff) << 4) |
473             (($list[2] & 0x0fffffff) >> 24);
474             $list[2] = ($list[2] & 0x00ffffff) << 8 | ($list[3] >> 24);
475             $list[3] = ($list[3] << 8) | $var;
476              
477             return $ver, pack 'N4', @list;
478             },
479             # _pair_to_bin_uuid
480             sub {
481             my ($ver, $data) = @_;
482              
483             $ver &= 0xf;
484              
485             my @list = unpack 'N4', $data;
486              
487             my $var = ($list[3] & 0xf0) << 24;
488              
489             $list[3] >>= 8;
490             $list[3] |= (($list[2] & 0xff) << 24);
491             $list[2] >>= 8;
492             $list[2] |= (($list[1] & 0xf) << 24) | $var;
493             $list[1] = ($list[1] & 0xffff0000) | ($ver << 12) |
494             (($list[1] >> 4) & 0xfff);
495              
496             #warn unpack 'H*', pack 'N4', @list;
497              
498             pack 'N4', @list;
499             },
500             ],
501             );
502              
503             sub _encode_version {
504 8006     8006   12784 my $ver = $_[0] & 15;
505 8006 100 100     25072 my $off = ($_[1] // 64) == 32 ? 97 : 65;
506             # A (0) starts at 65. this should never be higher than F (version
507             # 5) for a valid UUID, but even an invalid one will never be
508             # higher than P (15).
509              
510             # XXX boo-hoo, this will break in EBCDIC.
511 8006         39051 chr($ver + $off);
512             }
513              
514             sub _decode_version {
515             # modulo makes sure this always returns between 0 and 15
516 8001     8001   17322 return((ord(uc $_[0]) - 65) % 16);
517             }
518              
519             =head1 SUBROUTINES
520              
521             =head2 to_ncname $UUID [, $RADIX ] [, %PARAMS ]
522              
523             Turn C<$UUID> into an NCName. The UUID can be in the canonical
524             (hyphenated) hexadecimal form, non-hyphenated hexadecimal, Base64
525             (regular and base64url), or binary. The function returns a legal
526             NCName equivalent to the UUID, in either Base32, Base58, or Base64
527             (url), given a specified C<$RADIX> of 32, 58, or 64. If the radix is
528             omitted, Base64 is assumed.
529              
530             The following keyword parameters are also accepted, and override the
531             positional parameters where applicable:
532              
533             =over 4
534              
535             =item radix 32|58|64
536              
537             Either 32 or 64 to explicitly specify Base32, Base58, or Base64
538             output. Defaults to 64.
539              
540             =item version 0|1
541              
542             Version 0 will generate the original version of NCName identifiers,
543             prior to the changes noted above. Version 1 is the new version, which
544             is I backwards-compatible. The default, for a transitional
545             period, is to generate version 0, but complain about it. Set the
546             version explicitly (to 1, or to 0 if you need backwards compatibility)
547             to eliminate the warning messages.
548              
549             =item align $FALSY|$TRUTHY
550              
551             Align the last 4 bits to the Base32/Base64 symbol size. You almost
552             certainly want this, so the default is I. (Does not apply to
553             Base58.)
554              
555             =back
556              
557             =cut
558              
559             sub to_ncname {
560 6006     6006 1 3024426 state $dict = slurpy Dict[
561             radix => Optional[Radix],
562             version => Optional[Ver],
563             align => Optional[Item],
564             slurpy Any];
565 6006         22554 state $check = multisig(
566             [AnyUUID, Optional[Radix], $dict],
567             [AnyUUID, $dict]);
568              
569 6006         83738 my ($uuid, $radix, $p) = $check->(@_);
570              
571             # optional radix moved to named parameter
572 6006 100       205338 if (ref $radix) {
573 4         5 $p = $radix;
574 4         7 undef $radix;
575             }
576              
577 6006   50     12549 $radix //= $p->{radix} || 64;
      66        
578 6006   50     22927 $p->{align} = !!($p->{align} // 1);
579              
580 6006 50       12760 unless (defined $p->{version}) {
581 0         0 Carp::carp('Set an explicit `version` to eliminate this warning.' .
582             ' See Data::UUID::NCName docs.');
583 0         0 $p->{version} = 1;
584             }
585              
586             # type checking has ensured this is Stringable so get the string
587 6006 50       10974 $uuid = _to_string($uuid)->($uuid) if ref $uuid;
588              
589             # get the uuid into a binary string
590 6006         7441 my $bin;
591 6006 50       10081 if (length $uuid == 16) {
592             # this is already a binary string
593 0         0 $bin = $uuid;
594             }
595             else {
596             # get rid of any whitespace
597 6006         13116 $uuid =~ s/\s+//g;
598              
599             # handle hexadecimal
600 6006 50       22672 if ($uuid =~ /^(?i:urn:uuid:)?[0-9A-Fa-f-]{32,}$/) {
    0          
601 6006         10612 $uuid =~ s/^urn:uuid://i;
602 6006         19024 $uuid =~ s/-//g;
603             #warn $uuid;
604 6006         24850 $bin = pack 'H*', $uuid;
605             }
606             # handle base64
607             elsif ($uuid =~ m!^[0-9A-Za-z=+/_-]$!) {
608             # canonicalize first
609 0         0 $uuid =~ tr!-_!+/!;
610 0         0 $bin = MIME::Base64::decode($uuid);
611             }
612             else {
613 0         0 Carp::croak("Couldn't figure out what to do with putative UUID.");
614             }
615             }
616              
617             # extract the version
618 6006         18255 my ($version, $content) = $TRANSFORM[$p->{version}][0]->($bin);
619              
620             # wah-lah.
621             _encode_version($version, $radix) .
622 6006         14873 $ENCODE{$radix}->($content, $p->{align});
623             }
624              
625             =head2 from_ncname $NCNAME [, $FORMAT [, $RADIX] ] [, %PARAMS ]
626              
627             Turn an appropriate C<$NCNAME> back into a UUID, where I,
628             unless overridden by C<$RADIX>, is defined beginning with one initial
629             alphabetic letter (A to Z, case-insensitive) followed by either:
630              
631             =over 4
632              
633             =item B<25> Base32 characters, or
634              
635             =item B<21> Base64URL characters.
636              
637             =back
638              
639             The function will return C immediately if it cannot match
640             either of these patterns. Input past the 21-character mark (for
641             Base64) or 25-character mark (for Base32) is ignored.
642              
643             This function returns a UUID of type C<$FORMAT>, which if left
644             undefined, must be one of the following:
645              
646             =over 4
647              
648             =item str
649              
650             The canonical UUID format, like so:
651             C<33fcc995-5d10-477e-a9b4-c9cc405bbf04>. This is the default.
652              
653             =item hex
654              
655             The same thing, minus the hyphens.
656              
657             =item b64
658              
659             Base64.
660              
661             =item bin
662              
663             A binary string.
664              
665             =back
666              
667             This function also takes the new keyword-style parameters:
668              
669             =over 4
670              
671             =item format
672              
673             As above.
674              
675             =item radix
676              
677             As above.
678              
679             =item version
680              
681             Sets the identifier version. Defaults to version 0 with a warning. See
682             the note about setting an explicit C parameter in L.
683              
684             =item align
685              
686             Assume the last few bits are aligned to the symbol, as in L.
687              
688             =back
689              
690             =cut
691              
692             my %FORMAT = (
693             str => sub {
694             sprintf $UUF, unpack 'C*', shift;
695             },
696             hex => sub {
697             unpack 'H*', shift;
698             },
699             b64 => sub {
700             my $x = MIME::Base64::encode(shift);
701             $x =~ s/=+$//;
702             $x;
703             },
704             bin => sub {
705             shift;
706             },
707             );
708              
709             sub from_ncname {
710 6001     6001 1 3013200 state $dict = slurpy Dict[
711             format => Optional[Format],
712             radix => Optional[Radix],
713             version => Optional[Ver],
714             align => Optional[Item],
715             slurpy Any];
716 6001         16772 state $check = multisig(
717             [Str, $dict],
718             [Str, Maybe[Format], Optional[Radix], $dict],
719             [Str, Maybe[Format], $dict],
720             );
721              
722 6001         32551 my ($ncname, $format, $radix, $p) = $check->(@_);
723              
724             # handle vagaries of legacy positional parameters
725 6001 50       266905 if (ref $format) {
    0          
726 6001         8809 $p = $format;
727 6001         8456 undef $format;
728             }
729             elsif (ref $radix) {
730 0         0 $p = $radix;
731 0         0 undef $radix;
732             }
733              
734             # unconditional override by key-value radix and format parameters
735 6001 100       15987 $radix = $p->{radix} if defined $p->{radix};
736              
737             # reuse this variable because it doesn't get used for anything else
738 6001   33     31323 $format = $FORMAT{$p->{format} || $format || 'str'};
      50        
739              
740             # coerce align parameter to boolish
741 6001   50     21136 $p->{align} = !!($p->{align} // 1);
742              
743             # enforce explicit presence of version
744 6001 50       12254 unless (defined $p->{version}) {
745 0         0 Carp::carp('Set an explicit `version` to eliminate this warning.' .
746             ' See Data::UUID::NCName docs.');
747 0         0 $p->{version} = 1;
748             }
749              
750             # obviously this must be defined
751 6001 50       10704 return unless defined $ncname;
752              
753             # no whitespace
754 6001         44808 $ncname =~ s/^\s*(.*?)\s*$/$1/sm;
755              
756             # note that the rfc4648 sequence ends in +/ or -_
757 6001 50       29159 my ($version, $content) = ($ncname =~ /^([A-Za-z])([0-9A-Za-z_-]{21,})$/)
758             or return;
759              
760 6001 100       13248 if ($radix) {
761 6000 50 100     22868 Carp::croak("Radix must be either 32, 58, or 64, not $radix")
      66        
762             unless $radix == 32 || $radix == 58 || $radix == 64;
763             }
764             else {
765             # detect what to do based on input
766 1         3 my $len = length $ncname;
767              
768 1 50       4 if ($len == 22) {
    0          
    0          
769 1         3 $radix = 64;
770             }
771             elsif ($len == 23) {
772 0         0 $radix = 58;
773             }
774             elsif ($len == 26) {
775 0         0 $radix = 32;
776             }
777             else {
778             # the regex above should ensure this is never reached.
779 0         0 Carp::croak
780             ("Not sure what to do with an identifier of length $len.");
781             }
782             }
783              
784             # get this stuff back to canonical form
785 6001         13011 $version = _decode_version($version);
786             # warn $version;
787 6001         17972 $content = $DECODE{$radix}->($content, $p->{align});
788             # warn unpack 'H*', $content;
789              
790             # reassemble the pair
791 6001         850041 my $bin = $TRANSFORM[$p->{version}][1]->($version, $content);
792              
793             # *now* format.
794 6001         13324 $format->($bin);
795             }
796              
797             =head2 to_ncname_64 $UUID [, %PARAMS ]
798              
799             Shorthand for Base64 NCNames.
800              
801             =cut
802              
803             sub to_ncname_64 {
804 3     3 1 3474 to_ncname(@_, radix => 64);
805             }
806              
807             =head2 from_ncname_64 $NCNAME [, $FORMAT | %PARAMS ]
808              
809             Ditto.
810              
811             =cut
812              
813             sub from_ncname_64 {
814 0     0 1 0 from_ncname(@_, radix => 64);
815             }
816              
817             =head2 to_ncname_58 $UUID [, %PARAMS ]
818              
819             Shorthand for Base58 NCNames.
820              
821             =cut
822              
823             sub to_ncname_58 {
824 0     0 1 0 to_ncname(shift, 58, @_);
825             }
826              
827             =head2 from_ncname_58 $NCNAME [, $FORMAT | %PARAMS ]
828              
829             Ditto.
830              
831             =cut
832              
833             sub from_ncname_58 {
834 0     0 1 0 from_ncname(@_, radix => 58);
835             }
836              
837             =head2 to_ncname_32 $UUID [, %PARAMS ]
838              
839             Shorthand for Base32 NCNames.
840              
841             =cut
842              
843             sub to_ncname_32 {
844 2     2 1 13 to_ncname(shift, 32, @_);
845             }
846              
847             =head2 from_ncname_32 $NCNAME [, $FORMAT | %PARAMS ]
848              
849             Ditto.
850              
851             =cut
852              
853             sub from_ncname_32 {
854 0     0 1   from_ncname(@_, radix => 32);
855             }
856              
857             =head1 AUTHOR
858              
859             Dorian Taylor, C<< >>
860              
861             =head1 BUGS
862              
863             Please report bugs/issues/etc L
864             GitHub|https://github.com/doriantaylor/p5-data-uuid-ncname/issues>.
865              
866             =over 4
867              
868             =item * MetaCPAN
869              
870             L
871              
872             =item * GitHub repository (bugs also go here)
873              
874             L
875              
876             =item * AnnoCPAN: Annotated CPAN documentation
877              
878             L
879              
880             =item * CPAN Ratings
881              
882             L
883              
884             =back
885              
886             =head1 SEE ALSO
887              
888             =over 4
889              
890             =item
891              
892             L
893              
894             =item
895              
896             L
897              
898             =item
899              
900             L
901              
902             =item
903              
904             L
905              
906             =item
907              
908             L
909              
910             =item
911              
912             L
913             (NCName)
914              
915             =item
916              
917             L
918             Datatypes|http://www.w3.org/TR/xmlschema11-2/#ID> (ID)
919              
920             =item
921              
922             L
923             (Revised)|http://www.w3.org/TR/rdf-syntax-grammar/#rdf-id>
924              
925             =item
926              
927             L
928              
929             =back
930              
931             This module lives under the C namespace for the purpose of
932             namespace hygiene. The main module I depend on
933             L, howevever the script L I depend on
934             L to generate UUIDs.
935              
936             =head1 LICENSE AND COPYRIGHT
937              
938             Copyright 2012-2018 Dorian Taylor.
939              
940             Licensed under the Apache License, Version 2.0 (the "License"); you
941             may not use this file except in compliance with the License. You may
942             obtain a copy of the License at
943             L .
944              
945             Unless required by applicable law or agreed to in writing, software
946             distributed under the License is distributed on an "AS IS" BASIS,
947             WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
948             implied. See the License for the specific language governing
949             permissions and limitations under the License.
950              
951             =cut
952              
953             1; # End of Data::UUID::NCName