File Coverage

blib/lib/Data/UUID/NCName.pm

Criterion	Covered	Total	%
statement	106	125	84.8
branch	18	38	47.3
condition	15	27	55.5
subroutine	24	29	82.7
pod	8	8	100.0
total	171	227	75.3

line	stmt	bran	cond	sub	pod	time	code
1							package Data::UUID::NCName;
2
3	2			2		63073	use 5.012;
	2					6
4	2			2		10	use strict;
	2					3
	2					39
5	2			2		8	use warnings FATAL => 'all';
	2					2
	2					64
6	2			2		8	use feature 'state';
	2					2
	2					225
7	2			2		18	use base 'Exporter::Tiny';
	2					3
	2					919
8	2			2		8706	use overload;
	2					841
	2					11
9
10	2			2		878	use MIME::Base32 ();
	2					2088
	2					38
11	2			2		806	use MIME::Base64 ();
	2					1732
	2					47
12	2			2		2396	use Math::BigInt ();
	2					58660
	2					86
13	2			2		1189	use Encode::Base58::BigInt ();
	2					46324
	2					53
14	2			2		15	use Carp ();
	2					3
	2					38
15
16	2			2		1200	use Type::Params qw(compile multisig);
	2					195730
	2					26
17	2			2		918	use Types::Standard qw(slurpy Maybe Any Item Str Int Dict Object Optional);
	2					15
	2					18
18	2			2		6054	use Type::Library -base, -declare => qw(Stringable AnyUUID Format Radix Ver);
	2					4
	2					10
19	2			2		2842	use Type::Utils -all;
	2					15076
	2					21
20
21							sub _to_string {
22	0			0		0	my $x = shift;
23	0	0	0			0	overload::Method($x, '""') \|\| $x->can('to_string') \|\| $x->can('as_string');
24							}
25
26							my %B58_FWD = do {
27							my @flickr = split //,
28							q{123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ};
29							my @btc = split //,
30							q{123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz};
31							map { $flickr[$_] => $btc[$_] } (0..$#flickr);
32							};
33							my %B58_REV = reverse %B58_FWD;
34
35
36							sub _to_base58 {
37							# if we turn this to a bigint we need to remove the leading nulls first
38	2000			2000		3062	my $str = shift;
39	2000					6904	$str =~ s/\A(\0*)//;
40	2000		50			6502	my $null = $1 // '';
41							# warn unpack 'H*', $str;
42	2000					9054	my $big = Math::BigInt->from_bytes($str);
43	2000					1169235	my $b58 = Encode::Base58::BigInt::encode_base58($big);
44	2000					12291002	('1' x length($null)) . join '', map { $B58_FWD{$_} } (split //, $b58);
	41738					81578
45							}
46
47							sub _from_base58 {
48							# again we have to remove any leading zero values (1 in base58) or
49							# they will be clobbered when turned into a bigint
50	2000			2000		9443	my ($null, $b58) = ($_[0] =~ /\A(1)(.?)\z/);
51	2000					11025	$b58 = join '', map { $B58_REV{$_} } (split //, $b58);
	41738					59490
52	2000					9919	my $big = Math::BigInt->new(Encode::Base58::BigInt::decode_base58($b58));
53	2000					14610915	("\0" x length($null)) . $big->to_bytes;
54							}
55
56							declare Stringable, as Object, where \&_to_string;
57
58							declare AnyUUID, as Str\|Stringable, where {
59	2			2		6633	use bytes;
	2					28
	2					10
60							my $x = ref $_ ? _to_string($_)->($_) : $_;
61
62							return 1 if length $x == 16;
63
64							if (my ($hex) = ($x =~ /^\s(?i:urn:uuid:)?([0-9A-Fa-f-]{32,})\s$/sm)) {
65							$hex =~ s/-//g;
66							return 1 if length $hex == 32;
67							}
68
69							if (my ($b64) = ($x =~ m!^\s([0-9A-Za-z+/_-]=)\s*$!sm)) {
70							$b64 =~ tr!-_!+/!;
71							return 1 if 16 == length(MIME::Base64::decode($b64));
72							}
73
74							return;
75							};
76
77							enum Format, [qw(str hex b64 bin)];
78							enum Radix, [32, 58, 64];
79							enum Ver, [0, 1]; # there may be more versions later on
80
81							=encoding utf8
82
83							=head1 NAME
84
85							Data::UUID::NCName - Make valid NCName tokens which are also UUIDs
86
87							=head1 VERSION
88
89							Version 0.07
90
91							=cut
92
93							our $VERSION = '0.07';
94
95							=head1 SYNOPSIS
96
97							use Data::UUID::NCName qw(:all);
98
99							my $uuid = '1ff916f3-6ed7-443a-bef5-f4c85f18cd10';
100							my $ncn = to_ncname($uuid, version => 1);
101							my $ncn32 = to_ncname($uuid, version => 1, radix => 32);
102
103							# $ncn is now "EH_kW827XQ6719MhfGM0QL".
104							# $ncn32 is "ed74rn43o25b255puzbprrtiql" and case-insensitive.
105
106							# from Test::More, this will output 'ok':
107							is(from_ncname($ncn, version => 1),
108							$uuid, 'Decoding result matches original');
109
110							=head1 DESCRIPTION
111
112							The purpose of this module is to devise an alternative representation
113							of the L which conforms to
114							the constraints of various other identifiers such as NCName, and create an
115							L mapping between
116							them.
117
118							=head1 FORMAT DEPRECATION NOTICE
119
120							After careful consideration, I have decided to change the UUID-NCName
121							format in a minor yet incompatible way. In particular, I have moved
122							the quartet containing the
123							L\|https://tools.ietf.org/html/rfc4122#section-4.1.1> to the
124							very end of the identifier, whereas it previously was mixed into the
125							middle somewhere.
126
127							This can be considered an application of L
128							Law\|https://en.wikipedia.org/wiki/Postel%27s_law>, based on the
129							assumption that these identifiers will be generated through other
130							methods, and potentially naïvely. Like the C field, the
131							C field has a limited acceptable range of values. If, for
132							example, one were to attempt to generate a conforming identifier by
133							simply generating a random Base32 or Base64 string, it will be
134							difficult to ensure that the C field will indeed conform when
135							the identifier is converted to a standard UUID. By moving the
136							C field out to the end of the identifier, everything between
137							the C and C bookends can be generated randomly
138							without any further consideration, like so:
139
140							our @B64_ALPHA = ('A'..'Z', 'a'..'z', 0..9, qw(- _));
141
142							sub make_cheapo_b64_uuid_ncname () {
143							my @vals = map { int rand 64 } (1..20); # generate content
144							push @vals, 8 + int rand 4; # last digit is special
145							'E' . join '', map { $B64_ALPHA[$_] } @vals; # 'E' for UUID V4
146							}
147
148							# voilà:
149							my $cheap = make_cheapo_b64_uuid_ncname;
150							# EPrakcT1o2arqWSOuIMGSK or something
151
152							# as expected, we can decode it (version 1, naturally)
153							my $uu = Data::UUID::NCName::from_ncname($cheap, version => 1);
154							# 3eb6a471-3d68-4d9a-aaea-5923ae20c192 - UUID is valid
155
156							Furthermore, since the default behaviour is to align the bits of the
157							last byte to the size of the encoding symbol, and since the C
158							bits are masked, a compliant RFC4122 UUID will I end with
159							C, C, C, or C, in I Base32 (case-insensitive) and
160							Base64 variants.
161
162							Since I have already released this module prior to this format change,
163							I have added a C parameter to both L and
164							L. The version currently defaults to C<1>, the new one,
165							but will issue a warning if not explicitly set. Later I will
166							finally remove the warning with C<1> as the default. This should
167							ensure that any code written during the transition produces the
168							correct results.
169
170							=over 4
171
172							Unless you have to support identifiers generated from version 0.04 or
173							older, B 1>>.
174
175							=back
176
177							=head1 RATIONALE & METHOD
178
179							The UUID is a generic identifier which is large enough to be globally
180							unique. This makes it useful as a canonical name for data objects in
181							distributed systems, especially those that cross administrative
182							jurisdictions, such as the World-Wide Web. The
183							L,
184							however, of the UUID, precludes it from being used in many places
185							where it would be useful to do so.
186
187							In particular, there are grammars for many types of identifiers which
188							must not begin with a digit. Others are case-insensitive, or
189							prohibited from containing hyphens (present in both the standard
190							notation and Base64URL), or indeed anything outside of
191							C<^[A-Za-z_][0-9A-Za-z_]*$>.
192
193							The hexadecimal notation of the UUID has a 5/8 chance of beginning
194							with a digit, Base64 has a 5/32 chance, and Base32 has a 3/16
195							chance. As such, the identifier must be modified in such a way as to
196							guarantee beginning with an alphabetic letter (or underscore C<_>, but some
197							grammars even prohibit that, so we omit it as well).
198
199							While it is conceivable to simply add a padding character, there are a
200							few considerations which make it more appealing to derive the initial
201							character from the content of the UUID itself:
202
203							=over 4
204
205							=item *
206
207							UUIDs are large (128-bit) identifiers as it is, and it is undesirable
208							to add meaningless syntax to them if we can avoid doing so.
209
210							=item *
211
212							128 bits is an inconvenient number for aligning to both Base32 (130)
213							and Base64 (132), though 120 divides cleanly into 5, 6 and 8.
214
215							=item *
216
217							The 13th quartet, or higher four bits of the
218							C of the UUID is constant, as it indicates
219							the UUID's version. If we encode this value using the scheme common to
220							both Base64 and Base32, we get values between C and C , with the
221							valid subset between C and C.
222
223							=back
224
225							B extract the UUID's version quartet, shift all subsequent
226							data 4 bits to the left, zero-pad to the octet, encode with either
227							I or I, truncate, and finally prepend the encoded
228							version character. VoilE, one token-safe UUID.
229
230							=head1 APPLICATIONS
231
232							=over 4
233
234							=item XML IDs
235
236							The C production appears to have been constricted, inadvertently
237							or otherwise, from L in both
238							the XML 1.0 and 1.1 specifications, to
239							L by L
240							Part 2\|http://www.w3.org/TR/xmlschema-2/#ID>. This removes the colon
241							character C<:> from the grammar. The net effect is that
242
243
244
245							while being a I ID I valid under DTD validation, is
246							I valid per XML Schema Part 2 or anything that uses it
247							(e.g. Relax NG).
248
249							=item RDF blank node identifiers
250
251							Blank node identifiers in RDF are intended for serialization, to act
252							as a handle so that multiple RDF statements can refer to the same
253							blank node. The L
254							specifies\|http://www.w3.org/TR/rdf-concepts/#section-URI-Vocabulary>
255							that the validity constraints of blank node identifiers be delegated
256							to the concrete syntax specifications. The L
257							specification\|http://www.w3.org/TR/rdf-syntax-grammar/#rdf-id>
258							lists the blank node identifier as NCName. However, according to
259							L, this is a
260							valid blank node identifier:
261
262							_:42df00ec-30a2-431f-be9e-e3a612b325db
263
264							despite L
265							version\|http://www.w3.org/TeamSubmission/turtle/#nodeID> listing a
266							production equivalent to the more conservative NCName. NTriples
267							syntax is L
268							constrained\|http://www.w3.org/TR/rdf-testcases/#ntriples>, given as
269							C<^[A-Za-z][0-9A-Za-z]*$>.
270
271							=item Generated symbols
272
273							=over 4
274
275							There are only two hard things in computer science: cache
276							invalidation and naming things [and off-by-one errors].
277
278							-- Phil Karlton [extension of unknown origin]
279
280							=back
281
282							Suppose you wanted to create a L
283							programming\|http://en.wikipedia.org/wiki/Literate_programming> system
284							(I do). One of your (my) stipulations is that the symbols get defined
285							in the I, rather than the I`. However, you (I) still want`
286							to be able to validate the code's syntax, and potentially even run the
287							code, without having to commit to naming anything. You are (I am) also
288							interested in creating a global map of classes, datatypes and code
289							fragments, which can be operated on and tested in isolation, ported to
290							other languages, or transplanted into the more conventional packages
291							of programs, libraries and frameworks. The Base32 UUID NCName
292							representation should be adequate for placeholder symbols in just
293							about any programming language, save for those which do not permit
294							identifiers as long as 26 characters (which are extremely scarce).
295
296							=back
297
298							=head1 EXPORT
299
300							No subroutines are exported by default. Be sure to include at least
301							one of the following in your C statement:
302
303							=over 4
304
305							=item :all
306
307							Import all functions.
308
309							=item :decode
310
311							Import decode-only functions.
312
313							=item :encode
314
315							Import encode-only functions.
316
317							=item :32
318
319							Import base32-only functions.
320
321							=item :58
322
323							Import base58-only functions.
324
325							=item :64
326
327							Import base64-only functions.
328
329							=back
330
331							=cut
332
333							# exporter stuff
334
335							our %EXPORT_TAGS = (
336							all => [qw(to_ncname from_ncname
337							to_ncname_32 from_ncname_32
338							to_ncname_58 from_ncname_58
339							to_ncname_64 from_ncname_64)],
340							decode => [qw(from_ncname from_ncname_32 from_ncname_58 from_ncname_64)],
341							encode => [qw(to_ncname to_ncname_32 to_ncname_58 to_ncname_64)],
342							32 => [qw(to_ncname_32 from_ncname_32)],
343							58 => [qw(to_ncname_58 from_ncname_58)],
344							64 => [qw(to_ncname_64 from_ncname_64)],
345							);
346
347							# export nothing by default
348							our @EXPORT = ();
349							our @EXPORT_OK = @{$EXPORT_TAGS{all}};
350
351							# uuid format string, so meta.
352							my $UUF = sprintf('%s-%s-%s-%s-%s', '%02x' x 4, ('%02x' x 2) x 3, '%02x' x 6);
353							# yo dawg i herd u liek format strings so we put a format string in yo
354							# format string
355
356							# dispatch tables for encoding/decoding
357
358							my %ENCODE = (
359							32 => sub {
360							my @in = unpack 'C*', shift;
361							my $align = shift;
362							$in[-1] >>= 1 if $align;
363							my $out = MIME::Base32::encode_rfc3548(pack 'C*', @in);
364
365							# we want lower case because IT IS RUDE TO SHOUT
366							lc substr($out, 0, 25);
367							},
368							58 => sub {
369							my @in = unpack 'C*', shift;
370							my $variant = pop(@in) >> 4;
371							# warn unpack 'H', pack 'C', @in;
372							my $out = _to_base58(pack 'C*', @in);
373							# warn $out;
374							$out . ('_' x (21 - length $out)) . _encode_version($variant);
375							},
376							64 => sub {
377							my @in = unpack 'C*', shift;
378							my $align = shift;
379							$in[-1] >>= 2 if $align;
380
381							my $out = MIME::Base64::encode(pack 'C*', @in);
382							# note that the rfc4648 sequence ends in +/ or -_
383							$out =~ tr!+/!-_!;
384
385							substr($out, 0, 21);
386							},
387							);
388
389							my %DECODE = (
390							32 => sub {
391							my ($in, $align) = @_;
392
393							$in = uc substr($in, 0, 25) . '0';
394
395							my @out = unpack 'C*', MIME::Base32::decode_rfc3548($in);
396							$out[-1] <<= 1 if $align;
397
398							pack 'C*', @out;
399							},
400							58 => sub {
401							my ($in, $align) = @_;
402							my ($b58, $variant) = ($in =~ /^(.*)(.)$/);
403							$variant = _decode_version($variant) << 4;
404							$b58 =~ tr/_//d;
405							_from_base58($b58) . chr($variant);
406							},
407							64 => sub {
408							my ($in, $align) = @_;
409
410							$in = substr($in, 0, 21) . 'A==';
411							# note that the rfc4648 sequence ends in +/ or -_
412							$in =~ tr!-_!+/!;
413
414							#warn unpack 'H*', MIME::Base64::decode($in);
415
416							my @out = unpack 'C*', MIME::Base64::decode($in);
417
418							$out[-1] <<= 2 if $align;
419
420							pack 'C*', @out;
421							},
422							);
423
424							my @TRANSFORM = (
425							# old version, prior to format change
426							[
427							# _bin_uuid_to_pair
428							sub {
429							my $data = shift;
430							my @list = unpack 'N4', $data;
431
432							# take the 4 bits the version
433							my $ver = ($list[1] & 0x0000f000) >> 12;
434							# patch the hole with by shifting everything left by 4 bits
435							$list[1] = ($list[1] & 0xffff0000) \|
436							(($list[1] & 0x00000fff) << 4) \| ($list[2] >> 28);
437							$list[2] = ($list[2] & 0x0fffffff) << 4 \| ($list[3] >> 28);
438							$list[3] <<= 4; # note variant bits are not removed
439
440							return $ver, pack 'N4', @list;
441							},
442							# _pair_to_bin_uuid
443							sub {
444							my ($ver, $data) = @_;
445
446							$ver &= 0xf;
447
448							my @list = unpack 'N4', $data;
449
450							$list[3] >>= 4;
451							$list[3] \|= (($list[2] & 0xf) << 28);
452							$list[2] >>= 4;
453							$list[2] \|= (($list[1] & 0xf) << 28);
454							$list[1] = ($list[1] & 0xffff0000) \| ($ver << 12) \|
455							(($list[1] >> 4) & 0xfff);
456
457							#warn unpack 'H*', pack 'N4', @list;
458
459							pack 'N4', @list;
460							},
461							],
462							# new version
463							[
464							# _bin_uuid_to_pair
465							sub {
466							my $data = shift;
467							my @list = unpack 'N4', $data;
468
469							my $ver = ($list[1] & 0x0000f000) >> 12; # version
470							my $var = ($list[2] & 0xf0000000) >> 24; # variant
471							$list[1] = ($list[1] & 0xffff0000) \|
472							(($list[1] & 0x00000fff) << 4) \|
473							(($list[2] & 0x0fffffff) >> 24);
474							$list[2] = ($list[2] & 0x00ffffff) << 8 \| ($list[3] >> 24);
475							$list[3] = ($list[3] << 8) \| $var;
476
477							return $ver, pack 'N4', @list;
478							},
479							# _pair_to_bin_uuid
480							sub {
481							my ($ver, $data) = @_;
482
483							$ver &= 0xf;
484
485							my @list = unpack 'N4', $data;
486
487							my $var = ($list[3] & 0xf0) << 24;
488
489							$list[3] >>= 8;
490							$list[3] \|= (($list[2] & 0xff) << 24);
491							$list[2] >>= 8;
492							$list[2] \|= (($list[1] & 0xf) << 24) \| $var;
493							$list[1] = ($list[1] & 0xffff0000) \| ($ver << 12) \|
494							(($list[1] >> 4) & 0xfff);
495
496							#warn unpack 'H*', pack 'N4', @list;
497
498							pack 'N4', @list;
499							},
500							],
501							);
502
503							sub _encode_version {
504	8006			8006		12784	my $ver = $_[0] & 15;
505	8006	100	100			25072	my $off = ($_[1] // 64) == 32 ? 97 : 65;
506							# A (0) starts at 65. this should never be higher than F (version
507							# 5) for a valid UUID, but even an invalid one will never be
508							# higher than P (15).
509
510							# XXX boo-hoo, this will break in EBCDIC.
511	8006					39051	chr($ver + $off);
512							}
513
514							sub _decode_version {
515							# modulo makes sure this always returns between 0 and 15
516	8001			8001		17322	return((ord(uc $_[0]) - 65) % 16);
517							}
518
519							=head1 SUBROUTINES
520
521							=head2 to_ncname $UUID [, $RADIX ] [, %PARAMS ]
522
523							Turn C<$UUID> into an NCName. The UUID can be in the canonical
524							(hyphenated) hexadecimal form, non-hyphenated hexadecimal, Base64
525							(regular and base64url), or binary. The function returns a legal
526							NCName equivalent to the UUID, in either Base32, Base58, or Base64
527							(url), given a specified C<$RADIX> of 32, 58, or 64. If the radix is
528							omitted, Base64 is assumed.
529
530							The following keyword parameters are also accepted, and override the
531							positional parameters where applicable:
532
533							=over 4
534
535							=item radix 32\|58\|64
536
537							Either 32 or 64 to explicitly specify Base32, Base58, or Base64
538							output. Defaults to 64.
539
540							=item version 0\|1
541
542							Version 0 will generate the original version of NCName identifiers,
543							prior to the changes noted above. Version 1 is the new version, which
544							is I backwards-compatible. The default, for a transitional
545							period, is to generate version 0, but complain about it. Set the
546							version explicitly (to 1, or to 0 if you need backwards compatibility)
547							to eliminate the warning messages.
548
549							=item align $FALSY\|$TRUTHY
550
551							Align the last 4 bits to the Base32/Base64 symbol size. You almost
552							certainly want this, so the default is I. (Does not apply to
553							Base58.)
554
555							=back
556
557							=cut
558
559							sub to_ncname {
560	6006			6006	1	3024426	state $dict = slurpy Dict[
561							radix => Optional[Radix],
562							version => Optional[Ver],
563							align => Optional[Item],
564							slurpy Any];
565	6006					22554	state $check = multisig(
566							[AnyUUID, Optional[Radix], $dict],
567							[AnyUUID, $dict]);
568
569	6006					83738	my ($uuid, $radix, $p) = $check->(@_);
570
571							# optional radix moved to named parameter
572	6006	100				205338	if (ref $radix) {
573	4					5	$p = $radix;
574	4					7	undef $radix;
575							}
576
577	6006		50			12549	$radix //= $p->{radix} \|\| 64;
			66
578	6006		50			22927	$p->{align} = !!($p->{align} // 1);
579
580	6006	50				12760	unless (defined $p->{version}) {
581	0					0	Carp::carp('Set an explicit `version` to eliminate this warning.' .
582							' See Data::UUID::NCName docs.');
583	0					0	$p->{version} = 1;
584							}
585
586							# type checking has ensured this is Stringable so get the string
587	6006	50				10974	$uuid = _to_string($uuid)->($uuid) if ref $uuid;
588
589							# get the uuid into a binary string
590	6006					7441	my $bin;
591	6006	50				10081	if (length $uuid == 16) {
592							# this is already a binary string
593	0					0	$bin = $uuid;
594							}
595							else {
596							# get rid of any whitespace
597	6006					13116	$uuid =~ s/\s+//g;
598
599							# handle hexadecimal
600	6006	50				22672	if ($uuid =~ /^(?i:urn:uuid:)?[0-9A-Fa-f-]{32,}$/) {
		0
601	6006					10612	$uuid =~ s/^urn:uuid://i;
602	6006					19024	$uuid =~ s/-//g;
603							#warn $uuid;
604	6006					24850	$bin = pack 'H*', $uuid;
605							}
606							# handle base64
607							elsif ($uuid =~ m!^[0-9A-Za-z=+/_-]$!) {
608							# canonicalize first
609	0					0	$uuid =~ tr!-_!+/!;
610	0					0	$bin = MIME::Base64::decode($uuid);
611							}
612							else {
613	0					0	Carp::croak("Couldn't figure out what to do with putative UUID.");
614							}
615							}
616
617							# extract the version
618	6006					18255	my ($version, $content) = $TRANSFORM[$p->{version}][0]->($bin);
619
620							# wah-lah.
621							_encode_version($version, $radix) .
622	6006					14873	$ENCODE{$radix}->($content, $p->{align});
623							}
624
625							=head2 from_ncname $NCNAME [, $FORMAT [, $RADIX] ] [, %PARAMS ]
626
627							Turn an appropriate C<$NCNAME> back into a UUID, where I,
628							unless overridden by C<$RADIX>, is defined beginning with one initial
629							alphabetic letter (A to Z, case-insensitive) followed by either:
630
631							=over 4
632
633							=item B<25> Base32 characters, or
634
635							=item B<21> Base64URL characters.
636
637							=back
638
639							The function will return C immediately if it cannot match
640							either of these patterns. Input past the 21-character mark (for
641							Base64) or 25-character mark (for Base32) is ignored.
642
643							This function returns a UUID of type C<$FORMAT>, which if left
644							undefined, must be one of the following:
645
646							=over 4
647
648							=item str
649
650							The canonical UUID format, like so:
651							C<33fcc995-5d10-477e-a9b4-c9cc405bbf04>. This is the default.
652
653							=item hex
654
655							The same thing, minus the hyphens.
656
657							=item b64
658
659							Base64.
660
661							=item bin
662
663							A binary string.
664
665							=back
666
667							This function also takes the new keyword-style parameters:
668
669							=over 4
670
671							=item format
672
673							As above.
674
675							=item radix
676
677							As above.
678
679							=item version
680
681							Sets the identifier version. Defaults to version 0 with a warning. See
682							the note about setting an explicit C parameter in L.
683
684							=item align
685
686							Assume the last few bits are aligned to the symbol, as in L.
687
688							=back
689
690							=cut
691
692							my %FORMAT = (
693							str => sub {
694							sprintf $UUF, unpack 'C*', shift;
695							},
696							hex => sub {
697							unpack 'H*', shift;
698							},
699							b64 => sub {
700							my $x = MIME::Base64::encode(shift);
701							$x =~ s/=+$//;
702							$x;
703							},
704							bin => sub {
705							shift;
706							},
707							);
708
709							sub from_ncname {
710	6001			6001	1	3013200	state $dict = slurpy Dict[
711							format => Optional[Format],
712							radix => Optional[Radix],
713							version => Optional[Ver],
714							align => Optional[Item],
715							slurpy Any];
716	6001					16772	state $check = multisig(
717							[Str, $dict],
718							[Str, Maybe[Format], Optional[Radix], $dict],
719							[Str, Maybe[Format], $dict],
720							);
721
722	6001					32551	my ($ncname, $format, $radix, $p) = $check->(@_);
723
724							# handle vagaries of legacy positional parameters
725	6001	50				266905	if (ref $format) {
		0
726	6001					8809	$p = $format;
727	6001					8456	undef $format;
728							}
729							elsif (ref $radix) {
730	0					0	$p = $radix;
731	0					0	undef $radix;
732							}
733
734							# unconditional override by key-value radix and format parameters
735	6001	100				15987	$radix = $p->{radix} if defined $p->{radix};
736
737							# reuse this variable because it doesn't get used for anything else
738	6001		33			31323	$format = $FORMAT{$p->{format} \|\| $format \|\| 'str'};
			50
739
740							# coerce align parameter to boolish
741	6001		50			21136	$p->{align} = !!($p->{align} // 1);
742
743							# enforce explicit presence of version
744	6001	50				12254	unless (defined $p->{version}) {
745	0					0	Carp::carp('Set an explicit `version` to eliminate this warning.' .
746							' See Data::UUID::NCName docs.');
747	0					0	$p->{version} = 1;
748							}
749
750							# obviously this must be defined
751	6001	50				10704	return unless defined $ncname;
752
753							# no whitespace
754	6001					44808	$ncname =~ s/^\s(.?)\s*$/$1/sm;
755
756							# note that the rfc4648 sequence ends in +/ or -_
757	6001	50				29159	my ($version, $content) = ($ncname =~ /^([A-Za-z])([0-9A-Za-z_-]{21,})$/)
758							or return;
759
760	6001	100				13248	if ($radix) {
761	6000	50	100			22868	Carp::croak("Radix must be either 32, 58, or 64, not $radix")
			66
762							unless $radix == 32 \|\| $radix == 58 \|\| $radix == 64;
763							}
764							else {
765							# detect what to do based on input
766	1					3	my $len = length $ncname;
767
768	1	50				4	if ($len == 22) {
		0
		0
769	1					3	$radix = 64;
770							}
771							elsif ($len == 23) {
772	0					0	$radix = 58;
773							}
774							elsif ($len == 26) {
775	0					0	$radix = 32;
776							}
777							else {
778							# the regex above should ensure this is never reached.
779	0					0	Carp::croak
780							("Not sure what to do with an identifier of length $len.");
781							}
782							}
783
784							# get this stuff back to canonical form
785	6001					13011	$version = _decode_version($version);
786							# warn $version;
787	6001					17972	$content = $DECODE{$radix}->($content, $p->{align});
788							# warn unpack 'H*', $content;
789
790							# reassemble the pair
791	6001					850041	my $bin = $TRANSFORM[$p->{version}][1]->($version, $content);
792
793							# now format.
794	6001					13324	$format->($bin);
795							}
796
797							=head2 to_ncname_64 $UUID [, %PARAMS ]
798
799							Shorthand for Base64 NCNames.
800
801							=cut
802
803							sub to_ncname_64 {
804	3			3	1	3474	to_ncname(@_, radix => 64);
805							}
806
807							=head2 from_ncname_64 $NCNAME [, $FORMAT \| %PARAMS ]
808
809							Ditto.
810
811							=cut
812
813							sub from_ncname_64 {
814	0			0	1	0	from_ncname(@_, radix => 64);
815							}
816
817							=head2 to_ncname_58 $UUID [, %PARAMS ]
818
819							Shorthand for Base58 NCNames.
820
821							=cut
822
823							sub to_ncname_58 {
824	0			0	1	0	to_ncname(shift, 58, @_);
825							}
826
827							=head2 from_ncname_58 $NCNAME [, $FORMAT \| %PARAMS ]
828
829							Ditto.
830
831							=cut
832
833							sub from_ncname_58 {
834	0			0	1	0	from_ncname(@_, radix => 58);
835							}
836
837							=head2 to_ncname_32 $UUID [, %PARAMS ]
838
839							Shorthand for Base32 NCNames.
840
841							=cut
842
843							sub to_ncname_32 {
844	2			2	1	13	to_ncname(shift, 32, @_);
845							}
846
847							=head2 from_ncname_32 $NCNAME [, $FORMAT \| %PARAMS ]
848
849							Ditto.
850
851							=cut
852
853							sub from_ncname_32 {
854	0			0	1		from_ncname(@_, radix => 32);
855							}
856
857							=head1 AUTHOR
858
859							Dorian Taylor, C<< >>
860
861							=head1 BUGS
862
863							Please report bugs/issues/etc L
864							GitHub\|https://github.com/doriantaylor/p5-data-uuid-ncname/issues>.
865
866							=over 4
867
868							=item * MetaCPAN
869
870							L
871
872							=item * GitHub repository (bugs also go here)
873
874							L
875
876							=item * AnnoCPAN: Annotated CPAN documentation
877
878							L
879
880							=item * CPAN Ratings
881
882							L
883
884							=back
885
886							=head1 SEE ALSO
887
888							=over 4
889
890							=item
891
892							L
893
894							=item
895
896							L
897
898							=item
899
900							L
901
902							=item
903
904							L
905
906							=item
907
908							L
909
910							=item
911
912							L
913							(NCName)
914
915							=item
916
917							L
918							Datatypes\|http://www.w3.org/TR/xmlschema11-2/#ID> (ID)
919
920							=item
921
922							L
923							(Revised)\|http://www.w3.org/TR/rdf-syntax-grammar/#rdf-id>
924
925							=item
926
927							L
928
929							=back
930
931							This module lives under the C namespace for the purpose of
932							namespace hygiene. The main module I depend on
933							L, howevever the script L I depend on
934							L to generate UUIDs.
935
936							=head1 LICENSE AND COPYRIGHT
937
938							Copyright 2012-2018 Dorian Taylor.
939
940							Licensed under the Apache License, Version 2.0 (the "License"); you
941							may not use this file except in compliance with the License. You may
942							obtain a copy of the License at
943							L .
944
945							Unless required by applicable law or agreed to in writing, software
946							distributed under the License is distributed on an "AS IS" BASIS,
947							WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
948							implied. See the License for the specific language governing
949							permissions and limitations under the License.
950
951							=cut
952
953							1; # End of Data::UUID::NCName