File Coverage

blib/lib/Hash/Fold.pm
Criterion Covered Total %
statement 139 147 94.5
branch 45 56 80.3
condition 14 18 77.7
subroutine 22 22 100.0
pod 4 4 100.0
total 224 247 90.6


line stmt bran cond sub pod time code
1             package Hash::Fold;
2              
3 4     4   18857 use Moo;
  4         48663  
  4         18  
4 4     4   5533 use Scalar::Util qw(refaddr);
  4         8  
  4         412  
5 4     4   2134 use Types::Standard qw(Str CodeRef);
  4         301722  
  4         39  
6              
7             # TODO switch to Exporter::Tiny?
8             use Sub::Exporter -setup => {
9             exports => [
10 16         35 (map { $_ => \&_build_unary_export } qw(fold unfold flatten unflatten)),
11 4         12 (map { $_ => \&_build_nary_export } qw(merge)),
  4         33  
12             ],
13 4     4   6027 };
  4         39222  
14              
15             # XXX this declaration must be on a single line
16             # https://metacpan.org/pod/version#How-to-declare()-a-dotted-decimal-version
17 4     4   3655 use version 0.77; our $VERSION = version->declare('v1.0.0');
  4         7163  
  4         30  
18              
19             use constant {
20 4         2997 ARRAY => 1,
21             HASH => 2,
22             TYPE => 0,
23             VALUE => 1,
24 4     4   448 };
  4         9  
25              
26             has on_object => (
27             isa => CodeRef,
28             is => 'ro',
29             default => sub { sub { $_[1] } }, # return the value unchanged
30             );
31              
32             has on_cycle => (
33             isa => CodeRef,
34             is => 'ro',
35             default => sub { sub { } }, # do nothing
36             );
37              
38             has hash_delimiter => (
39             isa => Str,
40             is => 'ro',
41             default => '.',
42             );
43              
44             has array_delimiter => (
45             isa => Str,
46             is => 'ro',
47             default => '.',
48             );
49              
50             around BUILDARGS => sub {
51             my $original = shift;
52             my $class = shift;
53             my $args = $class->$original(@_);
54             my $delimiter = delete $args->{delimiter};
55              
56             if (defined $delimiter) {
57             $args->{array_delimiter} = $delimiter
58             unless (exists $args->{array_delimiter});
59              
60             $args->{hash_delimiter} = $delimiter
61             unless (exists $args->{hash_delimiter});
62             }
63              
64             return $args;
65             };
66              
67             sub fold {
68 66     66 1 146 my $self = shift;
69 66         117 my $hash = _check_hash(shift);
70 66         97 my $prefix = undef;
71 66         97 my $target = {};
72 66         106 my $seen = {};
73              
74 66         145 return $self->_merge($hash, $prefix, $target, $seen);
75             }
76              
77             sub _build_path {
78 19     19   34 my ($self, $steps) = @_;
79              
80             my @path = map {
81 19 50       36 $_->[VALUE],
  31 100       164  
82             ($_->[TYPE] == ARRAY ?
83             $self->array_delimiter :
84             ($_->[TYPE] == HASH ? $self->hash_delimiter : ''))
85             } @$steps;
86              
87             # last element is a delimiter; don't need that
88 19         35 pop @path;
89              
90 19         61 return join '', @path;
91             }
92              
93             sub unfold {
94 42     42 1 1240 my $self = shift;
95 42         74 my $hash = _check_hash(shift);
96 42         69 my $target = {};
97              
98             # sorting the keys should lead to better locality of reference,
99             # for what that's worth here
100             # XXX the sort order is connected with the ambiguity issue mentioned below
101 42         179 for my $key (sort keys %$hash) {
102 144         243 my $value = $hash->{$key};
103 144         245 my $steps = $self->_split($key);
104              
105 144         212 eval { $self->_set($target, $steps, $value) };
  144         282  
106              
107 144 100       337 if ($@) {
108 19         32 my $error = $@;
109 19         41 my $o_steps = $self->_split($key);
110 19         60 my $context_type = $o_steps->[@$o_steps - @$steps - 1][TYPE];
111 19 50       59 my ($article, $type) = $context_type == ARRAY ?
    100          
112             qw[an array] :
113             ($context_type == HASH ? qw[a hash] : (undef, undef));
114              
115             # want everything that was removed from $steps
116 19         39 splice(@$o_steps, -1, @$steps + 1);
117              
118 19         61 my $path = $self->_build_path($o_steps);
119 19 50       80 my $message = defined($type) ?
120             "Attempt to use non-${type} ($path) as ${article} ${type}" :
121             "unanticipated error: $error";
122              
123 19         1108 require Hash::Fold::Error;
124              
125 19         109 Hash::Fold::Error->throw({
126             message => $message,
127             path => $path,
128             type => $type
129             });
130             }
131             }
132              
133 23         147 return $target;
134             }
135              
136             BEGIN {
137 4     4   21 *flatten = \&fold;
138 4         5005 *unflatten = \&unfold;
139             }
140              
141             sub merge {
142 27     27 1 62 my $self = shift;
143 27         52 return $self->unfold({ map { %{ $self->fold(_check_hash($_)) } } @_ });
  51         67  
  51         94  
144             }
145              
146             sub is_object {
147 324     324 1 477 my ($self, $value) = @_;
148 324         461 my $ref = ref($value);
149 324   100     1226 return $ref && ($ref ne 'HASH') && ($ref ne 'ARRAY');
150             }
151              
152             sub _build_unary_export {
153 10     10   896 my ($class, $name, $base_options) = @_;
154              
155             return sub {
156 26     26   31663 my $arg = shift;
157 26 100       80 my $custom_options = @_ == 1 ? shift : { @_ };
158 26         705 my $folder = $class->new({ %$base_options, %$custom_options });
159 26         606 return $folder->$name($arg);
160             }
161 10         41 }
162              
163             sub _build_nary_export {
164 1     1   146 my ($class, $name, $base_options) = @_;
165              
166             return sub {
167 23     23   106420 my ($args, $custom_options);
168              
169 23 50 33     137 if (@_ && (ref($_[0]) eq 'ARRAY')) {
170 0         0 $args = shift;
171 0 0       0 $custom_options = @_ == 1 ? shift : { @_ };
172             } else {
173 23         55 $args = [@_];
174 23         39 $custom_options = {};
175             }
176              
177 23         584 my $folder = $class->new({ %$base_options, %$custom_options });
178              
179 23         454 return $folder->$name(@$args);
180             }
181 1         6 }
182              
183             sub _check_hash {
184 159     159   219 my $hash = shift;
185 159         239 my $ref = ref($hash);
186              
187 159 50       311 unless ($ref eq 'HASH') {
188 0         0 my $type;
189              
190 0 0       0 if (defined $hash) {
191 0 0       0 $type = length($ref) ? "'$ref'" : 'non-reference';
192             } else {
193 0         0 $type = 'undef';
194             }
195              
196 0         0 require Hash::Fold::Error;
197              
198 0         0 Hash::Fold::Error->throw({
199             message => "invalid argument: expected unblessed HASH reference, got: $type",
200             type => $type,
201             });
202             }
203              
204 159         270 return $hash;
205             }
206              
207             sub _join {
208 258     258   475 my ($self, $prefix, $delimiter, $key) = @_;
209 258 100       814 return defined($prefix) ? $prefix . $delimiter . $key : $key;
210             }
211              
212             =begin comment
213              
214             TODO: when the hash delimiter is the same as the array delimiter
215             (as it is by default), ambiguities can arise:
216              
217             {
218             foo => 'bar',
219             1 => 'aaagh!',
220             baz => 'quux',
221             }
222              
223             In many cases, these can be smartly resolved by looking at the context: if
224             at least one step is non-numeric, then the container must be a hashref:
225              
226             foo.bar.baz
227             foo.bar.0 <- must be a hash key
228             foo.bar.quux
229              
230             The ambiguity can either be resolved here/in unfold with a bit of static
231             analysis or resolved lazily/dynamically in _set (need to sort the keys so
232             that non-integers (if any) are unpacked before integers (if any)).
233              
234             Currently, the example above is unpacked correctly :-)
235              
236             =end comment
237              
238             =cut
239              
240             sub _split {
241 163     163   280 my ($self, $path) = @_;
242 163         285 my $hash_delimiter = $self->hash_delimiter;
243 163         243 my $array_delimiter = $self->array_delimiter;
244 163         249 my $hash_delimiter_pattern = quotemeta($hash_delimiter);
245 163         211 my $array_delimiter_pattern = quotemeta($array_delimiter);
246 163         235 my $same_delimiter = $array_delimiter eq $hash_delimiter;
247 163         187 my $split_pattern;
248              
249 163 100       286 if (length($hash_delimiter) >= length($array_delimiter)) {
250 159         742 $split_pattern = qr{((?:$hash_delimiter_pattern)|(?:$array_delimiter_pattern))};
251             } else {
252 4         38 $split_pattern = qr{((?:$array_delimiter_pattern)|(?:$hash_delimiter_pattern))};
253             }
254              
255 163         842 my @split = split $split_pattern, $path;
256 163         262 my @steps;
257              
258             # since we require the argument to fold (and unfold) to be a hashref,
259             # the top-level keys must always be hash keys (strings) rather than
260             # array indices (numbers)
261 163         354 push @steps, [HASH, shift @split];
262              
263 163         379 while (@split) {
264 213         323 my $delimiter = shift @split;
265 213         288 my $step = shift @split;
266              
267 213 100       334 if ($same_delimiter) {
268             # tie-breaker
269 197 100 100     631 if (($step eq '0') || ($step =~ /^[1-9]\d*$/)) { # no leading 0
270 90         237 push @steps, [ARRAY, $step];
271             } else {
272 107         297 push @steps, [HASH, $step];
273             }
274             } else {
275 16 100       27 if ($delimiter eq $array_delimiter) {
276 6         13 push @steps, [ARRAY, $step];
277             } else {
278 10         25 push @steps, [HASH, $step];
279             }
280             }
281             }
282              
283 163         493 return \@steps;
284             }
285              
286             sub _merge {
287 324     324   592 my ($self, $value, $target_key, $target, $_seen) = @_;
288              
289             # "localize" the $seen hash: we want to catch circular references (i.e.
290             # an unblessed hashref or arrayref which contains (at some depth) a
291             # reference to itself), but don't want to prevent repeated references
292             # i.e. { foo => $object, bar => $object } is OK. To achieve this, we need
293             # to "localize" the $seen hash i.e. do the equivalent of "local $seen".
294             # However, perl doesn't allow lexical variables to be localized, so we have
295             # to do it manually.
296              
297             # isolate from the caller's $seen hash and allow scoped additions
298 324         754 my $seen = { %$_seen };
299              
300 324 100       624 if ($self->is_object($value)) {
301 12         46 $value = $self->on_object->($self, $value);
302             }
303              
304 324         2334 my $ref = ref($value);
305 324         541 my $refaddr = refaddr($value);
306              
307 324 100 100     1035 if ($refaddr && $seen->{$refaddr}) { # seen HASH or ARRAY
    100          
    100          
308             # we've seen this unblessed hashref/arrayref before: possible actions
309             #
310             # 1) (do nothing and) treat it as a terminal
311             # 2) warn and treat it as a terminal
312             # 3) die (and treat it as a terminal :-)
313             #
314             # if the callback doesn't raise a fatal exception,
315             # treat the value as a terminal
316 5         17 $self->on_cycle->($self, $value); # might warn or die
317 5         1474 $target->{$target_key} = $value; # treat as a terminal
318             } elsif ($ref eq 'HASH') {
319 130         273 my $delimiter = $self->hash_delimiter;
320              
321 130         315 $seen->{$refaddr} = 1;
322              
323 130 100       213 if (%$value) {
324             # sorting the keys ensures a deterministic order,
325             # which (at the very least) is required for unsurprising
326             # tests
327 123         381 for my $hash_key (sort keys %$value) {
328 169         250 my $hash_value = $value->{$hash_key};
329 169         302 $self->_merge(
330             $hash_value,
331             $self->_join($target_key, $delimiter, $hash_key),
332             $target, $seen
333             );
334             }
335             } else {
336 7         15 $target->{$target_key} = {};
337             }
338             } elsif ($ref eq 'ARRAY') {
339 42         88 my $delimiter = $self->array_delimiter;
340              
341 42         75 $seen->{$refaddr} = 1;
342              
343 42 100       78 if (@$value) {
344 40         102 for my $index (0 .. $#$value) {
345 89         131 my $array_element = $value->[$index];
346              
347 89         153 $self->_merge(
348             $array_element,
349             $self->_join($target_key, $delimiter, $index),
350             $target, $seen
351             );
352             }
353             } else {
354 2         6 $target->{$target_key} = [];
355             }
356             } else { # terminal
357 147         303 $target->{$target_key} = $value;
358             }
359              
360 324         1065 return $target;
361             }
362              
363             # the action depends on the number of steps:
364             #
365             # 1: e.g. ['foo']:
366             #
367             # $context->{foo} = $value
368             #
369             # 2: e.g. ['foo', 42]:
370             #
371             # $context = $context->{foo} ||= []
372             # $context->[42] = $value
373             #
374             # 3 (or more): e.g. ['foo', 42, 'bar']:
375             #
376             # $context = $context->{foo} ||= []
377             # return $self->_set($context, [42, 'bar'], $value)
378             #
379             # Note that the 2 case can be implemented in the same way as the 3
380             # (or more) case.
381              
382             sub _set {
383 326     326   565 my ($self, $context, $steps, $value) = @_;
384 326         429 my $step = shift @$steps;
385              
386 326 100       514 if (@$steps) { # recursive case
387             # peek i.e. look-ahead to the step that will be processed in
388             # the tail call and make sure its container exists
389 182         255 my $next_step = $steps->[0];
390 182 100   67   509 my $next_step_container = sub { $next_step->[TYPE] == ARRAY ? [] : {} };
  67         307  
391              
392             $context = ($step->[TYPE] == ARRAY) ?
393             ($context->[ $step->[VALUE] ] ||= $next_step_container->()) : # array index
394 182 100 66     728 ($context->{ $step->[VALUE] } ||= $next_step_container->()); # hash key
      66        
395             } else { # base case
396 144 100       246 if ($step->[TYPE] == ARRAY) {
397 58         165 $context->[ $step->[VALUE] ] = $value; # array index
398             } else {
399 86         268 $context->{ $step->[VALUE] } = $value; # hash key
400             }
401             }
402              
403 307 100       799 return @$steps ? $self->_set($context, $steps, $value) : $value;
404             }
405              
406             1;
407              
408             =head1 NAME
409              
410             Hash::Fold - flatten and unflatten nested hashrefs
411              
412             =head1 SYNOPSIS
413              
414             use Hash::Fold qw(flatten unflatten);
415              
416             my $object = bless { foo => 'bar' };
417              
418             my $nested = {
419             foo => $object,
420             baz => {
421             a => 'b',
422             c => [ 'd', { e => 'f' }, 42 ],
423             },
424             };
425              
426             my $flattened = flatten($nested);
427              
428             is_deeply $flattened, {
429             'baz.a' => 'b',
430             'baz.c.0' => 'd',
431             'baz.c.1.e' => 'f',
432             'baz.c.2' => 42,
433             'foo' => $object,
434             };
435              
436             my $roundtrip = unflatten($flattened);
437              
438             is_deeply $roundtrip, $nested;
439              
440             =head1 DESCRIPTION
441              
442             This module provides functional and OO interfaces which can be used to flatten,
443             unflatten and merge nested hashrefs.
444              
445             Unless noted, the functions listed below are also available as methods. Options
446             provided to the Hash::Fold constructor can be supplied to the functions e.g.:
447              
448             use Hash::Fold;
449              
450             my $folder = Hash::Fold->new(delimiter => '/');
451              
452             $folder->fold($hash);
453              
454             is equivalent to:
455              
456             use Hash::Fold qw(fold);
457              
458             my $folded = fold($hash, delimiter => '/');
459              
460             Options (and constructor args) can be supplied as a list of key/value pairs or
461             a hashref, so the following are equivalent:
462              
463             my $folded = fold($hash, delimiter => '/' );
464             my $folded = fold($hash, { delimiter => '/' });
465              
466             In addition, Hash::Fold uses L, which allows functions to be
467             imported with options baked in e.g.:
468              
469             use Hash::Fold fold => { delimiter => '/' };
470              
471             my $folded = fold($hash);
472              
473             =head1 OPTIONS
474              
475             As described above, the following options can be supplied as constructor args,
476             import args, or per-function overrides. Under the hood, they are (L)
477             attributes which can be wrapped and overridden like any other attributes.
478              
479             =head2 array_delimiter
480              
481             B: Str, ro, default: "."
482              
483             The delimiter prefixed to array elements when flattening and unflattening.
484              
485             =head2 hash_delimiter
486              
487             B: Str, ro, default: "."
488              
489             The delimiter prefixed to hash elements when flattening and unflattening.
490              
491             =head2 delimiter
492              
493             B: Str
494              
495             This is effectively a write-only attribute which assigns the same string to
496             L<"array_delimiter"> and L<"hash_delimiter">. It can only be supplied as a
497             constructor arg or function option (which are equivalent) i.e. Hash::Fold
498             instances have no C method.
499              
500             =head2 on_cycle
501              
502             B: (Hash::Fold, Ref) -> None, ro
503              
504             A callback invoked whenever L<"fold"> encounters a circular reference i.e. a
505             reference which contains itself as a nested value.
506              
507             The callback is passed two arguments: the Hash::Fold instance and the value e.g.:
508              
509             sub on_cycle {
510             my ($folder, $value) = @_;
511             warn 'self-reference found: ', Dumper(value), $/;
512             }
513              
514             my $folder = Hash::Fold->new(on_cycle => \&on_cycle);
515              
516             Note that circular references are handled correctly i.e. they are treated as
517             terminals and not traversed. This callback merely provides a mechanism to
518             report them (e.g. by issuing a warning).
519              
520             The default callback does nothing.
521              
522             =head2 on_object
523              
524             B: (Hash::Fold, Ref) -> Any, ro
525              
526             A callback invoked whenever L<"fold"> encounters a value for which the
527             L<"is_object"> method returns true i.e. any reference that isn't an unblessed
528             arrayref or unblessed hashref. This callback can be used to modify
529             the value e.g. to return a traversable value (e.g. unblessed hashref)
530             in place of a terminal (e.g. blessed hashref).
531              
532             The callback is passed two arguments: the Hash::Fold instance and the object e.g.:
533              
534             use Scalar::Util qw(blessed);
535              
536             sub on_object {
537             my ($folder, $value) = @_;
538              
539             if (blessed($value) && $value->isa('HASH')) {
540             return { %$value }; # unbless
541             } else {
542             return $value;
543             }
544             }
545              
546             my $folder = Hash::Fold->new(on_object => \&on_object);
547              
548             The default callback returns its value unchanged.
549              
550             =head1 EXPORTS
551              
552             Nothing by default. The following functions can be imported.
553              
554             =head2 fold
555              
556             B: (HashRef [, Hash|HashRef ]) -> HashRef
557              
558             Takes a nested hashref and returns a single-level hashref with (by default)
559             dotted keys. The delimiter can be overridden via the L<"delimiter">,
560             L<"array_delimiter"> and L<"hash_delimiter"> options.
561              
562             Unblessed arrayrefs and unblessed hashrefs are traversed. All other values
563             (e.g. strings, regexps, objects etc.) are treated as terminals and passed
564             through verbatim, although this can be overridden by supplying a suitable
565             L<"on_object"> callback.
566              
567             =head2 flatten
568              
569             B: (HashRef [, Hash|HashRef ]) -> HashRef
570              
571             Provided as an alias for L<"fold">.
572              
573             =head2 unfold
574              
575             B: (HashRef [, Hash|HashRef ]) -> HashRef
576              
577             Takes a flattened hashref and returns the corresponding nested hashref.
578              
579             =head2 unflatten
580              
581             B: (HashRef [, Hash|HashRef ]) -> HashRef
582              
583             Provided as an alias for L<"unfold">.
584              
585             =head2 merge
586              
587             B: (HashRef [, HashRef... ]) -> HashRef
588              
589             B: (ArrayRef [, Hash|HashRef ]) -> HashRef
590              
591             Takes a list of hashrefs which are then flattened, merged into one (in the
592             order provided i.e. with precedence given to the rightmost arguments) and
593             unflattened i.e. shorthand for:
594              
595             unflatten { map { %{ flatten $_ } } @_ }
596              
597             To provide options to the C subroutine, pass the hashrefs in an
598             arrayref, and the options (as usual) as a list of key/value pairs or a hashref:
599              
600             merge([ $hash1, $hash2, ... ], delimiter => ... )
601             merge([ $hash1, $hash2, ... ], { delimiter => ... })
602              
603             =head1 METHODS
604              
605             =head2 is_object
606              
607             B: Any -> Bool
608              
609             This method is called from L<"fold"> to determine whether a value should be
610             passed to the L<"on_object"> callback.
611              
612             It is passed each value encountered while traversing a hashref and returns true
613             for all references (e.g. regexps, globs, objects etc.) apart from unblessed
614             arrayrefs and unblessed hashrefs, and false for all other
615             values (i.e. unblessed hashrefs, unblessed arrayrefs, and non-references).
616              
617             =head1 ERRORS
618              
619             Objects of the class L are thrown upon error. The object
620             stringifies to an error message with a strack trace.
621              
622             =over
623              
624             =item Invalid Argument
625              
626             If an input argument is not a hash, an error object will be thrown with the C
627             attribute set to the unexpected type of the argument.
628              
629             =item Incompatible path component
630              
631             If, during the course of an L/L or a L, incompatible
632             types are found along the same paths in the structures, an error
633             object will be thrown with the C attribute set to the path with
634             the incorrect type, and the C attribute set to the unexpected
635             type. For example, the following paths would be incompatible:
636              
637             foo.0 = 2
638             foo = 3
639              
640             - as C cannot be both a scalar and an array.
641              
642             =back
643              
644             =head1 VERSION
645              
646             1.0.0
647              
648             =head1 SEE ALSO
649              
650             =over
651              
652             =item * L
653              
654             =item * L
655              
656             =item * L
657              
658             =item * L
659              
660             =back
661              
662             =head1 AUTHOR
663              
664             =over
665              
666             =item * chocolateboy
667              
668             =item * djerius
669              
670             =back
671              
672             =head1 COPYRIGHT
673              
674             Copyright (c) 2014-2019 by chocolateboy.
675              
676             This is free software; you can redistribute it and/or modify it under the
677             terms of the Artistic License 2.0.
678              
679             =cut