File Coverage

blib/lib/Hash/Fold.pm
Criterion Covered Total %
statement 117 124 94.3
branch 36 44 81.8
condition 13 18 72.2
subroutine 20 20 100.0
pod 4 4 100.0
total 190 210 90.4


line stmt bran cond sub pod time code
1             package Hash::Fold;
2              
3 4     4   25638 use Carp qw(confess);
  4         10  
  4         279  
4 4     4   4257 use Moose;
  4         1928687  
  4         27  
5 4     4   27386 use Scalar::Util qw(refaddr);
  4         13  
  4         504  
6              
7             use Sub::Exporter -setup => {
8             exports => [
9 16         46 (map { $_ => \&_build_unary_export } qw(fold unfold flatten unflatten)),
10 4         13 (map { $_ => \&_build_nary_export } qw(merge)),
  4         51  
11             ],
12 4     4   22 };
  4         7  
13              
14             use constant {
15 4         2009 ARRAY => 1,
16             HASH => 2,
17             TYPE => 0,
18             VALUE => 1,
19 4     4   2031 };
  4         5  
20              
21             our $VERSION = '0.1.2';
22              
23             has on_object => (
24             isa => 'CodeRef',
25             is => 'ro',
26             default => sub { sub { $_[1] } }, # return the value unchanged
27             );
28              
29             has on_cycle => (
30             isa => 'CodeRef',
31             is => 'ro',
32             default => sub { sub { } }, # do nothing
33             );
34              
35             has hash_delimiter => (
36             isa => 'Str',
37             is => 'ro',
38             default => '.',
39             );
40              
41             has array_delimiter => (
42             isa => 'Str',
43             is => 'ro',
44             default => '.',
45             );
46              
47             around BUILDARGS => sub {
48             my $original = shift;
49             my $class = shift;
50             my $args = $class->$original(@_);
51             my $delimiter = delete $args->{delimiter};
52              
53             if (defined $delimiter) {
54             $args->{array_delimiter} = $delimiter
55             unless (exists $args->{array_delimiter});
56              
57             $args->{hash_delimiter} = $delimiter
58             unless (exists $args->{hash_delimiter});
59             }
60              
61             return $args;
62             };
63              
64             sub fold {
65 30     30 1 51 my $self = shift;
66 30         68 my $hash = _check_hash(shift);
67 30         44 my $prefix = undef;
68 30         52 my $target = {};
69 30         44 my $seen = {};
70              
71 30         84 return $self->_merge($hash, $prefix, $target, $seen);
72             }
73              
74             sub unfold {
75 24     24 1 1539 my $self = shift;
76 24         52 my $hash = _check_hash(shift);
77 24         42 my $target = {};
78              
79             # sorting the keys should lead to better locality of reference,
80             # for what that's worth here
81             # XXX the sort order is connected with the ambiguity issue mentioned below
82 24         105 for my $key (sort keys %$hash) {
83 96         154 my $value = $hash->{$key};
84 96         200 my $steps = $self->_split($key);
85 96         222 $self->_set($target, $steps, $value);
86             }
87              
88 23         569 return $target;
89             }
90              
91             BEGIN {
92 4     4   13 *flatten = \&fold;
93 4         5440 *unflatten = \&unfold;
94             }
95              
96             sub merge {
97 9     9 1 28 my $self = shift;
98 9         18 return $self->unfold({ map { %{ $self->fold(_check_hash($_)) } } @_ });
  15         18  
  15         34  
99             }
100              
101             sub is_object {
102 180     180 1 240 my ($self, $value) = @_;
103 180         282 my $ref = ref($value);
104 180   100     987 return $ref && ($ref ne 'HASH') && ($ref ne 'ARRAY');
105             }
106              
107             sub _build_unary_export {
108 10     10   715 my ($class, $name, $base_options) = @_;
109              
110             return sub {
111 26     26   35154 my $arg = shift;
112 26 100       74 my $custom_options = @_ == 1 ? shift : { @_ };
113 26         976 my $folder = $class->new({ %$base_options, %$custom_options });
114 26         107 return $folder->$name($arg);
115             }
116 10         62 }
117              
118             sub _build_nary_export {
119 1     1   156 my ($class, $name, $base_options) = @_;
120              
121             return sub {
122 5     5   20 my ($args, $custom_options);
123              
124 5 50 33     30 if (@_ && (ref($_[0]) eq 'ARRAY')) {
125 0         0 $args = shift;
126 0 0       0 $custom_options = @_ == 1 ? shift : { @_ };
127             } else {
128 5         13 $args = [ @_ ];
129 5         10 $custom_options = {};
130             }
131              
132 5         164 my $folder = $class->new({ %$base_options, %$custom_options });
133 5         23 return $folder->$name(@$args);
134             }
135 1         7 }
136              
137             sub _check_hash {
138 69     69   95 my $hash = shift;
139 69         105 my $ref = ref($hash);
140              
141 69 50       156 unless ($ref eq 'HASH') {
142 0         0 my $type;
143              
144 0 0       0 if (defined $hash) {
145 0 0       0 $type = length($ref) ? "'$ref'" : 'non-reference';
146             } else {
147 0         0 $type = 'undef';
148             }
149              
150 0         0 confess "invalid argument: expected unblessed HASH reference, got: $type";
151             }
152              
153 69         121 return $hash;
154             }
155              
156             sub _join {
157 150     150   281 my ($self, $prefix, $delimiter, $key) = @_;
158 150 100       637 return defined($prefix) ? $prefix . $delimiter . $key : $key;
159             }
160              
161             =begin comment
162              
163             TODO: when the hash delimiter is the same as the array delimiter
164             (as it is by default), ambiguities can arise:
165              
166             {
167             foo => 'bar',
168             1 => 'aaagh!',
169             baz => 'quux',
170             }
171              
172             In many cases, these can be smartly resolved by looking at the context: if
173             at least one step is non-numeric, then the container must be a hashref:
174              
175             foo.bar.baz
176             foo.bar.0 <- must be a hash key
177             foo.bar.quux
178              
179             The ambiguity can either be resolved here/in unfold with a bit of static
180             analysis or resolved lazily/dynamically in _set (need to sort the keys so
181             that non-integers (if any) are unpacked before integers (if any)).
182              
183             Currently, the example above is unpacked correctly :-)
184              
185             =end comment
186              
187             =cut
188              
189             sub _split {
190 96     96   144 my ($self, $path) = @_;
191 96         2951 my $hash_delimiter = $self->hash_delimiter;
192 96         2860 my $array_delimiter = $self->array_delimiter;
193 96         170 my $hash_delimiter_pattern = quotemeta($hash_delimiter);
194 96         119 my $array_delimiter_pattern = quotemeta($array_delimiter);
195 96         141 my $same_delimiter = $array_delimiter eq $hash_delimiter;
196 96 100       645 my $split_pattern = length($hash_delimiter) >= length($array_delimiter)
197             ? qr{((?:$hash_delimiter_pattern)|(?:$array_delimiter_pattern))}
198             : qr{((?:$array_delimiter_pattern)|(?:$hash_delimiter_pattern))};
199 96         481 my @split = split $split_pattern, $path;
200 96         118 my @steps;
201              
202             # since we require the argument to fold (and unfold) to be a hashref,
203             # the top-level keys must always be hash keys (strings) rather than
204             # array indices (numbers)
205 96         253 push @steps, [ HASH, shift @split ];
206              
207 96         260 while (@split) {
208 114         179 my $delimiter = shift @split;
209 114         162 my $step = shift @split;
210              
211 114 100       240 if ($same_delimiter) {
212             # tie-breaker
213 98 100 100     453 if (($step eq '0') || ($step =~ /^[1-9]\d*$/)) { # no leading 0
214 38         145 push @steps, [ ARRAY, $step ];
215             } else {
216 60         211 push @steps, [ HASH, $step ];
217             }
218             } else {
219 16 100       27 if ($delimiter eq $array_delimiter) {
220 6         18 push @steps, [ ARRAY, $step ];
221             } else {
222 10         32 push @steps, [ HASH, $step ];
223             }
224             }
225             }
226              
227 96         317 return \@steps;
228             }
229              
230             sub _merge {
231 180     180   313 my ($self, $value, $target_key, $target, $_seen) = @_;
232              
233             # "localize" the $seen hash: we want to catch circular references (i.e.
234             # an unblessed hashref or arrayref which contains (at some depth) a
235             # reference to itself), but don't want to prevent repeated references
236             # e.g. { foo => $object, bar => $object } is OK. To achieve this, we need
237             # to "localize" the $seen hash i.e. do the equivalent of "local $seen".
238             # However, perl doesn't allow lexical variables to be localized, so we have
239             # to do it manually.
240              
241             # isolate from the caller's $seen hash and allow scoped additions
242 180         508 my $seen = { %$_seen };
243              
244 180 100       414 if ($self->is_object($value)) {
245 12         359 $value = $self->on_object->($self, $value);
246             }
247              
248 180         2561 my $ref = ref($value);
249 180         376 my $refaddr = refaddr($value);
250              
251 180 100 66     773 if ($refaddr && $seen->{$refaddr}) { # seen HASH or ARRAY
    100          
    100          
252             # we've seen this unblessed hashref/arrayref before: possible actions
253             #
254             # 1) (do nothing and) treat it as a terminal
255             # 2) warn and treat it as a terminal
256             # 3) die (and treat it as a terminal :-)
257             #
258             # if the callback doesn't raise a fatal exception,
259             # treat the value as a terminal
260 5         154 $self->on_cycle->($self, $value); # might warn or die
261 5         1858 $target->{$target_key} = $value; # treat as a terminal
262             } elsif ($ref eq 'HASH') {
263 70         2203 my $delimiter = $self->hash_delimiter;
264              
265 70         175 $seen->{$refaddr} = 1;
266              
267 70 100       140 if (%$value) {
268             # sorting the keys ensures a deterministic order,
269             # which (at the very least) is required for unsurprising
270             # tests
271 63         234 for my $hash_key (sort keys %$value) {
272 109         182 my $hash_value = $value->{$hash_key};
273 109         247 $self->_merge(
274             $hash_value,
275             $self->_join($target_key, $delimiter, $hash_key),
276             $target, $seen
277             );
278             }
279             } else {
280 7         19 $target->{$target_key} = {};
281             }
282             } elsif ($ref eq 'ARRAY') {
283 18         601 my $delimiter = $self->array_delimiter;
284              
285 18         46 $seen->{$refaddr} = 1;
286              
287 18 100       41 if (@$value) {
288 16         45 for my $index (0 .. $#$value) {
289 41         61 my $array_element = $value->[$index];
290              
291 41         93 $self->_merge(
292             $array_element,
293             $self->_join($target_key, $delimiter, $index),
294             $target, $seen
295             );
296             }
297             } else {
298 2         6 $target->{$target_key} = [];
299             }
300             } else { # terminal
301 87         199 $target->{$target_key} = $value;
302             }
303              
304 180         1092 return $target;
305             }
306              
307             # the action depends on the number of steps:
308             #
309             # 1: e.g. [ 'foo' ]:
310             #
311             # $context->{foo} = $value
312             #
313             # 2: e.g. [ 'foo', 42 ]:
314             #
315             # $context = $context->{foo} ||= []
316             # $context->[42] = $value
317             #
318             # 3 (or more): e.g. [ 'foo', 42, 'bar' ]:
319             #
320             # $context = $context->{foo} ||= []
321             # return $self->_set($context, [ 42, 'bar' ], $value)
322             #
323             # Note that the 2 case can be implemented in the same way as the 3
324             # (or more) case.
325              
326             sub _set {
327 210     210   336 my ($self, $context, $steps, $value) = @_;
328 210         284 my $step = shift @$steps;
329              
330 210 100       407 if (@$steps) { # recursive case
331             # peek i.e. look-ahead to the step that will be processed in
332             # the tail call and make sure its container exists
333 114         152 my $next_step = $steps->[0];
334 114 100   49   313 my $next_step_container = sub { $next_step->[TYPE] == ARRAY ? [] : {} };
  49         275  
335              
336             $context = ($step->[TYPE] == ARRAY) ?
337             ($context->[ $step->[VALUE] ] ||= $next_step_container->()) : # array index
338 114 100 66     590 ($context->{ $step->[VALUE] } ||= $next_step_container->()); # hash key
      66        
339             } else { # base case
340 96 100       175 if ($step->[TYPE] == ARRAY) {
341 30         80 $context->[ $step->[VALUE] ] = $value; # array index
342             } else {
343 66         190 $context->{ $step->[VALUE] } = $value; # hash key
344             }
345             }
346              
347 209 100       800 return @$steps ? $self->_set($context, $steps, $value) : $value;
348             }
349              
350             __PACKAGE__->meta->make_immutable;
351              
352             1;
353              
354             =head1 NAME
355              
356             Hash::Fold - flatten and unflatten nested hashrefs
357              
358             =head1 SYNOPSIS
359              
360             use Hash::Fold qw(flatten unflatten);
361              
362             my $object = bless { foo => 'bar' };
363              
364             my $nested = {
365             foo => $object,
366             baz => {
367             a => 'b',
368             c => [ 'd', { e => 'f' }, 42 ],
369             },
370             };
371              
372             my $flattened = flatten($nested);
373              
374             is_deeply $flattened, {
375             'baz.a' => 'b',
376             'baz.c.0' => 'd',
377             'baz.c.1.e' => 'f',
378             'baz.c.2' => 42,
379             'foo' => $object,
380             };
381              
382             my $roundtrip = unflatten($flattened);
383              
384             is_deeply $roundtrip, $nested;
385              
386             =head1 DESCRIPTION
387              
388             This module provides functional and OO interfaces which can be used to flatten,
389             unflatten and merge nested hashrefs.
390              
391             Unless noted, the functions listed below are also available as methods. Options
392             provided to the Hash::Fold constructor can be supplied to the functions e.g.:
393              
394             use Hash::Fold;
395              
396             my $folder = Hash::Fold->new(delimiter => '/');
397              
398             $folder->fold($hash);
399              
400             is equivalent to:
401              
402             use Hash::Fold qw(fold);
403              
404             my $folded = fold($hash, delimiter => '/');
405              
406             Options (and constructor args) can be supplied as a list of key/value pairs or
407             a hashref, so the following are equivalent:
408              
409             my $folded = fold($hash, delimiter => '/' );
410             my $folded = fold($hash, { delimiter => '/' });
411              
412             In addition, Hash::Fold uses L<Sub::Exporter>, which allows functions to be
413             imported with options baked in e.g.:
414              
415             use Hash::Fold fold => { delimiter => '/' };
416              
417             my $folded = fold($hash);
418              
419             =head1 OPTIONS
420              
421             As described above, the following options can be supplied as constructor args,
422             import args, or per-function overrides. Under the hood, they are (L<Moose>)
423             attributes which can be wrapped and overridden like any other attributes.
424              
425             =head2 array_delimiter
426              
427             B<Type>: Str, ro, default: "."
428              
429             The delimiter prefixed to array elements when flattening and unflattening.
430              
431             =head2 hash_delimiter
432              
433             B<Type>: Str, ro, default: "."
434              
435             The delimiter prefixed to hash elements when flattening and unflattening.
436              
437             =head2 delimiter
438              
439             B<Type>: Str
440              
441             This is effectively a write-only attribute which assigns the same string to
442             L<"array_delimiter"> and L<"hash_delimiter">. It can only be supplied as a
443             constructor arg or function option (which are equivalent) i.e. Hash::Fold
444             instances have no C<delimiter> method.
445              
446             =head2 on_cycle
447              
448             B<Type>: (Hash::Fold, Ref) -> None, ro
449              
450             A callback invoked whenever L<"fold"> encounters a circular reference i.e. a
451             reference which contains itself as a nested value.
452              
453             The callback takes two arguments: the Hash::Fold instance and the value e.g.:
454              
455             sub on_cycle {
456             my ($folder, $value) = @_;
457             warn 'self-reference found: ', Dumper(value), $/;
458             }
459              
460             my $folder = Hash::Fold->new(on_cycle => \&on_cycle);
461              
462             Note that circular references are handled correctly i.e. they are treated as
463             terminals and not traversed. This callback merely provides a mechanism to
464             report them (e.g. by issuing a warning).
465              
466             The default callback does nothing.
467              
468             =head2 on_object
469              
470             B<Type>: (Hash::Fold, Ref) -> Any, ro
471              
472             A callback invoked whenever L<"fold"> encounters a value for which the
473             L<"is_object"> method returns true i.e. any reference that isn't an unblessed
474             arrayref or unblessed hashref. This callback can be used to modify
475             the value e.g. to return a traversable value (e.g. unblessed hashref)
476             in place of a terminal (e.g. blessed hashref).
477              
478             The callback takes two arguments: the Hash::Fold instance and the object e.g.:
479              
480             use Scalar::Util qw(blessed);
481              
482             sub on_object {
483             my ($folder, $value) = @_;
484              
485             if (blessed($value) && $value->isa('HASH')) {
486             return { %$value }; # unbless
487             } else {
488             return $value;
489             }
490             }
491              
492             my $folder = Hash::Fold->new(on_object => \&on_object);
493              
494             The default callback returns its value unchanged.
495              
496             =head1 EXPORTS
497              
498             Nothing by default. The following functions can be imported.
499              
500             =head2 fold
501              
502             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
503              
504             Takes a nested hashref and returns a single-level hashref with (by default)
505             dotted keys. The delimiter can be overridden via the L<"delimiter">,
506             L<"array_delimiter"> and L<"hash_delimiter"> options.
507              
508             Unblessed arrayrefs and unblessed hashrefs are traversed. All other values
509             (e.g. strings, regexps, objects &c.) are treated as terminals and passed
510             through verbatim, although this can be overridden by supplying a suitable
511             L<"on_object"> callback.
512              
513             =head2 flatten
514              
515             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
516              
517             Provided as an alias for L<"fold">.
518              
519             =head2 unfold
520              
521             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
522              
523             Takes a flattened hashref and returns the corresponding nested hashref.
524              
525             =head2 unflatten
526              
527             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
528              
529             Provided as an alias for L<"unfold">.
530              
531             =head2 merge
532              
533             B<Signature>: (HashRef [, HashRef... ]) -> HashRef
534              
535             B<Signature>: (ArrayRef[HashRef] [, Hash|HashRef ]) -> HashRef
536              
537             Takes a list of hashrefs which are then flattened, merged into one (in the
538             order provided i.e. with precedence given to the rightmost arguments) and
539             unflattened i.e. shorthand for:
540              
541             unflatten { map { %{ flatten $_ } } @_ }
542              
543             To provide options to the C<merge> subroutine, pass the hashrefs in an
544             arrayref, and the options (as usual) as a list of key/value pairs or a hashref:
545              
546             merge([ $hash1, $hash2, ... ], delimiter => ... )
547             merge([ $hash1, $hash2, ... ], { delimiter => ... })
548              
549             =head1 METHODS
550              
551             =head2 is_object
552              
553             B<Signature>: Any -> Bool
554              
555             This method is called from L<"fold"> to determine whether a value should be
556             passed to the L<"on_object"> callback.
557              
558             It is passed each value encountered while traversing a hashref and returns true
559             for all references (e.g. regexps, globs, objects &c.) apart from unblessed
560             arrayrefs and unblessed hashrefs, and false for all other
561             values (i.e. unblessed hashrefs, unblessed arrayrefs, and non-references).
562              
563             =head1 VERSION
564              
565             0.1.2
566              
567             =head1 SEE ALSO
568              
569             =over
570              
571             =item * L<CGI::Expand>
572              
573             =item * L<Hash::Flatten>
574              
575             =item * L<Hash::Merge>
576              
577             =item * L<Hash::Merge::Simple>
578              
579             =back
580              
581             =head1 AUTHOR
582              
583             chocolateboy <chocolate@cpan.org>
584              
585             =head1 COPYRIGHT
586              
587             Copyright (c) 2014-2015, chocolateboy.
588              
589             This module is free software. It may be used, redistributed and/or modified
590             under the same terms as Perl itself.
591              
592             =cut