File Coverage

blib/lib/Hash/Fold.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Hash::Fold;
2              
3 3     3   34511 use Carp qw(confess);
  3         7  
  3         250  
4 3     3   15908 use Moose;
  0            
  0            
5             use Scalar::Util qw(refaddr);
6              
7             use Sub::Exporter -setup => {
8             exports => [
9             (map { $_ => \&_build_unary_export } qw(fold unfold flatten unflatten)),
10             (map { $_ => \&_build_nary_export } qw(merge)),
11             ],
12             };
13              
14             use constant {
15             ARRAY => 1,
16             HASH => 2,
17             TYPE => 0,
18             VALUE => 1,
19             };
20              
21             our $VERSION = '0.1.1';
22              
23             has on_object => (
24             isa => 'CodeRef',
25             is => 'ro',
26             default => sub { sub { $_[1] } }, # return the value unchanged
27             );
28              
29             has on_cycle => (
30             isa => 'CodeRef',
31             is => 'ro',
32             default => sub { sub { } }, # do nothing
33             );
34              
35             has hash_delimiter => (
36             isa => 'Str',
37             is => 'ro',
38             default => '.',
39             );
40              
41             has array_delimiter => (
42             isa => 'Str',
43             is => 'ro',
44             default => '.',
45             );
46              
47             around BUILDARGS => sub {
48             my $original = shift;
49             my $class = shift;
50             my $args = $class->$original(@_);
51             my $delimiter = delete $args->{delimiter};
52              
53             if (defined $delimiter) {
54             $args->{array_delimiter} = $delimiter unless (exists $args->{array_delimiter});
55             $args->{hash_delimiter} = $delimiter unless (exists $args->{hash_delimiter});
56             }
57              
58             return $args;
59             };
60              
61             sub fold {
62             my $self = shift;
63             my $hash = _check_hash(shift);
64             my $prefix = undef;
65             my $target = {};
66             my $seen = {};
67              
68             return $self->_merge($hash, $prefix, $target, $seen);
69             }
70              
71             sub unfold {
72             my $self = shift;
73             my $hash = _check_hash(shift);
74             my $target = {};
75              
76             # sorting the keys should lead to better locality of reference,
77             # for what that's worth here
78             # XXX the sort order is connected with the ambiguity issue mentioned below
79             for my $key (sort keys %$hash) {
80             my $value = $hash->{$key};
81             my $steps = $self->_split($key);
82             $self->_set($target, $steps, $value);
83             }
84              
85             return $target;
86             }
87              
88             BEGIN {
89             *flatten = \&fold;
90             *unflatten = \&unfold;
91             }
92              
93             sub merge {
94             my $self = shift;
95             return $self->unfold({ map { %{ $self->fold(_check_hash($_)) } } @_ });
96             }
97              
98             sub is_object {
99             my ($self, $value) = @_;
100             my $ref = ref($value);
101             return $ref && ($ref ne 'HASH') && ($ref ne 'ARRAY');
102             }
103              
104             sub _build_unary_export {
105             my ($class, $name, $base_options) = @_;
106              
107             return sub {
108             my $arg = shift;
109             my $custom_options = @_ == 1 ? shift : { @_ };
110             my $folder = $class->new({ %$base_options, %$custom_options });
111             return $folder->$name($arg);
112             }
113             }
114              
115             sub _build_nary_export {
116             my ($class, $name, $base_options) = @_;
117              
118             return sub {
119             my ($args, $custom_options);
120              
121             if (@_ && (ref($_[0]) eq 'ARRAY')) {
122             $args = shift;
123             $custom_options = @_ == 1 ? shift : { @_ };
124             } else {
125             $args = [ @_ ];
126             $custom_options = {};
127             }
128              
129             my $folder = $class->new({ %$base_options, %$custom_options });
130             return $folder->$name(@$args);
131             }
132             }
133              
134             sub _check_hash {
135             my $hash = shift;
136             my $ref = ref($hash);
137              
138             unless ($ref eq 'HASH') {
139             my $type;
140              
141             if (defined $hash) {
142             $type = length($ref) ? "'$ref'" : 'non-reference';
143             } else {
144             $type = 'undef';
145             }
146              
147             confess "invalid argument: expected unblessed HASH reference, got: $type";
148             }
149              
150             return $hash;
151             }
152              
153             sub _join {
154             my ($self, $prefix, $delimiter, $key) = @_;
155             return defined($prefix) ? $prefix . $delimiter . $key : $key;
156             }
157              
158             =begin comment
159              
160             TODO: when the hash delimiter is the same as the array delimiter (as it is by default), ambiguities can arise:
161              
162             {
163             foo => 'bar',
164             1 => 'aaagh!',
165             baz => 'quux',
166             }
167              
168             In many cases, these can be smartly resolved by looking at the context: if at least one step
169             is non-numeric, then the container must be a hashref:
170              
171             foo.bar.baz
172             foo.bar.0 <- must be a hash key
173             foo.bar.quux
174              
175             The ambiguity can either be resolved here/in unfold with a bit of static analysis or resolved
176             lazily/dynamically in _set (need to sort the keys so that non-integers (if any) are unpacked
177             before integers (if any)).
178              
179             Currently, the example above is unpacked correctly :-)
180              
181             =end comment
182              
183             =cut
184              
185             sub _split {
186             my ($self, $path) = @_;
187             my $hash_delimiter = $self->hash_delimiter;
188             my $array_delimiter = $self->array_delimiter;
189             my $hash_delimiter_pattern = quotemeta($hash_delimiter);
190             my $array_delimiter_pattern = quotemeta($array_delimiter);
191             my $same_delimiter = $array_delimiter eq $hash_delimiter;
192             my @split = split qr{((?:$hash_delimiter_pattern)|(?:$array_delimiter_pattern))}, $path;
193             my @steps;
194              
195             # since we require the argument to fold (and unfold) to be a hashref,
196             # the top-level keys must always be hash keys (strings) rather than
197             # array indices (numbers)
198             push @steps, [ HASH, shift @split ];
199              
200             while (@split) {
201             my $delimiter = shift @split;
202             my $step = shift @split;
203              
204             if ($same_delimiter) {
205             # tie-breaker
206             if (($step eq '0') || ($step =~ /^[1-9]\d*$/)) { # no leading 0
207             push @steps, [ ARRAY, $step ];
208             } else {
209             push @steps, [ HASH, $step ];
210             }
211             } else {
212             if ($delimiter eq $array_delimiter) {
213             push @steps, [ ARRAY, $step ];
214             } else {
215             push @steps, [ HASH, $step ];
216             }
217             }
218             }
219              
220             return \@steps;
221             }
222              
223             sub _merge {
224             my ($self, $value, $target_key, $target, $_seen) = @_;
225              
226             # "localize" the $seen hash: we want to catch circular references (i.e.
227             # an unblessed hashref or arrayref which contains (at some depth) a reference to itself),
228             # but don't want to prevent repeated references e.g. { foo => $object, bar => $object }
229             # is OK. To achieve this, we need to "localize" the $seen hash i.e. do
230             # the equivalent of "local $seen". However, perl doesn't allow lexical variables
231             # to be localized, so we have to do it manually.
232             my $seen = { %$_seen }; # isolate from the caller's $seen hash and allow scoped additions
233              
234             if ($self->is_object($value)) {
235             $value = $self->on_object->($self, $value);
236             }
237              
238             my $ref = ref($value);
239             my $refaddr = refaddr($value);
240              
241             if ($refaddr && $seen->{$refaddr}) { # seen HASH or ARRAY
242             # we've seen this unblessed hashref/arrayref before: possible actions
243             #
244             # 1) (do nothing and) treat it as a terminal
245             # 2) warn and treat it as a terminal
246             # 3) die (and treat it as a terminal :-)
247             #
248             # if the callback doesn't raise a fatal exception,
249             # treat the value as a terminal
250             $self->on_cycle->($self, $value); # might warn or die
251             $target->{$target_key} = $value; # treat as a terminal
252             } elsif ($ref eq 'HASH') {
253             my $delimiter = $self->hash_delimiter;
254              
255             $seen->{$refaddr} = 1;
256              
257             if (%$value) {
258             # sorting the keys ensures a deterministic order,
259             # which (at the very least) is required for unsurprising
260             # tests
261             for my $hash_key (sort keys %$value) {
262             my $hash_value = $value->{$hash_key};
263             $self->_merge($hash_value, $self->_join($target_key, $delimiter, $hash_key), $target, $seen);
264             }
265             } else {
266             $target->{$target_key} = {};
267             }
268             } elsif ($ref eq 'ARRAY') {
269             my $delimiter = $self->array_delimiter;
270              
271             $seen->{$refaddr} = 1;
272              
273             if (@$value) {
274             for my $index (0 .. $#$value) {
275             my $array_element = $value->[$index];
276             $self->_merge($array_element, $self->_join($target_key, $delimiter, $index), $target, $seen);
277             }
278             } else {
279             $target->{$target_key} = [];
280             }
281             } else { # terminal
282             $target->{$target_key} = $value;
283             }
284              
285             return $target;
286             }
287              
288             # the action depends on the number of steps:
289             #
290             # 1: e.g. [ 'foo' ]:
291             #
292             # $context->{foo} = $value
293             #
294             # 2: e.g. [ 'foo', 42 ]:
295             #
296             # $context = $context->{foo} ||= []
297             # $context->[42] = $value
298             #
299             # 3 (or more): e.g. [ 'foo', 42, 'bar' ]:
300             #
301             # $context = $context->{foo} ||= []
302             # return $self->_set($context, [ 42, 'bar' ], $value)
303             #
304             # Note that the 2 case can be implemented in the same way as the 3 (or more) case.
305              
306             sub _set {
307             my ($self, $context, $steps, $value) = @_;
308             my $step = shift @$steps;
309              
310             if (@$steps) { # recursive case
311             # peek i.e. look-ahead to the step that will be processed in
312             # the tail call and make sure its container exists
313             my $next_step = $steps->[0];
314             my $next_step_container = sub { $next_step->[TYPE] == ARRAY ? [] : {} };
315              
316             $context = ($step->[TYPE] == ARRAY) ?
317             ($context->[ $step->[VALUE] ] ||= $next_step_container->()) : # array index
318             ($context->{ $step->[VALUE] } ||= $next_step_container->()); # hash key
319             } else { # base case
320             if ($step->[TYPE] == ARRAY) {
321             $context->[ $step->[VALUE] ] = $value; # array index
322             } else {
323             $context->{ $step->[VALUE] } = $value; # hash key
324             }
325             }
326              
327             return @$steps ? $self->_set($context, $steps, $value) : $value;
328             }
329              
330             __PACKAGE__->meta->make_immutable;
331              
332             1;
333              
334             =head1 NAME
335              
336             Hash::Fold - flatten and unflatten nested hashrefs
337              
338             =head1 SYNOPSIS
339              
340             use Hash::Fold qw(flatten unflatten);
341              
342             my $object = bless { foo => 'bar' };
343              
344             my $nested = {
345             foo => $object,
346             baz => {
347             a => 'b',
348             c => [ 'd', { e => 'f' }, 42 ],
349             },
350             };
351              
352             my $flattened = flatten($nested);
353              
354             is_deeply $flattened, {
355             'baz.a' => 'b',
356             'baz.c.0' => 'd',
357             'baz.c.1.e' => 'f',
358             'baz.c.2' => 42,
359             'foo' => $object,
360             };
361              
362             my $roundtrip = unflatten($flattened);
363              
364             is_deeply $roundtrip, $nested;
365              
366             =head1 DESCRIPTION
367              
368             This module provides functional and OO interfaces which can be used to flatten, unflatten
369             and merge nested hashrefs.
370              
371             Unless noted, the functions listed below are also available as methods. Options provided to the
372             Hash::Fold constructor can be supplied to the functions e.g.:
373              
374             use Hash::Fold;
375              
376             my $folder = Hash::Fold->new(delimiter => '/');
377              
378             $folder->fold($hash);
379              
380             is equivalent to:
381              
382             use Hash::Fold qw(fold);
383              
384             my $folded = fold($hash, delimiter => '/');
385              
386             Options (and constructor args) can be supplied as a list of key/value pairs or a hashref, so the
387             following are equivalent:
388              
389             my $folded = fold($hash, delimiter => '/' );
390             my $folded = fold($hash, { delimiter => '/' });
391              
392             In addition, Hash::Fold uses L<Sub::Exporter>, which allows functions to be imported with options
393             baked in e.g.:
394              
395             use Hash::Fold fold => { delimiter => '/' };
396              
397             my $folded = fold($hash);
398              
399             =head1 OPTIONS
400              
401             As described above, the following options can be supplied as constructor args, import args,
402             or per-function overrides. Under the hood, they are (L<Moose>) attributes which can be wrapped
403             and overridden like any other attributes.
404              
405             =head2 array_delimiter
406              
407             B<Type>: Str, ro, default: "."
408              
409             The delimiter prefixed to array elements when flattening and unflattening.
410              
411             =head2 hash_delimiter
412              
413             B<Type>: Str, ro, default: "."
414              
415             The delimiter prefixed to hash elements when flattening and unflattening.
416              
417             =head2 delimiter
418              
419             B<Type>: Str
420              
421             This is effectively a write-only attribute which assigns the same string to L<"array_delimiter"> and
422             L<"hash_delimiter">. It can only be supplied as a constructor arg or function option (which are
423             equivalent) i.e. Hash::Fold instances have no C<delimiter> method.
424              
425             =head2 on_cycle
426              
427             B<Type>: (Hash::Fold, Ref) -> None, ro
428              
429             A callback invoked whenever L<"fold"> encounters a circular reference i.e. a reference which contains
430             itself as a nested value.
431              
432             The callback takes two arguments: the Hash::Fold instance and the value e.g.:
433              
434             sub on_cycle {
435             my ($folder, $value) = @_;
436             warn 'self-reference found: ', Dumper(value), $/;
437             }
438              
439             my $folder = Hash::Fold->new(on_cycle => \&on_cycle);
440              
441             Note that circular references are handled correctly i.e. they are treated as terminals and not traversed.
442             This callback merely provides a mechanism to report them (e.g. by issuing a warning).
443              
444             The default callback does nothing.
445              
446             =head2 on_object
447              
448             B<Type>: (Hash::Fold, Ref) -> Any, ro
449              
450             A callback invoked whenever L<"fold"> encounters a value for which the L<"is_object"> method returns true
451             i.e. any reference that isn't an unblessed arrayref or unblessed hashref. This callback can be used to modify
452             the value e.g. to return a traversable value (e.g. unblessed hashref) in place of a terminal (e.g.
453             blessed hashref).
454              
455             The callback takes two arguments: the Hash::Fold instance and the object e.g.:
456              
457             use Scalar::Util qw(blessed);
458              
459             sub on_object {
460             my ($folder, $value) = @_;
461              
462             if (blessed($value) && $value->isa('HASH')) {
463             return { %$value }; # unbless
464             } else {
465             return $value;
466             }
467             }
468              
469             my $folder = Hash::Fold->new(on_object => \&on_object);
470              
471             The default callback returns its value unchanged.
472              
473             =head1 EXPORTS
474              
475             Nothing by default. The following functions can be imported.
476              
477             =head2 fold
478              
479             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
480              
481             Takes a nested hashref and returns a single-level hashref with (by default) dotted keys.
482             The delimiter can be overridden via the L<"delimiter">, L<"array_delimiter"> and
483             L<"hash_delimiter"> options.
484              
485             Unblessed arrayrefs and unblessed hashrefs are traversed. All other values (e.g. strings,
486             regexps, objects &c.) are treated as terminals and passed through verbatim,
487             although this can be overridden by supplying a suitable L<"on_object"> callback.
488              
489             =head2 flatten
490              
491             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
492              
493             Provided as an alias for L<"fold">.
494              
495             =head2 unfold
496              
497             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
498              
499             Takes a flattened hashref and returns the corresponding nested hashref.
500              
501             =head2 unflatten
502              
503             B<Signature>: (HashRef [, Hash|HashRef ]) -> HashRef
504              
505             Provided as an alias for L<"unfold">.
506              
507             =head2 merge
508              
509             B<Signature>: (HashRef [, HashRef... ]) -> HashRef
510              
511             B<Signature>: (ArrayRef[HashRef] [, Hash|HashRef ]) -> HashRef
512              
513             Takes a list of hashrefs which are then flattened, merged into one (in the order provided i.e.
514             with precedence given to the rightmost arguments) and unflattened i.e. shorthand for:
515              
516             unflatten { map { %{ flatten $_ } } @_ }
517              
518             To provide options to the C<merge> subroutine, pass the hashrefs in an arrayref, and the options
519             (as usual) as a list of key/value pairs or a hashref:
520              
521             merge([ $hash1, $hash2, ... ], delimiter => ... )
522             merge([ $hash1, $hash2, ... ], { delimiter => ... })
523              
524             =head1 METHODS
525              
526             =head2 is_object
527              
528             B<Signature>: Any -> Bool
529              
530             This method is called from L<"fold"> to determine whether a value should be passed to the L<"on_object">
531             callback.
532              
533             It is passed each value encountered while traversing a hashref and returns true for all references (e.g.
534             regexps, globs, objects &c.) apart from unblessed arrayrefs and unblessed hashrefs, and false for all other
535             values (i.e. unblessed hashrefs, unblessed arrayrefs, and non-references).
536              
537             =head1 VERSION
538              
539             0.1.1
540              
541             =head1 SEE ALSO
542              
543             =over
544              
545             =item * L<CGI::Expand>
546              
547             =item * L<Hash::Flatten>
548              
549             =item * L<Hash::Merge>
550              
551             =item * L<Hash::Merge::Simple>
552              
553             =back
554              
555             =head1 AUTHOR
556              
557             chocolateboy <chocolate@cpan.org>
558              
559             =head1 COPYRIGHT
560              
561             Copyright (c) 2014, chocolateboy.
562              
563             This module is free software. It may be used, redistributed and/or modified under the same terms
564             as Perl itself.
565              
566             =cut