File Coverage

blib/lib/Data/SplitSerializer.pm
Criterion Covered Total %
statement 85 86 98.8
branch 35 40 87.5
condition 6 12 50.0
subroutine 15 16 93.7
pod 4 5 80.0
total 145 159 91.1


line stmt bran cond sub pod time code
1             package Data::SplitSerializer;
2              
3             our $VERSION = '0.91'; # VERSION
4             # ABSTRACT: Modules that "split serialize" data structures
5              
6             #############################################################################
7             # Modules
8              
9 2     2   4239 use sanity;
  2         973681  
  2         77  
10 2     2   828515 use Moo;
  2         33479  
  2         14  
11 2     2   7223 use Types::Standard qw(Bool Str HashRef InstanceOf HasMethods);
  2         360701  
  2         35  
12              
13 2     2   3102 use Module::Runtime qw( use_module );
  2         5  
  2         20  
14 2     2   2358 use Hash::Merge;
  2         9168  
  2         192  
15 2     2   2670 use Try::Tiny;
  2         16899  
  2         234  
16 2     2   23 use Scalar::Util qw( blessed );
  2         7  
  2         148  
17              
18 2     2   2235 use namespace::clean;
  2         57712  
  2         20  
19 2     2   874 no warnings 'uninitialized';
  2         117  
  2         10488  
20              
21             #############################################################################
22             # Custom Hash::Merge behaviors
23              
24             my $default_behavior = 'LEFT_PRECEDENT_STRICT_ARRAY_INDEX';
25              
26             Hash::Merge::specify_behavior(
27             {
28             # NOTE: Undef is still considered 'SCALAR'.
29             SCALAR => {
30             SCALAR => sub { $_[1] },
31             ARRAY => sub {
32             return $_[1] unless defined $_[0];
33             die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'SCALAR', 'ARRAY', $_[0]);
34             },
35             HASH => sub {
36             return $_[1] unless defined $_[0];
37             die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'SCALAR', 'HASH', $_[0]);
38             },
39             },
40             ARRAY => {
41             SCALAR => sub {
42             return $_[0] unless defined $_[1];
43             die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'ARRAY', 'SCALAR', $_[1]);
44             },
45             ARRAY => sub {
46             # Handle arrays by index, not by combining
47             my ($l, $r) = @_;
48             $l->[$_] = $r->[$_] for (
49             grep { defined $r->[$_] }
50             (0 .. $#{$_[1]})
51             );
52             return $l;
53             },
54             HASH => sub { die sprintf('mismatched type (%s vs. %s) found during merge', 'ARRAY', 'HASH'); },
55             },
56             HASH => {
57             SCALAR => sub {
58             return $_[0] unless defined $_[1];
59             die sprintf('mismatched type (%s vs. %s) found during merge: $scalar = %s', 'HASH', 'SCALAR', $_[1]);
60             },
61             ARRAY => sub { die sprintf('mismatched type (%s vs. %s) found during merge', 'HASH', 'ARRAY'); },
62             HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) },
63             },
64             },
65             $default_behavior,
66             );
67              
68             #############################################################################
69             # Attributes
70              
71             has _merge_obj => (
72             is => 'rw',
73             isa => InstanceOf['Hash::Merge'],
74             default => sub { Hash::Merge->new($default_behavior); },
75             handles => { qw(
76             merge merge
77             specify_behavior specify_merge_behavior
78             set_behavior set_merge_behavior
79             ) },
80             );
81              
82             has path_style => (
83             is => 'ro',
84             isa => Str,
85             default => sub { 'DZIL' },
86             coerce => sub {
87             'Parse::Path::'.$_[0] unless ($_[0] =~ s/^\=//); # NOTE: kill two birds with one stone
88             },
89             );
90              
91             has path_options => (
92             is => 'ro',
93             isa => HashRef,
94             default => sub { {
95             auto_normalize => 1,
96             auto_cleanup => 1,
97             } },
98             );
99              
100             has remove_undefs => (
101             is => 'ro',
102             isa => Bool,
103             default => sub { 1 },
104             );
105              
106             #############################################################################
107             # Pre/post-BUILD
108              
109              
110             sub BUILD {
111 2     2 0 67 my $self = $_[0];
112              
113             # Load the path class
114 2         109 use_module $self->path_style;
115              
116 2         2065445 return $self;
117             }
118              
119             #############################################################################
120             # Methods
121              
122             ### FLATTENING ###
123              
124             sub serialize {
125 5     5 1 16508 my ($self, $ref) = @_;
126              
127 5         16 my $type = ref $ref;
128 5 50 33     149 die 'Reference must be an unblessed HASH or ARRAY!'
      33        
129             unless (defined $ref && !blessed $ref && $type =~ /HASH|ARRAY/);
130              
131 5         26 return $self->serialize_refpath('', $ref);
132             }
133              
134             sub serialize_refpath {
135 90     90 1 150 my ($self, $path, $ref) = @_;
136 90   50     311 $path //= '';
137              
138 90         888 my $prh = { $path => $ref }; # single row answer
139              
140 90 50       8147 return $prh if blessed $ref; # down that path leads madness...
141 90   100     245 my $type = ref $ref || return $prh; # that covers SCALARs...
142 58 100       311 return $prh unless $type =~ /HASH|ARRAY/; # ...and all other endpoints
143              
144             # Blessed is the path
145 55 100       179 unless (blessed $path) {
146 5         198 $path = $self->path_style->new(
147 5         25 %{ $self->path_options },
148             stash_obj => $self,
149             path => $path,
150             );
151             }
152              
153 55 50       1710 die sprintf("Too deep down the rabbit hole, stopped at '%s'", $path)
154             if ($path->step_count > 255);
155              
156 55         331 my $hash = {};
157 55 100       225 my @keys = $type eq 'HASH' ? (keys %$ref) : (0 .. $#$ref);
158 55         98 foreach my $key (@keys) {
159 85 100       204 my $val = $type eq 'HASH' ? $ref->{$key} : $ref->[$key];
160              
161             # Add on to $path
162 85         253 my $newpath = $path->clone;
163 85         19909 $newpath->push( $newpath->key2hash($key, $type) );
164              
165             # Recurse back to give us a full set of $path => $val pairs
166 85         29031 my $newhash = $self->serialize_refpath($newpath, $val);
167              
168             # Merge (shallowly)
169 85 100       199 $hash->{$_} = $newhash->{$_} for (grep { defined $newhash->{$_} or !$self->remove_undefs } keys %$newhash);
  94         1600  
170             }
171              
172 55         245 return $hash;
173             }
174              
175             ### EXPANSION ###
176              
177             sub deserialize {
178 10     10 1 17745 my ($self, $hash) = @_;
179              
180 10         18 my $root; # not sure if it's a hash or array yet
181 10         79 foreach my $path (sort keys %$hash) {
182 42   50     159 my $branch = $self->deserialize_pathval($path, $hash->{$path}) || return; # error already set
183              
184             # New root?
185 42 100       100 unless (defined $root) {
186 10         14 $root = $branch;
187 10         21 next;
188             }
189              
190             # Our merge behavior might die on us (or Hash::Merge itself)
191 32         38 my $err;
192 32     32   2034 try { $root = $self->merge($root, $branch); }
193 32     0   256 catch { $err = $_; };
  0         0  
194              
195             # Add path to error
196 32 50       1557 die sprintf("In path '%s', %s", $path, $err) if ($err);
197             }
198              
199 10         85 return $root;
200             }
201              
202             sub deserialize_pathval {
203 42     42 1 72 my ($self, $path, $val) = @_;
204              
205 42         51 my ($root, $leaf, $hash_steps);
206 42         1194 $path = $self->path_style->new(
207 42         104 %{ $self->path_options },
208             path => $path,
209             );
210              
211 42         60597 for my $i (0 .. $path->step_count - 1) {
212 157         509 my $hash_step = $path->_path->[$i];
213 157 100       387 my $next_step = ($i == $path->step_count - 1) ? undef : $path->_path->[$i+1];
214              
215             # Construct $root if we need to
216 157 100       1077 $root = $leaf = ( $hash_step->{type} eq 'HASH' ? {} : [] ) unless ($i);
    100          
217              
218             # Add in the key, construct the next ref, and move the leaf forward
219 157         275 my $type_str = substr($hash_step->{type}, 0, 1);
220 157 100       424 $type_str .= substr($next_step->{type}, 0, 1) if $next_step;
221              
222 157         209 my $key = $hash_step->{key};
223              
224             # (RIP for/when)
225 157 100       496 if ($type_str eq 'HH') { $leaf = $leaf->{$key} = {}; }
  65 100       233  
    100          
    100          
    100          
    50          
226 14         55 elsif ($type_str eq 'HA') { $leaf = $leaf->{$key} = []; }
227 16         52 elsif ($type_str eq 'AH') { $leaf = $leaf->[$key] = {}; }
228 20         65 elsif ($type_str eq 'AA') { $leaf = $leaf->[$key] = []; }
229 36         138 elsif ($type_str eq 'H') { $leaf->{$key} = $val; }
230 6         22 elsif ($type_str eq 'A') { $leaf->[$key] = $val; }
231             }
232              
233 42         768 return $root;
234             }
235              
236             42;
237              
238             __END__
239              
240             =pod
241              
242             =encoding utf-8
243              
244             =head1 NAME
245              
246             Data::SplitSerializer - Modules that "split serialize" data structures
247              
248             =head1 SYNOPSIS
249              
250             use Data::SplitSerializer;
251            
252             my $dss = Data::SplitSerializer->new( path_style => 'DZIL' );
253             my $serialized = {
254             'gophers[0].holes' => 3,
255             'gophers[0].food.type' => 'grubs',
256             'gophers[0].food.count' => 7,
257            
258             'gophers[1].holes' => 1,
259             'gophers[1].food.type' => 'fruit',
260             'gophers[1].food.count' => 5,
261             };
262             my $deserialized = $dss->deserialize($serialized);
263            
264             my $more_gophers = [];
265             $more_gophers->[2] = {
266             holes => 2,
267             food => {
268             type => 'earthworms',
269             count => 15,
270             },
271             };
272            
273             $deserialized = $dss->merge( $deserialized, $more_gophers );
274              
275             =head1 DESCRIPTION
276              
277             Split serialization is a unique form of serialization that only serializes part of the data structure (as a path on the left side) and
278             leaves the rest of the data, typically a scalar, untouched (as a value on the right side). Consider the gopher example above:
279              
280             my $deserialized = {
281             gophers => [
282             {
283             holes => 3,
284             food => {
285             type => 'grubs',
286             count => 7,
287             },
288             },
289             {
290             holes => 1,
291             food => {
292             type => 'fruit',
293             count => 5,
294             },
295             },
296             {
297             holes => 2,
298             food => {
299             type => 'earthworms',
300             count => 15,
301             },
302             }
303             ],
304             };
305              
306             A full serializer, like L<Data::Serializer> or L<Data::Dumper>, would turn the entire object into a string, much like the real code
307             above. Or into JSON, XML, BerkleyDB, etc. But, the end values would be lost in the stream. If you were given an object like this,
308             how would you be able to store the data in an easy-to-access form for a caching module like L<CHI>? It requires keyE<sol>value pairs. Same
309             goes for L<KiokuDB> or various other storageE<sol>ORM modules.
310              
311             Data::SplitSerializer uses split serialization to turn the data into a path like this:
312              
313             my $serialized = {
314             'gophers[0].holes' => 3,
315             'gophers[0].food.type' => 'grubs',
316             'gophers[0].food.count' => 7,
317            
318             'gophers[1].holes' => 1,
319             'gophers[1].food.type' => 'fruit',
320             'gophers[1].food.count' => 5,
321            
322             'gophers[2].holes' => 2,
323             'gophers[2].food.type' => 'earthworms',
324             'gophers[2].food.count' => 15,
325             };
326              
327             Now, you can stash the data into whatever storage engine you want... or use just use it as a simple hash.
328              
329             =for Pod::Coverage BUILD
330              
331             =head1 CONSTRUCTOR
332              
333             # Defaults shown
334             my $stash = Data::Stash->new(
335             path_style => 'DZIL',
336             path_options => {
337             auto_normalize => 1,
338             auto_cleanup => 1,
339             },
340             );
341              
342             Creates a new serializer object. Accepts the following arguments:
343              
344             =head2 path_style
345              
346             path_style => 'File::Unix'
347             path_style => '=MyApp::Parse::Path::Foobar'
348              
349             Class used to create new L<path objects|Parse::Path> for path parsing. With a C<<< = >>> prefix, it will use that as the full
350             class. Otherwise, the class will be intepreted as C<<< Parse::Path::$class >>>.
351              
352             Default is L<DZIL|Parse::Path::DZIL>.
353              
354             =head2 path_options
355              
356             path_options => {
357             auto_normalize => 1,
358             auto_cleanup => 1,
359             }
360              
361             Hash of options to pass to new path objects. Typically, the default set of options are recommended to ensure a more commutative
362             path.
363              
364             =head2 remove_undefs
365              
366             remove_undefs => 0
367              
368             Boolean to indicate whether to remove See L</Undefined values> for more information.
369              
370             Default is on.
371              
372             =head1 METHODS
373              
374             =head2 serialize
375              
376             my $serialized = $dss->serialize($deserialized);
377              
378             SerializesE<sol>flattens a ref. Returns a serialized hashref of pathE<sol>value pairs.
379              
380             =head2 serialize_refpath
381              
382             my $serialized = $dss->serialize_refpath($path_prefix, $deserialized);
383            
384             # serialize is basically this with some extra sanity checks
385             my $serialized = $dss->serialize_refpath('', $deserialized);
386              
387             The real workhorse for C<<< serialize_ref >>>. Recursively dives down the different pieces of the deserialized tree and eventually comes
388             back with the serialized hashref. The path prefix can be used for prepending all of the paths returned in the serialized hashref.
389              
390             =head2 deserialize
391              
392             my $deserialized = $dss->deserialize($serialized);
393              
394             DeserializesE<sol>expands a hash of pathE<sol>data pairs. Returns the expanded object, which is usually a hashref, but might be an arrayref.
395             For example:
396              
397             # Starts with an array
398             my $serialized = {
399             '[0].thingy' => 1,
400             '[1].thingy' => 2,
401             };
402             my $deserialized = $dss->deserialize($serialized);
403            
404             # Returns:
405             $deserialized = [
406             { thingy => 1 },
407             { thingy => 2 },
408             ];
409              
410             =head2 deserialize_pathval
411              
412             my $deserialized = $dss->deserialize_pathval($path, $value);
413              
414             DeserializesE<sol>expands a single pathE<sol>data pair. Returns the expanded object.
415              
416             =head2 merge
417              
418             my $newhash = $dss->merge($hash1, $hash2);
419              
420             Merges two hashes. This is a direct handle to C<<< merge >>> from an (internal) L<Hash::Merge> object, and is used by L</deserialize> to
421             combine individual expanded objects.
422              
423             =head2 set_merge_behavior
424              
425             Handle to C<<< set_behavior >>> from the (internal) L<Hash::Merge> object. B<Advanced usage only!>
426              
427             Data::SplitSerializer uses a special custom type called C<<< LEFT_PRECEDENT_STRICT_ARRAY_INDEX >>>, which properly handles array
428             indexes and dies on any non-array-or-hash refs.
429              
430             =head2 specify_merge_behavior
431              
432             Handle to C<<< specify_behavior >>> from the (internal) L<Hash::Merge> object. B<Advanced usage only!>
433              
434             =head1 CAVEATS
435              
436             =head2 Undefined values
437              
438             Flattening will remove pathE<sol>values if the value is undefined. This is to clean up unused array values that appeared as holes in a
439             sparse array. For example:
440              
441             # From one of the basic tests
442             my $round_trip = $dss->serialize( $dss->deserialize_pathval(
443             'a[0][1][1][1][1][2].too' => 'long'
444             ) );
445            
446             # Without undef removal, this returns:
447             $round_trip = {
448             'a[0][0]' => undef,
449             'a[0][1][0]' => undef,
450             'a[0][1][1][0]' => undef,
451             'a[0][1][1][1][0]' => undef,
452             'a[0][1][1][1][1][0]' => undef,
453             'a[0][1][1][1][1][1]' => undef,
454             'a[0][1][1][1][1][2].too' => 'long',
455             };
456              
457             You can disable this with the L</remove_undefs> switch.
458              
459             =head2 Refs in split serialization
460              
461             Split serialization works by looking for HASH or ARRAY refs and diving further into them, adding path prefixes as it goes down. If
462             it encounters some other ref (like a SCALAR), it will stop and consider that to be the value for that path. In terms of ref parsing,
463             this means two things:
464              
465             =over
466              
467             =item 1.
468              
469             Only HASH and ARRAYs can be examined deeper.
470              
471             =item 2.
472              
473             If you have a HASH or ARRAY as a "value", serialization cannot tell the difference and it will be included in the path.
474              
475             =back
476              
477             The former isn't that big of a problem, since deeper dives with other kinds of refs are either not possible or dangerous (like CODE).
478              
479             The latter could be a problem if you started with a hashref with a pathE<sol>data pair, expanded it, and tried to flatten it again. This
480             can be solved by protecting the hash with a REF. Consider this example:
481              
482             my $round_trip = $dss->serialize( $dss->deserialize_pathval(
483             'a[0]' => { your => 'hash' }
484             ) );
485            
486             # Returns:
487             $round_trip = {
488             'a[0].your' => 'hash',
489             };
490            
491             # Now protect the hash
492             my $round_trip = $dss->serialize( $dss->deserialize_pathval(
493             'a[0]' => \{ your => 'hash' }
494             ) );
495            
496             # Returns:
497             $round_trip = {
498             'a[0]' => \{ your => 'hash' }
499             };
500              
501             =head2 Sparse arrays and memory usage
502              
503             Since arrays within paths are based on indexes, there's a potential security issue with large indexes causing abnormal memory usage.
504             In Perl, these two arrays would have drastically different memory footprints:
505              
506             my @small;
507             $small[0] = 1;
508            
509             my @large;
510             $large[999999] = 1;
511              
512             This can be mitigated by making sure the Path style you use will limit the total digits for array indexes. L<Parse::Path> handles
513             this on all of its paths, but it's something to be aware of if you create your own path classes.
514              
515             =head1 TODO
516              
517             This module might split off into individual split serializers, but so far, this is the only one "out in the wild".
518              
519             =head1 SEE ALSO
520              
521             L<Parse::Path>
522              
523             =head1 ACKNOWLEDGEMENTS
524              
525             Kent Fredric for getting me started on the basic idea.
526              
527             =head1 AVAILABILITY
528              
529             The project homepage is L<https://github.com/SineSwiper/Data-SplitSerializer/wiki>.
530              
531             The latest version of this module is available from the Comprehensive Perl
532             Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
533             site near you, or see L<https://metacpan.org/module/Data::SplitSerializer/>.
534              
535             =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
536              
537             =head1 SUPPORT
538              
539             =head2 Internet Relay Chat
540              
541             You can get live help by using IRC ( Internet Relay Chat ). If you don't know what IRC is,
542             please read this excellent guide: L<http://en.wikipedia.org/wiki/Internet_Relay_Chat>. Please
543             be courteous and patient when talking to us, as we might be busy or sleeping! You can join
544             those networks/channels and get help:
545              
546             =over 4
547              
548             =item *
549              
550             irc.perl.org
551              
552             You can connect to the server at 'irc.perl.org' and talk to this person for help: SineSwiper.
553              
554             =back
555              
556             =head2 Bugs / Feature Requests
557              
558             Please report any bugs or feature requests via L<https://github.com/SineSwiper/Data-SplitSerializer/issues>.
559              
560             =head1 AUTHOR
561              
562             Brendan Byrd <BBYRD@CPAN.org>
563              
564             =head1 CONTRIBUTOR
565              
566             Brendan Byrd <bbyrd@cpan.org>
567              
568             =head1 COPYRIGHT AND LICENSE
569              
570             This software is Copyright (c) 2013 by Brendan Byrd.
571              
572             This is free software, licensed under:
573              
574             The Artistic License 2.0 (GPL Compatible)
575              
576             =cut