File Coverage

blib/lib/Data/Crumbr.pm
Criterion Covered Total %
statement 77 78 98.7
branch 21 24 87.5
condition 8 11 72.7
subroutine 13 13 100.0
pod 2 2 100.0
total 121 128 94.5


line stmt bran cond sub pod time code
1             package Data::Crumbr;
2             $Data::Crumbr::VERSION = '0.1.0';
3             # ABSTRACT: Render data structures for easy searching and parsing
4              
5             # Inlined Mo
6 7     7   148719 use Mo qw< default coerce >;
  7         3156  
  7         36  
7              
8 7     7   13694 use 5.018;
  7         24  
9 7     7   36 use strict;
  7         14  
  7         145  
10 7     7   31 use warnings;
  7         8  
  7         179  
11 7     7   56 use Carp;
  7         14  
  7         600  
12 7     7   4925 use English qw< -no_match_vars >;
  7         28081  
  7         39  
13 7     7   2835 use Exporter qw< import >;
  7         13  
  7         233  
14 7     7   34 use Scalar::Util qw< blessed >;
  7         9  
  7         6093  
15              
16             our @EXPORT = qw< crumbr >;
17             our @EXPORT_OK = @EXPORT;
18             our %EXPORT_TAGS = (all => [@EXPORT_OK]);
19              
20             has encoder => (
21             default => sub { __encoder() },
22             coerce => \&__encoder,
23             );
24              
25             sub __load_class {
26 9     9   20 my ($class) = @_;
27 9         55 (my $packname = "$class.pm") =~ s{::}{/}gmxs;
28 9         5121 require $packname;
29 9         71 return $class;
30             } ## end sub __load_class
31              
32             sub crumbr {
33 5 50 66 5 1 11223 my %args = (@_ && ref($_[0])) ? %{$_[0]} : @_;
  0         0  
34 5 100       25 if (defined(my $name = delete $args{profile})) {
35 4         16 my $class = __PACKAGE__ . "::Default::$name";
36 4         19 my $profile = __load_class($class)->profile();
37 4   100     44 my $encoder = delete($args{encoder}) // {};
38 4         46 %$encoder = (
39             %$profile,
40             %$encoder, # allow some overriding
41             class => '::Default', # but not on this one
42             );
43 4         26 %args = (encoder => $encoder);
44             } ## end if (defined(my $name =...))
45 5         58 my $wh = __PACKAGE__->new(%args);
46 5     5   85 return sub { $wh->encode(@_) };
  5         3007  
47             } ## end sub crumbr
48              
49             sub __encoder {
50 5     5   184 my ($e) = @_;
51 5 50       53 if (!blessed($e)) {
52 5         14 my ($class, @parameters) = $e;
53 5 100       25 if (ref($e) eq 'HASH') {
54 4         13 $class = delete $e->{class};
55 4         25 @parameters = %$e;
56             }
57 5 100       21 $class = '::Default' unless defined $class;
58 5 50       33 $class = __PACKAGE__ . $class
59             if substr($class, 0, 2) eq '::';
60 5         54 $e = __load_class($class)->new(@parameters);
61             } ## end if (!blessed($e))
62 5         446 return $e;
63             } ## end sub __encoder
64              
65             sub encode {
66 5     5 1 11 my ($self, $data) = @_;
67 5         26 my $encoder = $self->encoder();
68 5         52 $encoder->reset();
69              
70 5         31 my @stack = ({closers => ''}, {data => $data, type => ref($data)},);
71             ITERATION:
72 5         23 while (@stack > 1) { # frame #0 is dummy
73 195         270 my $frame = $stack[-1];
74 195 100       503 if ($frame->{type} eq 'ARRAY') {
    100          
75 45 100       69 if (!scalar(@{$frame->{data}})) {
  45         108  
76 5         19 $encoder->array_leaf(\@stack);
77             }
78             else {
79             my $iterator = $frame->{iterator} //=
80 40   66     122 $encoder->array_keys_iterator($frame->{data});
81 40 100       92 if (defined(my $key = $iterator->())) {
82 30         103 $frame->{encoded} = $encoder->array_key($key);
83             $frame->{closers} =
84 30         305 $encoder->array_close() . $stack[-2]{closers};
85 30         208 my $child_data = $frame->{data}[$key];
86 30         83 push @stack,
87             {
88             data => $child_data,
89             type => ref($child_data),
90             };
91 30         88 next ITERATION;
92             } ## end if (defined(my $key = ...))
93             } ## end else [ if (!scalar(@{$frame->...}))]
94             } ## end if ($frame->{type} eq ...)
95             elsif ($frame->{type} eq 'HASH') {
96 85 100       87 if (!scalar(keys %{$frame->{data}})) {
  85         216  
97 5         19 $encoder->hash_leaf(\@stack);
98             }
99             else {
100             my $iterator = $frame->{iterator} //=
101 80   66     239 $encoder->hash_keys_iterator($frame->{data});
102 80 100       188 if (defined(my $key = $iterator->())) {
103 65         205 $frame->{encoded} = $encoder->hash_key($key);
104             $frame->{closers} =
105 65         607 $encoder->hash_close() . $stack[-2]{closers};
106 65         450 my $child_data = $frame->{data}{$key};
107 65         217 push @stack,
108             {
109             data => $child_data,
110             type => ref($child_data),
111             };
112 65         199 next ITERATION;
113             } ## end if (defined(my $key = ...))
114             } ## end else [ if (!scalar(keys %{$frame...}))]
115             } ## end elsif ($frame->{type} eq ...)
116             else { # treat as leaf scalar
117 65         265 $encoder->scalar_leaf(\@stack);
118             }
119              
120             # only leaves or end-of-container arrive here
121 100         354 pop @stack;
122             } ## end ITERATION: while (@stack > 1)
123              
124 5         21 return $encoder->result();
125             } ## end sub encode
126              
127             1;
128              
129             __END__
130              
131             =pod
132              
133             =encoding utf-8
134              
135             =head1 NAME
136              
137             Data::Crumbr - Render data structures for easy searching and parsing
138              
139             =head1 VERSION
140              
141             version 0.1.0
142              
143             =head1 SYNOPSIS
144              
145             use Data::Crumber; # imports `crumbr`
146              
147             # some data to work with
148             my $data = { what => 'ever', hey => 'you' };
149              
150             # crumbr provides an anonyous sub back. This has defaults
151             my $csub = crumbr();
152              
153             # use it to encode the data
154             my $encoded = $crumbr->($data);
155             # {"here"}{"hey"}:"you"
156             # {"here"}{"what"}:"ever"
157              
158             # URI profile simplifies things but loses something
159             $encoded = crumbr(profile => 'URI')->($data);
160             # here/hey "you"
161             # here/what "ever"
162              
163             # JSON profile produces valid JSON "slices"
164             $encoded = crumbr(profile => 'JSON')->($data);
165             # {"here":{"hey":"you"}}
166             # {"here":{"what":"ever"}}
167              
168             # Object Oriented Interface
169             my $crobj = Data::Crumbr->new();
170             $encoded = $crobj->encode($data); # same as default
171              
172             =head1 DESCRIPTION
173              
174             Data::Crumbr lets you render data structures in a way that can then be
175             easily searched and parsed in "slices". The basic idea is that data
176             shaped in this way will then be easily filtered in the shell for
177             extracting interesting parts.
178              
179             The input data structure is traversed is if it is a tree (so no
180             circular structures please!), and a I<record> is generated for each leaf
181             in the tree. Depending on the backend and the configurations, the full
182             path from the root to the parent of the leaf is represented as a
183             sequence of keys (which can be hash keys or array indexes) followed by
184             the value. This should make your life easier e.g. in the shell, so that
185             you can specify the full path to the data structure part you're
186             interested into with common Unix tools like C<grep> and/or C<sed>.
187              
188             =head2 Example
189              
190             Suppose you have the following data structure in Perl:
191              
192             my $data = {
193             one => '1',
194             two => 2,
195             three => 3.1,
196             four => '4.0',
197             true => \1,
198             false => \0,
199             array => [
200             qw< what ever >,
201             { inner => 'part', empty => [] }
202             ],
203             hash => {
204             'with ♜' => {},
205             ar => [ 1..3 ],
206             something => "funny \x{263A} ☻",
207             },
208             };
209              
210             If you encode this e.g. in JSON, it will be easy to parse with
211             the right program, but not from the shell, even if you pretty
212             print it:
213              
214             {
215             "hash" : {
216             "something" : "funny ☺ ☻",
217             "with ♜" : {},
218             "ar" : [
219             1,
220             2,
221             3
222             ]
223             },
224             "one" : "1",
225             "array" : [
226             "what",
227             "ever",
228             {
229             "inner" : "part",
230             "empty" : []
231             }
232             ],
233             "four" : "4.0",
234             "true" : true,
235             "two" : 2,
236             "three" : 3.1,
237             "false" : false
238             }
239              
240             How do you get the second item in the array C<ar>i inside the
241             hash C<hash>? Would you do better with YAML instead?
242              
243             ---
244             array:
245             - what
246             - ever
247             - empty: []
248             inner: part
249             false: !!perl/ref
250             =: 0
251             four: 4.0
252             hash:
253             ar:
254             - 1
255             - 2
256             - 3
257             something: funny ☺ ☻
258             with ♜: {}
259             one: 1
260             three: 3.1
261             true: !!perl/ref
262             =: 1
263             two: 2
264              
265             Not really. Data::Crumbr lets you represent the data in a
266             more verbose but easily consumable way for the shell. Hence,
267             this:
268              
269             use Data::Crumbr;
270             print crumbr()->($data), "\n";
271              
272             will give you this:
273              
274             {"array"}[0]:"what"
275             {"array"}[1]:"ever"
276             {"array"}[2]{"empty"}:[]
277             {"array"}[2]{"inner"}:"part"
278             {"false"}:false
279             {"four"}:"4.0"
280             {"hash"}{"ar"}[0]:1
281             {"hash"}{"ar"}[1]:2
282             {"hash"}{"ar"}[2]:3
283             {"hash"}{"something"}:"funny \u263A \u263B"
284             {"hash"}{"with \u265C"}:{}
285             {"one"}:"1"
286             {"three"}:3.1
287             {"true"}:true
288             {"two"}:2
289              
290             Now it should pretty easy for a shell program to get at the
291             data, e.g. with this C<sed> substitution:
292              
293             sed -ne 's/^{"hash"}{"ar"}\[2\]://p'
294              
295             =head2 Profiles
296              
297             If you don't like the default encoding, you can get a different
298             one by using a I<profile>. This is a set of configurations for
299             C<Data::Crumbr::Default>, which is a pretty generic class for
300             representing a wide class of possible record-oriented encodings.
301              
302             A C<Data::Crumbr::Default> encoder is defined in terms of the following
303             parameters:
304              
305             =over
306              
307             =item C<array_open>
308              
309             sequence to put when an array is opened
310              
311             =item C<array_close>
312              
313             sequence to put when an array is closed
314              
315             =item C<array_key_prefix>
316              
317             sequence to put before an array's index
318              
319             =item C<array_key_suffix>
320              
321             sequence to put after an array's index
322              
323             =item C<array_key_encoder>
324              
325             a reference to a function that encodes an array's index
326              
327             =item C<hash_open>
328              
329             sequence to put when a hash is opened
330              
331             =item C<hash_close>
332              
333             sequence to put when a hash is closed
334              
335             =item C<hash_key_prefix>
336              
337             sequence to put before a hash's key
338              
339             =item C<hash_key_suffix>
340              
341             sequence to put after a hash's key
342              
343             =item C<hash_key_encoder>
344              
345             a reference to a function that encodes a hash's key
346              
347             =item C<value_encoder>
348              
349             a reference to a function that encodes a leaf value
350              
351             =item C<keys_separator>
352              
353             sequence to separate the keys breadcrumb
354              
355             =item C<value_separator>
356              
357             sequence to separate the keys from the value
358              
359             =back
360              
361             By default, Data::Crumbr ships with the following profiles:
362              
363             =over
364              
365             =item B<< Default >>
366              
367             i.e. the profile you get by default, and what you saw in action in the
368             example above. It has the following settings:
369              
370             =over
371              
372             =item *
373              
374             no openers and closers:
375              
376             array_open => ''
377             array_close => ''
378             hash_open => ''
379             hash_close => ''
380              
381             =item *
382              
383             array keys are printed verbatim, surrounded by square brackets:
384              
385             array_key_prefix => '['
386             array_key_suffix => ']'
387             array_key_encoder => Data::Crumbr::Util::id_encoder
388              
389             =item *
390              
391             hash keys encoded as JSON strings, surrounded by curly brackets:
392              
393             hash_key_prefix => '['
394             hash_key_suffix => ']'
395             hash_key_encoder => Data::Crumbr::Util::json_leaf_encoder
396              
397             =item *
398              
399             no separator between keys (because they already stand out very clearly,
400             but a colon to separate the sequence of keys from the value:
401              
402             keys_separator => ''
403             value_separator => ':'
404              
405             =item *
406              
407             leaf values encoded as JSON scalars:
408              
409             value_encoder => Data::Crumbr::Util::json_leaf_encoder
410              
411             =back
412              
413             This is quite verbose, but lets you specify very precisely what you are
414             looking for because the hash keys stand out clearly with respect to
415             array identifiers, i.e. there's no chance that you will mistake an array
416             index for a hash key (because they are embedded in different bracket
417             types).
418              
419             =item B<< JSON >>
420              
421             this profile always provides you compact JSON-compliant string
422             representations that contain only one single leaf value.
423              
424             It has the following characteristics:
425              
426             =over
427              
428             =item *
429              
430             openers and closers are what you would expect for JSON objects and
431             arrays:
432              
433             array_open => '['
434             array_close => ']'
435             hash_open => '{'
436             hash_close => '}'
437              
438             =item *
439              
440             there is only one non-empty suffix, i.e. the hash key suffix, so that
441             we can separate the hash key from the value with C<:> according to JSON:
442              
443             array_key_prefix => ''
444             array_key_suffix => ''
445             hash_key_prefix => ''
446             hash_key_suffix => ':'
447              
448             =item *
449              
450             array keys are not printed:
451              
452             array_key_encoder => sub { }
453              
454             =item *
455              
456             hash keys are JSON encoded:
457              
458             hash_key_encoder => Data::Crumbr::Util::json_encoder()
459              
460             =item *
461              
462             no separators are needed:
463              
464             keys_separator => ''
465             value_separator => ''
466              
467             =item *
468              
469             leaf values encoded as JSON scalars:
470              
471             value_encoder => Data::Crumbr::Util::json_leaf_encoder
472              
473             =back
474              
475             =item B<< URI >>
476              
477             this is the simplest of the profiles, and sacrifices the possibility to
478             distinguish between hash and array keys to the altar of simplicity.
479              
480             It has the following characteristics:
481              
482             =over
483              
484             =item *
485              
486             no openers, closers, prefixes or suffixes:
487              
488             array_open => ''
489             array_close => ''
490             array_key_prefix => ''
491             array_key_suffix => ''
492              
493             hash_open => ''
494             hash_close => ''
495             hash_key_prefix => ''
496             hash_key_suffix => ''
497              
498             =item *
499              
500             array keys are printed verbatim
501              
502             =item *
503              
504             hash keys are URI encoded
505              
506             hash_key_encoder => Data::Crumbr::Util::uri_encoder
507              
508             =item *
509              
510             keys are separated by a slash character C</> and values are separated by
511             a single space C< >:
512              
513             keys_separator => '/'
514             value_separator => ' '
515              
516             =item *
517              
518             leaf values encoded as JSON scalars:
519              
520             value_encoder => Data::Crumbr::Util::json_leaf_encoder
521              
522             =back
523              
524             =back
525              
526             =head1 INTERFACE
527              
528             There are two ways to use Data::Crumber: a function C<crumbr>,
529             that is exported by default, and the object-oriented interface.
530              
531             =over
532              
533             =item B<< crumbr >>
534              
535             $subref = crumbr(%args); # OR
536             $subref = crumbr(\%args);
537              
538             get a I<crumbr> generator based on provided C<%args>.
539              
540             Returns a reference to a sub, which can then be called upon a data
541             structure in order to get the I<crumbed> version.
542              
543             The input arguments can be:
544              
545             =over
546              
547             =item C<< encoder >>
548              
549             details about the encoder, see L</Profiles> for the available key-value
550             pairs. In addition, you can also set the following:
551              
552             =over
553              
554             =item C<< output >>
555              
556             the output channel to use for sending encoded data. This can be:
557              
558             =over
559              
560             =item * I<filename>
561              
562             this will be opened in raw mode and used to send the output
563              
564             =item * I<filehandle>
565              
566             used directly
567              
568             =item * I<array reference>
569              
570             each output line will be pushed as a new element in the array
571              
572             =item * I<object reference>
573              
574             which is assumed to support the C<print()> method, that will be called
575             with each generated line
576              
577             =item * I<sub reference>
578              
579             which will be called with each generated line
580              
581             =back
582              
583             =back
584              
585             =item C<< profile >>
586              
587             the name of a profile to use as a base - see L</Profiles>. Settings in
588             the profile are always overridden by corresponding ones in the provided
589             encoder, if any.
590              
591             =back
592              
593             =item B<< encode >>
594              
595             $dc->encode($data_structure);
596              
597             generate the encoding for the provided C<$data_structure>. Output is
598             generated depending on how it is specified, see L</crumbr> above.
599              
600             =item B<< new >>
601              
602             my $dc = Data::Crumber->new(encoder => \%args);
603              
604             create a new instance of C<Data::Crumbr>. Data provided for the
605             C<encoder> parameter (i.e. C<%args>) are those discussed in
606             L</Profiles>.
607              
608             The new instance can then be used to encode data using the C</encode>
609             method.
610              
611             =back
612              
613             =head1 AUTHOR
614              
615             Flavio Poletti <polettix@cpan.org>
616              
617             =head1 COPYRIGHT AND LICENSE
618              
619             Copyright (C) 2015 by Flavio Poletti <polettix@cpan.org>
620              
621             This module is free software. You can redistribute it and/or
622             modify it under the terms of the Artistic License 2.0.
623              
624             This program is distributed in the hope that it will be useful,
625             but without any warranty; without even the implied warranty of
626             merchantability or fitness for a particular purpose.
627              
628             =cut