File Coverage

blib/lib/Data/Crumbr.pm
Criterion Covered Total %
statement 75 76 98.6
branch 21 24 87.5
condition 8 11 72.7
subroutine 12 12 100.0
pod 2 2 100.0
total 118 125 94.4


line stmt bran cond sub pod time code
1             package Data::Crumbr;
2             $Data::Crumbr::VERSION = '0.1.1';
3             # ABSTRACT: Render data structures for easy searching and parsing
4              
5             # Inlined Mo
6 7     7   149797 use Mo qw< default coerce >;
  7         3416  
  7         38  
7              
8 7     7   14950 use strict;
  7         15  
  7         162  
9 7     7   35 use warnings;
  7         16  
  7         189  
10 7     7   35 use Carp;
  7         13  
  7         603  
11 7     7   23675 use English qw< -no_match_vars >;
  7         32072  
  7         40  
12 7     7   3070 use Exporter qw< import >;
  7         13  
  7         254  
13 7     7   34 use Scalar::Util qw< blessed >;
  7         13  
  7         6653  
14              
15             our @EXPORT = qw< crumbr >;
16             our @EXPORT_OK = @EXPORT;
17             our %EXPORT_TAGS = (all => [@EXPORT_OK]);
18              
19             has encoder => (
20             default => sub { __encoder() },
21             coerce => \&__encoder,
22             );
23              
24             sub __load_class {
25 9     9   20 my ($class) = @_;
26 9         64 (my $packname = "$class.pm") =~ s{::}{/}gmxs;
27 9         5771 require $packname;
28 9         139 return $class;
29             } ## end sub __load_class
30              
31             sub crumbr {
32 5 50 66 5 1 9965 my %args = (@_ && ref($_[0])) ? %{$_[0]} : @_;
  0         0  
33 5 100       32 if (defined(my $name = delete $args{profile})) {
34 4         18 my $class = __PACKAGE__ . "::Default::$name";
35 4         18 my $profile = __load_class($class)->profile();
36 4   100     41 my $encoder = delete($args{encoder}) // {};
37 4         52 %$encoder = (
38             %$profile,
39             %$encoder, # allow some overriding
40             class => '::Default', # but not on this one
41             );
42 4         26 %args = (encoder => $encoder);
43             } ## end if (defined(my $name =...))
44 5         59 my $wh = __PACKAGE__->new(%args);
45 5     5   97 return sub { $wh->encode(@_) };
  5         2175  
46             } ## end sub crumbr
47              
48             sub __encoder {
49 5     5   179 my ($e) = @_;
50 5 50       53 if (!blessed($e)) {
51 5         14 my ($class, @parameters) = $e;
52 5 100       26 if (ref($e) eq 'HASH') {
53 4         12 $class = delete $e->{class};
54 4         27 @parameters = %$e;
55             }
56 5 100       24 $class = '::Default' unless defined $class;
57 5 50       28 $class = __PACKAGE__ . $class
58             if substr($class, 0, 2) eq '::';
59 5         30 $e = __load_class($class)->new(@parameters);
60             } ## end if (!blessed($e))
61 5         452 return $e;
62             } ## end sub __encoder
63              
64             sub encode {
65 5     5 1 15 my ($self, $data) = @_;
66 5         21 my $encoder = $self->encoder();
67 5         65 $encoder->reset();
68              
69 5         35 my @stack = ({closers => ''}, {data => $data, type => ref($data)},);
70             ITERATION:
71 5         23 while (@stack > 1) { # frame #0 is dummy
72 195         283 my $frame = $stack[-1];
73 195 100       670 if ($frame->{type} eq 'ARRAY') {
    100          
74 45 100       53 if (!scalar(@{$frame->{data}})) {
  45         123  
75 5         20 $encoder->array_leaf(\@stack);
76             }
77             else {
78             my $iterator = $frame->{iterator} //=
79 40   66     131 $encoder->array_keys_iterator($frame->{data});
80 40 100       98 if (defined(my $key = $iterator->())) {
81 30         85 $frame->{encoded} = $encoder->array_key($key);
82             $frame->{closers} =
83 30         291 $encoder->array_close() . $stack[-2]{closers};
84 30         245 my $child_data = $frame->{data}[$key];
85 30         112 push @stack,
86             {
87             data => $child_data,
88             type => ref($child_data),
89             };
90 30         102 next ITERATION;
91             } ## end if (defined(my $key = ...))
92             } ## end else [ if (!scalar(@{$frame->...}))]
93             } ## end if ($frame->{type} eq ...)
94             elsif ($frame->{type} eq 'HASH') {
95 85 100       100 if (!scalar(keys %{$frame->{data}})) {
  85         239  
96 5         21 $encoder->hash_leaf(\@stack);
97             }
98             else {
99             my $iterator = $frame->{iterator} //=
100 80   66     239 $encoder->hash_keys_iterator($frame->{data});
101 80 100       210 if (defined(my $key = $iterator->())) {
102 65         168 $frame->{encoded} = $encoder->hash_key($key);
103             $frame->{closers} =
104 65         773 $encoder->hash_close() . $stack[-2]{closers};
105 65         489 my $child_data = $frame->{data}{$key};
106 65         195 push @stack,
107             {
108             data => $child_data,
109             type => ref($child_data),
110             };
111 65         229 next ITERATION;
112             } ## end if (defined(my $key = ...))
113             } ## end else [ if (!scalar(keys %{$frame...}))]
114             } ## end elsif ($frame->{type} eq ...)
115             else { # treat as leaf scalar
116 65         183 $encoder->scalar_leaf(\@stack);
117             }
118              
119             # only leaves or end-of-container arrive here
120 100         405 pop @stack;
121             } ## end ITERATION: while (@stack > 1)
122              
123 5         20 return $encoder->result();
124             } ## end sub encode
125              
126             1;
127              
128             __END__
129              
130             =pod
131              
132             =encoding utf-8
133              
134             =head1 NAME
135              
136             Data::Crumbr - Render data structures for easy searching and parsing
137              
138             =head1 VERSION
139              
140             version 0.1.1
141              
142             =head1 SYNOPSIS
143              
144             use Data::Crumber; # imports `crumbr`
145              
146             # some data to work with
147             my $data = { what => 'ever', hey => 'you' };
148              
149             # crumbr provides an anonyous sub back. This has defaults
150             my $csub = crumbr();
151              
152             # use it to encode the data
153             my $encoded = $crumbr->($data);
154             # {"here"}{"hey"}:"you"
155             # {"here"}{"what"}:"ever"
156              
157             # URI profile simplifies things but loses something
158             $encoded = crumbr(profile => 'URI')->($data);
159             # here/hey "you"
160             # here/what "ever"
161              
162             # JSON profile produces valid JSON "slices"
163             $encoded = crumbr(profile => 'JSON')->($data);
164             # {"here":{"hey":"you"}}
165             # {"here":{"what":"ever"}}
166              
167             # Object Oriented Interface
168             my $crobj = Data::Crumbr->new();
169             $encoded = $crobj->encode($data); # same as default
170              
171             =head1 DESCRIPTION
172              
173             Data::Crumbr lets you render data structures in a way that can then be
174             easily searched and parsed in "slices". The basic idea is that data
175             shaped in this way will then be easily filtered in the shell for
176             extracting interesting parts.
177              
178             The input data structure is traversed is if it is a tree (so no
179             circular structures please!), and a I<record> is generated for each leaf
180             in the tree. Depending on the backend and the configurations, the full
181             path from the root to the parent of the leaf is represented as a
182             sequence of keys (which can be hash keys or array indexes) followed by
183             the value. This should make your life easier e.g. in the shell, so that
184             you can specify the full path to the data structure part you're
185             interested into with common Unix tools like C<grep> and/or C<sed>.
186              
187             =head2 Example
188              
189             Suppose you have the following data structure in Perl:
190              
191             my $data = {
192             one => '1',
193             two => 2,
194             three => 3.1,
195             four => '4.0',
196             true => \1,
197             false => \0,
198             array => [
199             qw< what ever >,
200             { inner => 'part', empty => [] }
201             ],
202             hash => {
203             'with ♜' => {},
204             ar => [ 1..3 ],
205             something => "funny \x{263A} ☻",
206             },
207             };
208              
209             If you encode this e.g. in JSON, it will be easy to parse with
210             the right program, but not from the shell, even if you pretty
211             print it:
212              
213             {
214             "hash" : {
215             "something" : "funny ☺ ☻",
216             "with ♜" : {},
217             "ar" : [
218             1,
219             2,
220             3
221             ]
222             },
223             "one" : "1",
224             "array" : [
225             "what",
226             "ever",
227             {
228             "inner" : "part",
229             "empty" : []
230             }
231             ],
232             "four" : "4.0",
233             "true" : true,
234             "two" : 2,
235             "three" : 3.1,
236             "false" : false
237             }
238              
239             How do you get the second item in the array C<ar>i inside the
240             hash C<hash>? Would you do better with YAML instead?
241              
242             ---
243             array:
244             - what
245             - ever
246             - empty: []
247             inner: part
248             false: !!perl/ref
249             =: 0
250             four: 4.0
251             hash:
252             ar:
253             - 1
254             - 2
255             - 3
256             something: funny ☺ ☻
257             with ♜: {}
258             one: 1
259             three: 3.1
260             true: !!perl/ref
261             =: 1
262             two: 2
263              
264             Not really. Data::Crumbr lets you represent the data in a
265             more verbose but easily consumable way for the shell. Hence,
266             this:
267              
268             use Data::Crumbr;
269             print crumbr()->($data), "\n";
270              
271             will give you this:
272              
273             {"array"}[0]:"what"
274             {"array"}[1]:"ever"
275             {"array"}[2]{"empty"}:[]
276             {"array"}[2]{"inner"}:"part"
277             {"false"}:false
278             {"four"}:"4.0"
279             {"hash"}{"ar"}[0]:1
280             {"hash"}{"ar"}[1]:2
281             {"hash"}{"ar"}[2]:3
282             {"hash"}{"something"}:"funny \u263A \u263B"
283             {"hash"}{"with \u265C"}:{}
284             {"one"}:"1"
285             {"three"}:3.1
286             {"true"}:true
287             {"two"}:2
288              
289             Now it should pretty easy for a shell program to get at the
290             data, e.g. with this C<sed> substitution:
291              
292             sed -ne 's/^{"hash"}{"ar"}\[2\]://p'
293              
294             =head2 Profiles
295              
296             If you don't like the default encoding, you can get a different
297             one by using a I<profile>. This is a set of configurations for
298             C<Data::Crumbr::Default>, which is a pretty generic class for
299             representing a wide class of possible record-oriented encodings.
300              
301             A C<Data::Crumbr::Default> encoder is defined in terms of the following
302             parameters:
303              
304             =over
305              
306             =item C<array_open>
307              
308             sequence to put when an array is opened
309              
310             =item C<array_close>
311              
312             sequence to put when an array is closed
313              
314             =item C<array_key_prefix>
315              
316             sequence to put before an array's index
317              
318             =item C<array_key_suffix>
319              
320             sequence to put after an array's index
321              
322             =item C<array_key_encoder>
323              
324             a reference to a function that encodes an array's index
325              
326             =item C<hash_open>
327              
328             sequence to put when a hash is opened
329              
330             =item C<hash_close>
331              
332             sequence to put when a hash is closed
333              
334             =item C<hash_key_prefix>
335              
336             sequence to put before a hash's key
337              
338             =item C<hash_key_suffix>
339              
340             sequence to put after a hash's key
341              
342             =item C<hash_key_encoder>
343              
344             a reference to a function that encodes a hash's key
345              
346             =item C<value_encoder>
347              
348             a reference to a function that encodes a leaf value
349              
350             =item C<keys_separator>
351              
352             sequence to separate the keys breadcrumb
353              
354             =item C<value_separator>
355              
356             sequence to separate the keys from the value
357              
358             =back
359              
360             By default, Data::Crumbr ships with the following profiles:
361              
362             =over
363              
364             =item B<< Default >>
365              
366             i.e. the profile you get by default, and what you saw in action in the
367             example above. It has the following settings:
368              
369             =over
370              
371             =item *
372              
373             no openers and closers:
374              
375             array_open => ''
376             array_close => ''
377             hash_open => ''
378             hash_close => ''
379              
380             =item *
381              
382             array keys are printed verbatim, surrounded by square brackets:
383              
384             array_key_prefix => '['
385             array_key_suffix => ']'
386             array_key_encoder => Data::Crumbr::Util::id_encoder
387              
388             =item *
389              
390             hash keys encoded as JSON strings, surrounded by curly brackets:
391              
392             hash_key_prefix => '['
393             hash_key_suffix => ']'
394             hash_key_encoder => Data::Crumbr::Util::json_leaf_encoder
395              
396             =item *
397              
398             no separator between keys (because they already stand out very clearly,
399             but a colon to separate the sequence of keys from the value:
400              
401             keys_separator => ''
402             value_separator => ':'
403              
404             =item *
405              
406             leaf values encoded as JSON scalars:
407              
408             value_encoder => Data::Crumbr::Util::json_leaf_encoder
409              
410             =back
411              
412             This is quite verbose, but lets you specify very precisely what you are
413             looking for because the hash keys stand out clearly with respect to
414             array identifiers, i.e. there's no chance that you will mistake an array
415             index for a hash key (because they are embedded in different bracket
416             types).
417              
418             =item B<< JSON >>
419              
420             this profile always provides you compact JSON-compliant string
421             representations that contain only one single leaf value.
422              
423             It has the following characteristics:
424              
425             =over
426              
427             =item *
428              
429             openers and closers are what you would expect for JSON objects and
430             arrays:
431              
432             array_open => '['
433             array_close => ']'
434             hash_open => '{'
435             hash_close => '}'
436              
437             =item *
438              
439             there is only one non-empty suffix, i.e. the hash key suffix, so that
440             we can separate the hash key from the value with C<:> according to JSON:
441              
442             array_key_prefix => ''
443             array_key_suffix => ''
444             hash_key_prefix => ''
445             hash_key_suffix => ':'
446              
447             =item *
448              
449             array keys are not printed:
450              
451             array_key_encoder => sub { }
452              
453             =item *
454              
455             hash keys are JSON encoded:
456              
457             hash_key_encoder => Data::Crumbr::Util::json_encoder()
458              
459             =item *
460              
461             no separators are needed:
462              
463             keys_separator => ''
464             value_separator => ''
465              
466             =item *
467              
468             leaf values encoded as JSON scalars:
469              
470             value_encoder => Data::Crumbr::Util::json_leaf_encoder
471              
472             =back
473              
474             =item B<< URI >>
475              
476             this is the simplest of the profiles, and sacrifices the possibility to
477             distinguish between hash and array keys to the altar of simplicity.
478              
479             It has the following characteristics:
480              
481             =over
482              
483             =item *
484              
485             no openers, closers, prefixes or suffixes:
486              
487             array_open => ''
488             array_close => ''
489             array_key_prefix => ''
490             array_key_suffix => ''
491              
492             hash_open => ''
493             hash_close => ''
494             hash_key_prefix => ''
495             hash_key_suffix => ''
496              
497             =item *
498              
499             array keys are printed verbatim
500              
501             =item *
502              
503             hash keys are URI encoded
504              
505             hash_key_encoder => Data::Crumbr::Util::uri_encoder
506              
507             =item *
508              
509             keys are separated by a slash character C</> and values are separated by
510             a single space C< >:
511              
512             keys_separator => '/'
513             value_separator => ' '
514              
515             =item *
516              
517             leaf values encoded as JSON scalars:
518              
519             value_encoder => Data::Crumbr::Util::json_leaf_encoder
520              
521             =back
522              
523             =back
524              
525             =head1 INTERFACE
526              
527             There are two ways to use Data::Crumber: a function C<crumbr>,
528             that is exported by default, and the object-oriented interface.
529              
530             =over
531              
532             =item B<< crumbr >>
533              
534             $subref = crumbr(%args); # OR
535             $subref = crumbr(\%args);
536              
537             get a I<crumbr> generator based on provided C<%args>.
538              
539             Returns a reference to a sub, which can then be called upon a data
540             structure in order to get the I<crumbed> version.
541              
542             The input arguments can be:
543              
544             =over
545              
546             =item C<< encoder >>
547              
548             details about the encoder, see L</Profiles> for the available key-value
549             pairs. In addition, you can also set the following:
550              
551             =over
552              
553             =item C<< output >>
554              
555             the output channel to use for sending encoded data. This can be:
556              
557             =over
558              
559             =item * I<filename>
560              
561             this will be opened in raw mode and used to send the output
562              
563             =item * I<filehandle>
564              
565             used directly
566              
567             =item * I<array reference>
568              
569             each output line will be pushed as a new element in the array
570              
571             =item * I<object reference>
572              
573             which is assumed to support the C<print()> method, that will be called
574             with each generated line
575              
576             =item * I<sub reference>
577              
578             which will be called with each generated line
579              
580             =back
581              
582             =back
583              
584             =item C<< profile >>
585              
586             the name of a profile to use as a base - see L</Profiles>. Settings in
587             the profile are always overridden by corresponding ones in the provided
588             encoder, if any.
589              
590             =back
591              
592             =item B<< encode >>
593              
594             $dc->encode($data_structure);
595              
596             generate the encoding for the provided C<$data_structure>. Output is
597             generated depending on how it is specified, see L</crumbr> above.
598              
599             =item B<< new >>
600              
601             my $dc = Data::Crumber->new(encoder => \%args);
602              
603             create a new instance of C<Data::Crumbr>. Data provided for the
604             C<encoder> parameter (i.e. C<%args>) are those discussed in
605             L</Profiles>.
606              
607             The new instance can then be used to encode data using the C</encode>
608             method.
609              
610             =back
611              
612             =head1 AUTHOR
613              
614             Flavio Poletti <polettix@cpan.org>
615              
616             =head1 COPYRIGHT AND LICENSE
617              
618             Copyright (C) 2015 by Flavio Poletti <polettix@cpan.org>
619              
620             This module is free software. You can redistribute it and/or
621             modify it under the terms of the Artistic License 2.0.
622              
623             This program is distributed in the hope that it will be useful,
624             but without any warranty; without even the implied warranty of
625             merchantability or fitness for a particular purpose.
626              
627             =cut