File Coverage

blib/lib/Data/Selector.pm
Criterion Covered Total %
statement 172 172 100.0
branch 95 118 80.5
condition 39 50 78.0
subroutine 5 5 100.0
pod 2 2 100.0
total 313 347 90.2


line stmt bran cond sub pod time code
1             package Data::Selector;
2              
3 4     4   2007 use 5.10.1;
  4         20  
4 4     4   30 use strict;
  4         10  
  4         166  
5 4     4   30 use warnings FATAL => 'all';
  4         15  
  4         9619  
6              
7             =head1 NAME
8              
9             Data::Selector - data selection dsl parser and applicator
10              
11             =head1 VERSION
12              
13             1.02
14              
15             =cut
16              
17             our $VERSION = '1.02';
18              
19             =head1 SYNOPSIS
20              
21             my $data_tree = {
22             foo => {
23             bar => { baz1 => 1, baz22 => 2, baz32 => [ 'a', 'b', 'c', ], },
24             },
25             asdf => 'woohoo',
26             };
27             Data::Selector->apply_tree(
28             {
29             selector_tree => Data::Selector->parse_string(
30             {
31             named_selectors => { '$bla' => '[non-existent,asdf]', },
32             selector_string => '$bla,foo.bar.baz*2.1..-1',
33             # (same thing with all optional + chars added)
34             # named_selectors => { '$bla' => '[+non-existent,+asdf]', },
35             # selector_string => '$bla,+foo.+bar.+baz*2.+1..-1',
36             }
37             ),
38             data_tree => $data_tree,
39             }
40             );
41              
42             # $data_tree is now:
43             # {
44             # foo => { bar => { baz22 => 2, baz32 => [ 'b', 'c', ], }, },
45             # asdf => 'woohoo',
46             # }
47              
48             =head1 DESCRIPTION
49              
50             This module enables data selection via a terse dsl. The obvious use case is
51             data shaping though it could also be used to hint data requirements down the
52             stack.
53              
54             A selector string is transformed into a selector tree by parse_string(). Then
55             the apply_tree() method performs key (array subscripts and hash keys) inclusion,
56             and/or exclusion on a data tree using the selector tree. Note that arrays in
57             the data tree are trimmed of the slots that were removed.
58              
59             Note that parse_string() will throw some exceptions (in predicate form) but
60             there are probably many non-sensical selector strings that it won't throw on.
61             The apply_tree() method, on the other hand, does not throw any exceptions
62             because in the general case this is preferable. For example, some typical
63             "errors" might be missing (misspelled in the selector tree or non-existent in
64             the data tree) keys or indexing into an array with a string. Both cases may
65             legitimately happen when elements of a set are not the same shape. In the case
66             of an actual error the resulting data tree will likely reflect it.
67              
68             =head1 SELECTOR STRINGS
69              
70             Selector strings are a terse, robust way to express data selection. They are
71             sensitive to order of definition, are embeddable via square brackets, can be
72             constructed of lists of selector strings, and are therefore composable.
73              
74             A selector string consists of tokens separated by dot characters. Each dot
75             character denotes another level in the data tree. The selector strings may be a
76             single value or a list of values delimited by square brackets and separated by
77             commas.
78              
79             A leading hyphen character indicates exclusion.
80              
81             An optional leading plus character indicates inclusion. It is only required for
82             inclusion of values that start with a hyphen, like a negative array subscript,
83             or a plus character.
84              
85             Its important to note that positive array subscripts with a leading + character
86             are not supported. For instance, the selector string of "++2" will not
87             interpreted as "include array subscript 2". It could be used to include a hash
88             key of "+2" however. The same applies to "-+2". This inconsistency is the
89             result of a limitation in the implementation and may be changed in the future.
90              
91             Note that inclusion, in addition to specifying what is to be included, implies a
92             lower precedence exclusion of all other keys. In other words, if a particular
93             key is not specified for inclusion but there was an inclusion then it will be
94             excluded. For example, lets say the data tree is a hash with keys foo, bar, and
95             baz. A selector string of "foo" will include the foo key and exclude the bar
96             and baz keys. But a selector string of "foo,bar" will include the foo and bar
97             keys and exclude the baz key.
98              
99             Wildcarding is supported via the asterisk character.
100              
101             Negative array subscripts are supported but remember that they must be preceded
102             by a plus character to indicate inclusion (which must be urlencoded as %2B for
103             urls). For example, "-1" means "exclude key 1" where "+-1" means "include key
104             -1".
105              
106             Array subscript ranges are supported via the double dot sequence. These can be
107             tricky when used with negative array subscripts. For example, "-1..-1" means
108             exclude 1 to -1. But "+-2..-1" means include -2 to -1.
109              
110             Named selectors allow for pre-defined selectors to be interpolated into a
111             selector_string. They begin with a dollar character and otherwise can only
112             contain lower case alpha or underscore characters (a-z,_).
113              
114             =head2 EXAMPLES
115              
116             Lets say we have a date tree like so:
117              
118             $data_tree = {
119             count => 2,
120             items => [
121             {
122             body => 'b1',
123             links => [ 'l1', 'l2', 'l3', ],
124             rel_1_url => 'foo',
125             rel_1_id => 12,
126             rel_2_url => 'bar',
127             rel_2_id => 34,
128             },
129             {
130             body => 'b2',
131             links => [ 'l4', 'l5', ],
132             rel_1_url => 'up',
133             rel_1_id => 56,
134             rel_2_url => 'down',
135             rel_2_id => 78,
136             },
137             ],
138             total => 42,
139             }
140              
141             =over
142              
143             =item total only
144              
145             $selector_string = "total";
146              
147             $data_tree = {
148             total => 42,
149             }
150              
151             =item only rel urls in items
152              
153             $selector_string = "items.*.rel_*_url"
154              
155             $data_tree = {
156             items => [
157             {
158             rel_1_url => 'foo',
159             rel_2_url => 'bar',
160             },
161             {
162             rel_1_url => 'up',
163             rel_2_url => 'down',
164             },
165             ],
166             }
167              
168             =item count and last item with no body
169              
170             $selector_string = "count,items.+-1.-body"
171              
172             $data_tree = {
173             count => 2,
174             items => [
175             {
176             links => [ 'l4', 'l5', ],
177             rel_1_url => 'up',
178             rel_1_id => 56,
179             rel_2_url => 'down',
180             rel_2_id => 78,
181             },
182             ],
183             }
184              
185             =item last 2 links
186              
187             $selector_string = "items.*.links.+-2..-1"
188              
189             $data_tree = {
190             items => [
191             {
192             links => [ 'l2', 'l3', ],
193             },
194             {
195             links => [ 'l4', 'l5', ],
196             },
197             ],
198             }
199              
200             =back
201              
202             =head1 METHODS
203              
204             =cut
205              
206             =over
207              
208             =item parse_string
209              
210             Creates a selector tree from a selector string. A map of named selectors can
211             also be provided which will be interpolated into the selector string before it
212             is parsed.
213              
214             Required Args: selector_string
215             Optional Args: named_selectors
216              
217             =cut
218              
219             my $selector_string_pattern = qr/
220             (
221             [^\[\]\,]*+
222             (?:
223             \[
224             (?:
225             [^\[\]]++
226             |
227             (?1)
228             )*
229             \]
230             )?+
231             )
232             ,?+
233             /x;
234              
235             sub parse_string {
236 98     98 1 158493 my ( $class, $args, ) = @_;
237              
238             die "selector_string required\n"
239             unless defined $args->{selector_string}
240 98 50 33     776 && length $args->{selector_string};
241              
242 98 100       437 if ( index( $args->{selector_string}, '$', ) != -1 ) {
243             $args->{selector_string} =~
244 5         46 s/(?:(?<=^)|(?<=,))(\$[a-z_]*)(?:(,)(?!$)|$)/
245             defined $args->{named_selectors}->{$1}
246             && length $args->{named_selectors}->{$1}
247 6 100 33     87 ? $args->{named_selectors}->{$1} . ( $2 ? $2 : '' )
    50          
248             : die "contains invalid named selector\n";
249             /ego;
250             }
251              
252 98         236 my $selector_tree = {};
253 98         381 my @queue = ( [ $args->{selector_string}, $selector_tree, [], ], );
254              
255             die "must be a string that matches /[^.\[\],]/\n"
256             if length $args->{selector_string}
257 98 50 33     1014 && $args->{selector_string} !~ /[^.\[\],]/o;
258 98 50       455 die "must not contain ']['\n" if index( $queue[0]->[0], '][' ) != -1;
259 98 50       326 die "must not contain '[]'\n" if index( $queue[0]->[0], '[]' ) != -1;
260 98 50       317 die "must not contain '[,'\n" if index( $queue[0]->[0], '[,' ) != -1;
261 98 50       331 die "must not contain ',]'\n" if index( $queue[0]->[0], ',]' ) != -1;
262 98 50       319 die "must not contain '[.'\n" if index( $queue[0]->[0], '[.' ) != -1;
263 98 50       320 die "must not contain '.]'\n" if index( $queue[0]->[0], '.]' ) != -1;
264 98 50       361 die "must not begin with','\n" if substr( $queue[0]->[0], 0, 1 ) eq ',';
265 98 50       355 die "must not end with','\n" if substr( $queue[0]->[0], -1, ) eq ',';
266 98 50       307 die "must not begin with'.'\n" if substr( $queue[0]->[0], 0, 1 ) eq '.';
267 98 50       310 die "must not end with'.'\n" if substr( $queue[0]->[0], -1, ) eq '.';
268 98 50       431 die "must have balanced [] chars\n"
269             unless $queue[0]->[0] =~ tr/[/[/ == $queue[0]->[0] =~ tr/]/]/;
270             die "must not match /[^.,]\[/\n"
271 98 50       382 if $args->{selector_string} =~ /[^.,]\[/o;
272             die "must not match /\][^.,\]]/\n"
273 98 50       343 if $args->{selector_string} =~ /\][^.,\]]/o;
274              
275 98         194 my $order;
276 98         292 while (@queue) {
277 377         738 my $token = shift @queue;
278 377         3463 my @groups = $token->[0] =~ /$selector_string_pattern/go;
279 377         842 pop @groups;
280              
281 377         696 my ( $shift_a_suffix, $prev_is_suffix, );
282 377         734 for my $string (@groups) {
283 525         882 my $sub_tree = $token->[1];
284              
285 525         1158 my $is_suffix = substr( $string, 0, 1, ) eq '.';
286 525 100       1078 if ($is_suffix) {
287 30         43 push( @{ $queue[-1]->[2] }, substr( $string, 1, ), );
  30         75  
288 30         52 $string = '';
289             }
290             else {
291 495         938 my $opening_bracket_pos = index( $string, '[' );
292              
293 495         832 my $dot_in_prefix_pos = index( $string, '.' );
294 495 100 100     1561 $dot_in_prefix_pos = -1
295             if $opening_bracket_pos > -1
296             && $dot_in_prefix_pos > $opening_bracket_pos;
297              
298 495 100       1148 if ( $dot_in_prefix_pos > -1 ) {
299 188         376 my $is_range =
300             substr( $string, $dot_in_prefix_pos + 1, 1 ) eq '.';
301 188 100       456 if ($is_range) {
302 18         46 $dot_in_prefix_pos =
303             index( $string, '.', $dot_in_prefix_pos + 2, );
304 18 50 33     68 $dot_in_prefix_pos = -1
305             if $opening_bracket_pos > -1
306             && $dot_in_prefix_pos > $opening_bracket_pos;
307             }
308             }
309              
310 495         861 my $bare = '';
311 495 100       1154 if ( $dot_in_prefix_pos >= 1 ) {
    100          
312 174         403 $bare = substr( $string, 0, $dot_in_prefix_pos + 1, '' );
313 174         334 chop $bare;
314 174 100       461 $string =
315             substr( $string,
316             $opening_bracket_pos - $dot_in_prefix_pos, -1 )
317             if $dot_in_prefix_pos + 1 == $opening_bracket_pos;
318             }
319             elsif ( $opening_bracket_pos == 0 ) {
320 50         119 $string = substr( $string, $opening_bracket_pos + 1, -1 );
321             }
322             else {
323 271         429 $bare = $string;
324 271         447 $string = '';
325             }
326              
327 495 100       1147 if ( length $bare ) {
328 445         832 my $first_char = substr( $bare, 0, 1, );
329 445 100 100     2076 $bare = "+$bare"
330             if $first_char ne "+" && $first_char ne "-";
331 445 100       1191 my $bare_inverse =
332             ( $first_char eq "-" ? "+" : "-" ) . substr( $bare, 1, );
333 445         797 delete $sub_tree->{$bare_inverse};
334 445 100       884 if ( $sub_tree->{$bare} ) {
335 193         301 $sub_tree->{$bare}->{_order_} = ++$order;
336             }
337 252         861 else { $sub_tree->{$bare} = { _order_ => ++$order, }; }
338 445         867 $sub_tree = $sub_tree->{$bare};
339             }
340              
341 495 100 100     1896 if ( !length $string && !$prev_is_suffix && @{ $token->[2] } ) {
  270   100     957  
342 55         87 $string = $token->[2]->[0];
343 55         79 $shift_a_suffix++;
344             }
345             }
346              
347 525 100 66     2204 push(
    100          
348             @queue,
349             [
350             $string, $sub_tree,
351             ( !$is_suffix && !$prev_is_suffix ? $token->[2] : [] ),
352             ],
353             ) if length $string;
354              
355 525         1105 $prev_is_suffix = $is_suffix;
356             }
357 377 100       1417 shift @{ $token->[2] } if $shift_a_suffix;
  30         90  
358             }
359              
360 98         416 return $selector_tree;
361             }
362              
363             =item apply_tree
364              
365             Include or exclude parts of a data tree as specified by a selector tree. Note
366             that arrays that have elements excluded, or removed, will be trimmed.
367              
368             Required Args: selector_tree, data_tree
369              
370             =back
371              
372             =cut
373              
374             sub apply_tree {
375 67     67 1 4870 my ( $class, $args, ) = @_;
376              
377 67 50       259 die "selector_tree required" unless $args->{selector_tree};
378 67 50       226 die "data_tree required" unless $args->{data_tree};
379              
380 67         240 my @queue = ( [ $args->{selector_tree}, $args->{data_tree}, ], );
381 67         148 my %selector_trees_keys;
382 67         224 while (@queue) {
383 134         279 my ( $selector_tree, $data_tree, ) = @{ shift @queue };
  134         374  
384              
385             # Compile the selector tree keys and cache them sans any data tree
386             # dependencies. At this point each entry will contain:
387             #
388             # [ $selector_tree_key, $selector_tree_key_base, $pattern,
389             # $array_range, $original_selector_tree_key, $original_selector_tree ]
390             #
391             # Note that the array range and original selector tree key slots
392             # will always be undef at this point because they depend on the
393             # data tree. The original selector tree key is used to store a
394             # non-translated negative array subscript. During data tree based
395             # compilation below the two slots may be changed, if applicable.
396             #
397             # The ref to the original selector tree prevents its untimely
398             # destruction which may lead to its refaddr being recycled. That's
399             # undesirable since we use said refaddr as the key for this cache. I
400             # don't know how to reliably test for this so extra care is appropriate.
401             $selector_trees_keys{$selector_tree} ||= [
402             map {
403 153         416 my $selector_tree_key_base = substr( $_, 1, );
404             [
405             $_,
406             $selector_tree_key_base,
407             index( $selector_tree_key_base, '*', ) != -1
408 153 100       1001 ? do {
409 25         62 my $pattern = quotemeta $selector_tree_key_base;
410 25         105 $pattern =~ s/\\\*/.*/go;
411 25         145 $pattern;
412             }
413             : undef,
414             undef,
415             undef,
416             $selector_tree,
417             ];
418             }
419             sort {
420             $selector_tree->{$a}->{_order_}
421 22         99 <=> $selector_tree->{$b}->{_order_};
422 220         739 } grep { $_ ne '_order_'; }
423 134   50     902 keys %{$selector_tree}
  134         420  
424             ];
425              
426 134         393 my $data_tree_type = ref $data_tree;
427             my @data_tree_keys =
428             $data_tree_type eq 'HASH'
429 85         398 ? keys %{$data_tree}
430 134 100       361 : eval { 0 .. $#{$data_tree} };
  49         94  
  49         186  
431              
432             # Take a copy of the selector tree keys and do any data tree based
433             # compilation.
434 134         294 my @selector_tree_keys = @{ $selector_trees_keys{$selector_tree} };
  134         406  
435 134         259 my $has_includes;
436 134         344 for (@selector_tree_keys) {
437 153 100 100     790 $has_includes = 1
438             if !$has_includes && index( $_->[0], '+', ) == 0;
439 153 100 100     777 if ( index( $_->[0], '+-', ) == 0 || index( $_->[0], '--', ) == 0 )
440             {
441 20 100       152 if ( $_->[0] =~ /^(\+|-)(-\d+)$/o ) {
442 14         39 $_->[4] = $_->[0];
443             $_->[0] = $2 < 0 && $2 >= -@{$data_tree}
444 14 100 100     87 ? $_->[0] = $1 . ( @{$data_tree} + $2 )
  10         50  
445             : $1 . substr( $2, 1, );
446 14         48 $_->[1] = substr( $_->[0], 1, );
447             }
448             }
449              
450 153 100 100     687 if ( $data_tree_type eq 'ARRAY' && index( $_->[0], '..', ) != -1 ) {
451 14         100 my @array_range = $_->[0] =~ /^(?:\+|-)(-?\d+)\.\.(-?\d+)$/o;
452 14 100       35 map { $_ = @{$data_tree} + $_ if $_ < 0; } @array_range;
  28         92  
  14         53  
453 14         41 $_->[3] = \@array_range;
454             }
455             }
456              
457             # Match up data tree keys with selector tree keys.
458 134         309 my %matching_selector_keys_by_data_key;
459 134         519 my $data_tree_keys_string = join( "\n", @data_tree_keys, ) . "\n";
460 134         313 for (@selector_tree_keys) {
461 153 50       628 my $selector_tree_key_pattern =
    100          
    100          
462             defined $_->[3] ? join( '|', $_->[3]->[0] .. $_->[3]->[1], )
463             : defined $_->[2] ? $_->[2]
464             : defined $_->[1] ? quotemeta $_->[1]
465             : undef;
466 153         2790 my @matches =
467             $data_tree_keys_string =~ /($selector_tree_key_pattern)\n/g;
468 153         503 for my $data_tree_key (@matches) {
469             push(
470 212   66     349 @{ $matching_selector_keys_by_data_key{$data_tree_key} },
  212         1426  
471             $_->[4] // $_->[0],
472             );
473             }
474             }
475              
476             # Execute on matches. Exclusions are done immediately which includes
477             # marking arrays for later trimming. Inclusions result in new queue
478             # entries for any sub trees as well as matched inclusion and deferred
479             # exclusion bookkeeping.
480 134         300 my ( %arrays_to_be_trimmed, %deferred_excludes, %matched_includes, );
481 134         383 for my $data_tree_key ( keys %matching_selector_keys_by_data_key ) {
482             my $matching_selector_keys =
483 202         403 $matching_selector_keys_by_data_key{$data_tree_key};
484 202 100       548 if ( index( $matching_selector_keys->[-1], '-', ) == 0 ) {
485 48 100       128 if ( $data_tree_type eq 'HASH' ) {
486 33         94 delete $data_tree->{$data_tree_key};
487             }
488             else {
489             my $ok =
490 15         38 eval { $data_tree->[$data_tree_key] = '_to_be_trimmed_'; };
  15         67  
491 15 50       91 $arrays_to_be_trimmed{$data_tree} = $data_tree if $ok;
492             }
493             }
494             else {
495 154         482 $matched_includes{$data_tree}->{$data_tree_key}++;
496 154         346 delete $deferred_excludes{$data_tree}->{$data_tree_key};
497              
498             my $matched_includes_for_data_tree =
499 154         336 $matched_includes{$data_tree};
500             my @data_keys_to_be_deferred =
501 154         304 grep { !$matched_includes_for_data_tree->{$_}; }
  681         1605  
502             @data_tree_keys;
503 154         380 @{ $deferred_excludes{$data_tree} }{@data_keys_to_be_deferred}
  154         521  
504             = ($data_tree) x @data_keys_to_be_deferred;
505              
506             my $data_sub_tree =
507             $data_tree_type eq 'HASH'
508             ? $data_tree->{$data_tree_key}
509 154 100       464 : eval { $data_tree->[$data_tree_key] };
  50         167  
510              
511             # Using {%{}} to catch non-existance with fatal error. Can be
512             # taken out for space and/or speed savings at cost of "safety".
513             my $selector_sub_tree =
514 154         435 @{$matching_selector_keys} == 1
515 149         547 ? { %{ $selector_tree->{ $matching_selector_keys->[0] } } }
516             : {
517 10         18 map { %{ $selector_tree->{$_} }; }
  10         39  
518 154 100       256 @{$matching_selector_keys},
  5         11  
519             };
520              
521             push( @queue, [ $selector_sub_tree, $data_sub_tree, ] )
522 170         826 if ref $data_sub_tree && grep { $_ ne '_order_'; }
523 154 100 100     610 keys %{$selector_sub_tree};
  97         271  
524             }
525             }
526              
527             # Add deferred exclusions for all data keys if there were inclusions but
528             # none matched.
529 134 100 100     674 if ( $has_includes && !%matched_includes ) {
530             $deferred_excludes{$data_tree}->{$_} = $data_tree
531 9         78 for @data_tree_keys;
532             }
533              
534             # Execute deferred exclusions.
535 134         339 for my $data_tree_string ( keys %deferred_excludes ) {
536             my @data_tree_keys =
537 109         193 keys %{ $deferred_excludes{$data_tree_string} };
  109         415  
538 109 100       346 if (@data_tree_keys) {
539             my $data_tree =
540 92         238 $deferred_excludes{$data_tree_string}->{ $data_tree_keys[0] };
541 92         186 my $data_tree_type = ref $data_tree;
542 92 50       254 next unless $data_tree_type;
543              
544 92 100       236 if ( $data_tree_type eq 'HASH' ) {
545 66         130 delete @{$data_tree}{@data_tree_keys};
  66         373  
546             }
547             else {
548 26         57 my $ok = eval {
549 26         66 @{$data_tree}[@data_tree_keys] =
  26         139  
550             ('_to_be_trimmed_') x @data_tree_keys;
551             };
552 26 50       135 $arrays_to_be_trimmed{$data_tree} = $data_tree if $ok;
553             }
554             }
555             }
556              
557             # Trim arrays of slots that fell victim to exclusion.
558 134         737 for my $array ( values %arrays_to_be_trimmed ) {
559 37         310 @{$array} =
560 37 100       85 map { $_ eq '_to_be_trimmed_' ? () : $_; } @{$array};
  108         331  
  37         101  
561             }
562             }
563              
564 67         421 return;
565             }
566              
567             =head1 AUTHOR
568              
569             Justin DeVuyst, C
570              
571             =head1 COPYRIGHT AND LICENSE
572              
573             Copyright 2015 by Justin DeVuyst.
574              
575             This library is free software, you can redistribute it and/or modify it under
576             the same terms as Perl itself.
577              
578             =cut
579              
580             1;