File Coverage

blib/lib/Data/Selector.pm
Criterion Covered Total %
statement 171 171 100.0
branch 95 118 80.5
condition 39 50 78.0
subroutine 5 5 100.0
pod 2 2 100.0
total 312 346 90.1


line stmt bran cond sub pod time code
1             package Data::Selector;
2              
3 4     4   1815 use 5.10.1;
  4         10  
4 4     4   17 use strict;
  4         4  
  4         101  
5 4     4   15 use warnings FATAL => 'all';
  4         8  
  4         8372  
6              
7             =head1 NAME
8              
9             Data::Selector - data selection dsl parser and applicator
10              
11             =head1 VERSION
12              
13             1.01
14              
15             =cut
16              
17             our $VERSION = '1.01';
18              
19             =head1 SYNOPSIS
20              
21             my $data_tree = {
22             foo => {
23             bar => { baz1 => 1, baz22 => 2, baz32 => [ 'a', 'b', 'c', ], },
24             },
25             asdf => 'woohoo',
26             };
27             Data::Selector->apply_tree(
28             {
29             selector_tree => Data::Selector->parse_string(
30             {
31             named_selectors => { '$bla' => '[non-existent,asdf]', },
32             selector_string => '$bla,foo.bar.baz*2.1..-1',
33             # (same thing with all optional + chars added)
34             # named_selectors => { '$bla' => '[+non-existent,+asdf]', },
35             # selector_string => '$bla,+foo.+bar.+baz*2.+1..-1',
36             }
37             ),
38             data_tree => $data_tree,
39             }
40             );
41              
42             # $data_tree is now:
43             # {
44             # foo => { bar => { baz22 => 2, baz32 => [ 'b', 'c', ], }, },
45             # asdf => 'woohoo',
46             # }
47              
48             =head1 DESCRIPTION
49              
50             This module enables data selection via a terse dsl. The obvious use case is
51             data shaping though it could also be used to hint data requirements down the
52             stack.
53              
54             A selector string is transformed into a selector tree by parse_string(). Then
55             the apply_tree() method performs key (array subscripts and hash keys) inclusion,
56             and/or exclusion on a data tree using the selector tree. Note that arrays in
57             the data tree are trimmed of the slots that were removed.
58              
59             Note that parse_string() will throw some exceptions (in predicate form) but
60             there are probably many non-sensical selector strings that it won't throw on.
61             The apply_tree() method, on the other hand, does not throw any exceptions
62             because in the general case this is preferable. For example, some typical
63             "errors" might be missing (misspelled in the selector tree or non-existent in
64             the data tree) keys or indexing into an array with a string. Both cases may
65             legitimately happen when elements of a set are not the same shape. In the case
66             of an actual error the resulting data tree will likely reflect it.
67              
68             =head1 SELECTOR STRINGS
69              
70             Selector strings are a terse, robust way to express data selection. They are
71             sensitive to order of definition, are embeddable via square brackets, can be
72             constructed of lists of selector strings, and are therefore composable.
73              
74             A selector string consists of tokens separated by dot characters. Each dot
75             character denotes another level in the data tree. The selector strings may be a
76             single value or a list of values delimited by square brackets and separated by
77             commas.
78              
79             A leading hyphen character indicates exclusion.
80              
81             An optional leading plus character indicates inclusion. It is only required for
82             inclusion of values that start with a hyphen, like a negative array subscript,
83             or a plus character.
84              
85             Its important to note that positive array subscripts with a leading + character
86             are not supported. For instance, the selector string of "++2" will not
87             interpreted as "include array subscript 2". It could be used to include a hash
88             key of "+2" however. The same applies to "-+2". This inconsistency is the
89             result of a limitation in the implementation and may be changed in the future.
90              
91             Note that inclusion, in addition to specifying what is to be included, implies a
92             lower precedence exclusion of all other keys. In other words, if a particular
93             key is not specified for inclusion but there was an inclusion then it will be
94             excluded. For example, lets say the data tree is a hash with keys foo, bar, and
95             baz. A selector string of "foo" will include the foo key and exclude the bar
96             and baz keys. But a selector string of "foo,bar" will include the foo and bar
97             keys and exclude the baz key.
98              
99             Wildcarding is supported via the asterisk character.
100              
101             Negative array subscripts are supported but remember that they must be preceded
102             by a plus character to indicate inclusion (which must be urlencoded as %2B for
103             urls). For example, "-1" means "exclude key 1" where "+-1" means "include key
104             -1".
105              
106             Array subscript ranges are supported via the double dot sequence. These can be
107             tricky when used with negative array subscripts. For example, "-1..-1" means
108             exclude 1 to -1. But "+-2..-1" means include -2 to -1.
109              
110             Named selectors allow for pre-defined selectors to be interpolated into a
111             selector_string. They begin with a dollar character and otherwise can only
112             contain lower case alpha or underscore characters (a-z,_).
113              
114             =head2 EXAMPLES
115              
116             Lets say we have a date tree like so:
117              
118             $data_tree = {
119             count => 2,
120             items => [
121             {
122             body => 'b1',
123             links => [ 'l1', 'l2', 'l3', ],
124             rel_1_url => 'foo',
125             rel_1_id => 12,
126             rel_2_url => 'bar',
127             rel_2_id => 34,
128             },
129             {
130             body => 'b2',
131             links => [ 'l4', 'l5', ],
132             rel_1_url => 'up',
133             rel_1_id => 56,
134             rel_2_url => 'down',
135             rel_2_id => 78,
136             },
137             ],
138             total => 42,
139             }
140              
141             =over
142              
143             =item total only
144              
145             $selector_string = "total";
146              
147             $data_tree = {
148             total => 42,
149             }
150              
151             =item only rel urls in items
152              
153             $selector_string = "items.*.rel_*_url"
154              
155             $data_tree = {
156             items => [
157             {
158             rel_1_url => 'foo',
159             rel_2_url => 'bar',
160             },
161             {
162             rel_1_url => 'up',
163             rel_2_url => 'down',
164             },
165             ],
166             }
167              
168             =item count and last item with no body
169              
170             $selector_string = "count,items.+-1.-body"
171              
172             $data_tree = {
173             count => 2,
174             items => [
175             {
176             links => [ 'l4', 'l5', ],
177             rel_1_url => 'up',
178             rel_1_id => 56,
179             rel_2_url => 'down',
180             rel_2_id => 78,
181             },
182             ],
183             }
184              
185             =item last 2 links
186              
187             $selector_string = "items.*.links.+-2..-1"
188              
189             $data_tree = {
190             items => [
191             {
192             links => [ 'l2', 'l3', ],
193             },
194             {
195             links => [ 'l4', 'l5', ],
196             },
197             ],
198             }
199              
200             =back
201              
202             =head1 METHODS
203              
204             =cut
205              
206             =over
207              
208             =item parse_string
209              
210             Creates a selector tree from a selector string. A map of named selectors can
211             also be provided which will be interpolated into the selector string before it
212             is parsed.
213              
214             Required Args: selector_string
215             Optional Args: named_selectors
216              
217             =cut
218              
219             my $selector_string_pattern = qr/
220             (
221             [^\[\]\,]*+
222             (?:
223             \[
224             (?:
225             [^\[\]]++
226             |
227             (?1)
228             )*
229             \]
230             )?+
231             )
232             ,?+
233             /x;
234              
235             sub parse_string {
236 97     97 1 70881 my ( $class, $args, ) = @_;
237              
238             die "selector_string required\n"
239             unless defined $args->{selector_string}
240 97 50 33     448 && length $args->{selector_string};
241              
242 97 100       226 if ( index( $args->{selector_string}, '$', ) != -1 ) {
243             $args->{selector_string} =~
244 5         31 s/(?:(?<=^)|(?<=,))(\$[a-z_]*)(?:(,)(?!$)|$)/
245             defined $args->{named_selectors}->{$1}
246             && length $args->{named_selectors}->{$1}
247 6 100 33     53 ? $args->{named_selectors}->{$1} . ( $2 ? $2 : '' )
    50          
248             : die "contains invalid named selector\n";
249             /ego;
250             }
251              
252 97         92 my $selector_tree = {};
253 97         217 my @queue = ( [ $args->{selector_string}, $selector_tree, [], ], );
254              
255             die "must be a string that matches /[^.\[\],]/\n"
256             if length $args->{selector_string}
257 97 50 33     565 && $args->{selector_string} !~ /[^.\[\],]/o;
258 97 50       193 die "must not contain ']['\n" if index( $queue[0]->[0], '][' ) != -1;
259 97 50       144 die "must not contain '[]'\n" if index( $queue[0]->[0], '[]' ) != -1;
260 97 50       168 die "must not contain '[,'\n" if index( $queue[0]->[0], '[,' ) != -1;
261 97 50       146 die "must not contain ',]'\n" if index( $queue[0]->[0], ',]' ) != -1;
262 97 50       163 die "must not contain '[.'\n" if index( $queue[0]->[0], '[.' ) != -1;
263 97 50       144 die "must not contain '.]'\n" if index( $queue[0]->[0], '.]' ) != -1;
264 97 50       203 die "must not begin with','\n" if substr( $queue[0]->[0], 0, 1 ) eq ',';
265 97 50       180 die "must not end with','\n" if substr( $queue[0]->[0], -1, ) eq ',';
266 97 50       145 die "must not begin with'.'\n" if substr( $queue[0]->[0], 0, 1 ) eq '.';
267 97 50       145 die "must not end with'.'\n" if substr( $queue[0]->[0], -1, ) eq '.';
268 97 50       221 die "must have balanced [] chars\n"
269             unless $queue[0]->[0] =~ tr/[/[/ == $queue[0]->[0] =~ tr/]/]/;
270             die "must not match /[^.,]\[/\n"
271 97 50       199 if $args->{selector_string} =~ /[^.,]\[/o;
272             die "must not match /\][^.,\]]/\n"
273 97 50       167 if $args->{selector_string} =~ /\][^.,\]]/o;
274              
275 97         82 my $order;
276 97         149 while (@queue) {
277 375         310 my $token = shift @queue;
278 375         2479 my @groups = $token->[0] =~ /$selector_string_pattern/go;
279 375         329 pop @groups;
280              
281 375         274 my ( $shift_a_suffix, $prev_is_suffix, );
282 375         365 for my $string (@groups) {
283 523         416 my $sub_tree = $token->[1];
284              
285 523         526 my $is_suffix = substr( $string, 0, 1, ) eq '.';
286 523 100       534 if ($is_suffix) {
287 30         20 push( @{ $queue[-1]->[2] }, substr( $string, 1, ), );
  30         54  
288 30         27 $string = '';
289             }
290             else {
291 493         445 my $opening_bracket_pos = index( $string, '[' );
292              
293 493         400 my $dot_in_prefix_pos = index( $string, '.' );
294 493 100 100     1007 $dot_in_prefix_pos = -1
295             if $opening_bracket_pos > -1
296             && $dot_in_prefix_pos > $opening_bracket_pos;
297              
298 493 100       607 if ( $dot_in_prefix_pos > -1 ) {
299 187         209 my $is_range =
300             substr( $string, $dot_in_prefix_pos + 1, 1 ) eq '.';
301 187 100       269 if ($is_range) {
302 18         21 $dot_in_prefix_pos =
303             index( $string, '.', $dot_in_prefix_pos + 2, );
304 18 50 33     39 $dot_in_prefix_pos = -1
305             if $opening_bracket_pos > -1
306             && $dot_in_prefix_pos > $opening_bracket_pos;
307             }
308             }
309              
310 493         382 my $bare = '';
311 493 100       644 if ( $dot_in_prefix_pos >= 1 ) {
    100          
312 173         215 $bare = substr( $string, 0, $dot_in_prefix_pos + 1, '' );
313 173         136 chop $bare;
314 173 100       293 $string =
315             substr( $string,
316             $opening_bracket_pos - $dot_in_prefix_pos, -1 )
317             if $dot_in_prefix_pos + 1 == $opening_bracket_pos;
318             }
319             elsif ( $opening_bracket_pos == 0 ) {
320 50         56 $string = substr( $string, $opening_bracket_pos + 1, -1 );
321             }
322             else {
323 270         201 $bare = $string;
324 270         224 $string = '';
325             }
326              
327 493 100       601 if ( length $bare ) {
328 443         360 my $first_char = substr( $bare, 0, 1, );
329 443 100 100     1334 $bare = "+$bare"
330             if $first_char ne "+" && $first_char ne "-";
331 443 100       662 my $bare_inverse =
332             ( $first_char eq "-" ? "+" : "-" ) . substr( $bare, 1, );
333 443         344 delete $sub_tree->{$bare_inverse};
334 443 100       515 if ( $sub_tree->{$bare} ) {
335 193         182 $sub_tree->{$bare}->{_order_} = ++$order;
336             }
337 250         507 else { $sub_tree->{$bare} = { _order_ => ++$order, }; }
338 443         447 $sub_tree = $sub_tree->{$bare};
339             }
340              
341 493 100 100     1313 if ( !length $string && !$prev_is_suffix && @{ $token->[2] } ) {
  269   100     672  
342 55         63 $string = $token->[2]->[0];
343 55         67 $shift_a_suffix++;
344             }
345             }
346              
347 523 100 66     1423 push(
    100          
348             @queue,
349             [
350             $string, $sub_tree,
351             ( !$is_suffix && !$prev_is_suffix ? $token->[2] : [] ),
352             ],
353             ) if length $string;
354              
355 523         601 $prev_is_suffix = $is_suffix;
356             }
357 375 100       908 shift @{ $token->[2] } if $shift_a_suffix;
  30         68  
358             }
359              
360 97         212 return $selector_tree;
361             }
362              
363             =item apply_tree
364              
365             Include or exclude parts of a data tree as specified by a selector tree. Note
366             that arrays that have elements excluded, or removed, will be trimmed.
367              
368             Required Args: selector_tree, data_tree
369              
370             =back
371              
372             =cut
373              
374             sub apply_tree {
375 66     66 1 2573 my ( $class, $args, ) = @_;
376              
377 66 50       109 die "selector_tree required" unless $args->{selector_tree};
378 66 50       98 die "data_tree required" unless $args->{data_tree};
379              
380 66         112 my @queue = ( [ $args->{selector_tree}, $args->{data_tree}, ], );
381 66         56 my %selector_trees_keys;
382 66         101 while (@queue) {
383 130         106 my ( $selector_tree, $data_tree, ) = @{ shift @queue };
  130         166  
384              
385             # Compile the selector tree keys and cache them sans any data tree
386             # dependencies. At this point each entry will contain:
387             #
388             # [ $selector_tree_key, $selector_tree_key_base, $pattern,
389             # $array_range, $original_selector_tree_key, $original_selector_tree ]
390             #
391             # Note that the array range and original selector tree key slots
392             # will always be undef at this point because they depend on the
393             # data tree. The original selector tree key is used to store a
394             # non-translated negative array subscript. During data tree based
395             # compilation below the two slots may be changed, if applicable.
396             #
397             # The ref to the original selector tree prevents its untimely
398             # destruction which may lead to its refaddr being recycled. That's
399             # undesirable since we use said refaddr as the key for this cache. I
400             # don't know how to reliably test for this so extra care is appropriate.
401             $selector_trees_keys{$selector_tree} ||= [
402             map {
403 149         180 my $selector_tree_key_base = substr( $_, 1, );
404             [
405             $_,
406             $selector_tree_key_base,
407             index( $selector_tree_key_base, '*', ) != -1
408 149 100       559 ? do {
409 21         24 my $pattern = quotemeta $selector_tree_key_base;
410 21         54 $pattern =~ s/\\\*/.*/go;
411 21         77 $pattern;
412             }
413             : undef,
414             undef,
415             undef,
416             $selector_tree,
417             ];
418             }
419             sort {
420             $selector_tree->{$a}->{_order_}
421 21         43 <=> $selector_tree->{$b}->{_order_};
422 213         383 } grep { $_ ne '_order_'; }
423 130   50     361 keys %{$selector_tree}
  130         239  
424             ];
425              
426 130         169 my $data_tree_type = ref $data_tree;
427             my @data_tree_keys =
428             $data_tree_type eq 'HASH'
429 83         221 ? keys %{$data_tree}
430 130 100       188 : 0 .. $#{$data_tree};
  47         79  
431              
432             # Take a copy of the selector tree keys and do any data tree based
433             # compilation.
434 130         114 my @selector_tree_keys = @{ $selector_trees_keys{$selector_tree} };
  130         215  
435 130         103 my $has_includes;
436 130         163 for (@selector_tree_keys) {
437 149 100 100     500 $has_includes = 1
438             if !$has_includes && index( $_->[0], '+', ) == 0;
439 149 100 100     469 if ( index( $_->[0], '+-', ) == 0 || index( $_->[0], '--', ) == 0 )
440             {
441 20 100       97 if ( $_->[0] =~ /^(\+|-)(-\d+)$/o ) {
442 14         18 $_->[4] = $_->[0];
443             $_->[0] = $2 < 0 && $2 >= -@{$data_tree}
444 14 100 100     55 ? $_->[0] = $1 . ( @{$data_tree} + $2 )
  10         26  
445             : $1 . substr( $2, 1, );
446 14         24 $_->[1] = substr( $_->[0], 1, );
447             }
448             }
449              
450 149 100 100     413 if ( $data_tree_type eq 'ARRAY' && index( $_->[0], '..', ) != -1 ) {
451 14         73 my @array_range = $_->[0] =~ /^(?:\+|-)(-?\d+)\.\.(-?\d+)$/o;
452 14 100       16 map { $_ = @{$data_tree} + $_ if $_ < 0; } @array_range;
  28         65  
  14         37  
453 14         24 $_->[3] = \@array_range;
454             }
455             }
456              
457             # Match up data tree keys with selector tree keys.
458 130         93 my %matching_selector_keys_by_data_key;
459 130         287 my $data_tree_keys_string = join( "\n", @data_tree_keys, ) . "\n";
460 130         165 for (@selector_tree_keys) {
461 149 50       399 my $selector_tree_key_pattern =
    100          
    100          
462             defined $_->[3] ? join( '|', $_->[3]->[0] .. $_->[3]->[1], )
463             : defined $_->[2] ? $_->[2]
464             : defined $_->[1] ? quotemeta $_->[1]
465             : undef;
466 149         2103 my @matches =
467             $data_tree_keys_string =~ /($selector_tree_key_pattern)\n/g;
468 149         261 for my $data_tree_key (@matches) {
469             push(
470 193   66     140 @{ $matching_selector_keys_by_data_key{$data_tree_key} },
  193         727  
471             $_->[4] // $_->[0],
472             );
473             }
474             }
475              
476             # Execute on matches. Exclusions are done immediately which includes
477             # marking arrays for later trimming. Inclusions result in new queue
478             # entries for any sub trees as well as matched inclusion and deferred
479             # exclusion bookkeeping.
480 130         104 my ( %arrays_to_be_trimmed, %deferred_excludes, %matched_includes, );
481 130         182 for my $data_tree_key ( keys %matching_selector_keys_by_data_key ) {
482             my $matching_selector_keys =
483 183         170 $matching_selector_keys_by_data_key{$data_tree_key};
484 183 100       298 if ( index( $matching_selector_keys->[-1], '-', ) == 0 ) {
485 45 100       57 if ( $data_tree_type eq 'HASH' ) {
486 30         52 delete $data_tree->{$data_tree_key};
487             }
488             else {
489             my $ok =
490 15         17 eval { $data_tree->[$data_tree_key] = '_to_be_trimmed_'; };
  15         34  
491 15 50       52 $arrays_to_be_trimmed{$data_tree} = $data_tree if $ok;
492             }
493             }
494             else {
495 138         262 $matched_includes{$data_tree}->{$data_tree_key}++;
496 138         158 delete $deferred_excludes{$data_tree}->{$data_tree_key};
497              
498             my $matched_includes_for_data_tree =
499 138         141 $matched_includes{$data_tree};
500             my @data_keys_to_be_deferred =
501 138         140 grep { !$matched_includes_for_data_tree->{$_}; }
  544         757  
502             @data_tree_keys;
503 138         160 @{ $deferred_excludes{$data_tree} }{@data_keys_to_be_deferred}
  138         389  
504             = ($data_tree) x @data_keys_to_be_deferred;
505              
506             my $data_sub_tree =
507             $data_tree_type eq 'HASH'
508             ? $data_tree->{$data_tree_key}
509 138 100       260 : eval { $data_tree->[$data_tree_key] };
  46         67  
510              
511             # Using {%{}} to catch non-existance with fatal error. Can be
512             # taken out for space and/or speed savings at cost of "safety".
513             my $selector_sub_tree =
514 138         201 @{$matching_selector_keys} == 1
515 133         291 ? { %{ $selector_tree->{ $matching_selector_keys->[0] } } }
516             : {
517 10         8 map { %{ $selector_tree->{$_} }; }
  10         21  
518 138 100       89 @{$matching_selector_keys},
  5         6  
519             };
520              
521             push( @queue, [ $selector_sub_tree, $data_sub_tree, ] )
522 157         489 if ref $data_sub_tree && grep { $_ ne '_order_'; }
523 138 100 100     356 keys %{$selector_sub_tree};
  87         137  
524             }
525             }
526              
527             # Add deferred exclusions for all data keys if there were inclusions but
528             # none matched.
529 130 100 100     370 if ( $has_includes && !%matched_includes ) {
530             $deferred_excludes{$data_tree}->{$_} = $data_tree
531 9         43 for @data_tree_keys;
532             }
533              
534             # Execute deferred exclusions.
535 130         169 for my $data_tree_string ( keys %deferred_excludes ) {
536             my @data_tree_keys =
537 105         65 keys %{ $deferred_excludes{$data_tree_string} };
  105         213  
538 105 100       176 if (@data_tree_keys) {
539             my $data_tree =
540 92         114 $deferred_excludes{$data_tree_string}->{ $data_tree_keys[0] };
541 92         97 my $data_tree_type = ref $data_tree;
542 92 50       124 next unless $data_tree_type;
543              
544 92 100       123 if ( $data_tree_type eq 'HASH' ) {
545 66         43 delete @{$data_tree}{@data_tree_keys};
  66         242  
546             }
547             else {
548 26         24 my $ok = eval {
549 26         32 @{$data_tree}[@data_tree_keys] =
  26         62  
550             ('_to_be_trimmed_') x @data_tree_keys;
551             };
552 26 50       108 $arrays_to_be_trimmed{$data_tree} = $data_tree if $ok;
553             }
554             }
555             }
556              
557             # Trim arrays of slots that fell victim to exclusion.
558 130         500 for my $array ( values %arrays_to_be_trimmed ) {
559 37         221 @{$array} =
560 37 100       28 map { $_ eq '_to_be_trimmed_' ? () : $_; } @{$array};
  108         184  
  37         51  
561             }
562             }
563              
564 66         259 return;
565             }
566              
567             =head1 AUTHOR
568              
569             Justin DeVuyst, C
570              
571             =head1 COPYRIGHT AND LICENSE
572              
573             Copyright 2015 by Justin DeVuyst.
574              
575             This library is free software, you can redistribute it and/or modify it under
576             the same terms as Perl itself.
577              
578             =cut
579              
580             1;