File Coverage

blib/lib/FlyBy.pm
Criterion Covered Total %
statement 156 160 97.5
branch 65 74 87.8
condition 29 51 56.8
subroutine 19 21 90.4
pod 4 5 80.0
total 273 311 87.7


line stmt bran cond sub pod time code
1             package FlyBy;
2              
3 2     2   276361 use strict;
  2         4  
  2         62  
4 2     2   8 use warnings;
  2         3  
  2         48  
5 2     2   45 use 5.010;
  2         6  
6             our $VERSION = '0.095';
7              
8 2     2   1284 use Moo;
  2         23350  
  2         12  
9              
10 2     2   2671 use Carp qw(croak);
  2         5  
  2         105  
11 2     2   1288 use Parse::Lex;
  2         38128  
  2         73  
12 2     2   22 use Scalar::Util qw(reftype);
  2         3  
  2         124  
13 2     2   1223 use Set::Scalar;
  2         20536  
  2         87  
14 2     2   1108 use Try::Tiny;
  2         2667  
  2         3154  
15              
16             has index_sets => (
17                 is => 'ro',
18                 init_arg => undef,
19                 default => sub { {}; },
20             );
21              
22             has records => (
23                 is => 'ro',
24                 init_arg => undef,
25                 default => sub { []; },
26             );
27              
28             has _full_set => (
29                 is => 'ro',
30                 init_arg => undef,
31                 default => sub { Set::Scalar->new; },
32             );
33              
34             has query_lexer => (
35                 is => 'ro',
36                 init_arg => undef,
37                 lazy => 1,
38                 builder => '_build_query_lexer',
39             );
40              
41             my $negation = '!';
42              
43             sub _build_query_lexer {
44 2     2   1218     my $self = shift;
45              
46                 my @tokens = (
47                     "NOTEQUAL" => "(is not|IS NOT)",
48                     "EQUAL" => "is|IS",
49                     "AND" => "and|AND",
50                     "OR" => "or|OR",
51                     "REDUCE" => "->",
52                     "COMMA" => ",",
53                     "QUOTED_STRING" => qq~(?:\'(?:[^\\\']*(?:\\.[^\\\']*)*)\'|\"(?:[^\\\"]*(?:\\.[^\\\"]*)*)\")~, # From Text::Balanced
54                     "ERROR" => ".*",
55 2     1   20         sub { die qq!cannot analyze: "$_[1]"!; });
  1         11660  
56              
57 2         48     return Parse::Lex->new(@tokens);
58             }
59              
60             sub add_records {
61 8     8 1 14002     my ($self, @new_records) = @_;
62              
63 8         35     my $index_sets = $self->index_sets;
64 8         20     my $records = $self->records;
65              
66 8         19     foreach my $record (@new_records) {
67 13   100     223         my $whatsit = reftype($record) // 'no reference';
68 13 100       82         croak 'Records must be hash references, got: ' . $whatsit unless ($whatsit eq 'HASH');
69              
70 11         21         my $rec_index = $#$records + 1; # Even if we accidentally made this sparse, we can insert here.
71 11         16         my $add_it = 0; # Do not add until we know there is at least one defined value;
72 11         58         while (my ($k, $v) = each %$record) {
73 31 100       56             if (defined $v) {
74 28         55                 $self->_from_index($k, $v, 1)->insert($rec_index);
75 28   100     1080                 $add_it ||= 1;
76                         } else {
77 3         10                 delete $record->{$k}; # A missing key denotes an undefined value.
78                         }
79                     }
80 11 100       102         if ($add_it) {
81 10         21             $records->[$rec_index] = $record;
82 10         35             $self->_full_set->insert($rec_index);
83                     }
84                 }
85              
86 6         226     return 1;
87             }
88              
89             sub _from_index {
90 89     89   147     my ($self, $key, $value, $add_missing_key) = @_;
91 89         160     my $index_sets = $self->index_sets;
92              
93 89         83     my ($result, $negated);
94              
95 89 100       214     if (substr($value, 0, 1) eq $negation) {
96 12         14         $negated = 1;
97 12         23         $value = substr($value, 1);
98                 }
99              
100                 return $negated ? $self->_full_set : $self->_full_set->empty_clone
101 89 100 66     336         unless $add_missing_key or exists $index_sets->{$key}; # Avoiding auto-viv on request
    100          
102              
103 77 100       114     if ($add_missing_key) {
104 28   66     170         $result = $index_sets->{$key}{$value} //= $self->_full_set->empty_clone; # Sets which do not (yet) exist in the index are null.
105                 } else {
106 49   66     191         $result = $index_sets->{$key}{$value} // $self->_full_set->empty_clone; # Sets which do not (yet) exist in the index are null.
107                 }
108              
109 77 100       1619     $result = $self->_full_set->difference($result) if ($negated);
110              
111 77         3336     return $result;
112             }
113              
114             sub query {
115 48     48 1 79629     my ($self, $query_clauses, $reduce_list) = @_;
116              
117 48 100       193     if (not reftype($query_clauses)) {
118 20         22         my $err; # To let us notice parsing errors;
119 20 100       72         croak 'String queries should have a single parameter' if (defined $reduce_list);
120 19         49         ($query_clauses, $reduce_list, $err) = $self->parse_query($query_clauses);
121 19 100       94         croak $err if $err;
122                 } else {
123             # Trust the parser above, so we only verify on 'hand-made' queries.
124 28 100 50     395         croak 'Query clauses should be a non-empty hash reference.'
      33        
      66        
125                         unless ($query_clauses and (reftype($query_clauses) // '') eq 'HASH' and keys %$query_clauses);
126 27 100 100     159         croak 'Reduce list should be a non-empty array reference.'
      100        
      66        
127                         unless (not $reduce_list or ((reftype($reduce_list) // '') eq 'ARRAY' and scalar @$reduce_list));
128              
129             # Now convert the supplied hashref to an array reference we can use.
130 25         89         my %qhash = %$query_clauses;
131 25         56         $query_clauses = [map { [$_ => $qhash{$_}] } grep { defined $qhash{$_} } keys %qhash];
  31         107  
  32         87  
132                 }
133              
134 42         116     my $match_set = $self->_full_set;
135 42         79     my @qc = @$query_clauses;
136              
137 42         106     while (my $addl_clause = shift @qc) {
138 53         3098         my ($key, $value) = @$addl_clause;
139 53         105         my $whatsit = reftype($value);
140 53         50         my $change_set;
141 53 100 66     179         if ($whatsit && $whatsit eq 'ARRAY') {
142             # OR syntax.
143 9         24             my @ors = @$value;
144 9         14             $value = shift @ors;
145             # Allow for negation, even though it seems unlikely.
146 9         25             $change_set = $self->_from_index($key, $value, 0);
147 9         84             foreach my $or (@ors) {
148 8         15                 $change_set = $change_set->union($self->_from_index($key, $or, 0));
149                         }
150                     } else {
151 44         109             $change_set = $self->_from_index($key, $value, 0);
152                     }
153              
154 53         3280         $match_set = $match_set->intersection($change_set);
155                 }
156              
157 42         10902     my $records = $self->records;
158             # Sort may only be important for testing. Reconsider if large slow sets appear.
159 42         99     my @indices = sort { $a <=> $b } ($match_set->elements);
  72         257  
160 42         128     my @results;
161              
162 42 100       83     if ($reduce_list) {
163 13         26         my @keys = @$reduce_list;
164 13         22         my $key_count = scalar @keys;
165 13         15         my %seen;
166 13         26         foreach my $idx (@indices) {
167 30   50     37             my @reduced_element = map { ($records->[$idx]->{$_} // '') } @keys;
  36         132  
168 30         50             my $seen_key = join('->', @reduced_element);
169 30 100       69             if (not $seen{$seen_key}) {
170 22 100       49                 push @results, ($key_count > 1) ? \@reduced_element : @reduced_element;
171 22         64                 $seen{$seen_key}++;
172                         }
173                     }
174                 } else {
175                     @results = map {
176 29         43             { %{$records->[$_]} }
  52         47  
  52         245  
177                     } @indices;
178                 }
179              
180 42         180     return @results;
181             }
182              
183             sub parse_query {
184 19     19 0 25     my ($self, $query) = @_;
185              
186 19         23     my (%values, $err);
187 19         577     my $lexer = $self->query_lexer;
188 19     0   1928     my $parse_err = sub { return 'Improper query at: ' . shift; };
  0         0  
189              
190                 try {
191 19 100   19   929         croak 'Empty query' unless $query;
192 18         26         my @clause = ();
193 18         79         my @tokens = $lexer->analyze($query);
194 17         14901         my ($in_reduce, $negated, $in_or) = (0, 0);
195 17         49         $values{query} = [];
196                     TOKEN:
197 17         55         while (my $name = shift @tokens) {
198 112         119             my $text = shift @tokens;
199 112 100       227             if ($name eq 'EOI') {
200             # We must be done.
201 17 100 66     79                 if (@clause and $in_reduce) {
    50          
202 6         15                     $values{reduce} = [@clause];
203                             } elsif (@clause) {
204 11         10                     push @{$values{query}}, [@clause];
  11         26  
205                             }
206              
207 17         58                 last TOKEN;
208                         }
209 95 100       169             next TOKEN if ($name eq 'COMMA'); # They can put commas anywhere, we don't care.
210 93         77             my $expected_length = 2;
211 93 100       205             if ($name eq 'QUOTED_STRING') {
    100          
    100          
    100          
    100          
    50          
212 56 100       118                 my $value = ($negated) ? $negation . substr($text, 1, -1) : substr($text, 1, -1);
213 56 100       79                 if ($in_or) {
214 4 50       19                     $clause[-1] = [$clause[-1]] unless (reftype($clause[-1]));
215 4         4                     push @{$clause[-1]}, $value;
  4         9  
216                             } else {
217 52         75                     push @clause, $value;
218                             }
219 56         128                 ($in_or, $negated) = 0;
220                         } elsif ($name eq 'AND') {
221 5 50 33     21                 croak $parse_err->($text) if ($in_reduce or scalar @clause != $expected_length);
222 5         6                 push @{$values{query}}, [@clause];
  5         12  
223 5         16                 @clause = (); # Starting a new clause.
224                         } elsif ($name eq 'OR') {
225 4 50 33     20                 croak $parse_err->($text) if ($in_reduce or scalar @clause != $expected_length);
226 4         10                 $in_or = 1;
227                         } elsif ($name eq 'EQUAL') {
228 17 50 33     102                 croak $parse_err->($text) if ($in_reduce or scalar @clause != $expected_length - 1);
229                         } elsif ($name eq 'NOTEQUAL') {
230 5 50 33     31                 croak $parse_err->($text) if ($in_reduce or scalar @clause != $expected_length - 1);
231 5         16                 $negated = 1;
232                         } elsif ($name eq 'REDUCE') {
233 6 50       14                 croak $parse_err->($text) if ($in_reduce);
234 6         7                 $in_reduce = 1;
235 6 50       16                 push @{$values{query}}, [@clause] if (@clause);
  6         21  
236 6         19                 @clause = ();
237                         }
238                     }
239                 }
240                 catch {
241 2     2   685         $err = $_;
242 19         152     };
243              
244 19         529     return $values{query}, $values{reduce}, $err;
245             }
246              
247             sub _check_clause {
248 0     0   0     my ($self, $thing) = @_;
249              
250 0         0     my $whatsit = reftype $thing;
251 0   0     0     return ($whatsit and $whatsit eq 'ARRAY' and scalar @$thing == 2);
252             }
253              
254             sub all_keys {
255 1     1 1 2927     my $self = shift;
256 1         2     return (sort { $a cmp $b } keys %{$self->index_sets});
  4         13  
  1         7  
257             }
258              
259             sub values_for_key {
260 3     3 1 9     my ($self, $key) = @_;
261              
262 3         5     return (sort { $a cmp $b } keys %{$self->index_sets->{$key}});
  5         22  
  3         27  
263             }
264              
265             1;
266             __END__
267            
268             =encoding utf-8
269            
270             =head1 NAME
271            
272             FlyBy - Ad hoc denormalized querying
273            
274             =head1 SYNOPSIS
275            
276             use FlyBy;
277            
278             my $fb = FlyBy->new;
279             $fb->add_records({array => 'of'}, {hash => 'references'}, {with => 'fields'});
280             my @array_of_hash_refs = $fb->query({'key' => ['value', 'other value']});
281            
282             # Or with a 'reduction list':
283             my @array = $fb->query({'key' => 'value'}, ['some key']);
284             my @array_of_array_refs = $fb->query({'key' =>'value', 'other key' => 'other value'},
285             ['some key', 'some other key']);
286            
287             =head1 DESCRIPTION
288            
289             FlyBy is a system to allow for ad hoc querying of data which may not
290             exist in a traditional datastore at runtime
291            
292             =head1 USAGE
293            
294             =over
295            
296             =item add_records
297            
298             $fb->add_records({array => 'of'}, {hash => 'references'}, {with => 'fields'});
299            
300             Supply one or more hash references to be added to the store.
301            
302             Keys with undefined values will be silently stripped from each record. If the
303             record is then empty it will be discarded.
304            
305             `croak` on error; returns `1` on success
306            
307             =item query
308            
309             =over
310            
311             =item string
312            
313             $fb->query("'type' IS 'shark' AND 'food' IS 'seal' -> 'called', 'lives_in'");
314            
315             The query parameters are joined with `IS` for equality testing, or
316             `IS NOT` for its inverse.
317            
318             Multiple values for a given key can be combined with `OR`.
319            
320             Multiple keys are joined with AND.
321            
322             The optional reductions are prefaced with `->`.
323            
324             If no reduction is provided a list of the full record hash
325             references is returned.
326             If a reduction list of length 1 is provided, a list of the distinct
327             values for the matching key is returned.
328             If a longer reduction list is provided, a list of distinct value
329             array references (in the provided key order) is returned.
330            
331             =item raw
332            
333             $fb->query({'type' => 'shark', 'food' => 'seal'}, ['called', 'lives_in']");
334            
335             The query clause is supplied as hash reference of keys and values to
336             be `AND`-ed together for the final result.
337            
338             An array reference value is treated as a sucession of 'or'-ed values
339             for the provided key.
340            
341             All values prepended with an `!` are deemed to be a negation of the
342             rest of the string as a value.
343            
344             A second optional reduction list of strings may be provided which
345             reduces the result as above.
346            
347             =back
348            
349             Will `croak` on improperly supplied query formats.
350            
351             =item all_keys
352            
353             Returns an array with all known keys against which one might query.
354            
355             =item values_for_key
356            
357             Returns an array of all known values for a given key.
358            
359             =back
360            
361             =head1 CAVEATS
362            
363             Note that supplied keys may not begin with an `!`. Thought has been
364             given to making this configurable at creation, but it was deemed to
365             be unnecessary complexity.
366            
367             This software is in an early state. The internal representation and
368             external API are subject to deep breaking change.
369            
370             This software is not tuned for efficiency. If it is not being used
371             to resolve many queries on each instance or if the data is available
372             from a single canonical source, there are likely better solutions
373             available in CPAN.
374            
375             =head1 AUTHOR
376            
377             Binary.com
378            
379             =head1 COPYRIGHT
380            
381             Copyright 2015- Binary.com
382            
383             =head1 LICENSE
384            
385             This library is free software; you can redistribute it and/or modify
386             it under the same terms as Perl itself.
387            
388            
389             =cut
390