File Coverage

blib/lib/Search/MultiMatch.pm
Criterion Covered Total %
statement 46 46 100.0
branch 10 10 100.0
condition 7 9 77.7
subroutine 6 6 100.0
pod 3 3 100.0
total 72 74 97.3


line stmt bran cond sub pod time code
1             package Search::MultiMatch;
2              
3 2     2   92759 use 5.010;
  2         5  
4 2     2   6 use strict;
  2         2  
  2         34  
5 2     2   5 use warnings;
  2         9  
  2         694  
6              
7             =head1 NAME
8              
9             =encoding utf8
10              
11             Search::MultiMatch - An efficient, tree-based, 2D multimatcher.
12              
13             =head1 VERSION
14              
15             Version 0.01
16              
17             =cut
18              
19             our $VERSION = '0.01';
20              
21             =head1 SYNOPSIS
22              
23             B works by creating a multidimensional hash-table
24             with keys as 2D-arrays, which are stored as nodes.
25              
26             It accepts matching the stored entries with a pattern, that is
27             also a 2D-array, identifying matches by walking the table from node to node.
28              
29             use Search::MultiMatch;
30              
31             # Create a SMM object
32             my $smm = Search::MultiMatch->new();
33              
34             # Add an entry
35             $smm->add($key, $value); # key is a 2D-array
36              
37             # Search with a pattern
38             my @matches = $smm->search($pattern); # pattern is a 2D-array
39              
40             =head1 METHODS
41              
42             =head2 new
43              
44             Creates and returns a new object.
45              
46             my $smm = Search::MultiMatch->new(%opt);
47              
48             Where C<%opt> understands the following options:
49              
50             =over 2
51              
52             =item * table => {}
53              
54             The value of the C must be a multidimensional hash-like data structure.
55              
56             =back
57              
58             =cut
59              
60             sub new {
61 1     1 1 486 my ($class, %opt) = @_;
62 1   50     14 bless {table => $opt{table} // {}}, $class;
63             }
64              
65             =head2 add
66              
67             Synopsis:
68              
69             $smm->add($key, $value);
70              
71             The C<$key> must be a 2D-array, with groups in the first
72             dimension, and items in the second. The granularity of the items controls the matching.
73              
74             Example:
75              
76             my $key = [['f','o','o'],['b','a','r']];
77             my $value = 'Foo Bar';
78             $smm->add($key, $value);
79              
80             =cut
81              
82             sub add {
83 13     13 1 236 my ($self, $key, $value) = @_;
84              
85 13         18 my $table = $self->{table};
86              
87 13         19 foreach my $group (@$key) {
88 46         42 my $ref = $table;
89 46         52 foreach my $item (@$group) {
90 223   100     622 $ref = $ref->{$item} //= {};
91 223         189 push @{$ref->{values}}, \$value;
  223         529  
92             }
93             }
94              
95 13         47 $self;
96             }
97              
98             =head2 search
99              
100             Synopsis:
101              
102             my @matches = $smm->search($pattern, %opt)
103              
104             Searches and returns a list of matches, each match having a score
105             greater or equal to 1, which is the number of times it matched the pattern.
106              
107             Each returned match has the following structure:
108              
109             {
110             match => $value,
111             score => $integer,
112             }
113              
114             Where C<$value> is the original value associated with the matched key.
115              
116             The C<$pattern> must be a 2D-array, with groups in the first dimension
117             and items in the second dimension. The granularity of the items controls the matching.
118              
119             Example:
120              
121             my $pattern = [['f','o'], ['b', 'a']];
122             my @default = $smm->search($pattern);
123             my @best = $smm->search($pattern, keep => 'best');
124             my @any = $smm->search($pattern, keep => 'any');
125              
126             The B option controls which matches to be returned.
127              
128             =over 2
129              
130             =item * keep => 'best'
131              
132             Will filter the results to include only the matches with the highest score.
133              
134             =item * keep => 'any'
135              
136             Will keep any partial match, even when a certain group from the C<$pattern> do not match any of the returned matches.
137              
138             =item * keep => 'default'
139              
140             This is the default setting and it returns all the values that partially match, at least, one group in the C<$pattern>.
141              
142             =back
143              
144             In all cases, a given match is saved even when not all the pattern-items from a certain group match all the key-items of the match.
145              
146             For example, let's consider:
147              
148             my $pattern = [['f', 'o']];
149             my $key = [['f', 'o', 'o']];
150              
151             In the above scenario, the pattern will match the key, because C<'f'> and C<'o'> from the pattern will follow the path of the key.
152              
153             However, in the following case:
154              
155             my $pattern = [['f', 'o']];
156             my $key = [['foo']];
157              
158             the pattern will not match the key, because C<'foo'> is not stored on the C<'f'> node.
159              
160             =cut
161              
162             sub search {
163 5     5 1 13119 my ($self, $pattern, %opt) = @_;
164              
165 5         11 my $table = $self->{table};
166 5   100     22 my $keep = $opt{keep} // '';
167              
168 5         8 my (@matches, %seen);
169              
170 5         9 foreach my $group (@$pattern) {
171              
172 11         25 my $ref = $table;
173 11         17 foreach my $item (@$group) {
174 36 100       67 if (exists $ref->{$item}) {
175 34         54 $ref = $ref->{$item};
176             }
177             else {
178 2         4 $ref = undef;
179 2         5 last;
180             }
181             }
182              
183 11 100 66     43 if (defined($ref) and exists($ref->{values})) {
    100          
184 9         11 foreach my $match (@{$ref->{values}}) {
  9         18  
185 31 100       61 if (not exists $seen{$match}) {
186 26         41 $seen{$match} = 1;
187 26         54 push @matches, $match;
188             }
189             else {
190 5         13 ++$seen{$match};
191             }
192             }
193             }
194             elsif ($keep ne 'any') {
195 1         2 @matches = ();
196 1         2 last;
197             }
198             }
199              
200 5 100       13 if ($keep eq 'best') {
201 2         16 require List::Util;
202 2         16 my $max = List::Util::max(values %seen);
203 2         5 @matches = grep { $seen{$_} == $max } @matches;
  11         28  
204             }
205              
206 5         12 map { ; {match => $$_, score => $seen{$_}} } @matches;
  12         54  
207             }
208              
209             =head1 EXAMPLE
210              
211             This example illustrates how to add some key/value pairs to the table
212             and how to search the table with a given pattern at a later time:
213              
214             use Search::MultiMatch;
215             use Data::Dump qw(pp);
216              
217             # Creates a SMM object
218             my $smm = Search::MultiMatch->new();
219              
220             # Create a 2D-array key, by splitting the string
221             # into words, then each word into characters.
222             sub make_key {
223             [map { [split //] } split(' ', lc($_[0]))];
224             }
225              
226             my @movies = (
227             'My First Lover',
228             'A Lot Like Love',
229             'Funny Games (2007)',
230             'Cinderella Man (2005)',
231             'Pulp Fiction (1994)',
232             'Don\'t Say a Word (2001)',
233             'Secret Window (2004)',
234             'The Lookout (2007)',
235             '88 Minutes (2007)',
236             'The Mothman Prophecies',
237             'Love Actually (2003)',
238             'From Paris with Love (2010)',
239             'P.S. I Love You (2007)',
240             );
241              
242             # Add the entries
243             foreach my $movie (@movies) {
244             $smm->add(make_key($movie), $movie);
245             }
246              
247             my $pattern = make_key('i love'); # make the search-pattern
248             my @matches = $smm->search($pattern); # search for the pattern
249              
250             pp \@matches; # dump the results
251              
252             The results are:
253              
254             [
255             {match => "P.S. I Love You (2007)", score => 2},
256             {match => "My First Lover", score => 1},
257             {match => "A Lot Like Love", score => 1},
258             {match => "Love Actually (2003)", score => 1},
259             {match => "From Paris with Love (2010)", score => 1},
260             ]
261              
262             =head1 AUTHOR
263              
264             Daniel Șuteu, C<< >>
265              
266             =head1 BUGS
267              
268             Please report any bugs or feature requests to L.
269             I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
270              
271             =head1 SUPPORT
272              
273             You can find documentation for this module with the perldoc command.
274              
275             perldoc Search::MultiMatch
276              
277              
278             You can also look for information at:
279              
280             =over 4
281              
282             =item * Github
283              
284             L
285              
286             =item * AnnoCPAN: Annotated CPAN documentation
287              
288             L
289              
290             =item * CPAN Ratings
291              
292             L
293              
294             =item * Search CPAN
295              
296             L
297              
298             =back
299              
300             =head1 LICENSE AND COPYRIGHT
301              
302             Copyright 2016 Daniel Șuteu.
303              
304             This program is free software; you can redistribute it and/or modify it
305             under the terms of the the Artistic License (2.0). You may obtain a
306             copy of the full license at:
307              
308             L
309              
310             =cut
311              
312             1; # End of Search::MultiMatch