File Coverage

blib/lib/Search/MultiMatch.pm
Criterion Covered Total %
statement 47 47 100.0
branch 10 10 100.0
condition 7 9 77.7
subroutine 6 6 100.0
pod 3 3 100.0
total 73 75 97.3


line stmt bran cond sub pod time code
1             package Search::MultiMatch;
2              
3 4     4   74724 use 5.010;
  4         15  
4 4     4   21 use strict;
  4         9  
  4         82  
5 4     4   21 use warnings;
  4         11  
  4         2080  
6              
7             =encoding utf8
8              
9             =head1 NAME
10              
11             Search::MultiMatch - An efficient, tree-based, 2D multimatcher.
12              
13             =head1 VERSION
14              
15             Version 0.02
16              
17             =cut
18              
19             our $VERSION = '0.02';
20              
21             =head1 SYNOPSIS
22              
23             B works by creating a multidimensional hash-table
24             with keys as 2D-arrays, which are stored as nodes.
25              
26             It accepts matching the stored entries with a pattern, that is
27             also a 2D-array, identifying matches by walking the table from node to node.
28              
29             use Search::MultiMatch;
30              
31             # Create a SMM object
32             my $smm = Search::MultiMatch->new();
33              
34             # Add an entry
35             $smm->add($key, $value); # key is a 2D-array
36              
37             # Search with a pattern
38             my @matches = $smm->search($pattern); # pattern is a 2D-array
39              
40             =head1 METHODS
41              
42             =head2 new
43              
44             Creates and returns a new object.
45              
46             my $smm = Search::MultiMatch->new(%opt);
47              
48             Where C<%opt> understands the following options:
49              
50             =over 2
51              
52             =item * table => {}
53              
54             The value of the C must be a multidimensional hash-like data structure.
55              
56             =back
57              
58             =cut
59              
60             sub new {
61 3     3 1 1164 my ($class, %opt) = @_;
62 3   50     29 bless {table => $opt{table} // {}}, $class;
63             }
64              
65             =head2 add
66              
67             Synopsis:
68              
69             $smm->add($key, $value);
70              
71             The C<$key> must be a 2D-array, with groups in the first
72             dimension, and items in the second. The granularity of the items controls the matching.
73              
74             Example:
75              
76             my $key = [['f','o','o'],['b','a','r']];
77             my $value = 'Foo Bar';
78             $smm->add($key, $value);
79              
80             =cut
81              
82             sub add {
83 42     42 1 526 my ($self, $key, $value) = @_;
84              
85 42         62 my $vref = \$value;
86 42         98 my $table = $self->{table};
87              
88 42         65 foreach my $group (@$key) {
89 112         161 my $ref = $table;
90 112         159 foreach my $item (@$group) {
91 335   100     952 $ref = $ref->{$item} //= {};
92 335         422 push @{$ref->{$ref}}, $vref;
  335         921  
93             }
94             }
95              
96 42         121 $self;
97             }
98              
99             =head2 search
100              
101             Synopsis:
102              
103             my @matches = $smm->search($pattern, %opt)
104              
105             Searches and returns a list of matches, each match having a score
106             greater or equal to 1, which is the number of times it matched the pattern.
107              
108             Each returned match has the following structure:
109              
110             {
111             match => $value,
112             score => $integer,
113             }
114              
115             Where C<$value> is the original value associated with the matched key.
116              
117             The C<$pattern> must be a 2D-array, with groups in the first dimension
118             and items in the second dimension. The granularity of the items controls the matching.
119              
120             Example:
121              
122             my $pattern = [['f','o'], ['b', 'a']];
123             my @default = $smm->search($pattern);
124             my @best = $smm->search($pattern, keep => 'best');
125             my @any = $smm->search($pattern, keep => 'any');
126              
127             The B option controls which matches to be returned.
128              
129             =over 2
130              
131             =item * keep => 'best'
132              
133             Will filter the results to include only the matches with the highest score.
134              
135             =item * keep => 'any'
136              
137             Will keep any partial match, even when a certain group from the C<$pattern> do not match any of the returned matches.
138              
139             =item * keep => 'default'
140              
141             This is the default setting and it returns all the values that partially match, at least, one group in the C<$pattern>.
142              
143             =back
144              
145             In all cases, a given match is saved even when not all the pattern-items from a certain group match all the key-items of the match.
146              
147             For example, let's consider:
148              
149             my $pattern = [['f', 'o']];
150             my $key = [['f', 'o', 'o']];
151              
152             In the above scenario, the pattern will match the key, because C<'f'> and C<'o'> from the pattern will follow the path of the key.
153              
154             However, in the following case:
155              
156             my $pattern = [['f', 'o']];
157             my $key = [['foo']];
158              
159             the pattern will not match the key, because C<'foo'> is not stored on the C<'f'> node.
160              
161             =cut
162              
163             sub search {
164 16     16 1 12122 my ($self, $pattern, %opt) = @_;
165              
166 16         30 my $table = $self->{table};
167 16   100     53 my $keep = $opt{keep} // '';
168              
169 16         22 my (@matches, %seen);
170              
171 16         31 foreach my $group (@$pattern) {
172              
173 27         41 my $ref = $table;
174 27         40 foreach my $item (@$group) {
175 57 100       104 if (exists $ref->{$item}) {
176 51         81 $ref = $ref->{$item};
177             }
178             else {
179 6         10 $ref = undef;
180 6         12 last;
181             }
182             }
183              
184 27 100 66     114 if (defined($ref) and exists($ref->{$ref})) {
    100          
185 21         30 foreach my $match (@{$ref->{$ref}}) {
  21         50  
186 60 100       124 if (not exists $seen{$match}) {
187 52         89 $seen{$match} = 1;
188 52         152 push @matches, $match;
189             }
190             else {
191 8         17 ++$seen{$match};
192             }
193             }
194             }
195             elsif ($keep ne 'any') {
196 3         8 @matches = ();
197 3         6 last;
198             }
199             }
200              
201 16 100       38 if ($keep eq 'best') {
202 6         35 require List::Util;
203 6         26 my $max = List::Util::max(values %seen);
204 6         14 @matches = grep { $seen{$_} == $max } @matches;
  22         53  
205             }
206              
207 16         38 map { ; {match => $$_, score => $seen{$_}} } @matches;
  28         129  
208             }
209              
210             =head1 EXAMPLE
211              
212             This example illustrates how to add some key/value pairs to the table
213             and how to search the table with a given pattern at a later time:
214              
215             use Search::MultiMatch;
216             use Data::Dump qw(pp);
217              
218             # Creates a SMM object
219             my $smm = Search::MultiMatch->new();
220              
221             # Create a 2D-array key, by splitting the string
222             # into words, then each word into characters.
223             sub make_key {
224             [map { [split //] } split(' ', lc($_[0]))];
225             }
226              
227             my @movies = (
228             'My First Lover',
229             'A Lot Like Love',
230             'Funny Games (2007)',
231             'Cinderella Man (2005)',
232             'Pulp Fiction (1994)',
233             'Don\'t Say a Word (2001)',
234             'Secret Window (2004)',
235             'The Lookout (2007)',
236             '88 Minutes (2007)',
237             'The Mothman Prophecies',
238             'Love Actually (2003)',
239             'From Paris with Love (2010)',
240             'P.S. I Love You (2007)',
241             );
242              
243             # Add the entries
244             foreach my $movie (@movies) {
245             $smm->add(make_key($movie), $movie);
246             }
247              
248             my $pattern = make_key('i love'); # make the search-pattern
249             my @matches = $smm->search($pattern); # search by the pattern
250              
251             pp \@matches; # dump the results
252              
253             The results are:
254              
255             [
256             {match => "P.S. I Love You (2007)", score => 2},
257             {match => "My First Lover", score => 1},
258             {match => "A Lot Like Love", score => 1},
259             {match => "Love Actually (2003)", score => 1},
260             {match => "From Paris with Love (2010)", score => 1},
261             ]
262              
263             =head1 REPOSITORY
264              
265             L
266              
267             =head1 AUTHOR
268              
269             Daniel Șuteu, C<< >>
270              
271             =head1 LICENSE AND COPYRIGHT
272              
273             Copyright 2016 Daniel Șuteu.
274              
275             This program is free software; you can redistribute it and/or modify it
276             under the terms of the the Artistic License (2.0). You may obtain a
277             copy of the full license at:
278              
279             L
280              
281             =cut
282              
283             1; # End of Search::MultiMatch