File Coverage

blib/lib/Search/MultiMatch.pm
Criterion Covered Total %
statement 46 46 100.0
branch 10 10 100.0
condition 7 9 77.7
subroutine 6 6 100.0
pod 3 3 100.0
total 72 74 97.3


line stmt bran cond sub pod time code
1             package Search::MultiMatch;
2              
3 4     4   63012 use 5.010;
  4         14  
4 4     4   18 use strict;
  4         6  
  4         69  
5 4     4   14 use warnings;
  4         8  
  4         1757  
6              
7             =encoding utf8
8              
9             =head1 NAME
10              
11             Search::MultiMatch - An efficient, tree-based, 2D multimatcher.
12              
13             =head1 VERSION
14              
15             Version 0.03
16              
17             =cut
18              
19             our $VERSION = '0.03';
20              
21             =head1 SYNOPSIS
22              
23             B works by creating a multidimensional hash-table
24             with keys as 2D-arrays, which are stored as nodes.
25              
26             It accepts matching the stored entries with a pattern, that is
27             also a 2D-array, identifying matches by walking the table from node to node.
28              
29             use Search::MultiMatch;
30              
31             # Create a SMM object
32             my $smm = Search::MultiMatch->new();
33              
34             # Add an entry
35             $smm->add($key, $value); # key is a 2D-array
36              
37             # Search with a pattern
38             my @matches = $smm->search($pattern); # pattern is a 2D-array
39              
40             =head1 METHODS
41              
42             =head2 new
43              
44             Creates and returns a new object.
45              
46             my $smm = Search::MultiMatch->new(%opt);
47              
48             Where C<%opt> understands the following options:
49              
50             =over 2
51              
52             =item * table => {}
53              
54             The value of the C must be a multidimensional hash-like data structure.
55              
56             Starting with version 0.03, using a L database is also supported.
57              
58             =item * special_key => "\0\0\1\0\0"
59              
60             Special unique key, used internally to store the original values.
61              
62             =back
63              
64             =cut
65              
66             sub new {
67 3     3 1 985 my ($class, %opt) = @_;
68              
69             bless {
70              
71             table => $opt{table} // {},
72 3   50     25 special_key => "\0\0\1\0\0",
73              
74             }, $class;
75             }
76              
77             =head2 add
78              
79             Synopsis:
80              
81             $smm->add($key, $value);
82              
83             The C<$key> must be a 2D-array, with groups in the first
84             dimension, and items in the second. The granularity of the items controls the matching.
85              
86             Example:
87              
88             my $key = [['f','o','o'],['b','a','r']];
89             my $value = 'Foo Bar';
90             $smm->add($key, $value);
91              
92             =cut
93              
94             sub add {
95 42     42 1 408 my ($self, $key, $value) = @_;
96              
97 42         56 my $table = $self->{table};
98              
99 42         49 foreach my $group (@$key) {
100 112         117 my $ref = $table;
101 112         128 foreach my $item (@$group) {
102 335   100     734 $ref = $ref->{$item} //= {};
103 335         347 push @{$ref->{$self->{special_key}}}, $value;
  335         647  
104             }
105             }
106              
107 42         98 $self;
108             }
109              
110             =head2 search
111              
112             Synopsis:
113              
114             my @matches = $smm->search($pattern, %opt)
115              
116             Searches and returns a list of matches, each match having a score
117             greater or equal to 1, which is the number of times it matched the pattern.
118              
119             Each returned match has the following structure:
120              
121             {
122             match => $value,
123             score => $integer,
124             }
125              
126             Where C<$value> is the original value associated with the matched key.
127              
128             The C<$pattern> must be a 2D-array, with groups in the first dimension
129             and items in the second dimension. The granularity of the items controls the matching.
130              
131             Example:
132              
133             my $pattern = [['f','o'], ['b', 'a']];
134             my @default = $smm->search($pattern);
135             my @best = $smm->search($pattern, keep => 'best');
136             my @any = $smm->search($pattern, keep => 'any');
137              
138             The B option controls which matches to be returned.
139              
140             =over 2
141              
142             =item * keep => 'best'
143              
144             Will filter the results to include only the matches with the highest score.
145              
146             =item * keep => 'any'
147              
148             Will keep any partial match, even when a certain group from the C<$pattern> do not match any of the returned matches.
149              
150             =item * keep => 'default'
151              
152             This is the default setting and it returns all the values that partially match, at least, one group in the C<$pattern>.
153              
154             =back
155              
156             In all cases, a given match is saved even when not all the pattern-items from a certain group match all the key-items of the match.
157              
158             For example, let's consider:
159              
160             my $pattern = [['f', 'o']];
161             my $key = [['f', 'o', 'o']];
162              
163             In the above scenario, the pattern will match the key, because C<'f'> and C<'o'> from the pattern will follow the path of the key.
164              
165             However, in the following case:
166              
167             my $pattern = [['f', 'o']];
168             my $key = [['foo']];
169              
170             the pattern will not match the key, because C<'foo'> is not stored on the C<'f'> node.
171              
172             =cut
173              
174             sub search {
175 16     16 1 9915 my ($self, $pattern, %opt) = @_;
176              
177 16         29 my $table = $self->{table};
178 16   100     44 my $keep = $opt{keep} // '';
179              
180 16         32 my (@matches, %seen);
181              
182 16         25 foreach my $group (@$pattern) {
183              
184 27         33 my $ref = $table;
185 27         30 foreach my $item (@$group) {
186 57 100       82 if (exists $ref->{$item}) {
187 51         100 $ref = $ref->{$item};
188             }
189             else {
190 6         7 $ref = undef;
191 6         9 last;
192             }
193             }
194              
195 27 100 66     80 if (defined($ref) and exists($ref->{$self->{special_key}})) {
    100          
196 21         26 foreach my $match (@{$ref->{$self->{special_key}}}) {
  21         33  
197 60 100       81 if (not exists $seen{$match}) {
198 52         72 $seen{$match} = 1;
199 52         85 push @matches, $match;
200             }
201             else {
202 8         44 ++$seen{$match};
203             }
204             }
205             }
206             elsif ($keep ne 'any') {
207 3         7 @matches = ();
208 3         3 last;
209             }
210             }
211              
212 16 100       51 if ($keep eq 'best') {
213 6         43 require List::Util;
214 6         21 my $max = List::Util::max(values %seen);
215 6         11 @matches = grep { $seen{$_} == $max } @matches;
  22         66  
216             }
217              
218 16         38 map { ; {match => $_, score => $seen{$_}} } @matches;
  28         123  
219             }
220              
221             =head1 EXAMPLE
222              
223             This example illustrates how to add some key/value pairs to the table
224             and how to search the table with a given pattern at a later time:
225              
226             use Search::MultiMatch;
227             use Data::Dump qw(pp);
228              
229             # Creates a SMM object
230             my $smm = Search::MultiMatch->new();
231              
232             # Create a 2D-array key, by splitting the string
233             # into words, then each word into characters.
234             sub make_key {
235             [map { [split //] } split(' ', lc($_[0]))];
236             }
237              
238             my @movies = (
239             'My First Lover',
240             'A Lot Like Love',
241             'Funny Games (2007)',
242             'Cinderella Man (2005)',
243             'Pulp Fiction (1994)',
244             'Don\'t Say a Word (2001)',
245             'Secret Window (2004)',
246             'The Lookout (2007)',
247             '88 Minutes (2007)',
248             'The Mothman Prophecies',
249             'Love Actually (2003)',
250             'From Paris with Love (2010)',
251             'P.S. I Love You (2007)',
252             );
253              
254             # Add the entries
255             foreach my $movie (@movies) {
256             $smm->add(make_key($movie), $movie);
257             }
258              
259             my $pattern = make_key('i love'); # make the search-pattern
260             my @matches = $smm->search($pattern); # search by the pattern
261              
262             pp \@matches; # dump the results
263              
264             The results are:
265              
266             [
267             {match => "P.S. I Love You (2007)", score => 2},
268             {match => "My First Lover", score => 1},
269             {match => "A Lot Like Love", score => 1},
270             {match => "Love Actually (2003)", score => 1},
271             {match => "From Paris with Love (2010)", score => 1},
272             ]
273              
274             =head1 REPOSITORY
275              
276             L
277              
278             =head1 AUTHOR
279              
280             Daniel Șuteu, C<< >>
281              
282             =head1 LICENSE AND COPYRIGHT
283              
284             Copyright 2016-2022 Daniel Șuteu.
285              
286             This program is free software; you can redistribute it and/or modify it
287             under the terms of the the Artistic License (2.0). You may obtain a
288             copy of the full license at:
289              
290             L
291              
292             =cut
293              
294             1; # End of Search::MultiMatch