File Coverage

blib/lib/Suggest/PrePop.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             package Suggest::PrePop;
2              
3 1     1   185339 use strict;
  1         2  
  1         22  
4 1     1   3 use warnings;
  1         1  
  1         29  
5             our $VERSION = '1.1.0';
6              
7 1     1   1156 use Moose;
  0            
  0            
8              
9             use Cache::RedisDB;
10             use List::Util qw(uniq);
11              
12             has cache_namespace => (
13             is => 'ro',
14             isa => 'Str',
15             default => 'SUGGEST-PREPOP',
16             );
17              
18             my $key_sep = '<>';
19              
20             has 'scopes' => (
21             is => 'ro',
22             isa => 'ArrayRef',
23             lazy => 1,
24             default => sub {
25             my $self = shift;
26             my $size = length($self->_cnt_key_base . $key_sep);
27             no warnings('substr');
28             return [sort { $a cmp $b }
29             map { substr($_, $size) // '' }
30             @{$self->_redis->keys($self->_cnt_key_base . '*')}];
31             },
32             );
33              
34             has _lex_key_base => (
35             is => 'ro',
36             isa => 'Str',
37             lazy => 1,
38             default => sub {
39             my $self = shift;
40             return join($key_sep, $self->cache_namespace, 'ITEMS_BY_LEX');
41             },
42             );
43              
44             has _cnt_key_base => (
45             is => 'ro',
46             isa => 'Str',
47             lazy => 1,
48             default => sub {
49             my $self = shift;
50             return join($key_sep, $self->cache_namespace, 'ITEMS_BY_COUNT');
51             },
52             );
53              
54             has min_activity => (
55             is => 'ro',
56             isa => 'Int',
57             default => 5,
58             );
59              
60             has entries_limit => (
61             is => 'ro',
62             isa => 'Int',
63             default => 32768,
64             );
65              
66             has top_count => (
67             is => 'ro',
68             isa => 'Int',
69             default => 5,
70             );
71              
72             # Convenience
73             sub _redis { Cache::RedisDB->redis }
74              
75             sub _lex_key {
76             my ($self, $scope) = @_;
77              
78             return ($scope)
79             ? join($key_sep, $self->_lex_key_base, uc $scope)
80             : $self->_lex_key_base;
81             }
82              
83             sub _cnt_key {
84             my ($self, $scope) = @_;
85              
86             return ($scope)
87             ? join($key_sep, $self->_cnt_key_base, uc $scope)
88             : $self->_cnt_key_base;
89             }
90              
91             sub add {
92             my ($self, $item, $count, @scopes) = @_;
93              
94             $count //= 1; # Most of the time we'll just get a single entry
95             @scopes = ('') unless @scopes;
96              
97             # For now, we just assume supplied items are well-formed
98             my $redis = $self->_redis;
99              
100             my $how_many = 0;
101             foreach my $scope (@scopes) {
102             # Lexically sorted items are all zero-scored
103             $redis->zadd($self->_lex_key($scope), 0, $item);
104              
105             # Score sorted items get incremented.
106             $how_many += $redis->zincrby($self->_cnt_key($scope), $count, $item);
107             }
108              
109             return $how_many;
110             }
111              
112             sub ask {
113             my ($self, $prefix, $count, @scopes) = @_;
114              
115             $count //= $self->top_count; # If they don't say we try to find the 5 best.
116             @scopes = ('') unless @scopes;
117              
118             my $redis = $self->_redis;
119              
120             my @full;
121              
122             foreach my $scope (@scopes) {
123             push @full, grep { $_->[1] >= $self->min_activity }
124             map { [$_, $redis->zscore($self->_cnt_key($scope), $_)] } @{
125             $redis->zrangebylex(
126             $self->_lex_key($scope),
127             '[' . $prefix,
128             '[' . $prefix . "\xff"
129             ) // []};
130             }
131              
132             @full = uniq map { $_->[0] } sort { $b->[1] <=> $a->[1] } @full;
133              
134             return [splice(@full, 0, $count)];
135             }
136              
137             sub prune {
138             my ($self, $keep, @scopes) = @_;
139              
140             $keep //= $self->entries_limit;
141             @scopes = ('') unless @scopes;
142              
143             my $redis = $self->_redis;
144              
145             my $count = 0;
146              
147             foreach my $scope (@scopes) {
148             # Count key is the one from which results are collated, so even
149             # if things are out of sync, this is the one about which we care.
150             next if ($redis->zcard($self->_cnt_key($scope)) <= $keep);
151              
152             my $final_index = -1 * $keep - 1; # Range below is inclusive.
153             my @to_prune =
154             @{$redis->zrange($self->_cnt_key($scope), 0, $final_index)};
155             $count += scalar @to_prune;
156              
157             # We're going to do this the slow way to keep them in sync.
158             foreach my $item (@to_prune) {
159             $redis->zrem($self->_cnt_key($scope), $item);
160             $redis->zrem($self->_lex_key($scope), $item);
161             }
162              
163             }
164              
165             return $count;
166             }
167              
168             1;
169              
170             __END__
171              
172             =encoding utf-8
173              
174             =head1 NAME
175              
176             Suggest::PrePop - suggestions based on prefix and popularity
177              
178             =head1 SYNOPSIS
179              
180             use Suggest::PrePop;
181             my $suggestor = Suggest::Prepop->new;
182             $suggestor->add("item - complete", 10);
183             $suggestor->ask("item"); ["item - complete"];
184              
185             =head1 DESCRIPTION
186              
187             Suggest::PrePop is a suggestion engine which uses a string prefix and
188             the popularity of items to make suggestions. This is pattern is most often
189             used for suggestions of partially typed items (e.g. web search forms.)
190              
191             =head1 METHODS
192              
193             =over 4
194              
195             =item new
196              
197             Constructor. The following attributes (with defaults) may be set:
198              
199             - C<cache_namespace> ('SUGGEST-PREPOP') - C<Cache::RedisDB> namespace to use for our accounting
200              
201             - C<min_activity> (5) - The minimum number of times an item must have been seen to be suggested
202              
203             - C<entries_limit> (32768) - The count of most popular entries to maintain in a purge event
204              
205             - C<top_count> (5) - The default number of entries to return from 'ask'
206              
207             =item scopes
208              
209             Return an array reference with all currently known scopes. Lazily computed on first call.
210              
211             =item add($item, [$count], [@scopes])
212              
213             Add C<$item> to the scope indices, or increment its current popularity. Any C<$count> is taken as the number of times it was seen; defaults to 1.
214              
215             =item ask($prefix, [$count], [@scopes])
216              
217             Suggest the C<$count> most popular items n the given scopes matching the supplied C<$prefix>. Defaults to 5.
218              
219             =item prune([$count], [@scopes])
220              
221             Prune all but the C<$count> most popular items from the given scopes. Defaults to the instance C<entries_limit>.
222              
223             =back
224              
225             =head1 AUTHOR
226             Inspire
227              
228             =head1 COPYRIGHT
229             Copyright 2016- Inspire.com
230              
231             =head1 LICENSE
232              
233             This library is free software; you can redistribute it and/or modify
234             it under the same terms as Perl itself.
235              
236             =head1 SEE ALSO
237              
238             =cut