File Coverage

blib/lib/Data/Favorites.pm
Criterion Covered Total %
statement 68 68 100.0
branch 21 28 75.0
condition 14 23 60.8
subroutine 10 10 100.0
pod 6 6 100.0
total 119 135 88.1


line stmt bran cond sub pod time code
1             # Data::Favorites - tally a data stream to find recently dominant items
2              
3             #----------------------------------------------------------------------------
4             #
5             # Copyright (C) 1998-2003 Ed Halley
6             # http://www.halley.cc/ed/
7             #
8             #----------------------------------------------------------------------------
9              
10             package Data::Favorites;
11 1     1   28951 use vars qw($VERSION);
  1         3  
  1         189  
12             $VERSION = 1.00;
13              
14             =head1 NAME
15              
16             Data::Favorites - tally a data stream to find recently dominant items
17              
18             =head1 SYNOPSIS
19              
20             use Data::Favorites;
21              
22             my $faves = new Data::Favorites();
23              
24             $faves->tally($_)
25             foreach (@history);
26              
27             $faves->decay( 2 ); # everyone loses two points
28              
29             $faves->clamp( time() - 24*60*60 ); # cull everyone older than a day
30              
31             print join("\n", $faves->favorites( 5 )), "\n";
32              
33             =head1 ABSTRACT
34              
35             A Favorites structure tracks the disposition of various keys. A key's
36             disposition is a measurement of its relative predominance and freshness
37             when tallied. This is a good way to infer favorites or other
38             leadership-oriented facts from a historical data stream.
39              
40             More specifically, this structure measures how often and when various
41             keys are triggered by application-defined events. Those keys that are
42             mentioned often will accumulate a higher number of tally points. Those
43             keys that have been mentioned recently will have newer "freshness"
44             stamps. Both of these factors are metered and will affect their
45             positioning in a ranking of the keys.
46              
47             At any time, keys can be culled by freshness or by their current ranking,
48             or both. With these approaches, dispositions can be weighed over the
49             whole historical record, rather than providing a simplistic "top events
50             in the last N events" rolling count. Thus, highly popular event keys may
51             remain in the set of favorites for some time, even when the key hasn't
52             been seen very often recently. Popular items can be decayed gradually
53             rather than cut out of a simple census window.
54              
55             =cut
56              
57             #------------------------------------------------------------------
58              
59 1     1   6 use warnings;
  1         1  
  1         30  
60 1     1   6 use strict;
  1         6  
  1         39  
61 1     1   4 use Carp;
  1         2  
  1         1382  
62              
63             #------------------------------------------------------------------
64              
65             =head1 METHODS
66              
67             =head2 new()
68              
69             $faves = new Data::Favorites( );
70              
71             $faves = new Data::Favorites( \&stamper );
72              
73             Create a new favorites counter object. The counter object can tally
74             given elements, and also stamp the "freshness" of each element with the
75             numerical return from the given stamper sub. If no sub code reference is
76             given, then the C built-in function is assumed by default. It is
77             assumed that the sub returns a number which generally increases in value
78             for fresher stamps.
79              
80             =cut
81              
82             # $faves = new Data::Favorites( );
83             #
84             sub new
85             {
86 1     1 1 563 my $proto = shift;
87 1   33     9 my $class = ref($proto) || $proto;
88 1         4 my $self =
89             {
90             # key => { tally => 1, stamp => 1 }
91             dispositions => { },
92             };
93 1 50       6 $self->{stamper} = shift if ref $_[0];
94 1         3 bless($self, $class);
95 1         4 return $self;
96             }
97              
98              
99             =head2 tally()
100              
101             $times = $faves->tally( $scalar );
102              
103             $times = $faves->tally( $scalar, $times );
104              
105             Return the current number of times the given C<$scalar> has been seen, or
106             increment that count by a given number of times. The first form returns
107             C if the C<$scalar> has never been tallied.
108              
109             Items are tracked by their string form, so if the scalars are perl
110             references, take note that the whole favorites counter will not persist
111             well. A future version may use C to allow for persistable
112             tracking of object data.
113              
114             Each key in the favorites counter is marked with a timestamp via the
115             C function, or the stamper sub reference given during creation of
116             the favorites counter object. In the case of an application-supplied
117             stamper function, it will receive two arguments: this favorites counter
118             itself, and the given scalar being tallied.
119              
120             =cut
121              
122             # $count = $faves->tally( $scalar );
123             # $count = $faves->tally( $scalar, $times );
124             #
125             sub tally
126             {
127 6     6 1 12 my $self = shift;
128 6         8 my $item = shift;
129              
130             # $faves->tally() or $faves->tally(undef) are calling errors.
131             #
132 6 50       15 carp "Called tally() with an undef key" if not defined $item;
133              
134             # $faves->tally(key) returns current tally for given key.
135             # Returns undef if the key doesn't exist.
136             #
137 6   100     36 my $disp = $self->{dispositions}{$item} || { };
138 6 100 66     28 return if (not exists $disp->{tally}) and (not @_);
139 5 100       16 return $disp->{tally} if not @_;
140              
141             # $faves->tally(key, amount) boosts current tally for given key.
142             # The key is fully created with the given amount if it did not exist.
143             # The key's freshness stamp is also updated.
144             #
145 4         5 my $tally = shift;
146 4         9 $self->{dispositions}{$item} = $disp;
147             $disp->{stamp} =
148             $self->{stamper}?
149 4 50       13 eval { &{$self->{stamper}}($self, $item) } :
  4         5  
  4         13  
150             time();
151 4         19 $disp->{tally} += $tally;
152 4         24 return $disp->{tally};
153             }
154              
155              
156             =head2 fresh()
157              
158             $stamp = $faves->fresh( $scalar );
159              
160             Return the current freshness stamp for the given C<$scalar>. Returns
161             C if the C<$scalar> has never been tallied.
162              
163             Each key in the favorites counter is marked with a timestamp via the
164             C function, or the stamper sub reference given during creation of
165             the favorites counter object. In the case of an application-supplied
166             stamper callback, it will receive two arguments: this favorites counter
167             itself, and the given scalar being tallied.
168              
169             =cut
170              
171             # $stamp = $faves->fresh( $scalar );
172             #
173             sub fresh
174             {
175 1     1 1 3 my $self = shift;
176 1         2 my $item = shift;
177              
178             # $faves->fresh() or $faves->fresh(undef) are calling errors.
179             #
180 1 50       4 carp "Called fresh() with an undef key" if not defined $item;
181              
182             # $faves->fresh(key) returns current tally for given key.
183             # Returns undef if the key doesn't exist.
184             #
185 1   50     5 my $disp = $self->{dispositions}{$item} || { };
186 1   50     6 return $disp->{stamp} || undef;
187             }
188              
189              
190             =head2 decay()
191              
192             $count = $faves->decay( );
193             $count = $faves->decay( $times );
194              
195             $times = $faves->decay( $scalar );
196             $times = $faves->decay( $scalar, $times );
197              
198             In the first pair of forms, all present keys have their tally counts
199             reduced by one, or by the given number of times. In these forms, the
200             returned value is the remaining count of tracked favorite keys.
201              
202             In the latter pair of forms, an individual key C<$scalar> has its tally
203             reduced by one, or by the given number of times. These forms return the
204             remaining tally count for the given C<$scalar> key.
205              
206             The favorites counter will automatically remove any key in which the
207             tally count drops to zero or below.
208              
209             =cut
210              
211             # $count = $faves->decay( );
212             # $count = $faves->decay( $times );
213             # $times = $faves->decay( $scalar );
214             # $times = $faves->decay( $scalar, $times );
215             #
216             sub decay
217             {
218 4     4 1 762 my $self = shift;
219 4         8 my $disp = $self->{dispositions};
220 4         13 my $tally = 1;
221 4         6 my $item;
222              
223             # $faves->decay(key) decays one key by 1 tally
224             # $faves->decay(key, amount) decays one key by given amount
225             #
226 4 100 66     26 if (@_ && exists $disp->{$_[0]})
227             {
228 3         4 $item = shift;
229 3 50       8 $tally = shift if @_;
230 3 100       11 if (($disp->{$item}{tally} -= $tally) <= 0)
231             {
232 1         3 delete $disp->{$item};
233 1         3 return;
234             }
235 2         5 return $disp->{$item}{tally};
236             }
237              
238             # $faves->decay() decays all keys by 1 tally
239             # $faves->decay(amount) decays all keys by given amount
240             # Recurses to perform actual decay on each key.
241             #
242 1 50       621 $tally = shift if @_;
243 1         5 foreach $item (keys %$disp)
244             {
245 3         11 $self->decay($item, $tally);
246             }
247 1         4 return scalar keys %$disp;
248             }
249              
250              
251             =head2 clamp()
252              
253             $count = $faves->clamp( $stamp );
254              
255             Clamps the set of favorites to only the freshest tallied elements. This
256             method automatically removes any key in which the most recent tally is
257             more stale than the given timestamp value. Timestamps are assumed to be
258             numerical; lesser values represent stamps which are more stale, while
259             higher values are considered more fresh.
260              
261             =cut
262              
263             # $count = $faves->clamp( $stamp );
264             #
265             sub clamp
266             {
267 1     1 1 3 my $self = shift;
268 1         2 my $disp = $self->{dispositions};
269 1   50     4 my $stamp = shift || 0;
270              
271 1         4 my @items = keys %$disp;
272 1         4 while (@items)
273             {
274 2         4 my $item = shift @items;
275 2 100       10 delete $disp->{$item}
276             if $disp->{$item}{stamp} < $stamp;
277             }
278              
279 1         3 return scalar keys %$disp;
280             }
281              
282              
283             =head2 favorites()
284              
285             @topfaves = $faves->favorites( );
286             @topfaves = $faves->favorites( $limit );
287             $count = scalar $faves->favorites( );
288              
289             Returns the keys sorted by the strength of their tally counts. Those
290             which have equal tally counts are compared by their most recent tally
291             time; the most freshly stamped is favored. If a limit is given, the list
292             returned will not exceed the given length.
293              
294             In a scalar context, returns the current count of the tallied keys in the
295             favorites counter. If no limit argument is given, then no internal
296             sorting work needs to be performed to return the count.
297              
298             =cut
299              
300             # @faves = $faves->favorites( );
301             # @faves = $faves->favorites( $limit );
302             # $count = scalar $faves->favorites( );
303             #
304             sub favorites
305             {
306 5     5 1 688 my $self = shift;
307 5         10 my $disp = $self->{dispositions};
308 5 100 66     40 return scalar keys %$disp
309             if (not @_) and (not wantarray);
310 6 50       22 my @faves = sort
311             {
312 2         9 ($disp->{$b}{tally} <=> $disp->{$a}{tally}) ||
313             ($disp->{$b}{stamp} <=> $disp->{$a}{stamp})
314 2         3 } keys %{$disp};
315 2   66     12 my $limit = shift || @faves;
316 2 100       14 $#faves = $limit-1 if @faves > $limit;
317 2         10 return @faves;
318             }
319              
320             #------------------------------------------------------------------
321              
322             1;
323              
324             __END__