File Coverage

blib/lib/Statistics/WeightedSelection.pm
Criterion Covered Total %
statement 115 118 97.4
branch 36 40 90.0
condition 7 11 63.6
subroutine 14 14 100.0
pod 7 7 100.0
total 179 190 94.2


line stmt bran cond sub pod time code
1             package Statistics::WeightedSelection;
2             $Statistics::WeightedSelection::VERSION = '0.002';
3 1     1   25586 use strict;
  1         3  
  1         41  
4 1     1   7 use warnings;
  1         2  
  1         36  
5 1     1   5 use Carp qw/croak cluck/;
  1         7  
  1         78  
6              
7 1     1   6 use Storable qw/freeze/;
  1         1  
  1         1479  
8              
9             sub new {
10 3     3 1 3081 my ($class, %args) = @_;
11              
12 3   100     28 return bless {
13             objects => [],
14             id_lookup => {},
15             replace_after_get => $args{replace_after_get} || 0,
16             }, $class;
17             }
18              
19             sub add {
20 65     65 1 1870 my ($self, %args) = @_;
21              
22             # a non-zero number which can include a decimal
23 65         106 my $weight = $args{weight};
24              
25             # the scalar item that becomes a candidate for random selection
26 65         89 my $object = $args{object};
27              
28             # an optional id, which can be used for later removal from pool
29 65 100       781 my $id = defined $args{id} ? $args{id} : (ref $object ? freeze($object) : $object);
    100          
30              
31 65 100 100     507 unless ($weight && $object) {
32 2         221 croak 'Calls to ' . __PACKAGE__ . "::add() must include an arg named 'object'"
33             . " and a non-zero weight\n";
34             }
35              
36 63 50       283 unless ($weight =~ /^\d+(\.\d*)?/) {
37 0         0 croak 'Calls to ' . __PACKAGE__ . "::add()'s must include an arg named 'weight'"
38             . " that must be a whole integer or number with decimal\n";
39             }
40              
41             # in order to derive the starting_index (see the next stanza),
42             # we need the last item in @$self
43 63         148 my $last = $self->_get_last;
44              
45             # develops a structure that looks like the following, with each element being
46             # [
47             # {
48             # starting_index => 0,
49             # length => 40,
50             # object => 'apple',
51             # id => 'gwr3723',
52             # },
53             # {
54             # starting_index => 40,
55             # length => 16,
56             # object => 'plum',
57             # id => 'avx9716',
58             # },
59             # {
60             # starting_index => 56,
61             # length => 3.4
62             # object => 'peach',
63             # id => 'zzi1250',
64             # },
65             # {
66             # starting_index => 59.4,
67             # length => 60,
68             # object => 'mango',
69             # id => 'umn2932',
70             # },
71             # ]
72              
73 63 100       84 push @{$self->{objects}}, {
  63         344  
74             # if this is the first object, our starting index is zero, and otherwise, we add the
75             # new weight to the previous starting_index + weight for future binary search.
76             starting_index => defined $last ? $last->{starting_index} + $last->{weight} : 0,
77             weight => $weight,
78             object => $object,
79             id => $id,
80             };
81              
82             # I decided to use a hash with empty values for pointers to the index for a given
83             # id or frozen object scalar, rather than using an arrayref, which might have been
84             # more intuitive. More than one added object can have the same id, and I wanted a
85             # quick way to remove individual object pointers without having to iterate over an
86             # array somehow.
87 63         241 $self->{id_lookup}->{$id}->{$#{$self->{objects}}} = undef;
  63         180  
88              
89 63         84 return $#{$self->{objects}};
  63         197  
90             }
91              
92             sub remove {
93 12     12 1 34 my ($self, $id) = @_;
94              
95 12 100       39 unless (defined $id) {
96 1         73 croak 'Calls to ' . __PACKAGE__ . "::remove() must include an id to remove\n";
97             }
98              
99 11 100       38 $id = ref $id ? freeze($id) : $id;
100              
101             # delete the pointer altogether, as all of the objects with this id will be removed.
102 11         173 my $indexes = delete $self->{id_lookup}->{$id};
103              
104 11 50 33     44 unless ($indexes && %{$indexes}) {
  11         73  
105             # no need for this to be fatal, but it might indicate a bug in caller's code
106 0         0 cluck "Key $id contains no associated indexes currently\n";
107 0         0 return;
108             }
109              
110 11         21 my @reverse_sorted_indexes = sort {$b <=> $a} keys %{$indexes};
  10         65  
  11         70  
111              
112 11         29 my @removed;
113 11         21 for my $index (@reverse_sorted_indexes) {
114             # remove all items that were pointed to that had this id
115             # doing them in reverse, so that splicing out an earlier item won't
116             # alter the indexes of items we want to splice out later.
117 18         24 push @removed, splice(@{$self->{objects}}, $index, 1);
  18         64  
118             }
119              
120             # we've altered our items, and need to fix our object starting indexes
121             # and pointers for our keys.
122 11         48 $self->_consolidate(reverse @reverse_sorted_indexes);
123              
124             # return the removed items
125 11         21 return map {$_->{object}} @removed;
  18         99  
126             }
127              
128             sub get {
129 1410009     1410009 1 135691523 my ($self, $override_replacement) = @_;
130 1410009 100       1631470 return unless @{ $self->{objects} };
  1410009         3779885  
131              
132             # when adding the starting_index and length together of the last item in @$self,
133             # (see the generated structure note in the add() method for more info), the
134             # max random number to generate is determined.
135 1410008         2831972 my $last = $self->_get_last;
136 1410008         4115240 my $random = rand($last->{starting_index} + $last->{weight});
137              
138             # binary search to quickly find the weighted index range. the random number ($random)
139             # is tested against the range of starting_index and (starting_index + length) to
140             # determine if the number is lower or higher than the current arrayref until a match
141             # is found.
142 1410008         1502877 my $max = $#{ $self->{objects} };
  1410008         2421943  
143 1410008         1665249 my $min = 0;
144 1410008         1474741 my $index = 0;
145 1410008         2944735 while ( $max >= $min ) {
146 2197290         3096689 $index = int( ( $max + $min ) / 2 );
147 2197290         3133432 my $current_object = $self->{objects}->[$index];
148              
149 2197290 100       7033031 if ( $random < $current_object->{starting_index} ) {
    100          
150 336219         820386 $max = $index - 1;
151             }
152             elsif ( $random >= ($current_object->{starting_index} + $current_object->{weight}) ) {
153 451063         1044923 $min = $index + 1;
154             }
155             else {
156 1410008         1947758 last;
157             }
158             }
159              
160 1410008         1387177 my $random_element;
161              
162             # don't run the removal logic in the else block if the user instructed us to replace
163             # the object
164 1410008 100       2806994 if ($self->replace_after_get) {
165 10002         15080 $random_element = $self->{objects}->[$index];
166             }
167             else {
168             # remove the element in question
169 1400006         1435629 $random_element = splice(@{$self->{objects}}, $index, 1);
  1400006         2824386  
170              
171             # delete out the specific pointer to this item
172 1400006         3504865 delete $self->{id_lookup}->{$random_element->{id}}->{$index};
173 1400006 100       1419900 unless (keys %{$self->{id_lookup}->{$random_element->{id}}}) {
  1400006         4732865  
174 1324070         2538678 delete $self->{id_lookup}->{$random_element->{id}};
175             }
176              
177             # fix starting indexes and pointers for other objects.
178 1400006         3623619 $self->_consolidate($index);
179             }
180              
181             # return our selected object:
182 1410008         5990402 return $random_element->{object};
183             }
184              
185             sub replace_after_get {
186 1410011     1410011 1 1875071 my ($self, $new_setting) = @_;
187 1410011 100       2638280 $self->{replace_after_get} = $new_setting if defined $new_setting;
188 1410011         3418889 return $self->{replace_after_get};
189             }
190              
191             sub clear {
192 17     17 1 32550 my ($self) = @_;
193 17         230 $self->{objects} = [];
194 17         75 $self->{id_lookup} = {};
195 17         67 return;
196             }
197              
198             sub count {
199 1410067     1410067 1 8551947 my ($self) = @_;
200 1410067 100       1561496 return 0 if !@{ $self->{objects} };
  1410067         3952564  
201 1130062         1427409 return $#{ $self->{objects} } + 1;
  1130062         2878415  
202             }
203              
204             sub _dump {
205 1130152     1130152   1394840 my ($self) = @_;
206             # basically, don't return starting_index, as that is an internal implementation
207             # detail, and doesn't need to be shown to the user
208 2450418         11742207 return [map { {object => $_->{object}, weight => $_->{weight}, id => $_->{id}} }
  1130152         2111817  
209 1130152         1224462 @{$self->{objects}}];
210             }
211              
212             # internal method to fix id pointers and starting_indexes after a remove() or get() call
213             sub _consolidate {
214 1400017     1400017   1825569 my $self = shift;
215              
216             # disregard all indexes greater than the current length of our objects property array
217 1400017         2231481 my @removed_indexes = sort grep {$_ <= $#{ $self->{objects} }} @_;
  1400024         1545735  
  1400024         5850932  
218              
219             # separate our list of objects into segments bookended by our removed indexes
220             # say we started with something like:
221             # 0 1 2 3 4 5 6
222             # 'alan', 'nate', 'brian', 'bob', 'nate', 'ryan', 'nate'
223             #
224             # and we called remove() on 'nate', then (1, 4, 6) would be passed to this routine, which
225             # are array indexes, NOT our starting_indexes. 6 will be removed from consideration, as
226             # it was at the end of the array.
227             #
228             # our first segment (former indexes 2 and 3) would be fixed as such:
229             # well, we need to change the following as a result:
230             # update 'brian''s starting index to be the starting index of 'alan' + its weight,
231             # and subtract 1 (as we removed 1 objects before this one) from its pointer in the id lookup.
232             # update 'bob''s starting index to be the starting index of 'brian' + its weight
233             # and subtract 1 (as we removed 1 objects before this one) from its pointer in the id lookup.
234             #
235             # our second segment (former index 5) would be fixed as such:
236             # well, we need to change the following as a result:
237             # update 'ryan''s starting index to be the starting index of 'bob' + its weight
238             # and subtract 2 (as we removed 2 objects before this one) from its pointer in the id lookup.
239              
240 1400017         3185615 for my $removed_index_index (0..$#removed_indexes) {
241             # find our range bookends
242 833080         1180827 my $range_start = $removed_indexes[$removed_index_index];
243 833080         1450459 my $range_end = $removed_index_index == $#removed_indexes
244 833080 50       1509856 ? $#{ $self->{objects} }
245             : $removed_indexes[$removed_index_index + 1] - 1;
246              
247             # how many indexes were removed before our current segment, and thus the amount to subtract
248             # from the respective pointers
249 833080         1328239 my $to_subtract = @removed_indexes - $removed_index_index;
250              
251 833080         945976 my %ids_evaluated_for_range;
252 833080         1325279 for my $object_index ($range_start..$range_end) {
253 1481143         2158402 my $object = $self->{objects}->[$object_index];
254              
255             # do all of the subtractions of pointer indexes at once for each segment.
256 1481143 100       4520148 if (!$ids_evaluated_for_range{$object->{id}}++) {
257 1407334 50 33     1578295 for my $index (
  1375281         7138892  
258 1407334         4444907 grep {$_ >= $range_start && ($removed_index_index == $#removed_indexes || $_ <= $range_end)}
259             keys %{$self->{id_lookup}->{$object->{id}}}
260             ) {
261 1375281         3159560 delete $self->{id_lookup}->{$object->{id}}->{$index};
262 1375281         4714860 $self->{id_lookup}->{$object->{id}}->{$index - $to_subtract} = undef;
263             }
264             }
265              
266             # fix the starting indexes
267             $object->{starting_index} = $object_index == 0
268             ? 0
269 1481143 100       4163377 : do {
270 961403         1620371 my $previous_object = $self->{objects}->[$object_index - 1];
271 961403         4249274 $previous_object->{starting_index} + $previous_object->{weight};
272             };
273             }
274             }
275              
276 1400017         3017955 return;
277             }
278              
279              
280             # simply a utility method to get the last item in the blessed array, as it contains all
281             # the information needed to add items and generate a random number.
282             sub _get_last {
283 1410071     1410071   1717877 my ($self) = @_;
284 1410071 100       1421169 return unless @{ $self->{objects} };
  1410071         3201547  
285 1410056         2087679 return $self->{objects}->[$#{ $self->{objects} }];
  1410056         3271495  
286             }
287              
288             1;
289              
290             __END__