File Coverage

blib/lib/Set/ConsistentHash.pm
Criterion Covered Total %
statement 76 89 85.3
branch 17 26 65.3
condition 3 3 100.0
subroutine 13 17 76.4
pod 11 11 100.0
total 120 146 82.1


line stmt bran cond sub pod time code
1             package Set::ConsistentHash;
2 2     2   69278 use strict;
  2         7  
  2         97  
3 2     2   2067 use Digest::SHA1 qw(sha1);
  2         2193  
  2         178  
4 2     2   14 use Carp qw(croak);
  2         8  
  2         124  
5 2     2   10 use vars qw($VERSION);
  2         6  
  2         3827  
6             $VERSION = '0.92';
7              
8             =head1 NAME
9              
10             Set::ConsistentHash - library for doing consistent hashing
11              
12             =head1 SYNOPSIS
13              
14             my $set = Set::ConsistentHash->new;
15              
16             =head1 OVERVIEW
17              
18             Description, shamelessly stolen from Wikipedia:
19              
20             Consistent hashing is a scheme that provides hash table
21             functionality in a way that the addition or removal of one slot does
22             not significantly change the mapping of keys to slots. In contrast,
23             in most traditional hash tables, a change in the number of array
24             slots causes nearly all keys to be remapped.
25              
26             Consistent hashing was introduced in 1997 as a way of distributing
27             requests among a changing population of web servers. More recently,
28             it and similar techniques have been employed in distributed hash
29             tables.
30              
31             You're encouraged to read the original paper, linked below.
32              
33             =head1 TERMINOLOGY
34              
35             Terminology about this stuff seems to vary. For clarity, this module
36             uses the following:
37              
38             B -- The object you work with. Contains 0 or more
39             "targets", each with a weight.
40              
41             B -- A member of the set. The weight (an arbitrary number),
42             specifies how often it occurs relative to other targets.
43              
44             =head1 CLASS METHODS
45              
46             =head2 new
47              
48             $set = Set::ConsistentHash->new;
49              
50             Takes no options. Creates a new consistent hashing set with no
51             targets. You'll need to add them.
52              
53             =cut
54              
55             # creates a new consistent hashing set with no targets. you'll need to add targets.
56             sub new {
57 2     2 1 42 my $class = shift;
58 2 50       10 croak("Unknown parameters") if @_;
59 2         23 my $self = bless {
60             weights => {}, # $target => integer $weight
61             points => {}, # 32-bit value points on 'circle' => \$target
62             order => [], # 32-bit points, sorted
63             buckets => undef, # when requested, arrayref of 1024 buckets mapping to targets
64             total_weight => undef, # when requested, total weight of all targets
65             hash_func => undef, # hash function for key lookup
66             }, $class;
67 2         8 return $self;
68             }
69              
70             ############################################################################
71              
72             =head1 INSTANCE METHODS
73              
74             =cut
75              
76             ############################################################################
77              
78             =head2 targets
79              
80             Returns (alphabetically sorted) array of all targets in set.
81              
82             =cut
83              
84             sub targets {
85 1     1 1 13 my $self = shift;
86 1         3 return sort keys %{$self->{weights}};
  1         14  
87             }
88              
89             ############################################################################
90              
91             =head2 reset_targets
92              
93             Remove all targets.
94              
95             =cut
96              
97             sub reset_targets {
98 0     0 1 0 my $self = shift;
99 0         0 $self->modify_targets(map { $_ => 0 } $self->targets);
  0         0  
100             }
101             *clear = \&reset_targets;
102              
103             ############################################################################
104              
105             =head2 set_targets
106              
107             $set->set_targets(%target_to_weight);
108             $set->set_targets("foo" => 5, "bar" => 10);
109              
110             Removes all targets, then sets the provided ones with the weightings provided.
111              
112             =cut
113              
114             sub set_targets {
115 0     0 1 0 my $self = shift;
116 0         0 $self->reset_targets;
117 0         0 $self->modify_targets(@_);
118             }
119              
120             ############################################################################
121              
122             =head2 modify_targets
123              
124             $set->modify_targets(%target_to_weight);
125              
126             Without removing existing targets, modifies the weighting of provided
127             targets. A weight of undef or 0 removes an item from the set.
128              
129             =cut
130              
131             # add/modify targets. parameters are %weights: $target -> $weight
132             sub modify_targets {
133 2     2 1 19 my ($self, %weights) = @_;
134              
135             # uncache stuff:
136 2         11 $self->{total_weight} = undef;
137 2         5 $self->{buckets} = undef;
138              
139 2         14 while (my ($target, $weight) = each %weights) {
140 6 50       16 if ($weight) {
141 6         24 $self->{weights}{$target} = $weight;
142             } else {
143 0         0 delete $self->{weights}{$target};
144             }
145             }
146 2         10 $self->_redo_circle;
147             }
148              
149             ############################################################################
150              
151             =head2 set_target
152              
153             $set->set_target($target => $weight);
154              
155             A wrapper around modify_targets that sounds better for modifying a single item.
156              
157             =cut
158              
159             *set_target = \&modify_targets;
160              
161             ############################################################################
162              
163             =head2 total_weight
164              
165             Returns sum of all current targets' weights.
166              
167             =cut
168              
169             #'
170             sub total_weight {
171 3     3 1 5 my $self = shift;
172 3 100       408 return $self->{total_weight} if defined $self->{total_weight};
173 1         3 my $sum = 0;
174 1         2 foreach my $val (values %{$self->{weights}}) {
  1         4  
175 3         6 $sum += $val;
176             }
177 1         392 return $self->{total_weight} = $sum;
178             }
179              
180             ############################################################################
181              
182             =head2 percent_weight
183              
184             $weight = $set->percent_weight($target);
185             $weight = $set->percent_weight("10.0.0.2");
186              
187             Returns number in range [0,100] representing percentage of weight that provided $target has.
188              
189             =cut
190              
191             sub percent_weight {
192 3     3 1 24 my ($self, $target) = @_;
193 3 50       13 return 0 unless $self->{weights}{$target};
194 3         12 return 100 * $self->{weights}{$target} / $self->total_weight;
195             }
196              
197             ############################################################################
198              
199             =head2 set_hash_func
200              
201             $set->set_hash_func(\&your_hash_func);
202              
203             Sets the function with which keys will be hashed before looking up
204             which target they will be mapped onto.
205              
206             =cut
207              
208             sub set_hash_func {
209 0     0 1 0 my ($self, $hash_func) = @_;
210 0         0 $self->{hash_func} = $hash_func;
211             }
212              
213             ############################################################################
214              
215             =head2 get_target
216              
217             $selected_target = $set->get_target(your_hash_func($your_key));
218              
219             - or -
220              
221             $set->set_hash_func(\&your_hash_func);
222             $selected_target = $set->get_target($your_key);
223              
224             Given a key, select the target in the set to which that key is mapped.
225              
226             If you find the target (say, a server) to be dead or otherwise
227             unavailable, remove it from the set, and get the target again.
228              
229             =cut
230              
231             sub get_target {
232 0     0 1 0 my ($self, $key) = @_;
233 0 0       0 _compute_buckets($self) unless $self->{buckets};
234 0 0       0 $key = $self->{hash_func}->($key) if $self->{hash_func};
235 0         0 return $self->{buckets}->[$key % 1024];
236             }
237              
238             =head2 buckets
239              
240             $selected_target = $set->buckets->[your_hash_func($your_key) % 1024];
241              
242             Returns an arrayref of 1024 selected items from the set, in a consistent order.
243              
244             This is what you want to use to actually select items quickly in your
245             application.
246              
247             If you find the target (say, a server) to be dead, or otherwise
248             unavailable, remove it from the set, and look at that index in the
249             bucket arrayref again.
250              
251             =cut
252              
253             # returns arrayref of 1024 buckets. each array element is the $target for that bucket index.
254             sub buckets {
255 2     2 1 1030 my $self = shift;
256 2 50       13 _compute_buckets($self) unless $self->{buckets};
257 2         17 return $self->{buckets};
258             }
259              
260             ############################################################################
261              
262             =head1 INTERNALS
263              
264             =head2 _compute_buckets
265              
266             Computes and returns an array of 1024 selected items from the set,
267             in a consistent order.
268              
269             =cut
270              
271             # Computes and returns array of 1024 buckets. Each array element is the
272             # $target for that bucket index.
273             sub _compute_buckets {
274 2     2   4 my $self = shift;
275 2         5 my @buckets = ();
276 2         5 my $by = 2**22; # 2**32 / 2**10 (1024)
277 2         4 my $pt = 0;
278 2         7 for my $n (0..1023) {
279 2048         4227 $buckets[$n] = $self->target_of_point($pt);
280 2048         3794 $pt += $by;
281             }
282 2         14 return $self->{buckets} = \@buckets;
283             }
284              
285             =head2 target_of_point
286              
287             $target = $set->target_of_point($point)
288              
289             Given a $point, an integer in the range [0,2**32), returns (somewhat
290             slowly), the next target found, clockwise from that point on the circle.
291              
292             This is mostly an internal method, used to generated the 1024-element
293             cached bucket arrayref when needed. You probably don't want to use this.
294             Instead, use the B method, and run your hash function on your key,
295             generating an integer, modulous 1024, and looking up that bucket index's target.
296              
297             =cut
298              
299             # given a $point [0,2**32), returns the $target that's next going around the circle
300             sub target_of_point {
301 102048     102048 1 1002405 my ($self, $pt) = @_; # $pt is 32-bit unsigned integer
302              
303 102048         166531 my $order = $self->{order};
304 102048         154903 my $circle_pt = $self->{points};
305              
306 102048         154746 my ($lo, $hi) = (0, scalar(@$order)-1); # inclusive candidates
307              
308 102048         120426 while (1) {
309 796289         1289320 my $mid = int(($lo + $hi) / 2);
310 796289         1136891 my $val_at_mid = $order->[$mid];
311 796289 100       1520135 my $val_one_below = $mid ? $order->[$mid-1] : 0;
312              
313             # match
314 796289 100 100     2603998 return ${ $circle_pt->{$order->[$mid]} } if
  101713         431109  
315             $pt <= $val_at_mid && $pt > $val_one_below;
316              
317             # wrap-around match
318 694576 100       1354108 return ${ $circle_pt->{$order->[0]} } if
  335         1533  
319             $lo == $hi;
320              
321             # too low, go up.
322 694241 100       1150946 if ($val_at_mid < $pt) {
323 374780         460632 $lo = $mid + 1;
324 374780 50       734820 $lo = $hi if $lo > $hi;
325             }
326             # too high
327             else {
328 319461         356926 $hi = $mid - 1;
329 319461 100       647700 $hi = $lo if $hi < $lo;
330             }
331              
332 694241         1024520 next;
333             }
334             };
335              
336             ############################################################################
337             # Internal...
338             ############################################################################
339              
340             sub _redo_circle {
341 2     2   3 my $self = shift;
342              
343 2         6 my $pts = $self->{points} = {};
344 2         3 while (my ($target, $weight) = each %{$self->{weights}}) {
  8         46  
345 6         13 my $num_pts = $weight * 100;
346 6         15 foreach my $ptn (1..$num_pts) {
347 700         1276 my $key = "$target-$ptn";
348 700         2890 my $val = unpack("L", substr(sha1($key), 0, 4));
349 700         3398 $pts->{$val} = \$target;
350             }
351             }
352              
353 2         142 $self->{order} = [ sort { $a <=> $b } keys %$pts ];
  5078         6056  
354             }
355              
356              
357             =head1 REFERENCES
358              
359             L
360              
361             L
362              
363             =head1 AUTHOR
364              
365             Brad Fitzpatrick -- brad@danga.com
366              
367             =head1 CONTRIBUTING
368              
369             Bug, performance, doc, feature patch? See
370             L
371              
372             =head1 COPYRIGHT & LICENSE
373              
374             Copyright 2007, Six Apart, Ltd.
375              
376             You're granted permission to use this code under the same terms as Perl itself.
377              
378             =head1 WARRANTY
379              
380             This is free software. It comes with no warranty of any kind.
381              
382             =cut
383              
384             1;