File Coverage

blib/lib/List/Unique/DeterministicOrder.pm
Criterion Covered Total %
statement 80 83 96.3
branch 12 16 75.0
condition 3 4 75.0
subroutine 19 19 100.0
pod 10 10 100.0
total 124 132 93.9


line stmt bran cond sub pod time code
1             package List::Unique::DeterministicOrder;
2            
3 2     2   139769 use 5.010;
  2         14  
4 2     2   8 use Carp;
  2         4  
  2         92  
5 2     2   10 use strict;
  2         4  
  2         30  
6 2     2   22 use warnings;
  2         14  
  2         84  
7 2     2   11 use List::Util 1.45 qw /uniq/;
  2         27  
  2         212  
8            
9             our $VERSION = 0.002;
10            
11             #no autovivification;
12            
13             use constant {
14 2         287 _ARRAY => 0, # ordered keys
15             _HASH => 1, # unordered keys
16 2     2   12 };
  2         4  
17            
18             use overload
19 6     6   322 q{bool} => sub { !!%{ $_[0]->[_HASH] } },
  6         20  
20 2     2   2567 fallback => 1;
  2         1734  
  2         15  
21            
22             sub new {
23 4     4 1 90 my ($package, %args) = @_;
24            
25 4         10 my $self = [ [], {} ];
26            
27             # use data if we were passed some
28 4 100       10 if (my $data = $args{data}) {
29 3         14 my %hash;
30 3         24 @hash{@$data} = (0..$#$data);
31             # rebuild the lists if there were dups
32 3 50       11 if (scalar keys %hash != scalar @$data) {
33 0         0 my @uniq = uniq @$data;
34 0         0 @hash{@uniq} = (0..$#uniq);
35 0         0 $self->[_ARRAY] = \@uniq;
36             }
37             else {
38 3         11 $self->[_ARRAY] = [@$data];
39             }
40 3         7 $self->[_HASH] = \%hash;
41             }
42            
43 4         11 return bless $self, $package;
44             }
45            
46             sub clone {
47 1     1 1 6 my $self = shift;
48 1         3 my $cloned = $self->new ();
49 1         2 $cloned->[_ARRAY] = [@{$self->[_ARRAY]}];
  1         5  
50 1         3 $cloned->[_HASH] = {%{$self->[_HASH]}};
  1         9  
51 1         3 return $cloned;
52             }
53            
54             sub exists {
55 12     12 1 3173 exists $_[0]->[_HASH]{$_[1]};
56             }
57            
58             sub keys {
59             wantarray
60 1         5 ? @{$_[0]->[_ARRAY]}
61 8 100   8 1 1522 : scalar @{$_[0]->[_ARRAY]};
  7         21  
62             }
63            
64             sub push {
65 9 100   9 1 453 return if exists $_[0]->[_HASH]{$_[1]};
66            
67 6         8 push @{$_[0]->[_ARRAY]}, $_[1];
  6         13  
68 6         10 $_[0]->[_HASH]{$_[1]} = $#{$_[0]->[_ARRAY]};
  6         15  
69             }
70            
71             sub pop {
72 1     1 1 235 my $key = pop @{$_[0]->[_ARRAY]};
  1         4  
73 1         3 delete $_[0]->[_HASH]{$key};
74 1         2 $key;
75             }
76            
77             # returns undef if key not in hash
78             sub get_key_pos {
79 3     3 1 449 $_[0]->[_HASH]{$_[1]};
80             }
81            
82            
83             # returns undef if index is out of bounds
84             sub get_key_at_pos {
85 7     7 1 2154 $_[0]->[_ARRAY][$_[1]];
86             }
87            
88             # does nothing if key does not exist
89             sub delete {
90 4     4 1 17 my ($self, $key) = @_;
91            
92             # get the index while cleaning up
93 4   100     16 my $pos = CORE::delete $self->[_HASH]{$key}
94             // return;
95            
96 3         5 my $move_key = CORE::pop @{$self->[_ARRAY]};
  3         6  
97             # make sure we don't just reinsert the last item
98             # from a single item list
99 3 50       11 if ($move_key ne $key) {
100 3         5 $self->[_HASH]{$move_key} = $pos;
101 3         6 $self->[_ARRAY][$pos] = $move_key;
102             }
103            
104 3         5 return $key;
105             }
106            
107             # Delete the key at the specified position
108             # and move the last key into it.
109             # Does nothing if key does not exist
110             sub delete_key_at_pos {
111 2     2 1 416 my ($self, $pos) = @_;
112            
113 2   50     8 my $key = $self->[_ARRAY][$pos]
114             // return;
115            
116 2         4 my $move_key = CORE::pop @{$self->[_ARRAY]};
  2         6  
117 2         4 CORE::delete $self->[_HASH]{$key};
118            
119             # make sure we don't just reinsert the last item
120             # from a single item list
121 2 100       6 if ($move_key ne $key) {
122 1         3 $self->[_HASH]{$move_key} = $pos;
123 1         2 $self->[_ARRAY][$pos] = $move_key;
124             }
125            
126 2         5 return $key;
127             }
128            
129             # Delete the key at the specified position
130             # and move the last key into it.
131             # Not a true splice, but one day might work
132             # on multiple indices.
133             #sub splice {
134             # my ($self, $pos) = @_;
135             #
136             # my $key = $self->[_ARRAY][$pos]
137             # // return;
138             #
139             # my $move_key = CORE::pop @{$self->[_ARRAY]};
140             # $self->[_HASH]{$move_key} = $pos;
141             # $self->[_ARRAY][$pos] = $move_key;
142             # CORE::delete $self->[_HASH]{$key};
143             # return $key;
144             #}
145            
146            
147             sub _paranoia {
148 2     2   943 my ($self) = @_;
149            
150 2         6 my $array_len = @{$self->[_ARRAY]};
  2         5  
151 2         3 my $hash_len = CORE::keys %{$self->[_HASH]};
  2         5  
152 2 50       9 croak "array and hash key mismatch" if $array_len != $hash_len;
153            
154 2         3 foreach my $key (@{$self->[_ARRAY]}) {
  2         5  
155             croak "Key mismatch between array and hash lists"
156 10 50       19 if !CORE::exists $self->[_HASH]{$key};
157             }
158            
159 2         5 return 1;
160             }
161            
162             1;
163            
164             =head1 NAME
165            
166             List::Unique::DeterministicOrder - Store and access
167             a list of keys using a deterministic order based on
168             the sequence of insertions and deletions
169            
170             =head1 VERSION
171            
172             Version 0.01
173            
174             =cut
175            
176             =head1 SYNOPSIS
177            
178             This module provides a structure to store a list
179             of keys, without duplicates, and be able to access
180             them by either key name or index.
181            
182            
183             use List::Unique::DeterministicOrder;
184            
185             my $foo = List::Unique::DeterministicOrder->new(
186             data => [qw /foo bar quux fetangle/]
187             );
188            
189             print $foo->keys;
190             # foo bar quux fetangle
191            
192             $foo->delete ('bar')
193             print $foo->keys;
194             # foo fetangle quux
195            
196             print $foo->get_key_at_pos(2);
197             # quux
198             print $foo->get_key_at_pos(20);
199             # undef
200            
201             $foo->push ('bardungle')
202             print $foo->keys;
203             # foo fetangle quux bardungle
204            
205             # keys are stored only once,
206             # just like with a normal hash
207             $foo->push ('fetangle')
208             print $foo->keys;
209             # foo fetangle quux bardungle
210            
211             print $foo->exists ('gelert');
212             # false
213            
214             print $foo->pop;
215             # bardungle
216            
217            
218             =head1 DISCUSSION
219            
220             The algorithm used is from
221             L
222            
223             The algorithm used inserts keys at the end, but
224             swaps keys around on deletion. Hence it is
225             deterministic and repeatable, but only if the
226             sequence of insertions and deletions is replicated
227             exactly.
228            
229             So why would one use this in the first place?
230             The motivating use-case was a randomisation process
231             where keys would be selected from a pool of keys,
232             and sometimes inserted. e.g. the process might
233             select and remove the 10th key, then the 257th,
234             then insert a new key, followed by more selections
235             and removals. The randomisations needed to
236             produce the same results same for the same given
237             PRNG sequence for reproducibility purposes.
238            
239            
240             Using a hash to store the data provides rapid access,
241             but getting the nth key requires the key list be generated
242             each time, and Perl's hashes do not provide their
243             keys in a deterministic
244             order across all versions and platforms.
245             Binary searches over sorted lists proved very
246             effective for a while, but bottlenecks started
247             to manifest when the data sets became
248             much larger and the number of lists
249             became both abundant and lengthy.
250            
251             Since the order itself does not matter,
252             only the ability to replicate it, this module was written.
253            
254             One could also use L, but it has the overhead
255             of storing values, which are not needed here.
256             I also wrote this module before I benchmarked
257             against L. Regardless, this module is faster
258             for the example use-case described above - see the
259             benchmarking results in bench.pl (which is part of
260             this distribution). That said, some of the implementation
261             details have been adapted/borrowed from L.
262            
263            
264             =head1 METHODS
265            
266             Note that most methods take a single argument
267             (if any), so while the method names look
268             hash-like, this is essentially cosmetic.
269             In particular, it does not yet support splicing.
270            
271             =head2 new
272            
273             $foo->new();
274             $foo->new(data => [/a b c d e/]);
275            
276             Create a new object.
277             Optionally pass data using the data
278             keyword. Duplicate keys are
279             stored once only.
280            
281            
282             =cut
283            
284             =head2 exists
285            
286             True or false for if the key exists.
287            
288             =cut
289            
290             =head2 delete
291            
292             $foo->delete('some key');
293            
294             Deletes the key passed as an argument.
295             Returns the key name if successful, undef if
296             the key was not found.
297            
298             =cut
299            
300             =head2 delete_key_at_pos
301            
302             $foo->delete_key_at_pos(1);
303            
304             Removes a single key from the set at the specified position.
305            
306            
307             =cut
308            
309             =head2 get_key_at_pos
310            
311             $foo->get_key_at_pos(5);
312            
313             Returns the key at some position.
314            
315             =cut
316            
317             =head2 get_key_pos
318            
319             $foo->get_key_pos('quux');
320            
321             Returns the position of a key.
322            
323             =cut
324            
325             =head2 keys
326            
327             Returns the list of keys in list context,
328             and the number of keys in scalar context.
329            
330             =cut
331            
332             =head2 pop
333            
334             $foo->pop;
335            
336             Removes and returns the last key in the list.
337            
338             =cut
339            
340             =head2 push
341            
342             $foo->push('quux');
343            
344             Appends the specified key to the end of the list,
345             unless it is already in the list.
346            
347             =cut
348            
349             =head2 clone
350            
351             $foo->clone;
352            
353             Creates a clone of the object.
354            
355             =cut
356            
357            
358            
359            
360             =head1 AUTHOR
361            
362             Shawn Laffan, C<< >>
363            
364             =head1 BUGS
365            
366             Please report any bugs or feature requests via L.
367            
368            
369             =head1 ACKNOWLEDGEMENTS
370            
371             The algorithm used is from
372             L
373            
374             Some implementation details have been borrowed/adapted from L.
375            
376             =head1 SEE ALSO
377            
378             L (and modules listed in its "See Also" section)
379            
380             L
381            
382             L
383            
384            
385             =head1 LICENSE AND COPYRIGHT
386            
387             Copyright 2018 Shawn Laffan
388            
389             This program is free software; you can redistribute it and/or modify it
390             under the terms of the the Artistic License (2.0). You may obtain a
391             copy of the full license at:
392            
393             L
394            
395             Any use, modification, and distribution of the Standard or Modified
396             Versions is governed by this Artistic License. By using, modifying or
397             distributing the Package, you accept this license. Do not use, modify,
398             or distribute the Package, if you do not accept this license.
399            
400             If your Modified Version has been derived from a Modified Version made
401             by someone other than you, you are nevertheless required to ensure that
402             your Modified Version complies with the requirements of this license.
403            
404             This license does not grant you the right to use any trademark, service
405             mark, tradename, or logo of the Copyright Holder.
406            
407             This license includes the non-exclusive, worldwide, free-of-charge
408             patent license to make, have made, use, offer to sell, sell, import and
409             otherwise transfer the Package with respect to any patent claims
410             licensable by the Copyright Holder that are necessarily infringed by the
411             Package. If you institute patent litigation (including a cross-claim or
412             counterclaim) against any party alleging that the Package constitutes
413             direct or contributory patent infringement, then this Artistic License
414             to you shall terminate on the date that such litigation is filed.
415            
416             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
417             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
418             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
419             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
420             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
421             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
422             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
423             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
424            
425            
426             =cut
427