File Coverage

blib/lib/List/Unique/DeterministicOrder.pm
Criterion Covered Total %
statement 86 89 96.6
branch 12 16 75.0
condition 3 4 75.0
subroutine 20 20 100.0
pod 10 10 100.0
total 131 139 94.2


line stmt bran cond sub pod time code
1             package List::Unique::DeterministicOrder;
2            
3 2     2   140995 use 5.010;
  2         16  
4 2     2   10 use Carp;
  2         3  
  2         130  
5 2     2   10 use strict;
  2         4  
  2         36  
6 2     2   20 use warnings;
  2         15  
  2         87  
7 2     2   13 use List::Util 1.45 qw /uniq/;
  2         33  
  2         197  
8 2     2   23 use Scalar::Util qw /blessed/;
  2         5  
  2         185  
9            
10             our $VERSION = 0.004;
11            
12             #no autovivification;
13            
14             use constant {
15 2         298 _ARRAY => 0, # ordered keys
16             _HASH => 1, # unordered keys
17 2     2   22 };
  2         4  
18            
19             use overload
20 4     4   328 q{bool} => sub { !!%{ $_[0]->[_HASH] } },
  4         14  
21 2     2   2495 fallback => 1;
  2         2002  
  2         17  
22            
23             sub new {
24 4     4 1 98 my ($package, %args) = @_;
25            
26 4         11 my $self = [ [], {} ];
27            
28             # use data if we were passed some
29 4 50       12 if (my $data = $args{data}) {
30 4         7 my %hash;
31 4         35 @hash{@$data} = (0..$#$data);
32             # rebuild the lists if there were dups
33 4 50       17 if (scalar keys %hash != scalar @$data) {
34 0         0 my @uniq = uniq @$data;
35 0         0 @hash{@uniq} = (0..$#uniq);
36 0         0 $self->[_ARRAY] = \@uniq;
37             }
38             else {
39 4         15 $self->[_ARRAY] = [@$data];
40             }
41 4         9 $self->[_HASH] = \%hash;
42             }
43            
44 4         16 return bless $self, $package;
45             }
46            
47             sub clone {
48 1     1 1 7 my $self = shift;
49 1         10 my $cloned = bless [], blessed $self;
50 1         3 $cloned->[_ARRAY] = [@{$self->[_ARRAY]}];
  1         6  
51 1         3 $cloned->[_HASH] = {%{$self->[_HASH]}};
  1         19  
52 1         6 return $cloned;
53             }
54            
55             sub exists {
56 12     12 1 3696 exists $_[0]->[_HASH]{$_[1]};
57             }
58            
59             sub keys {
60             wantarray
61 1         6 ? @{$_[0]->[_ARRAY]}
62 9 100   9 1 1906 : scalar @{$_[0]->[_ARRAY]};
  8         38  
63             }
64            
65             sub push {
66 9 100   9 1 583 return if exists $_[0]->[_HASH]{$_[1]};
67            
68 6         7 push @{$_[0]->[_ARRAY]}, $_[1];
  6         15  
69 6         10 $_[0]->[_HASH]{$_[1]} = $#{$_[0]->[_ARRAY]};
  6         16  
70             }
71            
72             sub pop {
73 1     1 1 300 my $key = pop @{$_[0]->[_ARRAY]};
  1         3  
74 1         3 delete $_[0]->[_HASH]{$key};
75 1         3 $key;
76             }
77            
78             # returns undef if key not in hash
79             sub get_key_pos {
80 3     3 1 505 $_[0]->[_HASH]{$_[1]};
81             }
82            
83            
84             # returns undef if index is out of bounds
85             sub get_key_at_pos {
86 7     7 1 2568 $_[0]->[_ARRAY][$_[1]];
87             }
88            
89             # does nothing if key does not exist
90             sub delete {
91 5     5 1 29 my ($self, $key) = @_;
92            
93             # get the index while cleaning up
94 5   100     20 my $pos = CORE::delete $self->[_HASH]{$key}
95             // return;
96            
97             # No need to update lists if we are
98             # popping the last index in the array
99 1         3 return CORE::pop @{$self->[_ARRAY]}
100 4 100       9 if $pos == $#{$self->[_ARRAY]};
  4         16  
101            
102 3         4 $key = CORE::pop @{$self->[_ARRAY]};
  3         10  
103 3         7 $self->[_HASH]{$key} = $pos;
104 3         6 $self->[_ARRAY][$pos] = $key;
105            
106             # obfuscatory but lets us overwrite $key above
107             # and thus avoid an extra scalar variable destruction
108             # (assuming of course that doing so makes a meaningful difference)
109 3         7 return $_[1];
110             }
111            
112             # Delete the key at the specified position
113             # and move the last key into it.
114             # Does nothing if key does not exist
115             sub delete_key_at_pos {
116 2     2 1 497 my ($self, $pos) = @_;
117            
118 2   50     9 my $key = $self->[_ARRAY][$pos]
119             // return;
120            
121 2         3 my $move_key = CORE::pop @{$self->[_ARRAY]};
  2         8  
122 2         5 CORE::delete $self->[_HASH]{$key};
123            
124             # make sure we don't just reinsert the last item
125             # from a single item list
126             return $key
127 2 100       5 if $pos == @{$self->[_ARRAY]};
  2         7  
128            
129 1         3 $self->[_HASH]{$move_key} = $pos;
130 1         3 $self->[_ARRAY][$pos] = $move_key;
131            
132 1         2 return $key;
133             }
134            
135             # Delete the key at the specified position
136             # and move the last key into it.
137             # Not a true splice, but one day might work
138             # on multiple indices.
139             #sub splice {
140             # my ($self, $pos) = @_;
141             #
142             # my $key = $self->[_ARRAY][$pos]
143             # // return;
144             #
145             # my $move_key = CORE::pop @{$self->[_ARRAY]};
146             # $self->[_HASH]{$move_key} = $pos;
147             # $self->[_ARRAY][$pos] = $move_key;
148             # CORE::delete $self->[_HASH]{$key};
149             # return $key;
150             #}
151            
152            
153             sub _paranoia {
154 2     2   1110 my ($self) = @_;
155            
156 2         5 my $array_len = @{$self->[_ARRAY]};
  2         5  
157 2         3 my $hash_len = CORE::keys %{$self->[_HASH]};
  2         7  
158 2 50       6 croak "array and hash key mismatch" if $array_len != $hash_len;
159            
160 2         3 foreach my $key (@{$self->[_ARRAY]}) {
  2         5  
161             croak "Key mismatch between array and hash lists"
162 10 50       23 if !CORE::exists $self->[_HASH]{$key};
163             }
164            
165 2         5 return 1;
166             }
167            
168             1;
169            
170             =head1 NAME
171            
172             List::Unique::DeterministicOrder - Store and access
173             a list of keys using a deterministic order based on
174             the sequence of insertions and deletions
175            
176             =head1 VERSION
177            
178             Version 0.01
179            
180             =cut
181            
182             =head1 SYNOPSIS
183            
184             This module provides a structure to store a list
185             of keys, without duplicates, and be able to access
186             them by either key name or index.
187            
188            
189             use List::Unique::DeterministicOrder;
190            
191             my $foo = List::Unique::DeterministicOrder->new(
192             data => [qw /foo bar quux fetangle/]
193             );
194            
195             print $foo->keys;
196             # foo bar quux fetangle
197            
198             $foo->delete ('bar')
199             print $foo->keys;
200             # foo fetangle quux
201            
202             print $foo->get_key_at_pos(2);
203             # quux
204             print $foo->get_key_at_pos(20);
205             # undef
206            
207             $foo->push ('bardungle')
208             print $foo->keys;
209             # foo fetangle quux bardungle
210            
211             # keys are stored only once,
212             # just like with a normal hash
213             $foo->push ('fetangle')
214             print $foo->keys;
215             # foo fetangle quux bardungle
216            
217             print $foo->exists ('gelert');
218             # false
219            
220             print $foo->pop;
221             # bardungle
222            
223            
224             =head1 DISCUSSION
225            
226             The algorithm used is from
227             L
228            
229             The algorithm used inserts keys at the end, but
230             swaps keys around on deletion. Hence it is
231             deterministic and repeatable, but only if the
232             sequence of insertions and deletions is replicated
233             exactly.
234            
235             So why would one use this in the first place?
236             The motivating use-case was a randomisation process
237             where keys would be selected from a pool of keys,
238             and sometimes inserted. e.g. the process might
239             select and remove the 10th key, then the 257th,
240             then insert a new key, followed by more selections
241             and removals. The randomisations needed to
242             produce the same results same for the same given
243             PRNG sequence for reproducibility purposes.
244            
245            
246             Using a hash to store the data provides rapid access,
247             but getting the nth key requires the key list be generated
248             each time, and Perl's hashes do not provide their
249             keys in a deterministic
250             order across all versions and platforms.
251             Binary searches over sorted lists proved very
252             effective for a while, but bottlenecks started
253             to manifest when the data sets became
254             much larger and the number of lists
255             became both abundant and lengthy.
256            
257             Since the order itself does not matter,
258             only the ability to replicate it, this module was written.
259            
260             One could also use L, but it has the overhead
261             of storing values, which are not needed here.
262             I also wrote this module before I benchmarked
263             against L. Regardless, this module is faster
264             for the example use-case described above - see the
265             benchmarking results in bench.pl (which is part of
266             this distribution). That said, some of the implementation
267             details have been adapted/borrowed from L.
268            
269            
270             =head1 METHODS
271            
272             Note that most methods take a single argument
273             (if any), so while the method names look
274             hash-like, this is essentially cosmetic.
275             In particular, it does not yet support splicing.
276            
277             =head2 new
278            
279             $foo->new();
280             $foo->new(data => [/a b c d e/]);
281            
282             Create a new object.
283             Optionally pass data using the data
284             keyword. Duplicate keys are
285             stored once only.
286            
287            
288             =cut
289            
290             =head2 exists
291            
292             True or false for if the key exists.
293            
294             =cut
295            
296             =head2 delete
297            
298             $foo->delete('some key');
299            
300             Deletes the key passed as an argument.
301             Returns the key name if successful, undef if
302             the key was not found.
303            
304             =cut
305            
306             =head2 delete_key_at_pos
307            
308             $foo->delete_key_at_pos(1);
309            
310             Removes a single key from the set at the specified position.
311            
312            
313             =cut
314            
315             =head2 get_key_at_pos
316            
317             $foo->get_key_at_pos(5);
318            
319             Returns the key at some position.
320            
321             =cut
322            
323             =head2 get_key_pos
324            
325             $foo->get_key_pos('quux');
326            
327             Returns the position of a key.
328            
329             =cut
330            
331             =head2 keys
332            
333             Returns the list of keys in list context,
334             and the number of keys in scalar context.
335            
336             =cut
337            
338             =head2 pop
339            
340             $foo->pop;
341            
342             Removes and returns the last key in the list.
343            
344             =cut
345            
346             =head2 push
347            
348             $foo->push('quux');
349            
350             Appends the specified key to the end of the list,
351             unless it is already in the list.
352            
353             =cut
354            
355             =head2 clone
356            
357             $foo->clone;
358            
359             Creates a clone of the object.
360            
361             =cut
362            
363            
364            
365            
366             =head1 AUTHOR
367            
368             Shawn Laffan, C<< >>
369            
370             =head1 BUGS
371            
372             Please report any bugs or feature requests via L.
373            
374            
375             =head1 ACKNOWLEDGEMENTS
376            
377             The algorithm used is from
378             L
379            
380             Some implementation details have been borrowed/adapted from L.
381            
382             =head1 SEE ALSO
383            
384             L (and modules listed in its "See Also" section)
385            
386             L
387            
388             L
389            
390            
391             =head1 LICENSE AND COPYRIGHT
392            
393             Copyright 2018 Shawn Laffan
394            
395             This program is free software; you can redistribute it and/or modify it
396             under the terms of the the Artistic License (2.0). You may obtain a
397             copy of the full license at:
398            
399             L
400            
401             Any use, modification, and distribution of the Standard or Modified
402             Versions is governed by this Artistic License. By using, modifying or
403             distributing the Package, you accept this license. Do not use, modify,
404             or distribute the Package, if you do not accept this license.
405            
406             If your Modified Version has been derived from a Modified Version made
407             by someone other than you, you are nevertheless required to ensure that
408             your Modified Version complies with the requirements of this license.
409            
410             This license does not grant you the right to use any trademark, service
411             mark, tradename, or logo of the Copyright Holder.
412            
413             This license includes the non-exclusive, worldwide, free-of-charge
414             patent license to make, have made, use, offer to sell, sell, import and
415             otherwise transfer the Package with respect to any patent claims
416             licensable by the Copyright Holder that are necessarily infringed by the
417             Package. If you institute patent litigation (including a cross-claim or
418             counterclaim) against any party alleging that the Package constitutes
419             direct or contributory patent infringement, then this Artistic License
420             to you shall terminate on the date that such litigation is filed.
421            
422             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
423             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
424             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
425             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
426             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
427             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
428             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
429             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
430            
431            
432             =cut
433