File Coverage

blib/lib/List/Unique/DeterministicOrder.pm
Criterion Covered Total %
statement 83 86 96.5
branch 11 16 68.7
condition 3 4 75.0
subroutine 20 20 100.0
pod 10 10 100.0
total 127 136 93.3


line stmt bran cond sub pod time code
1             package List::Unique::DeterministicOrder;
2            
3 2     2   120436 use 5.010;
  2         17  
4 2     2   11 use Carp;
  2         4  
  2         103  
5 2     2   11 use strict;
  2         3  
  2         33  
6 2     2   19 use warnings;
  2         13  
  2         80  
7 2     2   12 use List::Util 1.45 qw /uniq/;
  2         28  
  2         190  
8 2     2   12 use Scalar::Util qw /blessed/;
  2         3  
  2         133  
9            
10             our $VERSION = 0.003;
11            
12             #no autovivification;
13            
14             use constant {
15 2         278 _ARRAY => 0, # ordered keys
16             _HASH => 1, # unordered keys
17 2     2   14 };
  2         2  
18            
19             use overload
20 4     4   302 q{bool} => sub { !!%{ $_[0]->[_HASH] } },
  4         12  
21 2     2   2027 fallback => 1;
  2         1720  
  2         13  
22            
23             sub new {
24 3     3 1 90 my ($package, %args) = @_;
25            
26 3         7 my $self = [ [], {} ];
27            
28             # use data if we were passed some
29 3 50       9 if (my $data = $args{data}) {
30 3         6 my %hash;
31 3         23 @hash{@$data} = (0..$#$data);
32             # rebuild the lists if there were dups
33 3 50       11 if (scalar keys %hash != scalar @$data) {
34 0         0 my @uniq = uniq @$data;
35 0         0 @hash{@uniq} = (0..$#uniq);
36 0         0 $self->[_ARRAY] = \@uniq;
37             }
38             else {
39 3         10 $self->[_ARRAY] = [@$data];
40             }
41 3         7 $self->[_HASH] = \%hash;
42             }
43            
44 3         10 return bless $self, $package;
45             }
46            
47             sub clone {
48 1     1 1 5 my $self = shift;
49 1         6 my $cloned = bless [], blessed $self;
50 1         3 $cloned->[_ARRAY] = [@{$self->[_ARRAY]}];
  1         5  
51 1         2 $cloned->[_HASH] = {%{$self->[_HASH]}};
  1         9  
52 1         13 return $cloned;
53             }
54            
55             sub exists {
56 12     12 1 3049 exists $_[0]->[_HASH]{$_[1]};
57             }
58            
59             sub keys {
60             wantarray
61 1         4 ? @{$_[0]->[_ARRAY]}
62 8 100   8 1 1593 : scalar @{$_[0]->[_ARRAY]};
  7         42  
63             }
64            
65             sub push {
66 9 100   9 1 456 return if exists $_[0]->[_HASH]{$_[1]};
67            
68 6         9 push @{$_[0]->[_ARRAY]}, $_[1];
  6         12  
69 6         8 $_[0]->[_HASH]{$_[1]} = $#{$_[0]->[_ARRAY]};
  6         16  
70             }
71            
72             sub pop {
73 1     1 1 217 my $key = pop @{$_[0]->[_ARRAY]};
  1         3  
74 1         3 delete $_[0]->[_HASH]{$key};
75 1         2 $key;
76             }
77            
78             # returns undef if key not in hash
79             sub get_key_pos {
80 3     3 1 419 $_[0]->[_HASH]{$_[1]};
81             }
82            
83            
84             # returns undef if index is out of bounds
85             sub get_key_at_pos {
86 7     7 1 2138 $_[0]->[_ARRAY][$_[1]];
87             }
88            
89             # does nothing if key does not exist
90             sub delete {
91 4     4 1 17 my ($self, $key) = @_;
92            
93             # get the index while cleaning up
94 4   100     14 my $pos = CORE::delete $self->[_HASH]{$key}
95             // return;
96            
97 3         6 my $move_key = CORE::pop @{$self->[_ARRAY]};
  3         5  
98             # make sure we don't just reinsert the last item
99             # from a single item list
100 3 50       10 if ($move_key ne $key) {
101 3         6 $self->[_HASH]{$move_key} = $pos;
102 3         5 $self->[_ARRAY][$pos] = $move_key;
103             }
104            
105 3         7 return $key;
106             }
107            
108             # Delete the key at the specified position
109             # and move the last key into it.
110             # Does nothing if key does not exist
111             sub delete_key_at_pos {
112 2     2 1 426 my ($self, $pos) = @_;
113            
114 2   50     7 my $key = $self->[_ARRAY][$pos]
115             // return;
116            
117 2         3 my $move_key = CORE::pop @{$self->[_ARRAY]};
  2         5  
118 2         4 CORE::delete $self->[_HASH]{$key};
119            
120             # make sure we don't just reinsert the last item
121             # from a single item list
122 2 100       6 if ($move_key ne $key) {
123 1         3 $self->[_HASH]{$move_key} = $pos;
124 1         2 $self->[_ARRAY][$pos] = $move_key;
125             }
126            
127 2         4 return $key;
128             }
129            
130             # Delete the key at the specified position
131             # and move the last key into it.
132             # Not a true splice, but one day might work
133             # on multiple indices.
134             #sub splice {
135             # my ($self, $pos) = @_;
136             #
137             # my $key = $self->[_ARRAY][$pos]
138             # // return;
139             #
140             # my $move_key = CORE::pop @{$self->[_ARRAY]};
141             # $self->[_HASH]{$move_key} = $pos;
142             # $self->[_ARRAY][$pos] = $move_key;
143             # CORE::delete $self->[_HASH]{$key};
144             # return $key;
145             #}
146            
147            
148             sub _paranoia {
149 2     2   934 my ($self) = @_;
150            
151 2         3 my $array_len = @{$self->[_ARRAY]};
  2         5  
152 2         3 my $hash_len = CORE::keys %{$self->[_HASH]};
  2         5  
153 2 50       24 croak "array and hash key mismatch" if $array_len != $hash_len;
154            
155 2         3 foreach my $key (@{$self->[_ARRAY]}) {
  2         4  
156             croak "Key mismatch between array and hash lists"
157 10 50       19 if !CORE::exists $self->[_HASH]{$key};
158             }
159            
160 2         4 return 1;
161             }
162            
163             1;
164            
165             =head1 NAME
166            
167             List::Unique::DeterministicOrder - Store and access
168             a list of keys using a deterministic order based on
169             the sequence of insertions and deletions
170            
171             =head1 VERSION
172            
173             Version 0.01
174            
175             =cut
176            
177             =head1 SYNOPSIS
178            
179             This module provides a structure to store a list
180             of keys, without duplicates, and be able to access
181             them by either key name or index.
182            
183            
184             use List::Unique::DeterministicOrder;
185            
186             my $foo = List::Unique::DeterministicOrder->new(
187             data => [qw /foo bar quux fetangle/]
188             );
189            
190             print $foo->keys;
191             # foo bar quux fetangle
192            
193             $foo->delete ('bar')
194             print $foo->keys;
195             # foo fetangle quux
196            
197             print $foo->get_key_at_pos(2);
198             # quux
199             print $foo->get_key_at_pos(20);
200             # undef
201            
202             $foo->push ('bardungle')
203             print $foo->keys;
204             # foo fetangle quux bardungle
205            
206             # keys are stored only once,
207             # just like with a normal hash
208             $foo->push ('fetangle')
209             print $foo->keys;
210             # foo fetangle quux bardungle
211            
212             print $foo->exists ('gelert');
213             # false
214            
215             print $foo->pop;
216             # bardungle
217            
218            
219             =head1 DISCUSSION
220            
221             The algorithm used is from
222             L
223            
224             The algorithm used inserts keys at the end, but
225             swaps keys around on deletion. Hence it is
226             deterministic and repeatable, but only if the
227             sequence of insertions and deletions is replicated
228             exactly.
229            
230             So why would one use this in the first place?
231             The motivating use-case was a randomisation process
232             where keys would be selected from a pool of keys,
233             and sometimes inserted. e.g. the process might
234             select and remove the 10th key, then the 257th,
235             then insert a new key, followed by more selections
236             and removals. The randomisations needed to
237             produce the same results same for the same given
238             PRNG sequence for reproducibility purposes.
239            
240            
241             Using a hash to store the data provides rapid access,
242             but getting the nth key requires the key list be generated
243             each time, and Perl's hashes do not provide their
244             keys in a deterministic
245             order across all versions and platforms.
246             Binary searches over sorted lists proved very
247             effective for a while, but bottlenecks started
248             to manifest when the data sets became
249             much larger and the number of lists
250             became both abundant and lengthy.
251            
252             Since the order itself does not matter,
253             only the ability to replicate it, this module was written.
254            
255             One could also use L, but it has the overhead
256             of storing values, which are not needed here.
257             I also wrote this module before I benchmarked
258             against L. Regardless, this module is faster
259             for the example use-case described above - see the
260             benchmarking results in bench.pl (which is part of
261             this distribution). That said, some of the implementation
262             details have been adapted/borrowed from L.
263            
264            
265             =head1 METHODS
266            
267             Note that most methods take a single argument
268             (if any), so while the method names look
269             hash-like, this is essentially cosmetic.
270             In particular, it does not yet support splicing.
271            
272             =head2 new
273            
274             $foo->new();
275             $foo->new(data => [/a b c d e/]);
276            
277             Create a new object.
278             Optionally pass data using the data
279             keyword. Duplicate keys are
280             stored once only.
281            
282            
283             =cut
284            
285             =head2 exists
286            
287             True or false for if the key exists.
288            
289             =cut
290            
291             =head2 delete
292            
293             $foo->delete('some key');
294            
295             Deletes the key passed as an argument.
296             Returns the key name if successful, undef if
297             the key was not found.
298            
299             =cut
300            
301             =head2 delete_key_at_pos
302            
303             $foo->delete_key_at_pos(1);
304            
305             Removes a single key from the set at the specified position.
306            
307            
308             =cut
309            
310             =head2 get_key_at_pos
311            
312             $foo->get_key_at_pos(5);
313            
314             Returns the key at some position.
315            
316             =cut
317            
318             =head2 get_key_pos
319            
320             $foo->get_key_pos('quux');
321            
322             Returns the position of a key.
323            
324             =cut
325            
326             =head2 keys
327            
328             Returns the list of keys in list context,
329             and the number of keys in scalar context.
330            
331             =cut
332            
333             =head2 pop
334            
335             $foo->pop;
336            
337             Removes and returns the last key in the list.
338            
339             =cut
340            
341             =head2 push
342            
343             $foo->push('quux');
344            
345             Appends the specified key to the end of the list,
346             unless it is already in the list.
347            
348             =cut
349            
350             =head2 clone
351            
352             $foo->clone;
353            
354             Creates a clone of the object.
355            
356             =cut
357            
358            
359            
360            
361             =head1 AUTHOR
362            
363             Shawn Laffan, C<< >>
364            
365             =head1 BUGS
366            
367             Please report any bugs or feature requests via L.
368            
369            
370             =head1 ACKNOWLEDGEMENTS
371            
372             The algorithm used is from
373             L
374            
375             Some implementation details have been borrowed/adapted from L.
376            
377             =head1 SEE ALSO
378            
379             L (and modules listed in its "See Also" section)
380            
381             L
382            
383             L
384            
385            
386             =head1 LICENSE AND COPYRIGHT
387            
388             Copyright 2018 Shawn Laffan
389            
390             This program is free software; you can redistribute it and/or modify it
391             under the terms of the the Artistic License (2.0). You may obtain a
392             copy of the full license at:
393            
394             L
395            
396             Any use, modification, and distribution of the Standard or Modified
397             Versions is governed by this Artistic License. By using, modifying or
398             distributing the Package, you accept this license. Do not use, modify,
399             or distribute the Package, if you do not accept this license.
400            
401             If your Modified Version has been derived from a Modified Version made
402             by someone other than you, you are nevertheless required to ensure that
403             your Modified Version complies with the requirements of this license.
404            
405             This license does not grant you the right to use any trademark, service
406             mark, tradename, or logo of the Copyright Holder.
407            
408             This license includes the non-exclusive, worldwide, free-of-charge
409             patent license to make, have made, use, offer to sell, sell, import and
410             otherwise transfer the Package with respect to any patent claims
411             licensable by the Copyright Holder that are necessarily infringed by the
412             Package. If you institute patent litigation (including a cross-claim or
413             counterclaim) against any party alleging that the Package constitutes
414             direct or contributory patent infringement, then this Artistic License
415             to you shall terminate on the date that such litigation is filed.
416            
417             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
418             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
419             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
420             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
421             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
422             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
423             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
424             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
425            
426            
427             =cut
428