File Coverage

lib/Statistics/Sampler/Multinomial/AliasMethod.pm
Criterion Covered Total %
statement 92 92 100.0
branch 17 22 77.2
condition 6 8 75.0
subroutine 15 15 100.0
pod 2 2 100.0
total 132 139 94.9


line stmt bran cond sub pod time code
1             package Statistics::Sampler::Multinomial::AliasMethod;
2            
3 1     1   93622 use 5.014;
  1         3  
4 1     1   3 use warnings;
  1         1  
  1         24  
5 1     1   3 use strict;
  1         1  
  1         31  
6            
7             our $VERSION = '0.7';
8            
9 1     1   3 use Carp;
  1         1  
  1         64  
10 1     1   504 use Ref::Util qw /is_arrayref/;
  1         428  
  1         64  
11 1     1   5 use List::Util qw /min sum/;
  1         1  
  1         72  
12 1     1   471 use List::MoreUtils qw /first_index/;
  1         6939  
  1         4  
13 1     1   456 use Scalar::Util qw /blessed/;
  1         2  
  1         43  
14            
15 1     1   443 use parent qw /Statistics::Sampler::Multinomial/;
  1         205  
  1         6  
16            
17             sub _validate_prng_object {
18 8     8   13 my ($self, $prng) = @_;
19            
20             # Math::Random::MT::Auto has boolean op overloading
21             # so make sure we don't trigger it or our tests fail
22             # i.e. don't use "if $prng" directly
23             # (and we waste a random number, but that's less of an issue)
24 8 100       17 return 1 if !defined $prng;
25            
26 7 50       24 croak 'prng arg is not an object'
27             if not blessed $prng;
28 7 50       26 croak 'prng arg does not have rand() method'
29             if not $prng->can('rand');
30            
31 7         82 return 1;
32             }
33            
34             sub _initialise_alias_tables {
35 8     8   22 my ($self, %args) = @_;
36            
37 8         11 my $probs = $self->{data};
38            
39             # caller has not promised they sum to 1
40 8 50       16 if (!$self->{data_sum_to_one}) {
41 8         27 my $sum = sum (@$probs);
42 8 50       22 if ($sum != 1) {
43 8         12 my @scaled_probs = map {$_ / $sum} @$probs;
  111         99  
44 8         17 $probs = \@scaled_probs;
45             }
46             }
47            
48             # algorithm and comments stolen/adapted from
49             # https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
50            
51 8         8 my (@smaller, @larger);
52 8         23 my @J = (0) x scalar @$probs;
53 8         19 my @q = (0) x scalar @$probs;
54 8         8 my $kk = -1;
55 8         10 my $K = scalar @$probs;
56            
57 8         12 foreach my $prob (@$probs){
58 111         67 $kk++;
59 111         86 $q[$kk] = $K * $prob;
60 111 100       116 if ($q[$kk] < 1.0) {
61 65         54 push @smaller, $kk
62             }
63             else {
64 46         37 push @larger, $kk;
65             }
66             }
67            
68             # Loop though and create little binary mixtures that
69             # appropriately allocate the larger outcomes over the
70             # overall uniform mixture.
71 8   100     29 while (scalar @smaller && scalar @larger) {
72 101         75 my $small = pop @smaller;
73 101         55 my $large = pop @larger;
74            
75 101         68 $J[$small] = $large;
76 101         85 $q[$large] = ($q[$large] + $q[$small]) - 1;
77            
78 101 100       101 if ($q[$large] < 1.0) {
79 37         65 push @smaller, $large;
80             }
81             else {
82 64         116 push @larger, $large;
83             }
84             }
85             # handle numeric stability issues
86             # courtesy http://www.keithschwarz.com/darts-dice-coins/
87 8         18 while (scalar @larger) {
88 9         8 my $g = shift @larger;
89 9         19 $q[$g] = 1;
90             }
91 8         15 while (scalar @smaller) {
92 1         4 my $l = shift @smaller;
93 1         3 $q[$l] = 1;
94             }
95            
96             # need better names for these,
97 8         12 $self->{J} = \@J;
98 8         10 $self->{q} = \@q;
99            
100 8 100       48 return if !defined wantarray;
101            
102             # should not expose these to the caller
103 3         9 my %results = (J => \@J, q => \@q);
104 3 50       20 return wantarray ? %results : \%results;
105             }
106            
107             sub draw {
108 10     10 1 7310 my ($self, $args) = @_;
109            
110 10         18 my $prng = $self->{prng};
111            
112             my $q = $self->{q}
113 10   66     26 // do {$self->_initialise_alias_tables; $self->{q}};
  4         11  
  4         8  
114            
115 10         10 my $J = $self->{J};
116 10         12 my $K = scalar @$J;
117 10         27 my $kk = int ($prng->rand * $K);
118            
119             # Draw from the binary mixture, either keeping the
120             # small one, or choosing the associated larger one.
121 10 100       30 return ($prng->rand < $q->[$kk]) ? $kk : $J->[$kk];
122             }
123            
124             sub draw_n_samples {
125 4     4 1 6561 my ($self, $n) = @_;
126            
127 4         9 my $prng = $self->{prng};
128            
129             my $q = $self->{q}
130 4   66     14 // do {$self->_initialise_alias_tables; $self->{q}};
  1         4  
  1         3  
131 4         9 my $J = $self->{J};
132 4         7 my $K = scalar @$J;
133            
134 4         12 my @draws = (0) x $K;
135 4         13 for (1..$n) {
136 41         51 my $kk = int ($prng->rand * $K);
137             # Draw from the binary mixture, either keeping the
138             # small one, or choosing the associated larger one.
139             # {SWL: could try to use Data::Alias or refaliasing here
140             # as the derefs cause overhead, albeit the big overhead
141             # is the method calls}
142             #push @draws, ($prng->rand < $q->[$kk]) ? $kk : $J->[$kk];
143 41 100       72 $draws[($prng->rand < $q->[$kk]) ? $kk : $J->[$kk]]++;
144             }
145            
146 4         9 return \@draws;
147             }
148            
149             # Cuckoo package to act as a method wrapper
150             # to use the perl PRNG stream by default.
151             package Statistics::Sampler::Multinomial::AliasMethod::DefaultPRNG {
152             sub new {
153 1     1   9 return bless {}, __PACKAGE__;
154             }
155             sub rand {
156 12     12   126 rand();
157             }
158             1;
159             }
160            
161             1;
162             __END__