File Coverage

lib/Statistics/Sampler/Multinomial/AliasMethod.pm
Criterion Covered Total %
statement 26 92 28.2
branch 0 22 0.0
condition 0 8 0.0
subroutine 9 15 60.0
pod 2 2 100.0
total 37 139 26.6


line stmt bran cond sub pod time code
1             package Statistics::Sampler::Multinomial::AliasMethod;
2            
3 1     1   95126 use 5.014;
  1         2  
4 1     1   4 use warnings;
  1         1  
  1         26  
5 1     1   3 use strict;
  1         1  
  1         32  
6            
7             our $VERSION = '0.5';
8            
9 1     1   3 use Carp;
  1         1  
  1         47  
10 1     1   414 use Ref::Util qw /is_arrayref/;
  1         491  
  1         63  
11 1     1   5 use List::Util qw /min sum/;
  1         1  
  1         70  
12 1     1   465 use List::MoreUtils qw /first_index/;
  1         6948  
  1         7  
13 1     1   444 use Scalar::Util qw /blessed/;
  1         1  
  1         51  
14            
15 1     1   436 use parent qw /Statistics::Sampler::Multinomial/;
  1         268  
  1         5  
16            
17             sub _validate_prng_object {
18 0     0     my ($self, $prng) = @_;
19            
20             # Math::Random::MT::Auto has boolean op overloading
21             # so make sure we don't trigger it or our tests fail
22             # i.e. don't use "if $prng" directly
23             # (and we waste a random number, but that's less of an issue)
24 0 0         return 1 if !defined $prng;
25            
26 0 0         croak 'prng arg is not an object'
27             if not blessed $prng;
28 0 0         croak 'prng arg does not have rand() method'
29             if not $prng->can('rand');
30            
31 0           return 1;
32             }
33            
34             sub _initialise_alias_tables {
35 0     0     my ($self, %args) = @_;
36            
37 0           my $probs = $self->{data};
38            
39             # caller has not promised they sum to 1
40 0 0         if (!$self->{data_sum_to_one}) {
41 0           my $sum = sum (@$probs);
42 0 0         if ($sum != 1) {
43 0           my @scaled_probs = map {$_ / $sum} @$probs;
  0            
44 0           $probs = \@scaled_probs;
45             }
46             }
47            
48             # algorithm and comments stolen/adapted from
49             # https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
50            
51 0           my (@smaller, @larger);
52 0           my @J = (0) x scalar @$probs;
53 0           my @q = (0) x scalar @$probs;
54 0           my $kk = -1;
55 0           my $K = scalar @$probs;
56            
57 0           foreach my $prob (@$probs){
58 0           $kk++;
59 0           $q[$kk] = $K * $prob;
60 0 0         if ($q[$kk] < 1.0) {
61 0           push @smaller, $kk
62             }
63             else {
64 0           push @larger, $kk;
65             }
66             }
67            
68             # Loop though and create little binary mixtures that
69             # appropriately allocate the larger outcomes over the
70             # overall uniform mixture.
71 0   0       while (scalar @smaller && scalar @larger) {
72 0           my $small = pop @smaller;
73 0           my $large = pop @larger;
74            
75 0           $J[$small] = $large;
76 0           $q[$large] = ($q[$large] + $q[$small]) - 1;
77            
78 0 0         if ($q[$large] < 1.0) {
79 0           push @smaller, $large;
80             }
81             else {
82 0           push @larger, $large;
83             }
84             }
85             # handle numeric stability issues
86             # courtesy http://www.keithschwarz.com/darts-dice-coins/
87 0           while (scalar @larger) {
88 0           my $g = shift @larger;
89 0           $q[$g] = 1;
90             }
91 0           while (scalar @smaller) {
92 0           my $l = shift @smaller;
93 0           $q[$l] = 1;
94             }
95            
96             # need better names for these,
97 0           $self->{J} = \@J;
98 0           $self->{q} = \@q;
99            
100 0 0         return if !defined wantarray;
101            
102             # should not expose these to the caller
103 0           my %results = (J => \@J, q => \@q);
104 0 0         return wantarray ? %results : \%results;
105             }
106            
107             sub draw {
108 0     0 1   my ($self, $args) = @_;
109            
110 0           my $prng = $self->{prng};
111            
112             my $q = $self->{q}
113 0   0       // do {$self->_initialise_alias_tables; $self->{q}};
  0            
  0            
114            
115 0           my $J = $self->{J};
116 0           my $K = scalar @$J;
117 0           my $kk = int ($prng->rand * $K);
118            
119             # Draw from the binary mixture, either keeping the
120             # small one, or choosing the associated larger one.
121 0 0         return ($prng->rand < $q->[$kk]) ? $kk : $J->[$kk];
122             }
123            
124             sub draw_n_samples {
125 0     0 1   my ($self, $n) = @_;
126            
127 0           my $prng = $self->{prng};
128            
129             my $q = $self->{q}
130 0   0       // do {$self->_initialise_alias_tables; $self->{q}};
  0            
  0            
131 0           my $J = $self->{J};
132 0           my $K = scalar @$J;
133            
134 0           my @draws = (0) x $K;
135 0           for (1..$n) {
136 0           my $kk = int ($prng->rand * $K);
137             # Draw from the binary mixture, either keeping the
138             # small one, or choosing the associated larger one.
139             # {SWL: could try to use Data::Alias or refaliasing here
140             # as the derefs cause overhead, albeit the big overhead
141             # is the method calls}
142             #push @draws, ($prng->rand < $q->[$kk]) ? $kk : $J->[$kk];
143 0 0         $draws[($prng->rand < $q->[$kk]) ? $kk : $J->[$kk]]++;
144             }
145            
146 0           return \@draws;
147             }
148            
149             # Cuckoo package to act as a method wrapper
150             # to use the perl PRNG stream by default.
151             package Statistics::Sampler::Multinomial::AliasMethod::DefaultPRNG {
152             sub new {
153 0     0     return bless {}, __PACKAGE__;
154             }
155             sub rand {
156 0     0     rand();
157             }
158             1;
159             }
160            
161             1;
162             __END__