File Coverage

blib/lib/Statistics/Test/RandomWalk.pm
Criterion Covered Total %
statement 91 96 94.7
branch 18 26 69.2
condition 1 2 50.0
subroutine 13 13 100.0
pod 3 3 100.0
total 126 140 90.0


line stmt bran cond sub pod time code
1             package Statistics::Test::RandomWalk;
2              
3 2     2   56042 use 5.006;
  2         7  
  2         199  
4 2     2   10 use strict;
  2         5  
  2         73  
5 2     2   9 use warnings;
  2         4  
  2         87  
6              
7             our $VERSION = '0.02';
8              
9 2     2   9 use Carp qw/croak/;
  2         3  
  2         151  
10 2     2   889 use Params::Util qw/_POSINT _ARRAY _CODE/;
  2         5971  
  2         126  
11 2     2   5382 use Memoize;
  2         5275  
  2         109  
12 2     2   5244 use Math::BigFloat;
  2         55405  
  2         13  
13 2     2   110628 use Statistics::Test::Sequence;
  2         13620  
  2         109  
14             use Class::XSAccessor {
15 2         28 constructor => 'new',
16             getters => {
17             rescale_factor => 'rescale',
18             },
19             setters => {
20             set_rescale_factor => 'rescale',
21             },
22 2     2   3016 };
  2         6856  
23              
24             =head1 NAME
25              
26             Statistics::Test::RandomWalk - Random Walk test for random numbers
27              
28             =head1 SYNOPSIS
29              
30             use Statistics::Test::RandomWalk;
31             my $tester = Statistics::Test::RandomWalk->new();
32             $tester->set_data( [map {rand()} 1..1000000] );
33            
34             my $no_bins = 10;
35             my ($quant, $got, $expected) = $tester->test($no_bins);
36             print $tester->data_to_report($quant, $got, $expected);
37              
38             =head1 DESCRIPTION
39              
40             This module implements a Random Walk test of a random number generator
41             as outlined in Blobel et al (Refer to the SEE ALSO section).
42              
43             Basically, it tests that the numbers C<[0,1]> generated by a
44             random number generator are distributed evenly. It divides C<[0,1]>
45             into C evenly sized bins and calculates the number of expected and
46             actual random numbers in the bin. (In fact, this counts the
47             cumulated numbers, but that works the same.)
48              
49             =head1 METHODS
50              
51             =head2 new
52              
53             Creates a new random number tester.
54              
55             =head2 set_rescale_factor
56              
57             The default range of the random numbers [0, 1) can be rescaled
58             by a constant factor. This method is the setter for that factor.
59              
60             =head2 rescale_factor
61              
62             Returns the current rescaling factor.
63              
64             =head2 set_data
65              
66             Sets the random numbers to operate on. First argument
67             must be either an array reference to an array of random
68             numbers or a code reference.
69              
70             If the first argument is a code reference, the second
71             argument must be an integer C. The code reference is
72             called C-times and its return values are used as
73             random numbers.
74              
75             The code reference semantics are particularily useful if
76             you do not want to store all random numbers in memory at
77             the same time. You can write a subroutine that, for example,
78             generates and returns batches of 100 random numbers so no
79             more than 101 of these numbers will be in memory at the
80             same time. Note that if you return 100 numbers at once and
81             pass in C, you will have a sequence of 5000 random
82             numbers.
83              
84             =cut
85              
86             sub set_data {
87 2     2 1 5698 my $self = shift;
88 2         5 my $data = shift;
89 2 100       34 if (_ARRAY($data)) {
    50          
90 1         16 $self->{data} = $data;
91 1         9 return 1;
92             }
93             elsif (_CODE($data)) {
94 1         3 $self->{data} = $data;
95 1         209 my $n = shift;
96 1 50       43 if (not _POSINT($n)) {
97 0         0 croak("'set_data' needs an integer as second argument if the first argument is a code reference.");
98             }
99 1         14 $self->{n} = $n;
100 1         4 return 1;
101             }
102             else {
103 0         0 croak("Invalid arguments to 'set_data'.");
104             }
105             }
106              
107             =head2 test
108              
109             Runs the Random Walk test on the data that was previously set using
110             C.
111              
112             First argument must be the number of bins.
113              
114             Returns three array references. First is an array of quantiles.
115             If the number of bins was ten, this (and all other returned arrays)
116             will hold ten items.
117              
118             Second are the determined numbers of random numbers below the
119             quantiles. Third are the expected counts.
120              
121             =cut
122              
123             sub test {
124 2     2 1 2203 my $self = shift;
125 2         6 my $bins = shift;
126 2 50       90 if (not _POSINT($bins)) {
127 0         0 croak("Expecting number of bins as argument to 'test'");
128             }
129 2   50     42 my $rescale_factor = $self->rescale_factor||1;
130              
131              
132 2         6 my $data = $self->{data};
133              
134 2 50       10 if (not defined $data) {
135 0         0 croak("Set data using 'set_data' first.");
136             }
137              
138 2         8 my $step = 1 / $bins * $rescale_factor;
139 2         3 my @alpha;
140 2         19 push @alpha, $_*$step for 1..$bins;
141              
142 2         10 my @bins = (0) x $bins;
143 2         3 my $numbers;
144              
145 2 100       9 if (_ARRAY($data)) {
146 1         2 $numbers = @$data;
147              
148 1         2 foreach my $i (@$data) {
149 10000         17058 foreach my $ai (0..$#alpha) {
150 55230 100       91097 if ($i < $alpha[$ai]) {
151 10000         28849 $bins[$_]++ for $ai..$#alpha;
152 10000         16000 last;
153             }
154             }
155             }
156             }
157             else { # CODE
158 1         2 my @cache;
159 1         2 my $calls = $self->{n};
160 1         2 foreach (1..$calls) {
161             # get new data
162 100         295 push @cache, $data->();
163 100         4163 while (@cache) {
164 10000         10678 $numbers++;
165 10000         11237 my $this = shift @cache;
166 10000         16270 foreach my $ai (0..$#alpha) {
167 105309 100       197272 if ($this < $alpha[$ai]) {
168 10000         43328 $bins[$_]++ for $ai..$#alpha;
169 10000         25367 last;
170             }
171             }
172             }
173             }
174             }
175              
176 2         37 my @expected_smaller = map Math::BigFloat->new($numbers)*$_/$rescale_factor, @alpha;
177              
178             return(
179 2         24163 [map $_/$rescale_factor, @alpha],
180             \@bins,
181             \@expected_smaller,
182             );
183             }
184              
185             =head2 data_to_report
186              
187             From the data returned by the C method, this
188             method creates a textual report and returns it as a string.
189              
190             Do not forget to pass in the data that was returned by C
191             or use the C method directly if you do not use
192             the data otherwise.
193              
194             =cut
195              
196             sub data_to_report {
197 2     2 1 3504 my $self = shift;
198 2         5 my $alpha = shift;
199 2         4 my $got = shift;
200 2         29 my $expected = shift;
201 2 50       6 if (grep {not _ARRAY($_)} ($alpha, $got, $expected)) {
  6         24  
202 0         0 croak("Please pass the data returned from 'test' to the 'data_to_report' method.");
203             }
204              
205 2         9 my $max_a = _max_length($alpha);
206 2 50       7 $max_a = length('Quantile') if length('Quantile') > $max_a;
207 2         5 my $max_g = _max_length($got);
208 2 50       6 $max_g = length('Got') if length('Got') > $max_g;
209 2         4 my $max_e = _max_length($expected);
210 2 50       8 $max_e = length('Expected') if length('Expected') > $max_e;
211              
212 2         4 my $str = '';
213              
214 2         18 $str .= sprintf(
215             "\%${max_a}s | \%${max_g}s | \%${max_e}s\n",
216             qw/Quantile Got Expected/
217             );
218 2         7 $str .= ('-' x (length($str)-1))."\n";
219 2         7 foreach my $i (0..$#$alpha) {
220 30         839 $str .= sprintf(
221             "\%${max_a}f | \%${max_g}u | \%${max_e}u\n",
222             $alpha->[$i], $got->[$i], $expected->[$i]
223             );
224             }
225 2         64 return $str;
226             }
227              
228             sub _max_length {
229 6     6   7 my $max = 0;
230 6         7 foreach (@{$_[0]}) {
  6         12  
231 90 100       1264 $max = length $_ if length($_) > $max;
232             }
233 6         143 return $max;
234              
235             }
236              
237             =head1 SUBROUTINES
238              
239             =head2 n_over_k
240              
241             Computes C over C. Uses Perl's big number support and
242             returns a L object.
243              
244             This sub is memoized.
245              
246             =cut
247              
248             memoize('n_over_k');
249             sub n_over_k {
250             my $n = shift;
251             my $k = shift;
252             my @bits = ((0) x $k, (1) x ($n-$k));
253             foreach my $x (1..($n-$k)) {
254             $bits[$x-1]--;
255             }
256              
257             my $o = Math::BigFloat->bone();
258             foreach my $i (0..$#bits) {
259             $o *= Math::BigFloat->new($i+1)**$bits[$i] if $bits[$i] != 0;
260             }
261              
262             return $o->ffround(0);
263             }
264              
265             1;
266              
267             __END__