File Coverage

blib/lib/Math/Round/Fair.pm
Criterion Covered Total %
statement 114 124 91.9
branch 22 30 73.3
condition 4 8 50.0
subroutine 18 19 94.7
pod 2 3 66.6
total 160 184 86.9


line stmt bran cond sub pod time code
1             package Math::Round::Fair;
2 6     6   123381 use warnings;
  6         12  
  6         213  
3 6     6   29 use strict;
  6         10  
  6         186  
4 6     6   128 use 5.005000;
  6         25  
  6         320  
5 6     6   4954 use Devel::Assert;
  6         69948  
  6         44  
6 6     6   575 use Carp;
  6         12  
  6         513  
7 6     6   31 use base qw/Exporter/;
  6         13  
  6         509  
8 6     6   29 use List::Util qw/shuffle sum min/;
  6         10  
  6         661  
9 6     6   5852 use POSIX qw/floor ceil DBL_EPSILON/;
  6         49074  
  6         42  
10              
11             our $VERSION = '0.03';
12              
13             our @EXPORT_OK = qw/round_fair round_adjacent/;
14              
15             BEGIN {
16 6     6   1092 my $debug;
17 13     13 0 902 sub DEBUG { $debug }
18              
19 6   100     45 $debug = $ENV{MATH_ROUND_FAIR_DEBUG} || 0;
20 6 50   6   7308 use Devel::Assert DEBUG() ? ('-all -verbose') : ();
  6         13  
  6         24  
21              
22             # used in assertions
23 6 100   5   16 eval q{use Perl6::Junction 'none'} if DEBUG();
  5         3375  
  5         58413  
  5         1940  
24             }
25              
26              
27             =head1 NAME
28              
29             Math::Round::Fair - distribute rounding errors fairly
30              
31             =head1 SYNOPSIS
32              
33             use Math::Round::Fair 'round_fair', 'round_adjacent';
34              
35             my $cents = 7;
36             my @weights = (1, 2, 3, 2, 1);
37             my @allocation = round_fair($cents, @weights);
38              
39             print "@allocation\n";
40              
41             # output will be one of the following:
42             # 0 1 3 2 1
43             # 0 2 2 2 1
44             # 0 2 3 1 1
45             # 0 2 3 2 0
46             # 1 1 2 2 1
47             # 1 1 3 1 1
48             # 1 1 3 2 0
49             # 1 2 2 1 1
50             # 1 2 2 2 0
51              
52             my @total;
53             for ( 1..900 ) {
54             @allocation = round_fair($cents, @weights);
55             @total[$_] += @allocation[$_] for 0..$#allocation;
56             }
57             print "@total\n";
58              
59             # output will be *near* 700 1400 2100 1400 700, e.g.:
60             # 698 1411 2096 1418 677
61              
62              
63             my @rounded = round_adjacent(0.95, 0.65, 0.41, 0.99);
64             # @rounded will be one of the following:
65             # 59% of the time: 1, 1, 0, 1
66             # 35% of the time: 1, 0, 1, 1
67             # 5% of the time: 0, 1, 1, 1
68             # 1% of the time: 1, 1, 1, 0
69              
70              
71             =head1 DESCRIPTION
72              
73             This module provides two exportable functions, C, which
74             allocates an integer value, fairly distributing rounding errors, and
75             C, which takes a list of real numbers and rounds them up, or
76             down, to an adjacent integer, fairly. Both functions return a list of fairly
77             rounded integer values.
78              
79             C and C round up, or down, randomly, where the
80             probability of rounding up is equal to the fraction to round. For example, 0.5
81             will round to 1.0 with a probability of 0.5. 0.3 will round to 1.0 3 out of 10
82             times and to zero 7 out of 10 times, on average.
83              
84             Consider the problem of distributing one indivisible item, for example a penny,
85             across three evenly weighted accounts, A, B, and C.
86              
87             Using a naive approach, none of the accounts will receive an allocation since
88             the allocated portion to each is 1/3 and 1/3 rounds to zero. We are left with
89             1 unallocated item.
90              
91             Another approach is to adjust the basis at each step. We start with 1 item to
92             allocate to 3 accounts. 1/3 rounds to 0, so account A receives no allocation,
93             and we drop it from consideration. Now, we have 2 accounts and one item to
94             allocate. 1/2 rounds to 1, so we allocate 1 item to account B. Account C
95             gets no allocation since there is nothing left to allocate.
96              
97             But what happens if we allocate one item to the same three accounts 10,000
98             times? Ideally, two accounts should end up with 3,333 items and one should end
99             up with 3,334 items.
100              
101             Using the naive approach, all three accounts receive no allocation since at
102             each round the allocation is 1/3 which rounds to zero. Using the second method,
103             account A and account C will receive no allocation, and account B will receive
104             a total allocation of 10,000 items. Account B always receives the benefit of
105             the rounding errors using the second method.
106              
107             The algorithm employed by this module uses randomness to ensure a fair distribution of
108             rounding errors. In our example problem, we start with 1 item to allocate. We
109             calculate account A's share, 1/3. Since it is less than one item, we give it a
110             1/3 chance of rounding up (and, therefore, a 2/3 chance of rounding down). It
111             wins the allocation 1/3 of the time. 2/3 of the time we continue to B. We
112             calculate B's allocation as 1/2 (since there are only 2 accounts remaining and
113             one item to allocate). B rounds up 1/2 of 2/3 (or 1/3) of the time and down
114             1/2 of 2/3 (or 1/3) of the time. If neither A nor B rounds up (which occurs
115             2/3 * 1/2, or 1/3 of the time), C's allocation is calculated as 1/1 since we
116             have one item to allocate and only one account to allocate it to. So, 1/3 of
117             the time C receives the benefit of the rounding error. We never end up with
118             any unallocated items.
119              
120             This algorithm works for any number of weighted allocations.
121              
122             =over 4
123              
124             =item round_fair($value, @weights)
125              
126             Returns a list of integer values that sum to C<$value> where each return value
127             is a portion of C<$value> allocated by the respective weights in C<@weights>.
128             The number of return values is equal to the number of elements in C<@weights>
129              
130             C<$value> must be an integer.
131              
132             =cut
133              
134             sub round_fair {
135 52     52 1 268 my $value = shift;
136              
137 52 50 33     221 croak "Value to be allocated must be an integer >= 0" unless int($value) == $value && $value >= 0;
138              
139 52 50       106 return ($value) if @_ == 1;
140 52 50       92 return (0) x @_ if $value == 0;
141              
142 52         50 my $basis = 0;
143 52         76 for my $w ( @_ ) {
144 214 50       306 croak "Weights must be > 0" unless $w > 0;
145 214         307 $basis += $w;
146             }
147              
148 52         68 my $sum = 0;
149 52         79 my @in = map { my $r = $value * $_ / $basis; $sum += $r; $r } @_;
  214         268  
  214         202  
  214         322  
150              
151             # First, create the extra entry for the total, so that the sum of
152             # the new array is zero.
153 52         75 push @in, -$sum;
154              
155 52         104 my $out = _round_adjacent_arrayref(\@in);
156 52         66 pop @$out; # Discard the entry for the total
157              
158 52         160 return @$out;
159             }
160              
161             =item round_adjacent(@input_values)
162              
163             Returns a list of integer values, each of which is numerically
164             adjacent to the corresponding element of @input_values, and whose total is
165             numerically adjacent to the total of @input_values.
166              
167             The expected value of each output value is equal to the corresponding element
168             of @input_values (within a small error margin due to the limited machine
169             precision).
170              
171             =cut
172              
173             sub round_adjacent {
174 11002 100   11002 1 549496 return () unless @_; # identity
175              
176             # First, create the extra entry for the total, so that the sum of
177             # the new array is zero.
178 11001         29270 push @_, -sum(@_);
179              
180             # use a reference to eliminate an unnecessary copy
181 11001         20226 my $out = _round_adjacent_arrayref(\@_);
182              
183 11001         13537 pop @$out; # Discard the entry for the total
184 11001         43021 return @$out;
185             }
186              
187             sub _round_adjacent_arrayref {
188 11053     11053   12493 my $in = shift;
189              
190             # Next, shuffle the order, so that the input order has no effect
191             # on the randomness characteristics.
192 6     6   5433 my @order = shuffle($[ .. $#{$in});
  6         2884  
  6         493  
  11053         11878  
  11053         47166  
193 11053         59562 @$in = map $in->[$_], @order;
194              
195 11053         22868 my $out = _round_adjacent_core($in);
196              
197 11053         10678 assert(sum(@$out) == 0);
198              
199             # put the output back into original order
200 11053         9893 my @r;
201 11053         84125 $r[$order[$_]] = $out->[$_] for $[ .. $#order;
202              
203 11053         35240 return \@r;
204             }
205              
206             # Like _round_adjacent_arrayref, except that the inputs must sum to zero, and the
207             # input order may affect the variance and correlations, etc.
208             sub _round_adjacent_core {
209 11053     11053   11717 my $in = shift;
210              
211 11053         9501 assert(scalar @$in); # @$in must not be empty
212              
213 11053         15382 my $eps1 = 4.0 * DBL_EPSILON() * (1 + @$in);
214 11053         11139 my $eps = $eps1;
215 11053         13297 my @fp = map { my $ip = floor($_); $_ - $ip } @$in;
  70270         95160  
  70270         118278  
216              
217 11053         12915 assert(none(@fp) < 0.0);
218              
219             # TBD: Maybe accuracy or fairness can be improved by
220             # re-adjusting after every iteration. This would slow it
221             # down significantly, though.
222 11053         17885 _adjust_input(\@fp);
223              
224 11053         13313 my @out;
225 11053         10873 INPUT: while() {
  11053         10188  
226 70270         77023 $eps += $eps1;
  11053         22648  
227              
228 70270         63601 assert(_check_invariants($eps, $in, \@fp));
229              
230 11053     11053   12400 # Calculate the next output. Discard the next input in the
231             # process.
232 70270         79956 my $p0 = shift @fp; # Probability of having to overpay
233 70270 100       132633 my $r0 = rand()<$p0 ? 1 : 0; # 1 if selected to overpay; else 0
234 70270         169667 push @out, floor(shift @$in) + $r0;
  11053         19906  
235 11053 100       37553  
236 70270 100       130914 last unless @fp;
  4016         7229  
237              
238 4016 50 33     18880 # Now adjust the remaining fractional parts.
239              
240             # $slack[i] = min( $p0 * $fp[i], (1-$p0) * (1-$fp[i]) ).
241 59217         61327 my @slack;
  4016         4411  
242 59217 100       57856 my $tslack = 0.0;
  4016         6827  
243 59217         60727 do {
  2616         14147  
244             @slack = map {
245 59217         67364 if ( 1 ) {
  502371         445559  
  1400         2080  
246 502371         927402 my $slack = min $p0 * $_, (1 - $_) * (1.0 - $p0);
  1400         13129  
247 502371         520563 $tslack += $slack;
248 502371         726121 $slack;
249             }
250             else {
251             # This is fewer FLOPS, but the perf benefit
252 0     0     # is only 1% on a modern system, and it leads
253             # to greater numerical errors for some reason.
254 0 0         my $add = $p0 + $_;
  0            
255 0           my $mult = $p0 * $_;
256 0           $add > 1.0 ? 1.0 - $add + $mult : $mult
257 0           }
258 0           } @fp;
259             };
260              
261 0           # See bottom of file for proof of this property:
262 59217         69392 assert($tslack + $eps >= $p0 * (1.0 - $p0));
  0            
263              
264 0           # wrapped in assert to make it a noop when DEBUG() == 0
265 59217         51205 assert(do { warn "TSLACK = $tslack\n" if DEBUG() > 1; 1 });
266              
267 59217 100       117893 if ( $tslack > $eps1 ) {
268 37168         51714 $eps += 128.0 * $eps1 * $eps / $tslack;
269             # NOTE: The expected value of gain is
270             # $p0 * ($p0 - 1.0) /$tslack +
271             # (1.0 - $p0) * $p0 / $tslack = 0
272 37168         33430 my $gain = do {
273 37168 100       57291 if ( $r0 ) {
274             # Last guy overpaid, so the probabilities for
275             # subsequent payers drop.
276 17549         23108 ($p0 - 1.0) / $tslack;
277             }
278             else {
279             # Last guy underpaid, so the probabilities for
280             # subsequent payers rise.
281 19619         23664 $p0 / $tslack;
282             }
283             };
284              
285             # NOTE: The change in the sum of @fp due to this step
286             # is $tslack * $gain, which is either $p0 or ($p0 - 1).
287             # Either way, the sum remains an integer, because it
288             # was reduced by $p0 when we shifted off the first
289             # element early in the INPUT loop iteration.
290             # Also note that each element of @fp stays in the range
291             # [0,1] because if $r0, then slack($_, $p0) * -$gain <=
292             # $p0 * $_ * (1.0 - $p0) / ($p0 * (1.0 - $p0)) ==
293             # $_, and otherwise slack($_, $p0) * $gain <=
294             # (1 - $p0) * (1 - $_) * $p0 / ($p0 * (1.0 - $p0)) ==
295             # 1 - $_.
296             # We modify in place here, for performance.
297 37168         225908 $_ += shift(@slack) * $gain for @fp;
298             }
299             }
300             assert(@$in == 0);
301             return \@out;
302             }
303              
304             sub _adjust_input {
305             my $p = shift;
306              
307             # Adjust @$p to account for numerical errors due to small
308             # difference of large numbers when the integer parts are big.
309             my $sum = sum @$p;
310             if ( $sum != floor($sum) ) {
311             my $target = floor($sum + 0.5);
312              
313             die "Total loss of precision"
314             unless abs($sum - $target) < 0.1 && $sum + 0.05 != $sum;
315              
316             my $adj = $target / $sum;
317             if ( $adj <= 1.0 ) {
318             $_ *= $adj for @$p;
319             } else {
320             $adj = (@$p - $target) / (@$p - $sum);
321             $_ = 1.0 - (1.0-$_) * $adj for @$p;
322             }
323             }
324             }
325              
326             sub _check_invariants {
327             my ( $eps, $v, $fp ) = @_;
328              
329             if ( DEBUG() > 1 ) {
330             warn sprintf "%d %f\n", floor($_), $_ for @$fp;
331             }
332              
333             assert(@$v && @$v == @$fp);
334              
335             for ( @$fp ) {
336             assert($_ >= -$eps);
337             assert($_ <= 1.0 + $eps);
338             }
339              
340             my $sum = sum @$fp;
341             assert(abs($sum - floor($sum + 0.5)) < $eps * (1 + $sum));
342              
343             1;
344             }
345              
346             1;
347              
348             __END__