File Coverage

blib/lib/Net/OnlineCode.pm
Criterion Covered Total %
statement 109 197 55.3
branch 18 50 36.0
condition 0 3 0.0
subroutine 20 30 66.6
pod 0 17 0.0
total 147 297 49.4


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             package Net::OnlineCode;
4              
5             # play nicely as a CPAN module
6              
7 4     4   32954 use strict;
  4         5  
  4         127  
8 4     4   15 use warnings;
  4         5  
  4         125  
9 4     4   16 use vars qw(@ISA @EXPORT_OK @EXPORT %EXPORT_TAGS $VERSION);
  4         6  
  4         483  
10              
11 4     4   22 use constant DEBUG => 0;
  4         4  
  4         309  
12 4     4   49 use constant ASSERT => 1;
  4         6  
  4         639  
13              
14             require Exporter;
15              
16             @ISA = qw(Exporter);
17              
18             our @export_xor = qw (xor_strings safe_xor_strings fast_xor_strings);
19             our @export_default = qw();
20              
21             %EXPORT_TAGS = ( all => [ @export_default, @export_xor ],
22             xor => [ @export_xor ],
23             );
24             @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
25             @EXPORT = ();
26             $VERSION = '0.04';
27              
28             # Use XS for fast xors (TODO: make this optional)
29             require XSLoader;
30             XSLoader::load('Net::OnlineCode', $VERSION);
31              
32             # on to our stuff ...
33              
34              
35             # Codec parameters
36             # q is the number of message blocks that each auxiliary block will
37             # link to
38             # e epsilon, the degree of "suboptimality". Unlike Reed-Solomon or
39             # Rabin's Information Dispersal Algorithm, Online Codes are not
40             # optimal. This means that slightly more data needs to be generated
41             # than either of these two codes. Also, whereas optimal codes
42             # guarantee that a certain fraction of the "check" blocks/digits
43             # suffice to reconstruct the original message, online codes only
44             # guarantee that it can be reconstructed with a certain
45             # probability
46             #
47             # Together with the number of blocks, n, these two variables define
48             # the online code such that (1+qe)n check blocks are sufficient to
49             # reconstruct the original message with a probability of 1 - (e/2) **
50             # (q+1).
51             #
52              
53 4     4   18 use Carp;
  4         6  
  4         296  
54 4     4   2062 use POSIX qw(ceil floor);
  4         22839  
  4         27  
55 4     4   6845 use Digest::SHA qw(sha1 sha1_hex);
  4         13772  
  4         389  
56 4     4   71 use Fcntl;
  4         6  
  4         10751  
57              
58              
59             # Constructor for the base class
60             #
61             # This takes the three parameters that define the Online Code scheme,
62             # corrects the value of epsilon if needed (see below) and then derives
63             # the following:
64             #
65             # * max degree variable (F)
66             # * number of auxiliary blocks (0.55 *qen)
67             # * probability distribution p_1, p2, ... , p_F
68              
69             sub new {
70              
71 2     2 0 12 my $class = shift;
72             # The default parameters used here for q and e (epsilon) are as
73             # suggested in the paper "Rateless Codes and Big Downloads" by Petar
74             # Maymounkov and David Maziere. Note that the value of e may be
75             # overridden with a higher value if the lower value doesn't satisfy
76             # max_degree(epsilon) > ceil(0.55 * q.e.mblocks)
77 2         44 my %args = (
78             e => 0.01,
79             q => 3,
80             mblocks => undef,
81             expand_aux => 0,
82             e_warning => 0,
83              
84             # We don't use or store any RNG parameter that's been
85             # passed into the constructor.
86             @_
87             );
88              
89              
90 2         6 my ($q,$e,$mblocks) = @args{qw(q e mblocks)};
91              
92 2 50       7 unless (defined $args{mblocks}) {
93 0         0 carp __PACKAGE__ . ": mblocks => (# message blocks) must be set\n";
94 0         0 return undef;
95             }
96              
97 2         3 print "Net::OnlineCode mblocks = $mblocks\n" if DEBUG;
98              
99 2         3 my $P = undef;
100 2         2 my $e_changed = 0;
101              
102             # how many auxiliary blocks would this scheme need?
103 2         6 my $ablocks = _count_auxiliary($q,$e,$mblocks);
104              
105             # does epsilon value need updating?
106 2         5 my $f = _max_degree($e);
107              
108 2 100       24 if ($f > $mblocks + $ablocks) {
109              
110 1         2 $e_changed = 1;
111              
112 1 50       5 if ($args{e_warning}) {
113 0         0 print "E CHANGED!!\nWas: $e\n";
114 0         0 print "Gave F value of $f\n";
115             }
116              
117             # use a binary search to find a new epsilon such that
118             # get_max_degree($epsilon) <= mblocks + ablocks (ie, n)
119 1         2 my $epsilon = $e;
120              
121             local *eval_f = sub {
122 11     11   11 my $t = shift;
123 11         27 return _max_degree(1/(1 + exp(-$t)));
124 1         7 };
125              
126 1         4 my $l = -log(1/$e - 1);
127 1         2 my $r = $l + 1;
128              
129             # expand right side of search until we get F <= n'
130 1         2 while (eval_f($r) > $mblocks + $ablocks) {
131             # $r = $l + ($r - $l) * 2;
132 1         2 $r = 2 * $r - $l;
133             }
134              
135             # binary search between left and right to find a suitable lower
136             # value of epsilon still satisfying F <= n'
137 1         4 while ($r - $l > 0.01) {
138 8         5 my $m = ($l + $r) / 2;
139 8 100       10 if (eval_f($m) > $mblocks + $ablocks) {
140 3         5 $l = $m;
141             } else {
142 5         10 $r = $m;
143             }
144             }
145              
146             # update e and ablocks
147 1         2 $epsilon = 1/(1 + exp(-$r));
148 1         2 $f = eval_f($r);
149             #$f=_max_degree($epsilon);
150 1 50       3 carp __PACKAGE__ . ": increased epsilon value from $e to $epsilon\n"
151             if $args{e_warning};
152 1         1 $e = $epsilon;
153 1         2 $ablocks = _count_auxiliary($q,$e,$mblocks);
154              
155 1 50       5 if ($args{e_warning}) {
156              
157 0         0 print "Is now: $e\n";
158 0         0 print "New F: $f\n";
159             }
160              
161             }
162              
163             # calculate the probability distribution
164 2         2 print "new: mblocks=$mblocks, ablocks=$ablocks, q=$q\n" if DEBUG;
165 2         7 $P = _probability_distribution($mblocks + $ablocks,$e);
166              
167 2 50       10 die "Wrong number of elements in probability distribution (got "
168             . scalar(@$P) . ", expecting $f)\n"
169             unless @$P == $f;
170              
171 2         28 my $self = { q => $q, e => $e, f => $f, P => $P,
172             mblocks => $mblocks, ablocks => $ablocks,
173             coblocks => $mblocks + $ablocks,
174             chblocks => 0, expand_aux=> $args{expand_aux},
175             e_changed => $e_changed, unique => {},
176             };
177              
178 2         3 print "expand_aux => $self->{expand_aux}\n" if DEBUG;
179              
180 2         22 bless $self, $class;
181              
182             }
183              
184             # while it probably doesn't matter too much to the encoder whether the
185             # supplied e value needed to be changed, if the receiver plugs the
186             # received value of e into the constructor and it ends up changing,
187             # there will be a problem with receiving the file.
188             sub e_changed {
189 0     0 0 0 return shift ->{e_changed};
190             }
191              
192             # convenience accessor functions
193             sub get_mblocks { # count message blocks; passed into new
194 2     2 0 10 return shift -> {mblocks};
195             }
196              
197             sub get_ablocks { # count auxiliary blocks; set in new
198 2     2 0 11 return shift -> {ablocks};
199             }
200              
201             sub get_coblocks { # count composite blocks
202 2     2 0 3 my $self = shift;
203 2         14 return $self->{mblocks} + $self->{ablocks};
204             }
205              
206             # count checkblocks
207             sub get_chblocks {
208 0     0 0 0 return shift->{chblocks}
209             }
210              
211             sub get_q { # q == reliability factor
212 0     0 0 0 return shift -> {q};
213             }
214              
215             sub get_e { # e == suboptimality factor
216 2     2 0 806 return shift -> {e};
217             }
218              
219             sub get_epsilon { # epsilon == e, as above
220 0     0 0 0 return shift -> {e};
221             }
222              
223             sub get_f { # f == max (check block) degree
224 2     2 0 40 return shift -> {f};
225             }
226              
227             sub get_P { # P == probability distribution
228 0     0 0 0 return shift -> {P}; # (array ref)
229             }
230              
231              
232             # "Private" routines
233              
234             # calculate how many auxiliary blocks need to be generated for a given
235             # code setup
236             sub _count_auxiliary {
237 3     3   5 my ($q, $e, $n) = @_;
238              
239 3         24 my $count = int(ceil(0.55 * $q * $e * $n));
240 3         6 my $delta = 0.55 * $e;
241              
242 3         3 warn "failure probability " . ($delta ** $q) . "\n" if DEBUG;
243             #$count = int(ceil($q * $delta * $n));
244              
245             # Is it better to change q or the number of aux blocks if q is too
246             # big? It's certainly easier to keep the q value and increase the
247             # number of aux blocks, as I'm doing here, and may even be the right
248             # thing to do rather than ignoring the user's q value.
249 3 50       7 if ($count < $q) {
250 0         0 $count = $q;
251             # warn "updated _count_auxiliary output value to $q\n";
252             }
253 3         4 return $count;
254             }
255              
256             # The max degree specifies the maximum number of blocks to be XORed
257             # together. This parameter is named F.
258             sub _max_degree {
259              
260 15     15   14 my $epsilon = shift;
261              
262 15         30 my $quotient = (2 * log ($epsilon / 2)) /
263             (log (1 - $epsilon / 2));
264              
265 15         11 my $delta = 0.55 * $epsilon;
266             #$quotient = (log ($epsilon) + log($delta)) / (log (1 - $epsilon));
267              
268 15         30 return int(ceil($quotient));
269             }
270              
271             # Functions relating to probability distribution
272             #
273             # From the wikipedia page:
274             #
275             # http://en.wikipedia.org/wiki/Online_codes
276             #
277             # During the inner coding step the algorithm selects some number of
278             # composite messages at random and XORs them together to form a check
279             # block. In order for the algorithm to work correctly, both the number
280             # of blocks to be XORed together and their distribution over composite
281             # blocks must follow a particular probability distribution.
282             #
283             # Consult the references for the implementation details.
284             #
285             # The probability distribution is designed to map a random number in
286             # the range [0,1) and return a degree i between 1 and F. The
287             # probability distribution depends on a single input, n, which is the
288             # number of blocks in the original message. The fixed values for q and
289             # epsilon are also used.
290             #
291             # This code includes two changes from that described in the wikipedia
292             # page.
293             #
294             # 1) Rather than returning an array of individual probabilities p_i,
295             # the array includes the cumulative probabilities. For example, if
296             # the p_i probabilities were:
297             # (0.1, 0.2, 0.3, 0.2, 0.1, 0.1)
298             # then the returned array would be:
299             # (0.1, 0.3, 0.6, 0.8, 0.9, 1) (last element always has value 1)
300             # This is done simply to make selecting a value based on the random
301             # number more efficient, but the underlying probability distribution
302             # is the same.
303             # 2) Handling edge cases. These are:
304             # a) the case where n = 1; and
305             # b) the case where F > n
306             # In both cases, the default value for epsilon cannot be used, so a
307             # more suitable value is calculated.
308             #
309             # The return value is an array containing:
310             #
311             # * the max degree F
312             # * a possibly updated value of epsilon
313             # * the F values of the (cumulative) probability distribution
314              
315             sub _probability_distribution {
316              
317 2     2   2 my ($nblocks,$epsilon) = @_; # nblocks = number of *composite* blocks!
318              
319 2         2 print "generating probability distribution from nblocks $nblocks, e $epsilon\n"
320             if DEBUG;
321              
322 2         3 my $f = _max_degree($epsilon);
323              
324             # after code reorganisation, this shouldn't happen:
325 2 50       5 if ($f > $nblocks) {
326 0         0 croak "BUG: " .__PACKAGE__ . " - epsilon still too small!\n";
327             }
328              
329             # probability distribution
330              
331             # Calculate the sum of the sequence:
332             #
333             # 1 + 1/F
334             # p_1 = 1 - ---------
335             # 1 + e
336             #
337             #
338             # F . (1 - p_1)
339             # p_i = ---------------------
340             # (F - 1) . (i^2 - i)
341             #
342             # Since the i term is the only thing that changes for each p_i, I
343             # optimise the calculation by keeping a fixed term involving only p
344             # and f with a variable one involving i, then dividing as
345             # appropriate.
346              
347 2         5 my $p1 = 1 - (1 + 1/$f)/(1 + $epsilon);
348 2         4 my $pfterm = (1-$p1) * $f / ($f - 1);
349              
350 2 50       5 die "p1 is negative\n" if $p1 < 0;
351              
352             # hard-code simple cases where f = 1 or 2
353 2 50       6 if ($f == 1) {
    50          
354 0         0 return [1];
355             } elsif ($f == 2) {
356 0         0 return [$p1, 1];
357             }
358              
359             # calculate sum(p_i) for 2 <= i < F.
360             # p_i=F is simply set to 1 to avoid rounding errors in the sum
361 2         3 my $sum = $p1;
362 2         167 my @P = ($sum);
363              
364 2         5 my $i = 2;
365 2         6 while ($i < $f) {
366 2515         2109 my $iterm = $i * ($i - 1);
367 2515         1818 my $p_i = $pfterm / $iterm;
368              
369 2515         1783 $sum += $p_i;
370              
371 2515 50       3459 die "p_$i is negative\n" if $p_i < 0;
372              
373 2515         2117 push @P, $sum;
374 2515         3453 $i++;
375             }
376              
377 2         5 if (DEBUG) {
378             # Make sure of the assumption that the sum of terms approaches 1.
379             # If the "rounding error" below is not a very small number, we
380             # know there is a problem with the assumption!
381             my $p_last = $sum + $pfterm / ($f * $f - $f);
382             my $absdiff = abs (1 - $p_last);
383             warn "Absolute difference of 1,sum to p_F = $absdiff\n" if $absdiff >1e-8;
384             }
385              
386 2         262 return [@P,1];
387             }
388              
389              
390             # Using Floyd's algorithm instead of Fisher-Yates shuffle. Picks k
391             # distinct elements from the range [start, start + n - 1]. Avoids the
392             # need to copy/re-initialise an array of size n every time we make a
393             # new check block.
394             sub floyd {
395 0     0 0 0 my ($rng, $start, $n, $k) = @_;
396 0         0 my %set;
397 0         0 my ($j, $t) = ($n - $k);
398 0         0 while ($j < $n) {
399 0         0 $t = floor($rng->rand($j + 1));
400 0 0       0 if (!exists($set{$t + $start})) {
401 0         0 $set{$t + $start} = undef;
402             } else {
403 0         0 $set{$j + $start} = undef;
404             }
405 0         0 ++$j;
406             }
407             # die "Floyd didn't pick $k elements" if ASSERT and $k != (keys %set);
408 0         0 return keys %set;
409             }
410              
411             # Routine to calculate the auxiliary block -> message block* mapping.
412             # The passed rng object must already have been seeded, and both sender
413             # and receiver should use the same seed. Returns [[..],[..],..]
414             # representing which message blocks each of the auxiliary block is
415             # composed of.
416              
417             sub auxiliary_mapping {
418              
419 0     0 0 0 my $self = shift;
420 0         0 my $rng = shift;
421              
422 0 0       0 croak "auxiliary_mapping: rng is not a reference\n" unless ref($rng);
423              
424             # hash slices: powerful, but syntax is sometimes confusing
425 0         0 my ($mblocks,$ablocks,$q) = @{$self}{"mblocks","ablocks","q"};
  0         0  
426              
427 0         0 my $aux_mapping = [];
428              
429 0         0 my $ab_string = pack "L*", ($mblocks .. $mblocks + $ablocks -1);
430              
431             # list of empty hashes
432 0         0 my @hashes;
433 0         0 for (0 .. $mblocks + $ablocks -1) { $hashes[$_] = {}; }
  0         0  
434              
435             # Use an unrolled version of Floyd's algorithm for the default case
436             # where q=3
437 0 0       0 if ($q == 3) {
438 0         0 my ($a,$b,$c);
439 0         0 for my $msg (0 .. $mblocks - 1) {
440 0         0 $a = $mblocks + floor($rng->rand($ablocks - 2));
441 0         0 $hashes[$a] ->{$msg}=undef;
442 0         0 $hashes[$msg]->{$a} =undef;
443 0         0 $b = $mblocks + floor($rng->rand($ablocks - 1));
444 0 0       0 $b = $mblocks + $ablocks - 2 if $b == $a;
445 0         0 $hashes[$b] ->{$msg}=undef;
446 0         0 $hashes[$msg]->{$b} =undef;
447 0         0 $c = $mblocks + floor($rng->rand($ablocks));
448 0 0 0     0 $c = $mblocks + $ablocks - 1 if $c == $a or $c == $b;
449 0         0 $hashes[$c] ->{$msg}=undef;
450 0         0 $hashes[$msg]->{$c} =undef;
451             }
452             } else {
453 0         0 for my $msg (0 .. $mblocks - 1) {
454 0         0 foreach my $aux (floyd($rng, $mblocks, $ablocks, $q)) {
455 0         0 $hashes[$aux]->{$msg}=undef;
456 0         0 $hashes[$msg]->{$aux}=undef;
457             }
458             }
459             }
460              
461             # convert list of hashes into a list of lists
462 0         0 for my $i (0 .. $mblocks + $ablocks -1) {
463 0         0 print "map $i: " . (join " ", keys %{$hashes[$i]}) . "\n" if DEBUG;
464 0         0 push @$aux_mapping, [ keys %{$hashes[$i]} ];
  0         0  
465             }
466              
467             # save and return aux_mapping
468 0         0 $self->{aux_mapping} = $aux_mapping;
469             }
470              
471             # Until I get the auto expand_aux working, this will have to do
472             sub blklist_to_msglist {
473              
474 0     0 0 0 my ($self,@xor_list) = @_;
475              
476 0         0 my $mblocks = $self->{mblocks};
477              
478 0         0 my %blocks;
479 0         0 while (@xor_list) {
480 0         0 my $entry = shift(@xor_list);
481 0 0       0 if ($entry < $mblocks) { # is it a message block index?
482             # toggle entry in the hash
483 0 0       0 if (exists($blocks{$entry})) {
484 0         0 delete $blocks{$entry};
485             } else {
486 0         0 $blocks{$entry}= undef;
487             }
488             } else {
489             # aux block : push all message blocks it's composed of
490 0         0 my @expansion = @{$self->{aux_mapping}->[$entry]};
  0         0  
491 0         0 if (DEBUG) {
492             print "expand_aux: expanding $entry to " .
493             (join " ", @expansion) . "\n";
494             }
495 0         0 push @xor_list, @expansion;
496             }
497             }
498 0         0 return keys %blocks;
499             }
500              
501             # Calculate the composition of a single check block based on the
502             # supplied RNG. Returns a reference to a list of composite blocks
503             # indices.
504              
505             sub checkblock_mapping {
506              
507 0     0 0 0 my $self = shift;
508 0         0 my $rng = shift;
509              
510 0 0       0 croak "rng is not an object reference\n" unless ref($rng);
511              
512 0         0 my ($mblocks,$coblocks,$P) = @{$self}{"mblocks","coblocks","P"};
  0         0  
513 0         0 my @coblocks;
514              
515             # use weighted distribution to find how many blocks to link
516 0         0 my $i = 0;
517 0         0 my $r = $rng->rand;
518 0         0 ++$i while($r > $P->[$i]); # terminates since r < P[last]
519 0         0 ++$i;
520              
521             # select i composite blocks uniformly
522 0         0 @coblocks = floyd($rng, 0, $coblocks , $i);
523              
524 0         0 if (ASSERT) {
525 0 0       0 die "checkblock_mapping: created empty check block\n!" unless @coblocks;
526             }
527              
528 0         0 print "CHECKblock mapping: " . (join " ", @coblocks) . "\n" if DEBUG;
529              
530 0         0 return \@coblocks;
531              
532             }
533              
534             # non-method sub for xoring a source string (passed by reference) with
535             # one or more target strings. I may reimplement this using XS later to
536             # make it more efficient, but will keep a pure-perl version with this
537             # name.
538             sub safe_xor_strings {
539              
540 4     4 0 17 my $source = shift;
541              
542             # die if user forgot to pass in a reference (to allow updating) or
543             # called $self->safe_xor_strings by mistake
544 4 50       14 croak "xor_strings: arg 1 should be a reference to a SCALAR!\n"
545             unless ref($source) eq "SCALAR";
546              
547 4         6 my $len = length ($$source);
548              
549 4 50       9 croak "xor_strings: source string can't have zero length!\n"
550             unless $len;
551              
552 4         9 foreach my $target (@_) {
553 4 50       11 croak "xor_strings: targets not all same size as source\n"
554             unless length($target) == $len;
555 4         11 map { substr ($$source, $_, 1) ^= substr ($target, $_, 1) }
  28         79  
556             (0 .. $len-1);
557             }
558              
559 4         18 return $$source;
560             }
561              
562             # Later, xor_strings could be replaced with an C version with reduced
563             # error checking, so make a backward-compatible version and an
564             # explicit fast/unsafe version.
565 0     0 0   sub xor_strings { safe_xor_strings(@_) }
566             #sub fast_xor_strings { safe_xor_strings(@_) } # implemented in OnlineCode.xs.
567              
568              
569             1;
570              
571             __END__