File Coverage

blib/lib/Net/OnlineCode.pm
Criterion Covered Total %
statement 110 190 57.8
branch 19 48 39.5
condition 0 3 0.0
subroutine 20 30 66.6
pod 0 17 0.0
total 149 288 51.7


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             package Net::OnlineCode;
4              
5             # play nicely as a CPAN module
6              
7 4     4   48863 use strict;
  4         10  
  4         185  
8 4     4   24 use warnings;
  4         8  
  4         176  
9 4     4   23 use vars qw(@ISA @EXPORT_OK @EXPORT %EXPORT_TAGS $VERSION);
  4         7  
  4         445  
10              
11 4     4   26 use constant DEBUG => 0;
  4         6  
  4         362  
12 4     4   20 use constant ASSERT => 1;
  4         8  
  4         972  
13              
14             require Exporter;
15              
16             @ISA = qw(Exporter);
17              
18             our @export_xor = qw (xor_strings safe_xor_strings fast_xor_strings);
19             our @export_default = qw();
20              
21             %EXPORT_TAGS = ( all => [ @export_default, @export_xor ],
22             xor => [ @export_xor ],
23             );
24             @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
25             @EXPORT = ();
26             $VERSION = '0.03';
27              
28             # Use XS for fast xors (TODO: make this optional)
29             require XSLoader;
30             XSLoader::load('Net::OnlineCode', $VERSION);
31              
32             # on to our stuff ...
33              
34              
35             # Codec parameters
36             # q is the number of message blocks that each auxiliary block will
37             # link to
38             # e epsilon, the degree of "suboptimality". Unlike Reed-Solomon or
39             # Rabin's Information Dispersal Algorithm, Online Codes are not
40             # optimal. This means that slightly more data needs to be generated
41             # than either of these two codes. Also, whereas optimal codes
42             # guarantee that a certain fraction of the "check" blocks/digits
43             # suffice to reconstruct the original message, online codes only
44             # guarantee that it can be reconstructed with a certain
45             # probability
46             #
47             # Together with the number of blocks, n, these two variables define
48             # the online code such that (1+qe)n check blocks are sufficient to
49             # reconstruct the original message with a probability of 1 - (e/2) **
50             # (q+1).
51             #
52              
53 4     4   25 use Carp;
  4         6  
  4         373  
54 4     4   3040 use POSIX qw(ceil floor);
  4         34720  
  4         35  
55 4     4   9068 use Digest::SHA qw(sha1 sha1_hex);
  4         16111  
  4         519  
56 4     4   36 use Fcntl;
  4         9  
  4         13425  
57              
58              
59             # Constructor for the base class
60             #
61             # This takes the three parameters that define the Online Code scheme,
62             # corrects the value of epsilon if needed (see below) and then derives
63             # the following:
64             #
65             # * max degree variable (F)
66             # * number of auxiliary blocks (0.55 *qen)
67             # * probability distribution p_1, p2, ... , p_F
68              
69             sub new {
70              
71 2     2 0 13 my $class = shift;
72             # The default parameters used here for q and e (epsilon) are as
73             # suggested in the paper "Rateless Codes and Big Downloads" by Petar
74             # Maymounkov and David Maziere. Note that the value of e may be
75             # overridden with a higher value if the lower value doesn't satisfy
76             # max_degree(epsilon) > ceil(0.55 * q.e.mblocks)
77 2         15 my %args = (
78             e => 0.01,
79             q => 3,
80             mblocks => undef,
81             expand_aux => 0,
82             e_warning => 0,
83              
84             # We don't use or store any RNG parameter that's been
85             # passed into the constructor.
86             @_
87             );
88              
89              
90 2         8 my ($q,$e,$mblocks) = @args{qw(q e mblocks)};
91              
92 2 50       7 unless (defined $args{mblocks}) {
93 0         0 carp __PACKAGE__ . ": mblocks => (# message blocks) must be set\n";
94 0         0 return undef;
95             }
96              
97 2         2 print "Net::OnlineCode mblocks = $mblocks\n" if DEBUG;
98              
99 2         3 my $P = undef;
100 2         3 my $e_changed = 0;
101              
102             # how many auxiliary blocks would this scheme need?
103 2         7 my $ablocks = _count_auxiliary($q,$e,$mblocks);
104              
105             # does epsilon value need updating?
106 2         4 my $f = _max_degree($e);
107              
108 2 100       27 if ($f > $mblocks + $ablocks) {
109              
110 1         2 $e_changed = 1;
111              
112 1 50       3 if ($args{e_warning}) {
113 0         0 print "E CHANGED!!\nWas: $e\n";
114 0         0 print "Gave F value of $f\n";
115             }
116              
117             # use a binary search to find a new epsilon such that
118             # get_max_degree($epsilon) <= mblocks + ablocks (ie, n)
119 1         2 my $epsilon = $e;
120              
121             local *eval_f = sub {
122 11     11   6 my $t = shift;
123 11         31 return _max_degree(1/(1 + exp(-$t)));
124 1         5 };
125              
126 1         4 my $l = -log(1/$e - 1);
127 1         2 my $r = $l + 1;
128              
129             # expand right side of search until we get F <= n'
130 1         2 while (eval_f($r) > $mblocks + $ablocks) {
131             # $r = $l + ($r - $l) * 2;
132 1         3 $r = 2 * $r - $l;
133             }
134              
135             # binary search between left and right to find a suitable lower
136             # value of epsilon still satisfying F <= n'
137 1         3 while ($r - $l > 0.01) {
138 8         8 my $m = ($l + $r) / 2;
139 8 100       10 if (eval_f($m) > $mblocks + $ablocks) {
140 3         7 $l = $m;
141             } else {
142 5         8 $r = $m;
143             }
144             }
145              
146             # update e and ablocks
147 1         3 $epsilon = 1/(1 + exp(-$r));
148 1         2 $f = eval_f($r);
149             #$f=_max_degree($epsilon);
150 1 50       3 carp __PACKAGE__ . ": increased epsilon value from $e to $epsilon\n"
151             if $args{e_warning};
152 1         2 $e = $epsilon;
153 1         2 $ablocks = _count_auxiliary($q,$e,$mblocks);
154              
155 1 50       5 if ($args{e_warning}) {
156              
157 0         0 print "Is now: $e\n";
158 0         0 print "New F: $f\n";
159             }
160              
161             }
162              
163             # calculate the probability distribution
164 2         2 print "new: mblocks=$mblocks, ablocks=$ablocks, q=$q\n" if DEBUG;
165 2         7 $P = _probability_distribution($mblocks + $ablocks,$e);
166              
167 2 50       9 die "Wrong number of elements in probability distribution (got "
168             . scalar(@$P) . ", expecting $f)\n"
169             unless @$P == $f;
170              
171 2         323 my $self = { q => $q, e => $e, f => $f, P => $P,
172             mblocks => $mblocks, ablocks => $ablocks,
173             coblocks => $mblocks + $ablocks,
174             chblocks => 0, expand_aux=> $args{expand_aux},
175             e_changed => $e_changed, unique => {},
176             fisher_string => pack("L*", (0 .. $mblocks + $ablocks -1)),
177             };
178              
179 2         68 print "expand_aux => $self->{expand_aux}\n" if DEBUG;
180              
181 2         19 bless $self, $class;
182              
183             }
184              
185             # while it probably doesn't matter too much to the encoder whether the
186             # supplied e value needed to be changed, if the receiver plugs the
187             # received value of e into the constructor and it ends up changing,
188             # there will be a problem with receiving the file.
189             sub e_changed {
190 0     0 0 0 return shift ->{e_changed};
191             }
192              
193             # convenience accessor functions
194             sub get_mblocks { # count message blocks; passed into new
195 2     2 0 11 return shift -> {mblocks};
196             }
197              
198             sub get_ablocks { # count auxiliary blocks; set in new
199 2     2 0 12 return shift -> {ablocks};
200             }
201              
202             sub get_coblocks { # count composite blocks
203 2     2 0 5 my $self = shift;
204 2         12 return $self->{mblocks} + $self->{ablocks};
205             }
206              
207             # count checkblocks
208             sub get_chblocks {
209 0     0 0 0 return shift->{chblocks}
210             }
211              
212             sub get_q { # q == reliability factor
213 0     0 0 0 return shift -> {q};
214             }
215              
216             sub get_e { # e == suboptimality factor
217 2     2 0 700 return shift -> {e};
218             }
219              
220             sub get_epsilon { # epsilon == e, as above
221 0     0 0 0 return shift -> {e};
222             }
223              
224             sub get_f { # f == max (check block) degree
225 2     2 0 12 return shift -> {f};
226             }
227              
228             sub get_P { # P == probability distribution
229 0     0 0 0 return shift -> {P}; # (array ref)
230             }
231              
232              
233             # "Private" routines
234              
235             # calculate how many auxiliary blocks need to be generated for a given
236             # code setup
237             sub _count_auxiliary {
238 3     3   3 my ($q, $e, $n) = @_;
239              
240 3         24 my $count = int(ceil(0.55 * $q * $e * $n));
241 3         4 my $delta = 0.55 * $e;
242              
243 3         4 warn "failure probability " . ($delta ** $q) . "\n" if DEBUG;
244             #$count = int(ceil($q * $delta * $n));
245              
246             # Is it better to change q or the number of aux blocks if q is too
247             # big? It's certainly easier to keep the q value and increase the
248             # number of aux blocks, as I'm doing here, and may even be the right
249             # thing to do rather than ignoring the user's q value.
250 3 50       6 if ($count < $q) {
251 0         0 $count = $q;
252 0         0 warn "updated _count_auxiliary output value to $q\n";
253             }
254 3         5 return $count;
255             }
256              
257             # The max degree specifies the maximum number of blocks to be XORed
258             # together. This parameter is named F.
259             sub _max_degree {
260              
261 15     15   11 my $epsilon = shift;
262              
263 15         35 my $quotient = (2 * log ($epsilon / 2)) /
264             (log (1 - $epsilon / 2));
265              
266 15         8 my $delta = 0.55 * $epsilon;
267             #$quotient = (log ($epsilon) + log($delta)) / (log (1 - $epsilon));
268              
269 15         36 return int(ceil($quotient));
270             }
271              
272             # Functions relating to probability distribution
273             #
274             # From the wikipedia page:
275             #
276             # http://en.wikipedia.org/wiki/Online_codes
277             #
278             # During the inner coding step the algorithm selects some number of
279             # composite messages at random and XORs them together to form a check
280             # block. In order for the algorithm to work correctly, both the number
281             # of blocks to be XORed together and their distribution over composite
282             # blocks must follow a particular probability distribution.
283             #
284             # Consult the references for the implementation details.
285             #
286             # The probability distribution is designed to map a random number in
287             # the range [0,1) and return a degree i between 1 and F. The
288             # probability distribution depends on a single input, n, which is the
289             # number of blocks in the original message. The fixed values for q and
290             # epsilon are also used.
291             #
292             # This code includes two changes from that described in the wikipedia
293             # page.
294             #
295             # 1) Rather than returning an array of individual probabilities p_i,
296             # the array includes the cumulative probabilities. For example, if
297             # the p_i probabilities were:
298             # (0.1, 0.2, 0.3, 0.2, 0.1, 0.1)
299             # then the returned array would be:
300             # (0.1, 0.3, 0.6, 0.8, 0.9, 1) (last element always has value 1)
301             # This is done simply to make selecting a value based on the random
302             # number more efficient, but the underlying probability distribution
303             # is the same.
304             # 2) Handling edge cases. These are:
305             # a) the case where n = 1; and
306             # b) the case where F > n
307             # In both cases, the default value for epsilon cannot be used, so a
308             # more suitable value is calculated.
309             #
310             # The return value is an array containing:
311             #
312             # * the max degree F
313             # * a possibly updated value of epsilon
314             # * the F values of the (cumulative) probability distribution
315              
316             sub _probability_distribution {
317              
318 2     2   2 my ($nblocks,$epsilon) = @_; # nblocks = number of *composite* blocks!
319              
320             # after code reorganisation, this shouldn't happen:
321 2 50       5 if ($nblocks == 1) {
322 0         0 croak "BUG: " . __PACKAGE__ ." - number of composite blocks = 1\n";
323 0         0 return (1, 0, 1);
324             }
325              
326 2         3 print "generating probability distribution from nblocks $nblocks, e $epsilon\n"
327             if DEBUG;
328              
329 2         2 my $f = _max_degree($epsilon);
330              
331             # after code reorganisation, this shouldn't happen:
332 2 50       6 if ($f > $nblocks) {
333 0         0 croak "BUG: " .__PACKAGE__ . " - epsilon still too small!\n";
334             }
335              
336             # probability distribution
337              
338             # Calculate the sum of the sequence:
339             #
340             # 1 + 1/F
341             # p_1 = 1 - ---------
342             # 1 + e
343             #
344             #
345             # F . (1 - p_1)
346             # p_i = ---------------------
347             # (F - 1) . (i^2 - i)
348             #
349             # Since the i term is the only thing that changes for each p_i, I
350             # optimise the calculation by keeping a fixed term involving only p
351             # and f with a variable one involving i, then dividing as
352             # appropriate.
353              
354 2         5 my $p1 = 1 - (1 + 1/$f)/(1 + $epsilon);
355 2         4 my $pfterm = (1-$p1) * $f / ($f - 1);
356              
357 2 50       6 die "p1 is negative\n" if $p1 < 0;
358              
359             # hard-code simple cases where f = 1 or 2
360 2 50       9 if ($f == 1) {
    50          
361 0         0 return [1];
362             } elsif ($f == 2) {
363 0         0 return [$p1, 1];
364             }
365              
366             # calculate sum(p_i) for 2 <= i < F.
367             # p_i=F is simply set to 1 to avoid rounding errors in the sum
368 2         3 my $sum = $p1;
369 2         3 my @P = ($sum);
370              
371 2         7 my $i = 2;
372 2         6 while ($i < $f) {
373 2515         1842 my $iterm = $i * ($i - 1);
374 2515         1749 my $p_i = $pfterm / $iterm;
375              
376 2515         1605 $sum += $p_i;
377              
378 2515 50       3122 die "p_$i is negative\n" if $p_i < 0;
379              
380 2515         1967 push @P, $sum;
381 2515         3282 $i++;
382             }
383              
384 2         4 if (DEBUG) {
385             # Make sure of the assumption that the sum of terms approaches 1.
386             # If the "rounding error" below is not a very small number, we
387             # know there is a problem with the assumption!
388             my $p_last = $sum + $pfterm / ($f * $f - $f);
389             my $absdiff = abs (1 - $p_last);
390             warn "Absolute difference of 1,sum to p_F = $absdiff\n" if $absdiff >1e-8;
391             }
392              
393 2         262 return [@P,1];
394             }
395              
396              
397             # Fisher-Yates shuffle algorithm, based on recipe 4.17 from the Perl
398             # Cookbook. Takes an input array it randomises the order (ie,
399             # shuffles) and then truncates the array to "picks" elements.
400             #
401             # This is much more efficient than the usual approach of "keep picking
402             # new elements until we get k distinct ones" particularly as k
403             # approaches the size of the array. That algorithm could make
404             # exponentially many calls to rand, whereas this just makes one call
405             # per item to be picked.
406             #
407             #
408              
409             sub fisher_yates_shuffle {
410              
411 0     0 0 0 my ($rng, $array, $picks) = @_;
412              
413 0         0 if (ASSERT) {
414 0 0       0 die "fisher_yates_shuffle: 1st arg not an RNG object\n"
415             unless ref($rng);
416             }
417              
418             # length in 32-bit words
419 0         0 my $len = length($array) >> 2;
420              
421             # Change recipe to pick subset of list
422 0 0 0     0 $picks=$len unless
423             defined($picks) and $picks >= 0;
424              
425             # algorithm fills picks into the end of the array
426 0         0 my $i=$len;
427 0         0 while (--$i >= $len - $picks) {
428 0         0 my $j=int($rng->rand($i + 1)); # range [0,$i]
429             #next if $i==$j; # not worth checking, probably
430             # @$array[$i,$j]=@$array[$j,$i]
431 0         0 my $tmp1 = substr $array, $i << 2, 4;
432 0         0 my $tmp2 = substr $array, $j << 2, 4;
433 0         0 substr $array, $i << 2, 4, $tmp2;
434 0         0 substr $array, $j << 2, 4, $tmp1;
435             }
436              
437 0         0 return (unpack "L*", substr $array, ($len - $picks) << 2);
438             }
439              
440             #
441             # Routine to calculate the auxiliary block -> message block* mapping.
442             # The passed rng object must already have been seeded, and both sender
443             # and receiver should use the same seed. Returns [[..],[..],..]
444             # representing which message blocks each of the auxiliary block is
445             # composed of.
446             #
447              
448             sub auxiliary_mapping {
449              
450 0     0 0 0 my $self = shift;
451 0         0 my $rng = shift;
452              
453 0 0       0 croak "auxiliary_mapping: rng is not a reference\n" unless ref($rng);
454              
455             #print "auxiliary_mapping: entering RNG value: " . ($rng->as_hex). "\n";
456              
457             # hash slices: powerful, but syntax is sometimes confusing
458 0         0 my ($mblocks,$ablocks,$q) = @{$self}{"mblocks","ablocks","q"};
  0         0  
459              
460             # make sure hash(ref) slice above actually did something sensible:
461             # die "weird mblocks/ablocks" unless $nblocks + $aux_blocks >= 2;
462              
463             # I made a big mistake when reading the description for creating aux
464             # blocks. What I implemented first (in the commented-out section
465             # below) was to link each of the auxiliary blocks to q message
466             # blocks. What I should have done was to link each *message block*
467             # to q auxiliary blocks. As a result, it was taking much more than
468             # the expected number of check blocks to decode the message.
469              
470             # as a result of the new algorithm, it makes sense to work out
471             # reciprocal links between message blocks and auxiliary blocks
472             # within the base class. Storing them here won't work out very well,
473             # though: the encoder doesn't care about the message block to aux
474             # block mapping, so it would be a waste of memory, but more
475             # importantly, the decoder object stores all mappings in a private
476             # GraphDecoder object (so duplicating the structure here would be a
477             # waste).
478              
479             # I will make one change to the output, though: instead of just
480             # returning the mappings for the 0.55qen auxiliary blocks, I will
481             # return a list of message block *and* auxiliary block mappings. The
482             # encoder and decoder will have to be changed: encoder immediately
483             # splices the array to remove unwanted message block mappings, while
484             # the decoder will be simplified by only having to pass the full
485             # list to the graph decoder (which will have to be modified
486             # appropriately).
487              
488 0         0 my $aux_mapping = [];
489              
490 0         0 my $ab_string = pack "L*", ($mblocks .. $mblocks + $ablocks -1);
491              
492             # list of empty hashes
493 0         0 my @hashes;
494 0         0 for (0 .. $mblocks + $ablocks -1) { $hashes[$_] = {}; }
  0         0  
495              
496 0         0 for my $msg (0 .. $mblocks - 1) {
497             # list of all aux block indices
498              
499 0         0 foreach my $aux (fisher_yates_shuffle($rng, $ab_string, $q)) {
500 0         0 $hashes[$aux]->{$msg}=undef;
501 0         0 $hashes[$msg]->{$aux}=undef;
502             }
503             }
504              
505             # convert list of hashes into a list of lists
506 0         0 for my $i (0 .. $mblocks + $ablocks -1) {
507 0         0 print "map $i: " . (join " ", keys %{$hashes[$i]}) . "\n" if DEBUG;
508 0         0 push @$aux_mapping, [ keys %{$hashes[$i]} ];
  0         0  
509             }
510              
511             # save and return aux_mapping
512 0         0 $self->{aux_mapping} = $aux_mapping;
513             }
514              
515             # Until I get the auto expand_aux working, this will have to do
516             sub blklist_to_msglist {
517              
518 0     0 0 0 my ($self,@xor_list) = @_;
519              
520 0         0 my $mblocks = $self->{mblocks};
521              
522 0         0 my %blocks;
523 0         0 while (@xor_list) {
524 0         0 my $entry = shift(@xor_list);
525 0 0       0 if ($entry < $mblocks) { # is it a message block index?
526             # toggle entry in the hash
527 0 0       0 if (exists($blocks{$entry})) {
528 0         0 delete $blocks{$entry};
529             } else {
530 0         0 $blocks{$entry}= undef;
531             }
532             } else {
533             # aux block : push all message blocks it's composed of
534 0         0 my @expansion = @{$self->{aux_mapping}->[$entry]};
  0         0  
535 0         0 if (DEBUG) {
536             print "expand_aux: expanding $entry to " .
537             (join " ", @expansion) . "\n";
538             }
539 0         0 push @xor_list, @expansion;
540             }
541             }
542 0         0 return keys %blocks;
543             }
544              
545             # Calculate the composition of a single check block based on the
546             # supplied RNG. Returns a reference to a list of composite blocks
547             # indices.
548              
549             sub checkblock_mapping {
550              
551 0     0 0 0 my $self = shift;
552 0         0 my $rng = shift;
553              
554 0 0       0 croak "rng is not an object reference\n" unless ref($rng);
555              
556 0         0 my ($mblocks,$coblocks,$P) = @{$self}{"mblocks","coblocks","P"};
  0         0  
557 0         0 my @coblocks;
558              
559             # use weighted distribution to find how many blocks to link
560 0         0 my $i = 0;
561 0         0 my $r = $rng->rand;
562 0         0 ++$i while($r > $P->[$i]); # terminates since r < P[last]
563 0         0 ++$i;
564              
565             # select i composite blocks uniformly
566 0         0 @coblocks = fisher_yates_shuffle($rng, $self->{fisher_string} , $i);
567              
568 0         0 if (ASSERT) {
569 0 0       0 die "fisher_yates_shuffle: created empty check block\n!" unless @coblocks;
570             }
571              
572 0         0 print "CHECKblock mapping: " . (join " ", @coblocks) . "\n" if DEBUG;
573              
574 0         0 return \@coblocks;
575              
576             }
577              
578             # non-method sub for xoring a source string (passed by reference) with
579             # one or more target strings. I may reimplement this using XS later to
580             # make it more efficient, but will keep a pure-perl version with this
581             # name.
582             sub safe_xor_strings {
583              
584 4     4 0 25 my $source = shift;
585              
586             # die if user forgot to pass in a reference (to allow updating) or
587             # called $self->safe_xor_strings by mistake
588 4 50       18 croak "xor_strings: arg 1 should be a reference to a SCALAR!\n"
589             unless ref($source) eq "SCALAR";
590              
591 4         8 my $len = length ($$source);
592              
593 4 50       13 croak "xor_strings: source string can't have zero length!\n"
594             unless $len;
595              
596 4         10 foreach my $target (@_) {
597 4 50       15 croak "xor_strings: targets not all same size as source\n"
598             unless length($target) == $len;
599 4         11 map { substr ($$source, $_, 1) ^= substr ($target, $_, 1) }
  28         95  
600             (0 .. $len-1);
601             }
602              
603 4         24 return $$source;
604             }
605              
606             # Later, xor_strings could be replaced with an C version with reduced
607             # error checking, so make a backward-compatible version and an
608             # explicit fast/unsafe version.
609 0     0 0   sub xor_strings { safe_xor_strings(@_) }
610             #sub fast_xor_strings { safe_xor_strings(@_) } # implemented in OnlineCode.xs.
611              
612              
613             1;
614              
615             __END__