File Coverage

blib/lib/Net/OnlineCode.pm

Criterion	Covered	Total	%
statement	109	197	55.3
branch	18	50	36.0
condition	0	3	0.0
subroutine	20	30	66.6
pod	0	17	0.0
total	147	297	49.4

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl
2
3							package Net::OnlineCode;
4
5							# play nicely as a CPAN module
6
7	4			4		32954	use strict;
	4					5
	4					127
8	4			4		15	use warnings;
	4					5
	4					125
9	4			4		16	use vars qw(@ISA @EXPORT_OK @EXPORT %EXPORT_TAGS $VERSION);
	4					6
	4					483
10
11	4			4		22	use constant DEBUG => 0;
	4					4
	4					309
12	4			4		49	use constant ASSERT => 1;
	4					6
	4					639
13
14							require Exporter;
15
16							@ISA = qw(Exporter);
17
18							our @export_xor = qw (xor_strings safe_xor_strings fast_xor_strings);
19							our @export_default = qw();
20
21							%EXPORT_TAGS = ( all => [ @export_default, @export_xor ],
22							xor => [ @export_xor ],
23							);
24							@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
25							@EXPORT = ();
26							$VERSION = '0.04';
27
28							# Use XS for fast xors (TODO: make this optional)
29							require XSLoader;
30							XSLoader::load('Net::OnlineCode', $VERSION);
31
32							# on to our stuff ...
33
34
35							# Codec parameters
36							# q is the number of message blocks that each auxiliary block will
37							# link to
38							# e epsilon, the degree of "suboptimality". Unlike Reed-Solomon or
39							# Rabin's Information Dispersal Algorithm, Online Codes are not
40							# optimal. This means that slightly more data needs to be generated
41							# than either of these two codes. Also, whereas optimal codes
42							# guarantee that a certain fraction of the "check" blocks/digits
43							# suffice to reconstruct the original message, online codes only
44							# guarantee that it can be reconstructed with a certain
45							# probability
46							#
47							# Together with the number of blocks, n, these two variables define
48							# the online code such that (1+qe)n check blocks are sufficient to
49							# reconstruct the original message with a probability of 1 - (e/2) **
50							# (q+1).
51							#
52
53	4			4		18	use Carp;
	4					6
	4					296
54	4			4		2062	use POSIX qw(ceil floor);
	4					22839
	4					27
55	4			4		6845	use Digest::SHA qw(sha1 sha1_hex);
	4					13772
	4					389
56	4			4		71	use Fcntl;
	4					6
	4					10751
57
58
59							# Constructor for the base class
60							#
61							# This takes the three parameters that define the Online Code scheme,
62							# corrects the value of epsilon if needed (see below) and then derives
63							# the following:
64							#
65							# * max degree variable (F)
66							# * number of auxiliary blocks (0.55 *qen)
67							# * probability distribution p_1, p2, ... , p_F
68
69							sub new {
70
71	2			2	0	12	my $class = shift;
72							# The default parameters used here for q and e (epsilon) are as
73							# suggested in the paper "Rateless Codes and Big Downloads" by Petar
74							# Maymounkov and David Maziere. Note that the value of e may be
75							# overridden with a higher value if the lower value doesn't satisfy
76							# max_degree(epsilon) > ceil(0.55 * q.e.mblocks)
77	2					44	my %args = (
78							e => 0.01,
79							q => 3,
80							mblocks => undef,
81							expand_aux => 0,
82							e_warning => 0,
83
84							# We don't use or store any RNG parameter that's been
85							# passed into the constructor.
86							@_
87							);
88
89
90	2					6	my ($q,$e,$mblocks) = @args{qw(q e mblocks)};
91
92	2	50				7	unless (defined $args{mblocks}) {
93	0					0	carp __PACKAGE__ . ": mblocks => (# message blocks) must be set\n";
94	0					0	return undef;
95							}
96
97	2					3	print "Net::OnlineCode mblocks = $mblocks\n" if DEBUG;
98
99	2					3	my $P = undef;
100	2					2	my $e_changed = 0;
101
102							# how many auxiliary blocks would this scheme need?
103	2					6	my $ablocks = _count_auxiliary($q,$e,$mblocks);
104
105							# does epsilon value need updating?
106	2					5	my $f = _max_degree($e);
107
108	2	100				24	if ($f > $mblocks + $ablocks) {
109
110	1					2	$e_changed = 1;
111
112	1	50				5	if ($args{e_warning}) {
113	0					0	print "E CHANGED!!\nWas: $e\n";
114	0					0	print "Gave F value of $f\n";
115							}
116
117							# use a binary search to find a new epsilon such that
118							# get_max_degree($epsilon) <= mblocks + ablocks (ie, n)
119	1					2	my $epsilon = $e;
120
121							local *eval_f = sub {
122	11			11		11	my $t = shift;
123	11					27	return _max_degree(1/(1 + exp(-$t)));
124	1					7	};
125
126	1					4	my $l = -log(1/$e - 1);
127	1					2	my $r = $l + 1;
128
129							# expand right side of search until we get F <= n'
130	1					2	while (eval_f($r) > $mblocks + $ablocks) {
131							# $r = $l + ($r - $l) * 2;
132	1					2	$r = 2 * $r - $l;
133							}
134
135							# binary search between left and right to find a suitable lower
136							# value of epsilon still satisfying F <= n'
137	1					4	while ($r - $l > 0.01) {
138	8					5	my $m = ($l + $r) / 2;
139	8	100				10	if (eval_f($m) > $mblocks + $ablocks) {
140	3					5	$l = $m;
141							} else {
142	5					10	$r = $m;
143							}
144							}
145
146							# update e and ablocks
147	1					2	$epsilon = 1/(1 + exp(-$r));
148	1					2	$f = eval_f($r);
149							#$f=_max_degree($epsilon);
150	1	50				3	carp __PACKAGE__ . ": increased epsilon value from $e to $epsilon\n"
151							if $args{e_warning};
152	1					1	$e = $epsilon;
153	1					2	$ablocks = _count_auxiliary($q,$e,$mblocks);
154
155	1	50				5	if ($args{e_warning}) {
156
157	0					0	print "Is now: $e\n";
158	0					0	print "New F: $f\n";
159							}
160
161							}
162
163							# calculate the probability distribution
164	2					2	print "new: mblocks=$mblocks, ablocks=$ablocks, q=$q\n" if DEBUG;
165	2					7	$P = _probability_distribution($mblocks + $ablocks,$e);
166
167	2	50				10	die "Wrong number of elements in probability distribution (got "
168							. scalar(@$P) . ", expecting $f)\n"
169							unless @$P == $f;
170
171	2					28	my $self = { q => $q, e => $e, f => $f, P => $P,
172							mblocks => $mblocks, ablocks => $ablocks,
173							coblocks => $mblocks + $ablocks,
174							chblocks => 0, expand_aux=> $args{expand_aux},
175							e_changed => $e_changed, unique => {},
176							};
177
178	2					3	print "expand_aux => $self->{expand_aux}\n" if DEBUG;
179
180	2					22	bless $self, $class;
181
182							}
183
184							# while it probably doesn't matter too much to the encoder whether the
185							# supplied e value needed to be changed, if the receiver plugs the
186							# received value of e into the constructor and it ends up changing,
187							# there will be a problem with receiving the file.
188							sub e_changed {
189	0			0	0	0	return shift ->{e_changed};
190							}
191
192							# convenience accessor functions
193							sub get_mblocks { # count message blocks; passed into new
194	2			2	0	10	return shift -> {mblocks};
195							}
196
197							sub get_ablocks { # count auxiliary blocks; set in new
198	2			2	0	11	return shift -> {ablocks};
199							}
200
201							sub get_coblocks { # count composite blocks
202	2			2	0	3	my $self = shift;
203	2					14	return $self->{mblocks} + $self->{ablocks};
204							}
205
206							# count checkblocks
207							sub get_chblocks {
208	0			0	0	0	return shift->{chblocks}
209							}
210
211							sub get_q { # q == reliability factor
212	0			0	0	0	return shift -> {q};
213							}
214
215							sub get_e { # e == suboptimality factor
216	2			2	0	806	return shift -> {e};
217							}
218
219							sub get_epsilon { # epsilon == e, as above
220	0			0	0	0	return shift -> {e};
221							}
222
223							sub get_f { # f == max (check block) degree
224	2			2	0	40	return shift -> {f};
225							}
226
227							sub get_P { # P == probability distribution
228	0			0	0	0	return shift -> {P}; # (array ref)
229							}
230
231
232							# "Private" routines
233
234							# calculate how many auxiliary blocks need to be generated for a given
235							# code setup
236							sub _count_auxiliary {
237	3			3		5	my ($q, $e, $n) = @_;
238
239	3					24	my $count = int(ceil(0.55 * $q * $e * $n));
240	3					6	my $delta = 0.55 * $e;
241
242	3					3	warn "failure probability " . ($delta ** $q) . "\n" if DEBUG;
243							#$count = int(ceil($q * $delta * $n));
244
245							# Is it better to change q or the number of aux blocks if q is too
246							# big? It's certainly easier to keep the q value and increase the
247							# number of aux blocks, as I'm doing here, and may even be the right
248							# thing to do rather than ignoring the user's q value.
249	3	50				7	if ($count < $q) {
250	0					0	$count = $q;
251							# warn "updated _count_auxiliary output value to $q\n";
252							}
253	3					4	return $count;
254							}
255
256							# The max degree specifies the maximum number of blocks to be XORed
257							# together. This parameter is named F.
258							sub _max_degree {
259
260	15			15		14	my $epsilon = shift;
261
262	15					30	my $quotient = (2 * log ($epsilon / 2)) /
263							(log (1 - $epsilon / 2));
264
265	15					11	my $delta = 0.55 * $epsilon;
266							#$quotient = (log ($epsilon) + log($delta)) / (log (1 - $epsilon));
267
268	15					30	return int(ceil($quotient));
269							}
270
271							# Functions relating to probability distribution
272							#
273							# From the wikipedia page:
274							#
275							# http://en.wikipedia.org/wiki/Online_codes
276							#
277							# During the inner coding step the algorithm selects some number of
278							# composite messages at random and XORs them together to form a check
279							# block. In order for the algorithm to work correctly, both the number
280							# of blocks to be XORed together and their distribution over composite
281							# blocks must follow a particular probability distribution.
282							#
283							# Consult the references for the implementation details.
284							#
285							# The probability distribution is designed to map a random number in
286							# the range [0,1) and return a degree i between 1 and F. The
287							# probability distribution depends on a single input, n, which is the
288							# number of blocks in the original message. The fixed values for q and
289							# epsilon are also used.
290							#
291							# This code includes two changes from that described in the wikipedia
292							# page.
293							#
294							# 1) Rather than returning an array of individual probabilities p_i,
295							# the array includes the cumulative probabilities. For example, if
296							# the p_i probabilities were:
297							# (0.1, 0.2, 0.3, 0.2, 0.1, 0.1)
298							# then the returned array would be:
299							# (0.1, 0.3, 0.6, 0.8, 0.9, 1) (last element always has value 1)
300							# This is done simply to make selecting a value based on the random
301							# number more efficient, but the underlying probability distribution
302							# is the same.
303							# 2) Handling edge cases. These are:
304							# a) the case where n = 1; and
305							# b) the case where F > n
306							# In both cases, the default value for epsilon cannot be used, so a
307							# more suitable value is calculated.
308							#
309							# The return value is an array containing:
310							#
311							# * the max degree F
312							# * a possibly updated value of epsilon
313							# * the F values of the (cumulative) probability distribution
314
315							sub _probability_distribution {
316
317	2			2		2	my ($nblocks,$epsilon) = @_; # nblocks = number of composite blocks!
318
319	2					2	print "generating probability distribution from nblocks $nblocks, e $epsilon\n"
320							if DEBUG;
321
322	2					3	my $f = _max_degree($epsilon);
323
324							# after code reorganisation, this shouldn't happen:
325	2	50				5	if ($f > $nblocks) {
326	0					0	croak "BUG: " .__PACKAGE__ . " - epsilon still too small!\n";
327							}
328
329							# probability distribution
330
331							# Calculate the sum of the sequence:
332							#
333							# 1 + 1/F
334							# p_1 = 1 - ---------
335							# 1 + e
336							#
337							#
338							# F . (1 - p_1)
339							# p_i = ---------------------
340							# (F - 1) . (i^2 - i)
341							#
342							# Since the i term is the only thing that changes for each p_i, I
343							# optimise the calculation by keeping a fixed term involving only p
344							# and f with a variable one involving i, then dividing as
345							# appropriate.
346
347	2					5	my $p1 = 1 - (1 + 1/$f)/(1 + $epsilon);
348	2					4	my $pfterm = (1-$p1) * $f / ($f - 1);
349
350	2	50				5	die "p1 is negative\n" if $p1 < 0;
351
352							# hard-code simple cases where f = 1 or 2
353	2	50				6	if ($f == 1) {
		50
354	0					0	return [1];
355							} elsif ($f == 2) {
356	0					0	return [$p1, 1];
357							}
358
359							# calculate sum(p_i) for 2 <= i < F.
360							# p_i=F is simply set to 1 to avoid rounding errors in the sum
361	2					3	my $sum = $p1;
362	2					167	my @P = ($sum);
363
364	2					5	my $i = 2;
365	2					6	while ($i < $f) {
366	2515					2109	my $iterm = $i * ($i - 1);
367	2515					1818	my $p_i = $pfterm / $iterm;
368
369	2515					1783	$sum += $p_i;
370
371	2515	50				3459	die "p_$i is negative\n" if $p_i < 0;
372
373	2515					2117	push @P, $sum;
374	2515					3453	$i++;
375							}
376
377	2					5	if (DEBUG) {
378							# Make sure of the assumption that the sum of terms approaches 1.
379							# If the "rounding error" below is not a very small number, we
380							# know there is a problem with the assumption!
381							my $p_last = $sum + $pfterm / ($f * $f - $f);
382							my $absdiff = abs (1 - $p_last);
383							warn "Absolute difference of 1,sum to p_F = $absdiff\n" if $absdiff >1e-8;
384							}
385
386	2					262	return [@P,1];
387							}
388
389
390							# Using Floyd's algorithm instead of Fisher-Yates shuffle. Picks k
391							# distinct elements from the range [start, start + n - 1]. Avoids the
392							# need to copy/re-initialise an array of size n every time we make a
393							# new check block.
394							sub floyd {
395	0			0	0	0	my ($rng, $start, $n, $k) = @_;
396	0					0	my %set;
397	0					0	my ($j, $t) = ($n - $k);
398	0					0	while ($j < $n) {
399	0					0	$t = floor($rng->rand($j + 1));
400	0	0				0	if (!exists($set{$t + $start})) {
401	0					0	$set{$t + $start} = undef;
402							} else {
403	0					0	$set{$j + $start} = undef;
404							}
405	0					0	++$j;
406							}
407							# die "Floyd didn't pick $k elements" if ASSERT and $k != (keys %set);
408	0					0	return keys %set;
409							}
410
411							# Routine to calculate the auxiliary block -> message block* mapping.
412							# The passed rng object must already have been seeded, and both sender
413							# and receiver should use the same seed. Returns [[..],[..],..]
414							# representing which message blocks each of the auxiliary block is
415							# composed of.
416
417							sub auxiliary_mapping {
418
419	0			0	0	0	my $self = shift;
420	0					0	my $rng = shift;
421
422	0	0				0	croak "auxiliary_mapping: rng is not a reference\n" unless ref($rng);
423
424							# hash slices: powerful, but syntax is sometimes confusing
425	0					0	my ($mblocks,$ablocks,$q) = @{$self}{"mblocks","ablocks","q"};
	0					0
426
427	0					0	my $aux_mapping = [];
428
429	0					0	my $ab_string = pack "L*", ($mblocks .. $mblocks + $ablocks -1);
430
431							# list of empty hashes
432	0					0	my @hashes;
433	0					0	for (0 .. $mblocks + $ablocks -1) { $hashes[$_] = {}; }
	0					0
434
435							# Use an unrolled version of Floyd's algorithm for the default case
436							# where q=3
437	0	0				0	if ($q == 3) {
438	0					0	my ($a,$b,$c);
439	0					0	for my $msg (0 .. $mblocks - 1) {
440	0					0	$a = $mblocks + floor($rng->rand($ablocks - 2));
441	0					0	$hashes[$a] ->{$msg}=undef;
442	0					0	$hashes[$msg]->{$a} =undef;
443	0					0	$b = $mblocks + floor($rng->rand($ablocks - 1));
444	0	0				0	$b = $mblocks + $ablocks - 2 if $b == $a;
445	0					0	$hashes[$b] ->{$msg}=undef;
446	0					0	$hashes[$msg]->{$b} =undef;
447	0					0	$c = $mblocks + floor($rng->rand($ablocks));
448	0	0	0			0	$c = $mblocks + $ablocks - 1 if $c == $a or $c == $b;
449	0					0	$hashes[$c] ->{$msg}=undef;
450	0					0	$hashes[$msg]->{$c} =undef;
451							}
452							} else {
453	0					0	for my $msg (0 .. $mblocks - 1) {
454	0					0	foreach my $aux (floyd($rng, $mblocks, $ablocks, $q)) {
455	0					0	$hashes[$aux]->{$msg}=undef;
456	0					0	$hashes[$msg]->{$aux}=undef;
457							}
458							}
459							}
460
461							# convert list of hashes into a list of lists
462	0					0	for my $i (0 .. $mblocks + $ablocks -1) {
463	0					0	print "map $i: " . (join " ", keys %{$hashes[$i]}) . "\n" if DEBUG;
464	0					0	push @$aux_mapping, [ keys %{$hashes[$i]} ];
	0					0
465							}
466
467							# save and return aux_mapping
468	0					0	$self->{aux_mapping} = $aux_mapping;
469							}
470
471							# Until I get the auto expand_aux working, this will have to do
472							sub blklist_to_msglist {
473
474	0			0	0	0	my ($self,@xor_list) = @_;
475
476	0					0	my $mblocks = $self->{mblocks};
477
478	0					0	my %blocks;
479	0					0	while (@xor_list) {
480	0					0	my $entry = shift(@xor_list);
481	0	0				0	if ($entry < $mblocks) { # is it a message block index?
482							# toggle entry in the hash
483	0	0				0	if (exists($blocks{$entry})) {
484	0					0	delete $blocks{$entry};
485							} else {
486	0					0	$blocks{$entry}= undef;
487							}
488							} else {
489							# aux block : push all message blocks it's composed of
490	0					0	my @expansion = @{$self->{aux_mapping}->[$entry]};
	0					0
491	0					0	if (DEBUG) {
492							print "expand_aux: expanding $entry to " .
493							(join " ", @expansion) . "\n";
494							}
495	0					0	push @xor_list, @expansion;
496							}
497							}
498	0					0	return keys %blocks;
499							}
500
501							# Calculate the composition of a single check block based on the
502							# supplied RNG. Returns a reference to a list of composite blocks
503							# indices.
504
505							sub checkblock_mapping {
506
507	0			0	0	0	my $self = shift;
508	0					0	my $rng = shift;
509
510	0	0				0	croak "rng is not an object reference\n" unless ref($rng);
511
512	0					0	my ($mblocks,$coblocks,$P) = @{$self}{"mblocks","coblocks","P"};
	0					0
513	0					0	my @coblocks;
514
515							# use weighted distribution to find how many blocks to link
516	0					0	my $i = 0;
517	0					0	my $r = $rng->rand;
518	0					0	++$i while($r > $P->[$i]); # terminates since r < P[last]
519	0					0	++$i;
520
521							# select i composite blocks uniformly
522	0					0	@coblocks = floyd($rng, 0, $coblocks , $i);
523
524	0					0	if (ASSERT) {
525	0	0				0	die "checkblock_mapping: created empty check block\n!" unless @coblocks;
526							}
527
528	0					0	print "CHECKblock mapping: " . (join " ", @coblocks) . "\n" if DEBUG;
529
530	0					0	return \@coblocks;
531
532							}
533
534							# non-method sub for xoring a source string (passed by reference) with
535							# one or more target strings. I may reimplement this using XS later to
536							# make it more efficient, but will keep a pure-perl version with this
537							# name.
538							sub safe_xor_strings {
539
540	4			4	0	17	my $source = shift;
541
542							# die if user forgot to pass in a reference (to allow updating) or
543							# called $self->safe_xor_strings by mistake
544	4	50				14	croak "xor_strings: arg 1 should be a reference to a SCALAR!\n"
545							unless ref($source) eq "SCALAR";
546
547	4					6	my $len = length ($$source);
548
549	4	50				9	croak "xor_strings: source string can't have zero length!\n"
550							unless $len;
551
552	4					9	foreach my $target (@_) {
553	4	50				11	croak "xor_strings: targets not all same size as source\n"
554							unless length($target) == $len;
555	4					11	map { substr ($$source, $_, 1) ^= substr ($target, $_, 1) }
	28					79
556							(0 .. $len-1);
557							}
558
559	4					18	return $$source;
560							}
561
562							# Later, xor_strings could be replaced with an C version with reduced
563							# error checking, so make a backward-compatible version and an
564							# explicit fast/unsafe version.
565	0			0	0		sub xor_strings { safe_xor_strings(@_) }
566							#sub fast_xor_strings { safe_xor_strings(@_) } # implemented in OnlineCode.xs.
567
568
569							1;
570
571							__END__