File Coverage

blib/lib/Net/OnlineCode/Bones.pm

Criterion	Covered	Total	%
statement	9	99	9.0
branch	0	32	0.0
condition	0	3	0.0
subroutine	3	16	18.7
pod	0	13	0.0
total	12	163	7.3

line	stmt	bran	cond	sub	pod	time	code
1							package Net::OnlineCode::Bones;
2
3	2			2		15182	use strict;
	2					3
	2					63
4	2			2		7	use warnings;
	2					3
	2					52
5
6	2			2		9	use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION);
	2					2
	2					2062
7
8							$VERSION = '0.04';
9
10							#
11							sub new {
12	0			0	0		my ($class, $graph, $top, $nodes) = @_;
13	0						my $bone = $nodes;
14	0						my $unknowns = scalar(@$nodes);
15
16	0	0					die "Bones: refusing to create a bone with empty node list\n"
17							unless $unknowns;
18
19							#print "new bone $top with list @$nodes\n";
20
21	0						unshift @$bone, $unknowns; # count unknowns
22	0						push @$bone, $top; # add "top" node to knowns
23
24							#print "bone after unshift/push: @$nodes\n";
25
26	0						my $index = 1;
27
28	0						while ($index <= $unknowns) {
29	0	0					if ($graph->{solution}->[$bone->[$index]]) {
30							# print "swapping bone known bone index $index with $unknowns\n";
31	0						@{$bone}[$index,$unknowns] = @{$bone}[$unknowns,$index];
	0
	0
32	0						--$unknowns;
33							} else {
34							# print "bone index $index is not known\n";
35	0						++$index;
36							}
37							}
38
39	0						$bone->[0] = $unknowns; # save updated count
40
41	0						bless $bone, $class;
42							}
43
44							# Throw the caller a bone (ahem) if they want to construct the object
45							# themself (useful in GraphDecoder constructor)
46							sub bless {
47	0			0	0		my ($class, $object) = @_;
48
49	0	0					die "Bones: bless is a class method (call with ...::Bones->bless())\n"
50							if ref($class);
51
52	0	0					die "Net::OnlineCode::Bones::bless can only bless an ARRAY reference\n"
53							unless ref($object) eq "ARRAY";
54
55							# warn "Bones got ARRAY to bless: " . (join ", ", @$object) . "\n";
56
57	0	0	0				die "Net::OnlineCode::Bones::bless was given an incorrectly constructed array\n"
58							if scalar(@$object) == 0 or $object->[0] > scalar(@$object);
59
60	0						bless $object, $class;
61							}
62
63
64							# "Firm up" a bone by turning an unknown node from the left side of
65							# the equation into a known one on the right side
66							sub firm {
67	0			0	0		my ($bone, $index) = @_;
68	0						my $unknowns = $bone->[0]--;
69
70	0						@{$bone}[$index,$unknowns] = @{$bone}[$unknowns,$index];
	0
	0
71							}
72
73
74							# The "top" node is the number of the check or aux block where the
75							# bone was first created. It's always the last value of the list
76							sub top {
77	0			0	0		my $bone = shift;
78
79	0						return $bone->[scalar(@$bone)];
80							}
81
82							# The "bottom" node will shuffle to the start of the list of unknown
83							# blocks (call only when there is just a single unknown left)
84							sub bottom {
85	0			0	0		my $bone = shift;
86
87	0	0					die "Bones: multiple bottom nodes exist\n" if $bone->[0] > 1;
88	0						return $bone->[1];
89							}
90
91							# how many unknowns on left side?
92							sub unknowns {
93	0			0	0		my $bone = shift;
94	0						return $bone->[0];
95							}
96
97							# how many knowns on right side?
98							sub knowns {
99	0			0	0		my $bone = shift;
100	0						return @$bone - $bone->[0];
101							}
102
103
104							# For extracting the actual known or unknown elements, rather than
105							# return a list or spliced part of it, return the range of the knowns
106							# part of the array for the caller to iterate over. (more efficient)
107							#
108							# Both the following subs return an inclusive range [first, last]
109							# that's suitable for iterating over with for ($first .. $last)
110							#
111
112							sub knowns_range {
113	0			0	0		my $bone = shift;
114	0						return ($bone->[0] + 1, scalar(@$bone) - 1);
115							}
116
117							# unknowns_range can return [1, 0] if there are no unknowns. Beware!
118							sub unknowns_range {
119	0			0	0		my $bone = shift;
120	0						return (1, $bone->[0]);
121							}
122
123							# The following two routines find a single unknown, shift it to the
124							# start of the array and mark all other nodes as known (used in
125							# propagation rule). They differ only in whether a node number or a
126							# graph are passed in. (modelled on C code)
127							sub known_unsolved {
128
129	0			0	0		my ($bone, $node) = @_;
130
131							# If given a node number, we just scan the list to find it
132
133	0						for (1 .. $bone->[0]) {
134	0	0					if ($node == $bone->[$_]) {
135	0	0					@{$bone}[$_,1] = @{$bone}[1,$_] if $_ != 1;
	0
	0
136	0						$bone->[0] = 1;
137	0						return $bone->[1];
138							}
139							}
140	0						die "Bones: Didn't find unsolved node $node\n";
141							}
142
143							sub unknown_unsolved {
144
145	0			0	0		my ($bone, $graph) = @_;
146
147							# If given a graph, we look up nodes in it to see if they're solved
148
149	0						for (1 .. $bone->[0]) {
150	0	0					if (!$graph->{solution}->[$bone->[$_]]) {
151	0	0					@{$bone}[$_,1] = @{$bone}[1,$_] if $_ != 1;
	0
	0
152	0						$bone->[0] = 1;
153	0						return $bone->[1];
154							}
155							}
156	0						die "Bones: Bone has no unsolved nodes\n";
157							}
158
159
160							# We can use the propagation rule from an aux block to a message
161							# block, but if the aux block itself is not solved, we end up with two
162							# unknown values in the list. This routine takes the aux block number
163							# and the single unknown down edge, marks both of them as unknown and
164							# the rest as known.
165							sub two_unknowns {
166	0			0	0		my ($bone, $graph) = @_;
167	0						my ($index, $kindex) = (1,1);
168	0						my $unknowns = $bone->[0];
169
170	0						print "two_unknowns: Looking for two unsolved in " . $bone->pp .
171							" (had $unknowns unknowns)\n";
172
173	0						while ($index <= $unknowns + 1) {
174	0						my $node = $bone->[$index];
175	0						print "two_unknowns: Considering node $node at index $index\n";
176	0	0					if ($graph->{solution}->[$node]) {
177	0						print "two_unknowns: Node $node is solved; skipping\n";
178	0						--$unknowns;
179							} else {
180	0						print "two_unknowns: Node $node is unsolved; shuffling to position $kindex\n";
181	0	0					@{$bone}[$index,$kindex] = @{$bone}[$kindex,$index]
	0
	0
182							if $index != $kindex;
183	0						++$kindex;
184							}
185	0						++$index;
186							}
187	0	0					die "Bones: didn't find two unknowns\n" unless $kindex == 3;
188
189							# swap elments if needed so that message node is first
190	0	0					@{$bone}[1,2] = @{$bone}[2,1] if $bone->[1] > $bone->[2];
	0
	0
191
192	0						$bone->[0] = 2;
193
194	0						print "two_unknowns: Final contents are " . $bone->pp . "\n";
195
196	0						return $bone->[1];
197							}
198
199							# "pretty printer": output in the form "[unknowns] <- [knowns]"
200							sub pp {
201
202	0			0	0		my $bone = shift;
203	0						my ($s, $min, $max) = ("[");
204
205							# print "raw bone is ". (join ",", @$bone) . "\n";
206
207	0						($min, $max) = $bone->unknowns_range;
208							# print "unknown range: [$min,$max]\n";
209	0	0					$s.= join ", ", map { $bone->[$_] } ($min .. $max) if $min <= $max;
	0
210
211	0						$s.= "] <- [";
212
213	0						($min, $max) = $bone->knowns_range;
214							# print "known range: [$min,$max]\n";
215	0	0					$s.= join ", ", map { $bone->[$_] } ($min .. $max) if $min <= $max;
	0
216
217	0						return $s . "]";
218
219							}
220
221							1;
222
223							=head1 NAME
224
225							Net::OnlineCode::Bones - Graph decoding internals
226
227							=head1 DESCRIPTION
228
229							This page gives an overview of how the decoding algorithm for Online
230							Codes work.
231
232							The decoding algorithm can be described in one of two ways:
233
234							=over
235
236							=item * in terms of solving a set of algebraic equations; and
237
238							=item * in terms of resolving a graph.
239
240							=back
241
242							The first of these explains I the algorithm does, while the
243							second describes I it does it.
244
245							=head2 Solving a system of algebraic equations
246
247							Recall that the Online Codes algorithm works with:
248
249							=over
250
251							=item * I blocks, which are portions of the original file
252
253							=item * I blocks, which are the XOR sum of one or more
254							I blocks.
255
256							=item * I blocks, which are the XOR sum of one or more
257							I and/or I blocks.
258
259							=back
260
261							On the encoder side, the algorithm generates I blocks by
262							using a pseudo-random number generator (PRNG). These blocks are stored
263							locally by the encoder, but are never transmitted. However, by sending
264							the seed value for the PRNG to the decoder, the decoder knows how the
265							auxiliary blocks were constructed, even though it does not know the
266							values of them. In other words, give the PRNG seed value, the decoder
267							can construct a set of equations, one for each auxiliary block:
268
269							aux = msg XOR msg XOR ...
270							1 x y ...
271							aux = ...
272							2
273							:
274
275
276							Initially, all the values in these equations are unknown on the
277							decoder side.
278
279							As for I blocks, the encoder picks a random seed value for its
280							PRNG and uses this to generate a list of message and/or check blocks
281							to XOR together to calculate the check block's value. It sends both
282							the seed used and the final XOR value to the decoder. As with
283							auxiliary blocks, the decoder can use the PRNG with the transmitted
284							seed value to construct an equation for a received check block:
285
286							chk = msg_or_aux XOR msg_or_aux XOR ...
287							1 x y
288
289							Unlike the equations constructed for auxiliary blocks, however, the
290							value of the check block is also sent to the decoder, so each equation
291							includes a single known value on the left-hand side of the
292							equation.
293
294							Before the first check block is received, the decoder has a set of
295							equations involving unknown values. As check blocks are received,
296							eventually one of them will be composed of just a single message or
297							auxiliary block. In algebraic terms, we have:
298
299							chk = msg_or_aux
300							x y
301
302							Since there is just a single unknown value in the equation, we can
303							reverse the order of it and use the new form of the equation
304
305							msg_or_aux = chk
306							y x
307
308							Since we have a single unknown value on the left side and only known
309							values on the right side, this new rule solves the value on the left.
310							Now wherever this message/aux block appears in another equation, we
311							can substitute the right side of the equation. This removes one
312							unknown value from the set of equations each time this step is taken.
313
314							Decoding progresses in this way by finding an equation with only a
315							single unknown value, solving that unknown value then substituting the
316							result into any other equation that mentions this value. This proceeds
317							until there are no unknowns left. At that point the entire file has
318							been "solved".
319
320							=head2 Solution in terms of a graph
321
322							The method of solving all of the equations above can be re-expressed
323							in terms of a graph. Nodes in the graph represent blocks, while the
324							edges capture the relation between blocks on the left side of an
325							equation and those on the right. So for example, a check block C
326							(on the left hand side of an equation) is composed of an auxiliary
327							node A and a message node M is represented by:
328
329							=over
330
331							=item * three nodes M, A and C
332
333							=item * an edge between M and C
334
335							=item * an edge between A and C
336
337							=back
338
339							There is also an additional structure imposed on the nodes in the
340							graph so that edges can be unambiguously identified as belonging to a
341							particular equation. Technically, the graph is a I
342							graph. It keeps each of the block types grouped with other blocks of
343							that type and orders the groups like so:
344
345							message blocks < auxiliary blocks < check blocks
346
347							Graphically, the example rule above could be illustrated as follows:
348
349							message auxiliary check
350
351							M <--------------------------------- C
352							/
353							A <------------/
354
355
356							This diagram could equally have been written with the check blocks on
357							the left and the message blocks on the right, or turned 90
358							degrees. It's merely a matter of convention, similar to the two ways
359							of writing out the equation as either:
360
361							C <- M xor A
362
363							or
364
365							M xor A -> C
366
367							For the remainder of the document, I'll go with the convention of
368							saying that auxiliary blocks are to the right of the message blocks
369							and check blocks are to the right of both of them. (My code uses a
370							different convention again and talks about check nodes being higher
371							than auxiliary and message nodes).
372
373							Besides information about edges, the graph also stores a status bit
374							for each node to indicate whether that node is known (solved) or
375							unknown. Check nodes are always taken to be solved since the encoder
376							sends the value of that block, whereas message and auxiliary nodes are
377							all initially unknown/unsolved.
378
379							In the explanation of the algebraic interpretation, I talked about
380							finding an equation that had just a single unknown and rearranging it
381							so that the single unknown value moves to one side and all the other
382							knowns move to the other side. There is an analoguous operation on the
383							graph, and this is named the "propagation rule".
384
385							The propagation rule involves finding a known node which has exactly
386							one unsolved neighbour on the left. In the above example, if we are
387							considering whether to propagate from node C (which is known) both M
388							and A are unknown, so the rule does not match. If, on the other hand,
389							one of M or A are known, the rule does match.
390
391							When the propagation rule matches, the solution for the newly-solved
392							node on the left becomes the XOR of the node on the right plus all the
393							other nodes emanating from that (right) node. When a node is solved in
394							this way, all edges from the node on the right are removed from the
395							graph.
396
397							In my code, the propagation rule is handled in a routine called
398							resolve().
399
400							=head2 Cascades
401
402							When matched, the propagation rule solves one extra node somewhere to
403							the left of the starting node. In the algebraic interpretation, I
404							talked about substituting a newly-solved variable into all other
405							equations where the variable appeared. There is an analogous procedure
406							in the graph-based implementation, which is implemenented in the
407							cascade() routine.
408
409							For the sake of discussion, let's assume that the message block M was
410							solved by the propagation rule and that it had the solution:
411
412							M <- A xor C
413
414							To simulate substituting M into all other equations where it appears,
415							we need to work backwards (from left to right) from node M and see if
416							any of those nodes now match the propagation rule. Since there will be
417							one rightward edge in the graph from that node for each equation the
418							left node appears in, this effectively reaches all equations that
419							could could become solvable.
420
421							In the case where the left node which has become solved is an
422							auxiliary block, the cascade() routine also queues up the auxiliary
423							node itself for checking the propagation rule.
424
425							=head2 Special handling for auxiliary nodes
426
427							Although in theory the propagation rule could be applied to unsolved
428							auxiliary nodes, in practice this has proved troublesome, so I have
429							not implemented it. Instead I have implemented a special "auxiliary
430							rule" that gives comparable results.
431
432							Recall that the propagation rule works with a single known node on the
433							right and a single unknown node on the left. It is also possible to
434							devise a rule where there is a message node on the left and an
435							unsolved aux rule on the right. If the auxiliary node has only one
436							unsolved left neighbour (ie the message node) and that message node
437							becomes solved, then the auxiliary block can be solved too.
438
439							Initially, each auxiliary block will be composed of some number of
440							message blocks:
441
442							aux = msg xor msg xor ...
443							x i j
444
445							When the last unsolved message block on the right becomes solved then
446							this equation has no more unknowns apart from the aux block
447							itself. Therefore, it can be marked as solved (with the above
448							solution) and we can cascade up from that aux block to see if it
449							solves any more equations.
450
451							=head2 Optimising by tracking counts of unsolved left nodes
452
453							When aux or check nodes are created, the number of unknown/unsolved
454							edges that they are comprised of is calculated. Whenever a node
455							becomes solved, each of the nodes that include that node in its list
456							of edges has their unsolved count decremented.
457
458							This improves performance by avoiding having to scan the node's full
459							list of leftward edges when it is considered for resolving.
460
461							=head2 "Bones" ("Bundles of Node Elements")
462
463							In a previous version of the program, edges in the graph were stored
464							by keeping track of the left (bottom) end of the edge in a hash, while
465							the right (top) end was stored in a list. I also had a separate array
466							for storing the solutions of each node. The "Bones" structure
467							essentially combines the top part of the edge and the solution into a
468							single fixed-size array. This was done to improve performance by
469							eliminating lots of list copying as the graph was processed.
470
471							The Bones structure is a fixed-sized array with three elements:
472
473							=over
474
475							=item * count of unsolved left (down) edges
476
477							=item * node ids of unsolved left (down) edges
478
479							=item * list of known node ids
480
481							=back
482
483							Bones can also be viewed as encapsulating an algebraic equation of the
484							form:
485
486							[unknown nodes] <- [known nodes]
487
488							At the start of decoding, each auxiliary node has a Bone object
489							created for it:
490
491							[aux node, message nodes] <- []
492
493							That is, the aux node and all its constituent message nodes are all
494							marked as unknown/unsolved (there are no knowns in the equation).
495
496							The "Bone" is attached to the auxiliary node and reciprocal links (the
497							other end of the edges) are created in each of the component message
498							nodes. All the nodes in the left hand side except the aux node itself
499							are considered to be the top parts of edges.
500
501							When a check node is created, its Bone is of the form:
502
503							[unsolved msg/aux nodes] <- [ check node, solved msg/aux nodes ]
504
505							The check node is placed on the right along with other known nodes
506							because the decoder knows the value of all received check nodes.
507							Reciprocal links are only created for unsolved nodes.
508
509							As can be seen, Bones have aspects of algebraic equations, but they
510							also encapsulate edge structure.
511
512							As nodes become solved by the propagation or auxiliary rule, elements
513							are shifted in the array to take this form:
514
515							[newly-solved node] <- [list of nodes to XOR to get value]
516
517							This is exactly the form of a solution to a node, so the Bone is
518							stored in the solution array. The right-hand side is also scanned and
519							any reciprocal links are deleted, as is the top part of the edge.
520
521							In summary, a Bone always represents an equation. At the start of the
522							decoding process it also encapsulates edge structure, but at the end
523							it becomes a solution for either a message block or an auxiliary block.
524
525							From version 0.04 of the Net::OnlineCode modules onwards, the resolver
526							returns a Bone object for each solved node. It will always be an array
527							of the form mentioned above, and encoded as follows:
528
529							[
530							1, # the number of "unknowns"
531							msg_or_aux, # the node that was just solved
532							list of component nodes
533							]
534