File Coverage

blib/lib/Net/OnlineCode/Bones.pm
Criterion Covered Total %
statement 9 99 9.0
branch 0 32 0.0
condition 0 3 0.0
subroutine 3 16 18.7
pod 0 13 0.0
total 12 163 7.3


line stmt bran cond sub pod time code
1             package Net::OnlineCode::Bones;
2              
3 2     2   15182 use strict;
  2         3  
  2         63  
4 2     2   7 use warnings;
  2         3  
  2         52  
5              
6 2     2   9 use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION);
  2         2  
  2         2062  
7              
8             $VERSION = '0.04';
9              
10             #
11             sub new {
12 0     0 0   my ($class, $graph, $top, $nodes) = @_;
13 0           my $bone = $nodes;
14 0           my $unknowns = scalar(@$nodes);
15              
16 0 0         die "Bones: refusing to create a bone with empty node list\n"
17             unless $unknowns;
18              
19             #print "new bone $top with list @$nodes\n";
20              
21 0           unshift @$bone, $unknowns; # count unknowns
22 0           push @$bone, $top; # add "top" node to knowns
23              
24             #print "bone after unshift/push: @$nodes\n";
25              
26 0           my $index = 1;
27              
28 0           while ($index <= $unknowns) {
29 0 0         if ($graph->{solution}->[$bone->[$index]]) {
30             # print "swapping bone known bone index $index with $unknowns\n";
31 0           @{$bone}[$index,$unknowns] = @{$bone}[$unknowns,$index];
  0            
  0            
32 0           --$unknowns;
33             } else {
34             # print "bone index $index is not known\n";
35 0           ++$index;
36             }
37             }
38              
39 0           $bone->[0] = $unknowns; # save updated count
40              
41 0           bless $bone, $class;
42             }
43              
44             # Throw the caller a bone (ahem) if they want to construct the object
45             # themself (useful in GraphDecoder constructor)
46             sub bless {
47 0     0 0   my ($class, $object) = @_;
48              
49 0 0         die "Bones: bless is a class method (call with ...::Bones->bless())\n"
50             if ref($class);
51              
52 0 0         die "Net::OnlineCode::Bones::bless can only bless an ARRAY reference\n"
53             unless ref($object) eq "ARRAY";
54              
55             # warn "Bones got ARRAY to bless: " . (join ", ", @$object) . "\n";
56              
57 0 0 0       die "Net::OnlineCode::Bones::bless was given an incorrectly constructed array\n"
58             if scalar(@$object) == 0 or $object->[0] > scalar(@$object);
59              
60 0           bless $object, $class;
61             }
62              
63              
64             # "Firm up" a bone by turning an unknown node from the left side of
65             # the equation into a known one on the right side
66             sub firm {
67 0     0 0   my ($bone, $index) = @_;
68 0           my $unknowns = $bone->[0]--;
69              
70 0           @{$bone}[$index,$unknowns] = @{$bone}[$unknowns,$index];
  0            
  0            
71             }
72            
73              
74             # The "top" node is the number of the check or aux block where the
75             # bone was first created. It's always the last value of the list
76             sub top {
77 0     0 0   my $bone = shift;
78              
79 0           return $bone->[scalar(@$bone)];
80             }
81              
82             # The "bottom" node will shuffle to the start of the list of unknown
83             # blocks (call only when there is just a single unknown left)
84             sub bottom {
85 0     0 0   my $bone = shift;
86              
87 0 0         die "Bones: multiple bottom nodes exist\n" if $bone->[0] > 1;
88 0           return $bone->[1];
89             }
90              
91             # how many unknowns on left side?
92             sub unknowns {
93 0     0 0   my $bone = shift;
94 0           return $bone->[0];
95             }
96              
97             # how many knowns on right side?
98             sub knowns {
99 0     0 0   my $bone = shift;
100 0           return @$bone - $bone->[0];
101             }
102              
103              
104             # For extracting the actual known or unknown elements, rather than
105             # return a list or spliced part of it, return the range of the knowns
106             # part of the array for the caller to iterate over. (more efficient)
107             #
108             # Both the following subs return an inclusive range [first, last]
109             # that's suitable for iterating over with for ($first .. $last)
110             #
111              
112             sub knowns_range {
113 0     0 0   my $bone = shift;
114 0           return ($bone->[0] + 1, scalar(@$bone) - 1);
115             }
116              
117             # unknowns_range can return [1, 0] if there are no unknowns. Beware!
118             sub unknowns_range {
119 0     0 0   my $bone = shift;
120 0           return (1, $bone->[0]);
121             }
122              
123             # The following two routines find a single unknown, shift it to the
124             # start of the array and mark all other nodes as known (used in
125             # propagation rule). They differ only in whether a node number or a
126             # graph are passed in. (modelled on C code)
127             sub known_unsolved {
128              
129 0     0 0   my ($bone, $node) = @_;
130              
131             # If given a node number, we just scan the list to find it
132              
133 0           for (1 .. $bone->[0]) {
134 0 0         if ($node == $bone->[$_]) {
135 0 0         @{$bone}[$_,1] = @{$bone}[1,$_] if $_ != 1;
  0            
  0            
136 0           $bone->[0] = 1;
137 0           return $bone->[1];
138             }
139             }
140 0           die "Bones: Didn't find unsolved node $node\n";
141             }
142              
143             sub unknown_unsolved {
144              
145 0     0 0   my ($bone, $graph) = @_;
146              
147             # If given a graph, we look up nodes in it to see if they're solved
148              
149 0           for (1 .. $bone->[0]) {
150 0 0         if (!$graph->{solution}->[$bone->[$_]]) {
151 0 0         @{$bone}[$_,1] = @{$bone}[1,$_] if $_ != 1;
  0            
  0            
152 0           $bone->[0] = 1;
153 0           return $bone->[1];
154             }
155             }
156 0           die "Bones: Bone has no unsolved nodes\n";
157             }
158              
159              
160             # We can use the propagation rule from an aux block to a message
161             # block, but if the aux block itself is not solved, we end up with two
162             # unknown values in the list. This routine takes the aux block number
163             # and the single unknown down edge, marks both of them as unknown and
164             # the rest as known.
165             sub two_unknowns {
166 0     0 0   my ($bone, $graph) = @_;
167 0           my ($index, $kindex) = (1,1);
168 0           my $unknowns = $bone->[0];
169              
170 0           print "two_unknowns: Looking for two unsolved in " . $bone->pp .
171             " (had $unknowns unknowns)\n";
172              
173 0           while ($index <= $unknowns + 1) {
174 0           my $node = $bone->[$index];
175 0           print "two_unknowns: Considering node $node at index $index\n";
176 0 0         if ($graph->{solution}->[$node]) {
177 0           print "two_unknowns: Node $node is solved; skipping\n";
178 0           --$unknowns;
179             } else {
180 0           print "two_unknowns: Node $node is unsolved; shuffling to position $kindex\n";
181 0 0         @{$bone}[$index,$kindex] = @{$bone}[$kindex,$index]
  0            
  0            
182             if $index != $kindex;
183 0           ++$kindex;
184             }
185 0           ++$index;
186             }
187 0 0         die "Bones: didn't find two unknowns\n" unless $kindex == 3;
188              
189             # swap elments if needed so that message node is first
190 0 0         @{$bone}[1,2] = @{$bone}[2,1] if $bone->[1] > $bone->[2];
  0            
  0            
191            
192 0           $bone->[0] = 2;
193              
194 0           print "two_unknowns: Final contents are " . $bone->pp . "\n";
195              
196 0           return $bone->[1];
197             }
198              
199             # "pretty printer": output in the form "[unknowns] <- [knowns]"
200             sub pp {
201              
202 0     0 0   my $bone = shift;
203 0           my ($s, $min, $max) = ("[");
204              
205             # print "raw bone is ". (join ",", @$bone) . "\n";
206              
207 0           ($min, $max) = $bone->unknowns_range;
208             # print "unknown range: [$min,$max]\n";
209 0 0         $s.= join ", ", map { $bone->[$_] } ($min .. $max) if $min <= $max;
  0            
210              
211 0           $s.= "] <- [";
212              
213 0           ($min, $max) = $bone->knowns_range;
214             # print "known range: [$min,$max]\n";
215 0 0         $s.= join ", ", map { $bone->[$_] } ($min .. $max) if $min <= $max;
  0            
216              
217 0           return $s . "]";
218              
219             }
220              
221             1;
222              
223             =head1 NAME
224              
225             Net::OnlineCode::Bones - Graph decoding internals
226              
227             =head1 DESCRIPTION
228              
229             This page gives an overview of how the decoding algorithm for Online
230             Codes work.
231              
232             The decoding algorithm can be described in one of two ways:
233              
234             =over
235              
236             =item * in terms of solving a set of algebraic equations; and
237              
238             =item * in terms of resolving a graph.
239              
240             =back
241              
242             The first of these explains I the algorithm does, while the
243             second describes I it does it.
244              
245             =head2 Solving a system of algebraic equations
246              
247             Recall that the Online Codes algorithm works with:
248              
249             =over
250              
251             =item * I blocks, which are portions of the original file
252              
253             =item * I blocks, which are the XOR sum of one or more
254             I blocks.
255              
256             =item * I blocks, which are the XOR sum of one or more
257             I and/or I blocks.
258              
259             =back
260              
261             On the encoder side, the algorithm generates I blocks by
262             using a pseudo-random number generator (PRNG). These blocks are stored
263             locally by the encoder, but are never transmitted. However, by sending
264             the seed value for the PRNG to the decoder, the decoder knows how the
265             auxiliary blocks were constructed, even though it does not know the
266             values of them. In other words, give the PRNG seed value, the decoder
267             can construct a set of equations, one for each auxiliary block:
268              
269             aux = msg XOR msg XOR ...
270             1 x y ...
271             aux = ...
272             2
273             :
274              
275              
276             Initially, all the values in these equations are unknown on the
277             decoder side.
278              
279             As for I blocks, the encoder picks a random seed value for its
280             PRNG and uses this to generate a list of message and/or check blocks
281             to XOR together to calculate the check block's value. It sends both
282             the seed used and the final XOR value to the decoder. As with
283             auxiliary blocks, the decoder can use the PRNG with the transmitted
284             seed value to construct an equation for a received check block:
285              
286             chk = msg_or_aux XOR msg_or_aux XOR ...
287             1 x y
288              
289             Unlike the equations constructed for auxiliary blocks, however, the
290             value of the check block is also sent to the decoder, so each equation
291             includes a single known value on the left-hand side of the
292             equation.
293              
294             Before the first check block is received, the decoder has a set of
295             equations involving unknown values. As check blocks are received,
296             eventually one of them will be composed of just a single message or
297             auxiliary block. In algebraic terms, we have:
298              
299             chk = msg_or_aux
300             x y
301              
302             Since there is just a single unknown value in the equation, we can
303             reverse the order of it and use the new form of the equation
304              
305             msg_or_aux = chk
306             y x
307              
308             Since we have a single unknown value on the left side and only known
309             values on the right side, this new rule solves the value on the left.
310             Now wherever this message/aux block appears in another equation, we
311             can substitute the right side of the equation. This removes one
312             unknown value from the set of equations each time this step is taken.
313              
314             Decoding progresses in this way by finding an equation with only a
315             single unknown value, solving that unknown value then substituting the
316             result into any other equation that mentions this value. This proceeds
317             until there are no unknowns left. At that point the entire file has
318             been "solved".
319              
320             =head2 Solution in terms of a graph
321              
322             The method of solving all of the equations above can be re-expressed
323             in terms of a graph. Nodes in the graph represent blocks, while the
324             edges capture the relation between blocks on the left side of an
325             equation and those on the right. So for example, a check block C
326             (on the left hand side of an equation) is composed of an auxiliary
327             node A and a message node M is represented by:
328              
329             =over
330              
331             =item * three nodes M, A and C
332              
333             =item * an edge between M and C
334              
335             =item * an edge between A and C
336              
337             =back
338              
339             There is also an additional structure imposed on the nodes in the
340             graph so that edges can be unambiguously identified as belonging to a
341             particular equation. Technically, the graph is a I
342             graph. It keeps each of the block types grouped with other blocks of
343             that type and orders the groups like so:
344              
345             message blocks < auxiliary blocks < check blocks
346              
347             Graphically, the example rule above could be illustrated as follows:
348              
349             message auxiliary check
350            
351             M <--------------------------------- C
352             /
353             A <------------/
354            
355              
356             This diagram could equally have been written with the check blocks on
357             the left and the message blocks on the right, or turned 90
358             degrees. It's merely a matter of convention, similar to the two ways
359             of writing out the equation as either:
360              
361             C <- M xor A
362              
363             or
364              
365             M xor A -> C
366              
367             For the remainder of the document, I'll go with the convention of
368             saying that auxiliary blocks are to the right of the message blocks
369             and check blocks are to the right of both of them. (My code uses a
370             different convention again and talks about check nodes being higher
371             than auxiliary and message nodes).
372              
373             Besides information about edges, the graph also stores a status bit
374             for each node to indicate whether that node is known (solved) or
375             unknown. Check nodes are always taken to be solved since the encoder
376             sends the value of that block, whereas message and auxiliary nodes are
377             all initially unknown/unsolved.
378              
379             In the explanation of the algebraic interpretation, I talked about
380             finding an equation that had just a single unknown and rearranging it
381             so that the single unknown value moves to one side and all the other
382             knowns move to the other side. There is an analoguous operation on the
383             graph, and this is named the "propagation rule".
384              
385             The propagation rule involves finding a known node which has exactly
386             one unsolved neighbour on the left. In the above example, if we are
387             considering whether to propagate from node C (which is known) both M
388             and A are unknown, so the rule does not match. If, on the other hand,
389             one of M or A are known, the rule does match.
390              
391             When the propagation rule matches, the solution for the newly-solved
392             node on the left becomes the XOR of the node on the right plus all the
393             other nodes emanating from that (right) node. When a node is solved in
394             this way, all edges from the node on the right are removed from the
395             graph.
396              
397             In my code, the propagation rule is handled in a routine called
398             resolve().
399              
400             =head2 Cascades
401              
402             When matched, the propagation rule solves one extra node somewhere to
403             the left of the starting node. In the algebraic interpretation, I
404             talked about substituting a newly-solved variable into all other
405             equations where the variable appeared. There is an analogous procedure
406             in the graph-based implementation, which is implemenented in the
407             cascade() routine.
408              
409             For the sake of discussion, let's assume that the message block M was
410             solved by the propagation rule and that it had the solution:
411              
412             M <- A xor C
413              
414             To simulate substituting M into all other equations where it appears,
415             we need to work backwards (from left to right) from node M and see if
416             any of those nodes now match the propagation rule. Since there will be
417             one rightward edge in the graph from that node for each equation the
418             left node appears in, this effectively reaches all equations that
419             could could become solvable.
420              
421             In the case where the left node which has become solved is an
422             auxiliary block, the cascade() routine also queues up the auxiliary
423             node itself for checking the propagation rule.
424              
425             =head2 Special handling for auxiliary nodes
426              
427             Although in theory the propagation rule could be applied to unsolved
428             auxiliary nodes, in practice this has proved troublesome, so I have
429             not implemented it. Instead I have implemented a special "auxiliary
430             rule" that gives comparable results.
431              
432             Recall that the propagation rule works with a single known node on the
433             right and a single unknown node on the left. It is also possible to
434             devise a rule where there is a message node on the left and an
435             unsolved aux rule on the right. If the auxiliary node has only one
436             unsolved left neighbour (ie the message node) and that message node
437             becomes solved, then the auxiliary block can be solved too.
438              
439             Initially, each auxiliary block will be composed of some number of
440             message blocks:
441              
442             aux = msg xor msg xor ...
443             x i j
444              
445             When the last unsolved message block on the right becomes solved then
446             this equation has no more unknowns apart from the aux block
447             itself. Therefore, it can be marked as solved (with the above
448             solution) and we can cascade up from that aux block to see if it
449             solves any more equations.
450              
451             =head2 Optimising by tracking counts of unsolved left nodes
452              
453             When aux or check nodes are created, the number of unknown/unsolved
454             edges that they are comprised of is calculated. Whenever a node
455             becomes solved, each of the nodes that include that node in its list
456             of edges has their unsolved count decremented.
457              
458             This improves performance by avoiding having to scan the node's full
459             list of leftward edges when it is considered for resolving.
460              
461             =head2 "Bones" ("Bundles of Node Elements")
462              
463             In a previous version of the program, edges in the graph were stored
464             by keeping track of the left (bottom) end of the edge in a hash, while
465             the right (top) end was stored in a list. I also had a separate array
466             for storing the solutions of each node. The "Bones" structure
467             essentially combines the top part of the edge and the solution into a
468             single fixed-size array. This was done to improve performance by
469             eliminating lots of list copying as the graph was processed.
470              
471             The Bones structure is a fixed-sized array with three elements:
472              
473             =over
474              
475             =item * count of unsolved left (down) edges
476              
477             =item * node ids of unsolved left (down) edges
478              
479             =item * list of known node ids
480              
481             =back
482              
483             Bones can also be viewed as encapsulating an algebraic equation of the
484             form:
485              
486             [unknown nodes] <- [known nodes]
487              
488             At the start of decoding, each auxiliary node has a Bone object
489             created for it:
490              
491             [aux node, message nodes] <- []
492              
493             That is, the aux node and all its constituent message nodes are all
494             marked as unknown/unsolved (there are no knowns in the equation).
495              
496             The "Bone" is attached to the auxiliary node and reciprocal links (the
497             other end of the edges) are created in each of the component message
498             nodes. All the nodes in the left hand side except the aux node itself
499             are considered to be the top parts of edges.
500              
501             When a check node is created, its Bone is of the form:
502              
503             [unsolved msg/aux nodes] <- [ check node, solved msg/aux nodes ]
504              
505             The check node is placed on the right along with other known nodes
506             because the decoder knows the value of all received check nodes.
507             Reciprocal links are only created for unsolved nodes.
508              
509             As can be seen, Bones have aspects of algebraic equations, but they
510             also encapsulate edge structure.
511              
512             As nodes become solved by the propagation or auxiliary rule, elements
513             are shifted in the array to take this form:
514              
515             [newly-solved node] <- [list of nodes to XOR to get value]
516              
517             This is exactly the form of a solution to a node, so the Bone is
518             stored in the solution array. The right-hand side is also scanned and
519             any reciprocal links are deleted, as is the top part of the edge.
520              
521             In summary, a Bone always represents an equation. At the start of the
522             decoding process it also encapsulates edge structure, but at the end
523             it becomes a solution for either a message block or an auxiliary block.
524              
525             From version 0.04 of the Net::OnlineCode modules onwards, the resolver
526             returns a Bone object for each solved node. It will always be an array
527             of the form mentioned above, and encoded as follows:
528              
529             [
530             1, # the number of "unknowns"
531             msg_or_aux, # the node that was just solved
532             list of component nodes
533             ]
534