| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 1 |  |  | 1 |  | 345 | use strict; use warnings; | 
|  | 1 |  |  | 1 |  | 1 |  | 
|  | 1 |  |  |  |  | 18 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | package UDCode; | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | our $VERSION = '1.04'; | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 1 |  |  | 1 |  | 4 | BEGIN { require Exporter; *import = \&Exporter::import } | 
|  | 1 |  |  |  |  | 304 |  | 
| 8 |  |  |  |  |  |  | our @EXPORT = qw(is_udcode ud_pair); | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 NAME | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | UDCode - Does a set of code words form a uniquely decodable code? | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | use UDCode; | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | if (is_udcode(@words)) { ... } | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | my ($x1, $x2) = ud_pair(@words); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | A I  is a set of strings, called the I. A code is  | 
| 25 |  |  |  |  |  |  | I if any string I  that is a concatenation of  | 
| 26 |  |  |  |  |  |  | code words is so in I. | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | For example, the code C<('ab', 'abba', 'b')> is I uniquely | 
| 29 |  |  |  |  |  |  | decodable, because C<'abba' . 'b' eq 'ab' . 'b' . 'ab'>. But the code | 
| 30 |  |  |  |  |  |  | C<('a', 'ab', 'abb')> I uniquely decodable, because there is no such | 
| 31 |  |  |  |  |  |  | pair of sequences of code words. | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | This module provides a pair of functions to tell whether a set of | 
| 34 |  |  |  |  |  |  | code words is a uniquely decodable code, and to find an example of | 
| 35 |  |  |  |  |  |  | sequences of code words whose concatenations are the same, if there is | 
| 36 |  |  |  |  |  |  | such a pair. | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =head1 INTERFACE | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | =head2 C | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | C returns true if and only if the specified code is | 
| 43 |  |  |  |  |  |  | uniquely decodable. | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =cut | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | sub is_udcode { | 
| 48 | 17 |  |  | 17 | 1 | 20281 | my $N = my ($a, $b) = ud_pair(@_); | 
| 49 | 17 |  |  |  |  | 68 | return $N == 0; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =head2 C | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | If C<@words> is not a uniquely decodable code, then C | 
| 55 |  |  |  |  |  |  | returns a proof of that fact, in the form of two distinct sequences of | 
| 56 |  |  |  |  |  |  | code words whose concatenations are equal. | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | If C<@words> is not uniquely decodable, then C returns | 
| 59 |  |  |  |  |  |  | references to two arrays of code words, C<$a>, and C<$b>, such that: | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | join("", @$a) eq join("", @$b) | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | For example, given C<@words = qw(ab abba b)>, C might return | 
| 64 |  |  |  |  |  |  | the two arrays C<["ab", "b", "ab"]> and C<["abba", "b"]>. | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | If C<@words> is uniquely decodable, C returns false. | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | =cut | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | sub ud_pair { | 
| 71 |  |  |  |  |  |  | # Code words | 
| 72 | 34 |  |  | 34 | 1 | 6794 | my @c = @_; | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | # $h{$x} = [$y, $z]  means that $x$y eq $z | 
| 75 | 34 |  |  |  |  | 37 | my %h; | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | # Queue | 
| 78 |  |  |  |  |  |  | my @q; | 
| 79 |  |  |  |  |  |  |  | 
| 80 | 34 |  |  |  |  | 48 | for my $c1 (@c) { | 
| 81 | 114 |  |  |  |  | 106 | for my $c2 (@c) { | 
| 82 | 482 | 100 |  |  |  | 565 | next if $c1 eq $c2; | 
| 83 | 328 | 100 |  |  |  | 314 | if (is_prefix_of($c1, $c2)) { | 
| 84 | 18 |  |  |  |  | 23 | my $x = subtract($c1, $c2); | 
| 85 | 18 |  |  |  |  | 37 | $h{$x} = [[$c1], [$c2]]; | 
| 86 | 18 |  |  |  |  | 31 | push @q, $x; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 | 34 |  |  |  |  | 50 | while (@q) { | 
| 92 | 40 |  |  |  |  | 41 | my $x = shift @q; | 
| 93 | 40 | 50 |  |  |  | 48 | return unless defined $x; | 
| 94 |  |  |  |  |  |  |  | 
| 95 | 40 |  |  |  |  | 34 | my ($a, $b) = @{$h{$x}}; | 
|  | 40 |  |  |  |  | 52 |  | 
| 96 | 40 |  |  |  |  | 41 | for my $c (@c) { | 
| 97 | 170 | 50 |  |  |  | 174 | die unless defined $b;      # Can't happen | 
| 98 |  |  |  |  |  |  | # $a$x eq $b | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 170 |  |  |  |  | 137 | my $y; | 
| 101 | 170 | 100 |  |  |  | 162 | if (is_prefix_of($c, $x)) { | 
|  |  | 100 |  |  |  |  |  | 
| 102 | 28 |  |  |  |  | 33 | $y = subtract($c, $x); | 
| 103 | 28 | 100 |  |  |  | 40 | next if exists $h{$y};  # already tried this | 
| 104 | 26 |  |  |  |  | 42 | $h{$y} = [[@$a, $c], $b]; | 
| 105 | 26 |  |  |  |  | 30 | push @q, $y; | 
| 106 |  |  |  |  |  |  | } elsif (is_prefix_of($x, $c)) { | 
| 107 | 16 |  |  |  |  | 16 | $y = subtract($x, $c); | 
| 108 | 16 | 100 |  |  |  | 27 | next if exists $h{$y};  # already tried this | 
| 109 | 12 |  |  |  |  | 24 | $h{$y} = [$b, [@$a, $c]]; | 
| 110 | 12 |  |  |  |  | 16 | push @q, $y; | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 164 | 100 | 100 |  |  | 280 | return @{$h{""}} if defined($y) && $y eq ""; | 
|  | 12 |  |  |  |  | 66 |  | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  | } | 
| 116 | 22 |  |  |  |  | 37 | return;                       # failure | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | sub is_prefix_of { | 
| 120 | 640 |  |  | 640 | 0 | 958 | index($_[1], $_[0]) == 0; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | sub subtract { | 
| 124 | 62 |  |  | 62 | 0 | 83 | substr($_[1], length($_[0])); | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | =head1 AUTHOR | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | Mark Jason Dominus | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | This software is hereby released into the public domain.  You may use, | 
| 134 |  |  |  |  |  |  | modify, or distribute it for any purpose whatsoever without restriction. | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | =cut | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | 1; | 
| 139 |  |  |  |  |  |  |  |