| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ### The POD is at the end. ### | 
| 2 |  |  |  |  |  |  | require 5.000; | 
| 3 |  |  |  |  |  |  | package Games::Dissociate; | 
| 4 | 3 |  |  | 3 |  | 36368 | use strict; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 205 |  | 
| 5 |  |  |  |  |  |  | require Exporter; | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 3 |  |  | 3 |  | 17 | use vars qw(@ISA @EXPORT @EXPORT_OK $Debug $VERSION); | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 303 |  | 
| 8 | 3 |  |  | 3 |  | 18 | use Carp; | 
|  | 3 |  |  |  |  | 11 |  | 
|  | 3 |  |  |  |  | 2982 |  | 
| 9 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 10 |  |  |  |  |  |  | @EXPORT = qw(dissociate_filter dissociate); | 
| 11 |  |  |  |  |  |  | $VERSION = 1.0; | 
| 12 |  |  |  |  |  |  | $Debug = 0; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | ########################################################################### | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub dissociate_filter { | 
| 17 | 0 |  |  | 0 | 1 | 0 | require Text::Wrap; | 
| 18 | 0 |  |  |  |  | 0 | require Getopt::Std; | 
| 19 | 0 |  |  |  |  | 0 | my %o; | 
| 20 | 0 | 0 |  |  |  | 0 | if(@ARGV) { | 
| 21 | 0 | 0 |  |  |  | 0 | Getopt::Std::getopts('c:w:m:', \%o) | 
| 22 |  |  |  |  |  |  | or die "Options: | 
| 23 |  |  |  |  |  |  | -cNUMBER | 
| 24 |  |  |  |  |  |  | Run a by-character dissociation with that number of | 
| 25 |  |  |  |  |  |  | characters as the group size. | 
| 26 |  |  |  |  |  |  | -wNUMBER | 
| 27 |  |  |  |  |  |  | Run a by-word dissociation with that number of | 
| 28 |  |  |  |  |  |  | words as the group size. | 
| 29 |  |  |  |  |  |  | -mNUMBER | 
| 30 |  |  |  |  |  |  | Specifies how many iterations the dissociator loop should make. | 
| 31 |  |  |  |  |  |  | "; | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 0 |  |  |  |  | 0 | my $o; | 
| 35 |  |  |  |  |  |  | my $max; | 
| 36 | 0 | 0 |  |  |  | 0 | if($_[0]) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 37 | 0 |  |  |  |  | 0 | $o = $_[0]; | 
| 38 |  |  |  |  |  |  | } elsif($o{'w'}){ | 
| 39 | 0 |  |  |  |  | 0 | $o = - abs($o{'w'}); | 
| 40 |  |  |  |  |  |  | } elsif($o{'c'}){ | 
| 41 | 0 |  |  |  |  | 0 | $o =   abs($o{'c'}); | 
| 42 |  |  |  |  |  |  | } | 
| 43 | 0 |  | 0 |  |  | 0 | $o ||= 2; | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 0 | 0 |  |  |  | 0 | if($_[1]) { | 
|  |  | 0 |  |  |  |  |  | 
| 46 | 0 |  |  |  |  | 0 | $max = $_[1]; | 
| 47 |  |  |  |  |  |  | } elsif ($o{'m'}) { | 
| 48 | 0 |  |  |  |  | 0 | $max = abs($o{'m'}); | 
| 49 |  |  |  |  |  |  | } | 
| 50 | 0 |  | 0 |  |  | 0 | $max ||= 100; | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 0 | 0 |  |  |  | 0 | print "group_length: $o.  max_length: $max\n" if $Debug; | 
| 53 | 0 |  |  |  |  | 0 | print Text::Wrap::wrap(   '','', dissociate(join('', <>), $o, $max)  ), "\n"; | 
| 54 | 0 |  |  |  |  | 0 | return; | 
| 55 |  |  |  |  |  |  | } | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | #========================================================================== | 
| 58 |  |  |  |  |  |  | sub dissociate { | 
| 59 | 5 |  |  | 5 | 1 | 712 | my $in = $_[0]; | 
| 60 | 5 |  | 50 |  |  | 34 | my $arg = int($_[1] || 2); | 
| 61 | 5 |  | 50 |  |  | 30 | my $iteration_limit = $_[2] || 100; | 
| 62 | 5 |  |  |  |  | 8 | my @out; | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 5 |  |  |  |  | 13 | my $by_word = ($arg < 0); | 
| 65 | 5 |  |  |  |  | 9 | my $degree = abs($arg); | 
| 66 | 5 |  |  |  |  | 8 | my $last_match_point; | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 5 | 50 |  |  |  | 19 | $degree = 2 if $degree == 1; | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 3 |  |  | 3 |  | 6098 | use locale; | 
|  | 3 |  |  |  |  | 789 |  | 
|  | 3 |  |  |  |  | 16 |  | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 5 |  |  |  |  | 335 | $in =~ tr<\cm\cj \t>< >s; | 
| 73 | 5 | 50 |  |  |  | 19 | die "No input\n" unless length $in; | 
| 74 | 5 |  |  |  |  | 9 | study $in; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 5 |  |  |  |  | 7 | my $new_matcher; | 
| 77 | 5 | 50 |  |  |  | 12 | if($by_word) { | 
| 78 | 0 |  |  |  |  | 0 | $new_matcher = "\\W+(" . join("\\W+", ("\\w+") x $degree) . ")"; | 
| 79 |  |  |  |  |  |  | } else { | 
| 80 | 5 |  |  |  |  | 20 | $new_matcher = "(" . ('.' x $degree) . ")"; | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | # In use in the loop. | 
| 84 | 5 |  |  |  |  | 8 | my($re, @orig, $matched, $remainder, | 
| 85 |  |  |  |  |  |  | $i, $last_matched, $iteration); | 
| 86 | 5 |  |  |  |  | 6 | $iteration = 0; | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 5 |  |  |  |  | 5 | $last_match_point = -1; | 
| 89 | 5 |  |  |  |  | 13 | while($iteration < $iteration_limit) { | 
| 90 | 500 |  |  |  |  | 537 | ++$iteration; | 
| 91 | 500 | 100 |  |  |  | 954 | if($last_matched) { # last thing we matched -- '' means take a stab | 
| 92 | 495 |  |  |  |  | 618 | $last_match_point = pos($in); | 
| 93 | 495 | 50 |  |  |  | 1048 | if($by_word) { # By word... | 
| 94 | 0 |  |  |  |  | 0 | @orig = map(quotemeta($_), $last_matched =~ m/(\w+)/sg ); | 
| 95 | 0 |  |  |  |  | 0 | $re =  "\\b" | 
| 96 |  |  |  |  |  |  | . join("\\W+",  @orig) # overlap | 
| 97 |  |  |  |  |  |  | . "\\W+(" | 
| 98 |  |  |  |  |  |  | . join("\\W+", ("\\w+") x  $degree) # new tokens | 
| 99 |  |  |  |  |  |  | . ")(\\W+)" | 
| 100 |  |  |  |  |  |  | ; | 
| 101 | 0 |  |  |  |  | 0 | $matched = $remainder = ''; | 
| 102 | 0 |  |  |  |  | 0 | $last_match_point = pos($in); | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 0 | 0 | 0 |  |  | 0 | if($in =~ m/$re/sig  ||  $in =~ m/$re/sig) { | 
| 105 | 0 |  |  |  |  | 0 | $matched = $1; | 
| 106 | 0 |  |  |  |  | 0 | $remainder = $2; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | } else { # By char... | 
| 110 | 495 |  |  |  |  | 3881 | @orig = map(quotemeta($_), $last_matched =~ m/(.)/sg ); | 
| 111 | 495 |  |  |  |  | 2130 | $re =    join('', @orig)     # overlap | 
| 112 |  |  |  |  |  |  | . '(' | 
| 113 |  |  |  |  |  |  | . ("." x  $degree)    # new tokens | 
| 114 |  |  |  |  |  |  | . ')'; | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 495 | 50 | 66 |  |  | 8240 | $matched = $1  if $in =~ m/$re/sig  ||  $in =~ m/$re/sig; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 | 495 | 50 | 33 |  |  | 2878 | if( $last_match_point == pos($in)  # This was a hapax legomenon. | 
| 120 |  |  |  |  |  |  | ||  pos($in) == 0              # We didn't match anything. | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # hm, this seems to be not just unnecessary, but BAD. | 
| 123 |  |  |  |  |  |  | #  ||  abs($last_match_point - pos($in)) < length($in) | 
| 124 |  |  |  |  |  |  | ) | 
| 125 |  |  |  |  |  |  | { | 
| 126 | 0 | 0 |  |  |  | 0 | print "Hm, dead end at pos ", (0 + pos($in)), "\n" if $Debug; | 
| 127 |  |  |  |  |  |  |  | 
| 128 | 0 |  |  |  |  | 0 | $last_matched = ''; | 
| 129 | 0 |  |  |  |  | 0 | next; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 495 |  |  |  |  | 1054 | $last_matched = $matched; | 
| 133 | 495 | 50 |  |  |  | 1040 | print "Matched ($matched) at ", pos($in), "\n" if $Debug; | 
| 134 | 495 | 50 |  |  |  | 1049 | push @out, $by_word ? ($last_matched . $remainder) | 
| 135 |  |  |  |  |  |  | :  $last_matched; | 
| 136 | 495 |  |  |  |  | 1240 | next; | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | } else { | 
| 139 |  |  |  |  |  |  | # We don't have a last_matched -- take a stab. | 
| 140 | 5 |  |  |  |  | 6 | my($frame, $frame_size); | 
| 141 | 5 |  |  |  |  | 17 | pos($in) = 0;  # Ever necessary? | 
| 142 | 5 | 50 |  |  |  | 13 | if($by_word) { | 
| 143 | 0 |  |  |  |  | 0 | $frame_size =  ($degree + 3) * 8; | 
| 144 |  |  |  |  |  |  | # Generously assume 8 chars per word. | 
| 145 |  |  |  |  |  |  | } else { | 
| 146 | 5 |  |  |  |  | 16 | $frame_size =  ($degree + 1) * 4; | 
| 147 |  |  |  |  |  |  | # Generously assume 4 bytes per "." char. | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 5 |  |  |  |  | 61 | my $i = int(rand(length($in) - $frame_size)); | 
| 151 | 5 |  |  |  |  | 11 | pos($in) = $i; | 
| 152 | 5 | 50 |  |  |  | 16 | print "Taking a stab at pos $i\n" if $Debug; | 
| 153 | 5 | 50 | 33 |  |  | 124 | if(   $in =~ m/$new_matcher/isg | 
| 154 |  |  |  |  |  |  | || $in =~ m/$new_matcher/isg )  # Yes, try TWICE!  Magic, wooo. | 
| 155 |  |  |  |  |  |  | { | 
| 156 | 5 |  |  |  |  | 29 | $last_matched = $1; | 
| 157 |  |  |  |  |  |  | } else { | 
| 158 | 0 | 0 |  |  |  | 0 | print "Can't get an initial $degree-token match" if $Debug; | 
| 159 | 0 |  |  |  |  | 0 | last; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | } # end while | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 5 |  |  |  |  | 154 | return join('', @out); | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | #========================================================================== | 
| 169 |  |  |  |  |  |  | 1; | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | __END__ |