File Coverage

blib/lib/Games/Dissociate.pm
Criterion Covered Total %
statement 52 84 61.9
branch 13 46 28.2
condition 6 20 30.0
subroutine 5 6 83.3
pod 2 2 100.0
total 78 158 49.3


line stmt bran cond sub pod time code
1             ### The POD is at the end. ###
2             require 5.000;
3             package Games::Dissociate;
4 3     3   36368 use strict;
  3         7  
  3         205  
5             require Exporter;
6              
7 3     3   17 use vars qw(@ISA @EXPORT @EXPORT_OK $Debug $VERSION);
  3         8  
  3         303  
8 3     3   18 use Carp;
  3         11  
  3         2982  
9             @ISA = qw(Exporter);
10             @EXPORT = qw(dissociate_filter dissociate);
11             $VERSION = 1.0;
12             $Debug = 0;
13              
14             ###########################################################################
15              
16             sub dissociate_filter {
17 0     0 1 0 require Text::Wrap;
18 0         0 require Getopt::Std;
19 0         0 my %o;
20 0 0       0 if(@ARGV) {
21 0 0       0 Getopt::Std::getopts('c:w:m:', \%o)
22             or die "Options:
23             -cNUMBER
24             Run a by-character dissociation with that number of
25             characters as the group size.
26             -wNUMBER
27             Run a by-word dissociation with that number of
28             words as the group size.
29             -mNUMBER
30             Specifies how many iterations the dissociator loop should make.
31             ";
32             }
33              
34 0         0 my $o;
35             my $max;
36 0 0       0 if($_[0]) {
    0          
    0          
37 0         0 $o = $_[0];
38             } elsif($o{'w'}){
39 0         0 $o = - abs($o{'w'});
40             } elsif($o{'c'}){
41 0         0 $o = abs($o{'c'});
42             }
43 0   0     0 $o ||= 2;
44              
45 0 0       0 if($_[1]) {
    0          
46 0         0 $max = $_[1];
47             } elsif ($o{'m'}) {
48 0         0 $max = abs($o{'m'});
49             }
50 0   0     0 $max ||= 100;
51              
52 0 0       0 print "group_length: $o. max_length: $max\n" if $Debug;
53 0         0 print Text::Wrap::wrap( '','', dissociate(join('', <>), $o, $max) ), "\n";
54 0         0 return;
55             }
56              
57             #==========================================================================
58             sub dissociate {
59 5     5 1 712 my $in = $_[0];
60 5   50     34 my $arg = int($_[1] || 2);
61 5   50     30 my $iteration_limit = $_[2] || 100;
62 5         8 my @out;
63              
64 5         13 my $by_word = ($arg < 0);
65 5         9 my $degree = abs($arg);
66 5         8 my $last_match_point;
67              
68 5 50       19 $degree = 2 if $degree == 1;
69              
70 3     3   6098 use locale;
  3         789  
  3         16  
71              
72 5         335 $in =~ tr<\cm\cj \t>< >s;
73 5 50       19 die "No input\n" unless length $in;
74 5         9 study $in;
75              
76 5         7 my $new_matcher;
77 5 50       12 if($by_word) {
78 0         0 $new_matcher = "\\W+(" . join("\\W+", ("\\w+") x $degree) . ")";
79             } else {
80 5         20 $new_matcher = "(" . ('.' x $degree) . ")";
81             }
82              
83             # In use in the loop.
84 5         8 my($re, @orig, $matched, $remainder,
85             $i, $last_matched, $iteration);
86 5         6 $iteration = 0;
87              
88 5         5 $last_match_point = -1;
89 5         13 while($iteration < $iteration_limit) {
90 500         537 ++$iteration;
91 500 100       954 if($last_matched) { # last thing we matched -- '' means take a stab
92 495         618 $last_match_point = pos($in);
93 495 50       1048 if($by_word) { # By word...
94 0         0 @orig = map(quotemeta($_), $last_matched =~ m/(\w+)/sg );
95 0         0 $re = "\\b"
96             . join("\\W+", @orig) # overlap
97             . "\\W+("
98             . join("\\W+", ("\\w+") x $degree) # new tokens
99             . ")(\\W+)"
100             ;
101 0         0 $matched = $remainder = '';
102 0         0 $last_match_point = pos($in);
103              
104 0 0 0     0 if($in =~ m/$re/sig || $in =~ m/$re/sig) {
105 0         0 $matched = $1;
106 0         0 $remainder = $2;
107             }
108              
109             } else { # By char...
110 495         3881 @orig = map(quotemeta($_), $last_matched =~ m/(.)/sg );
111 495         2130 $re = join('', @orig) # overlap
112             . '('
113             . ("." x $degree) # new tokens
114             . ')';
115              
116 495 50 66     8240 $matched = $1 if $in =~ m/$re/sig || $in =~ m/$re/sig;
117             }
118              
119 495 50 33     2878 if( $last_match_point == pos($in) # This was a hapax legomenon.
120             || pos($in) == 0 # We didn't match anything.
121              
122             # hm, this seems to be not just unnecessary, but BAD.
123             # || abs($last_match_point - pos($in)) < length($in)
124             )
125             {
126 0 0       0 print "Hm, dead end at pos ", (0 + pos($in)), "\n" if $Debug;
127              
128 0         0 $last_matched = '';
129 0         0 next;
130             }
131              
132 495         1054 $last_matched = $matched;
133 495 50       1040 print "Matched ($matched) at ", pos($in), "\n" if $Debug;
134 495 50       1049 push @out, $by_word ? ($last_matched . $remainder)
135             : $last_matched;
136 495         1240 next;
137              
138             } else {
139             # We don't have a last_matched -- take a stab.
140 5         6 my($frame, $frame_size);
141 5         17 pos($in) = 0; # Ever necessary?
142 5 50       13 if($by_word) {
143 0         0 $frame_size = ($degree + 3) * 8;
144             # Generously assume 8 chars per word.
145             } else {
146 5         16 $frame_size = ($degree + 1) * 4;
147             # Generously assume 4 bytes per "." char.
148             }
149              
150 5         61 my $i = int(rand(length($in) - $frame_size));
151 5         11 pos($in) = $i;
152 5 50       16 print "Taking a stab at pos $i\n" if $Debug;
153 5 50 33     124 if( $in =~ m/$new_matcher/isg
154             || $in =~ m/$new_matcher/isg ) # Yes, try TWICE! Magic, wooo.
155             {
156 5         29 $last_matched = $1;
157             } else {
158 0 0       0 print "Can't get an initial $degree-token match" if $Debug;
159 0         0 last;
160             }
161             }
162              
163             } # end while
164              
165 5         154 return join('', @out);
166             }
167              
168             #==========================================================================
169             1;
170              
171             __END__