| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::TokenParse; |
|
2
|
|
|
|
|
|
|
$Lingua::TokenParse::VERSION = '0.1602'; |
|
3
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:GENE'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# ABSTRACT: DEPRECATED in favor of Lingua::Word::Parser |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '0.1602'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
775
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
29
|
|
|
10
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
11
|
1
|
|
|
1
|
|
8
|
use Carp qw(croak); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
55
|
|
|
12
|
1
|
|
|
1
|
|
613
|
use Storable qw(retrieve store); |
|
|
1
|
|
|
|
|
3155
|
|
|
|
1
|
|
|
|
|
61
|
|
|
13
|
1
|
|
|
1
|
|
428
|
use Math::BaseCalc (); |
|
|
1
|
|
|
|
|
1505
|
|
|
|
1
|
|
|
|
|
2256
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new { |
|
16
|
2
|
|
|
2
|
1
|
546
|
my $proto = shift; |
|
17
|
2
|
|
33
|
|
|
11
|
my $class = ref $proto || $proto; |
|
18
|
2
|
|
|
|
|
17
|
my $self = { |
|
19
|
|
|
|
|
|
|
verbose => 0, |
|
20
|
|
|
|
|
|
|
# The word to parse! |
|
21
|
|
|
|
|
|
|
word => undef, |
|
22
|
|
|
|
|
|
|
# We need to use this. |
|
23
|
|
|
|
|
|
|
word_length => 0, |
|
24
|
|
|
|
|
|
|
# Known tokens. |
|
25
|
|
|
|
|
|
|
lexicon => {}, |
|
26
|
|
|
|
|
|
|
# Local lexicon cache file name. |
|
27
|
|
|
|
|
|
|
lexicon_file => '', # ?: 'lexicon-' . time(), |
|
28
|
|
|
|
|
|
|
# All word parts. |
|
29
|
|
|
|
|
|
|
parts => [], |
|
30
|
|
|
|
|
|
|
# All possible parts combinations. |
|
31
|
|
|
|
|
|
|
combinations => [], |
|
32
|
|
|
|
|
|
|
# Scored list of the known parts combinations. |
|
33
|
|
|
|
|
|
|
knowns => {}, |
|
34
|
|
|
|
|
|
|
# Definitions of the known and unknown fragments in knowns. |
|
35
|
|
|
|
|
|
|
definitions => {}, |
|
36
|
|
|
|
|
|
|
# Fragment definition separator. |
|
37
|
|
|
|
|
|
|
separator => ' + ', |
|
38
|
|
|
|
|
|
|
# Known-but-not-defined definition output string. |
|
39
|
|
|
|
|
|
|
not_defined => '.', |
|
40
|
|
|
|
|
|
|
# Unknown definition output string. |
|
41
|
|
|
|
|
|
|
unknown => '?', |
|
42
|
|
|
|
|
|
|
# Known trimming regexp rules. |
|
43
|
|
|
|
|
|
|
constraints => [], |
|
44
|
|
|
|
|
|
|
@_, # slurp anything else and override defaults. |
|
45
|
|
|
|
|
|
|
}; |
|
46
|
2
|
|
|
|
|
4
|
bless $self, $class; |
|
47
|
2
|
|
|
|
|
6
|
$self->_init(); |
|
48
|
2
|
|
|
|
|
6
|
return $self; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub _init { |
|
52
|
2
|
|
|
2
|
|
3
|
my $self = shift; |
|
53
|
2
|
50
|
|
|
|
8
|
warn "Entering _init()\n" if $self->{verbose}; |
|
54
|
2
|
100
|
|
|
|
7
|
$self->word( $self->{word} ) if $self->{word}; |
|
55
|
|
|
|
|
|
|
# Retrieve our lexicon cache if a filename was set. |
|
56
|
2
|
|
|
|
|
5
|
$self->lexicon_cache; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub DESTROY { |
|
60
|
2
|
|
|
2
|
|
739
|
my $self = shift; |
|
61
|
|
|
|
|
|
|
# Cache our lexicon if a filename has been given. |
|
62
|
|
|
|
|
|
|
$self->lexicon_cache( $self->{lexicon_file} ) |
|
63
|
2
|
100
|
|
|
|
11
|
if $self->{lexicon_file}; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub verbose { |
|
67
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
68
|
0
|
0
|
|
|
|
0
|
$self->{verbose} = shift if @_; |
|
69
|
0
|
|
|
|
|
0
|
return $self->{verbose}; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub word { |
|
73
|
|
|
|
|
|
|
# WORD: This method is the only place where word_length is set. |
|
74
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
|
75
|
1
|
50
|
|
|
|
3
|
warn "Entering word()\n" if $self->{verbose}; |
|
76
|
1
|
50
|
|
|
|
4
|
if( @_ ) { |
|
77
|
1
|
|
|
|
|
1
|
$self->{word} = shift; |
|
78
|
1
|
|
|
|
|
3
|
$self->{word_length} = length $self->{word}; |
|
79
|
|
|
|
|
|
|
printf "\tword = %s\n\tlength = %d\n", |
|
80
|
|
|
|
|
|
|
$self->{word}, $self->{word_length} |
|
81
|
1
|
50
|
|
|
|
3
|
if $self->{verbose}; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
1
|
|
|
|
|
1
|
return $self->{word}; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub lexicon { |
|
87
|
964
|
|
|
964
|
1
|
2075
|
my $self = shift; |
|
88
|
964
|
100
|
|
|
|
1585
|
if( @_ ) { |
|
89
|
2
|
0
|
33
|
|
|
17
|
$self->{lexicon} = @_ == 1 && ref $_[0] eq 'HASH' |
|
|
|
50
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
? shift |
|
91
|
|
|
|
|
|
|
: @_ % 2 == 0 |
|
92
|
|
|
|
|
|
|
? { @_ } |
|
93
|
|
|
|
|
|
|
: {}; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
964
|
|
|
|
|
1641
|
return $self->{lexicon}; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub parts { |
|
99
|
2
|
|
|
2
|
1
|
360
|
my $self = shift; |
|
100
|
2
|
100
|
|
|
|
7
|
$self->{parts} = shift if @_; |
|
101
|
2
|
|
|
|
|
30
|
return $self->{parts}; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub combinations { |
|
105
|
195
|
|
|
195
|
1
|
288
|
my $self = shift; |
|
106
|
195
|
100
|
|
|
|
341
|
$self->{combinations} = shift if @_; |
|
107
|
195
|
|
|
|
|
493
|
return $self->{combinations}; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub knowns { |
|
111
|
3
|
|
|
3
|
1
|
622
|
my $self = shift; |
|
112
|
3
|
100
|
|
|
|
9
|
$self->{knowns} = shift if @_; |
|
113
|
3
|
|
|
|
|
19
|
return $self->{knowns}; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub definitions { |
|
117
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
118
|
2
|
100
|
|
|
|
7
|
$self->{definitions} = shift if @_; |
|
119
|
2
|
|
|
|
|
6
|
return $self->{definitions}; |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub separator { |
|
123
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
124
|
0
|
0
|
|
|
|
0
|
$self->{separator} = shift if @_; |
|
125
|
0
|
|
|
|
|
0
|
return $self->{separator}; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub not_defined { |
|
129
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
130
|
0
|
0
|
|
|
|
0
|
$self->{not_defined} = shift if @_; |
|
131
|
0
|
|
|
|
|
0
|
return $self->{not_defined}; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub unknown { |
|
135
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
136
|
0
|
0
|
|
|
|
0
|
$self->{unknown} = shift if @_; |
|
137
|
0
|
|
|
|
|
0
|
return $self->{unknown}; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub constraints { |
|
141
|
46
|
|
|
46
|
1
|
418
|
my $self = shift; |
|
142
|
46
|
100
|
|
|
|
81
|
$self->{constraints} = shift if @_; |
|
143
|
46
|
|
|
|
|
82
|
return $self->{constraints}; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub parse { |
|
147
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
|
148
|
1
|
50
|
|
|
|
4
|
warn "Entering parse()\n" if $self->{verbose}; |
|
149
|
1
|
50
|
|
|
|
2
|
$self->word( shift ) if @_; |
|
150
|
1
|
50
|
|
|
|
3
|
croak 'No word provided.' unless defined $self->{word}; |
|
151
|
1
|
50
|
|
|
|
1
|
croak 'No lexicon defined.' unless keys %{ $self->{lexicon} }; |
|
|
1
|
|
|
|
|
6
|
|
|
152
|
|
|
|
|
|
|
# Reset our data structures. |
|
153
|
1
|
|
|
|
|
4
|
$self->parts([]); |
|
154
|
1
|
|
|
|
|
3
|
$self->definitions({}); |
|
155
|
1
|
|
|
|
|
3
|
$self->combinations([]); |
|
156
|
1
|
|
|
|
|
2
|
$self->knowns({}); |
|
157
|
|
|
|
|
|
|
# Build new ones based on the word. |
|
158
|
1
|
|
|
|
|
3
|
$self->build_parts; |
|
159
|
1
|
|
|
|
|
2
|
$self->build_definitions; |
|
160
|
1
|
|
|
|
|
3
|
$self->build_combinations; |
|
161
|
1
|
|
|
|
|
15
|
$self->build_knowns; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub build_parts { |
|
165
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
|
166
|
1
|
50
|
|
|
|
3
|
warn "Entering build_parts()\n" if $self->{verbose}; |
|
167
|
|
|
|
|
|
|
|
|
168
|
1
|
|
|
|
|
4
|
for my $i (0 .. $self->{word_length} - 1) { |
|
169
|
9
|
|
|
|
|
16
|
for my $j (1 .. $self->{word_length} - $i) { |
|
170
|
45
|
|
|
|
|
73
|
my $part = substr $self->{word}, $i, $j; |
|
171
|
45
|
|
|
|
|
105
|
push @{ $self->{parts} }, $part |
|
172
|
45
|
|
|
|
|
146
|
unless grep { $part =~ /$_/ } |
|
173
|
45
|
50
|
|
|
|
59
|
@{ $self->constraints }; |
|
|
45
|
|
|
|
|
68
|
|
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
1
|
50
|
|
|
|
3
|
if($self->{verbose}) { |
|
178
|
|
|
|
|
|
|
# XXX This is ugly. |
|
179
|
0
|
|
|
|
|
0
|
my $last = 0; |
|
180
|
0
|
|
|
|
|
0
|
for my $part (@{ $self->{parts} }) { |
|
|
0
|
|
|
|
|
0
|
|
|
181
|
0
|
0
|
|
|
|
0
|
print '', |
|
|
|
0
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
($last ? $last > length( $part ) ? "\n\t" : ', ' : "\t"), |
|
183
|
|
|
|
|
|
|
$part; |
|
184
|
0
|
|
|
|
|
0
|
$last = length $part; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
0
|
0
|
|
|
|
0
|
print "\n" if @{ $self->{parts} }; |
|
|
0
|
|
|
|
|
0
|
|
|
187
|
|
|
|
|
|
|
} |
|
188
|
1
|
|
|
|
|
2
|
return $self->{parts}; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# Save a known combination entry => definition table. |
|
192
|
|
|
|
|
|
|
sub build_definitions { |
|
193
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
194
|
1
|
50
|
|
|
|
3
|
warn "Entering build_definitions()\n" if $self->{verbose}; |
|
195
|
1
|
|
|
|
|
1
|
for my $part (@{ $self->{parts} }) { |
|
|
1
|
|
|
|
|
3
|
|
|
196
|
|
|
|
|
|
|
$self->{definitions}{$part} = $self->{lexicon}{$part} |
|
197
|
45
|
100
|
|
|
|
83
|
if $self->{lexicon}{$part}; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
0
|
|
|
|
|
0
|
warn "\t", join( "\n\t", sort keys %{ $self->definitions } ), "\n" |
|
200
|
1
|
50
|
|
|
|
25
|
if $self->{verbose}; |
|
201
|
1
|
|
|
|
|
5
|
return $self->{definitions}; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub build_combinations { |
|
205
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
206
|
1
|
50
|
|
|
|
2
|
warn "Entering build_combinations()\n" if $self->{verbose}; |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# field size for binary iteration (digits of precision) |
|
209
|
1
|
|
|
|
|
2
|
my $y = $self->{word_length} - 1; |
|
210
|
|
|
|
|
|
|
# total number of zero-based combinations |
|
211
|
1
|
|
|
|
|
9
|
my $z = 2 ** $y - 1; |
|
212
|
|
|
|
|
|
|
# field size for the count |
|
213
|
1
|
|
|
|
|
3
|
my $lz = length $z; |
|
214
|
|
|
|
|
|
|
# field size for a combination |
|
215
|
1
|
|
|
|
|
2
|
my $m = $self->{word_length} + $y; |
|
216
|
|
|
|
|
|
|
warn sprintf |
|
217
|
|
|
|
|
|
|
"\tTotal combinations: %d\n\tConstrained combinations:\n", |
|
218
|
|
|
|
|
|
|
$z + 1 |
|
219
|
1
|
50
|
|
|
|
2
|
if $self->{verbose}; |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Truth is a single partition character: the lowly dot. |
|
222
|
1
|
|
|
|
|
8
|
my $c = Math::BaseCalc->new( digits => [ 0, '.' ] ); |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# Build a word part combination for each iteration. |
|
225
|
1
|
|
|
|
|
46
|
for my $n ( 0 .. $z ) { |
|
226
|
|
|
|
|
|
|
# Iterate in base two. |
|
227
|
256
|
|
|
|
|
587
|
my $i = $c->to_base( $n ); |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# Get the binary digits as an array. |
|
230
|
256
|
|
|
|
|
7306
|
my @i = split //, sprintf( '%0'.$y.'s', $i ); |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Join the character and digit arrays into a partitioned word. |
|
233
|
256
|
|
|
|
|
408
|
my $t = ''; |
|
234
|
|
|
|
|
|
|
# ..by stepping over the characters and peeling off a digit. |
|
235
|
256
|
|
|
|
|
777
|
for( split //, $self->{word} ) { |
|
236
|
|
|
|
|
|
|
# Zero values become ''. Haha! Truth prevails. |
|
237
|
2304
|
|
100
|
|
|
5575
|
$t .= $_ . (shift( @i ) || ''); |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
256
|
100
|
|
|
|
435
|
unless( grep { $t =~ /$_/ } @{ $self->{constraints} } ) { |
|
|
256
|
|
|
|
|
1165
|
|
|
|
256
|
|
|
|
|
467
|
|
|
241
|
|
|
|
|
|
|
# Preach it. |
|
242
|
|
|
|
|
|
|
printf "\t%".$lz.'d) %0'.$y.'s => %'.$m."s\n", $n, $i, $t |
|
243
|
192
|
50
|
|
|
|
376
|
if $self->{verbose}; |
|
244
|
192
|
|
|
|
|
235
|
push @{ $self->combinations }, $t; |
|
|
192
|
|
|
|
|
293
|
|
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
1
|
|
|
|
|
10
|
return $self->{combinations}; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub build_knowns { |
|
252
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
|
253
|
1
|
50
|
|
|
|
2
|
return unless scalar keys %{ $self->{lexicon} }; |
|
|
1
|
|
|
|
|
5
|
|
|
254
|
1
|
50
|
|
|
|
4
|
warn "Entering build_knowns()\n" if $self->{verbose}; |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Save the familiarity value for each "raw" combination. |
|
257
|
1
|
|
|
|
|
2
|
for my $combo (@{ $self->{combinations} }) { |
|
|
1
|
|
|
|
|
3
|
|
|
258
|
|
|
|
|
|
|
# Skip combinations that have already been seen. |
|
259
|
192
|
50
|
|
|
|
371
|
next if exists $self->{knowns}{$combo}; |
|
260
|
|
|
|
|
|
|
|
|
261
|
192
|
|
|
|
|
296
|
my ($sum, $frag_sum, $char_sum) = (0, 0, 0); |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Get the bits of the combination. |
|
264
|
192
|
|
|
|
|
425
|
my @chunks = split /\./, $combo; |
|
265
|
192
|
|
|
|
|
301
|
for (@chunks) { |
|
266
|
|
|
|
|
|
|
# XXX Uh.. Magically handle hyphens in lexicon entries. |
|
267
|
960
|
|
|
|
|
1449
|
($_, my $combo_seen) = _hyphenate($_, $self->lexicon, 0); |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Sum the combination familiarity values. |
|
270
|
960
|
100
|
|
|
|
1871
|
if ($combo_seen) { |
|
271
|
108
|
|
|
|
|
128
|
$frag_sum++; |
|
272
|
108
|
|
|
|
|
171
|
$char_sum += length; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
# XXX Huh? Why? Can $_ change or something? |
|
276
|
|
|
|
|
|
|
# Stick our combination back together. |
|
277
|
192
|
|
|
|
|
354
|
$combo = join '.', @chunks; |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# Save this combination and its familiarity ratios. |
|
280
|
192
|
|
|
|
|
309
|
my $x = $frag_sum / @chunks; |
|
281
|
192
|
|
|
|
|
281
|
my $y = $char_sum / $self->{word_length}; |
|
282
|
192
|
50
|
|
|
|
328
|
warn "\t$combo: [$x, $y]\n" if $self->{verbose}; |
|
283
|
192
|
100
|
66
|
|
|
471
|
if( $x || $y ) { |
|
284
|
85
|
|
|
|
|
297
|
$self->{knowns}{$combo} = [ $x, $y ]; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
|
|
|
|
|
|
else { |
|
287
|
107
|
|
|
|
|
230
|
delete $self->{knowns}{$combo}; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
1
|
|
|
|
|
3
|
return $self->{knowns}; |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Reduce the number of known combinations by concatinating adjacent |
|
295
|
|
|
|
|
|
|
# unknowns (and then removing any duplicates produced). |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#sub learn { |
|
298
|
|
|
|
|
|
|
# my ($self, %args) = @_; |
|
299
|
|
|
|
|
|
|
# Get the list of (partially) unknown stem combinations. |
|
300
|
|
|
|
|
|
|
# Loop through each looking in %args or prompting for a definition. |
|
301
|
|
|
|
|
|
|
#} |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# Update the given string with its actual lexicon value and increment |
|
304
|
|
|
|
|
|
|
# the seen flag. |
|
305
|
|
|
|
|
|
|
sub _hyphenate { |
|
306
|
960
|
|
|
960
|
|
1514
|
my ($string, $lexicon, $combo_seen) = @_; |
|
307
|
|
|
|
|
|
|
|
|
308
|
960
|
100
|
|
|
|
2442
|
if (exists $lexicon->{$string}) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
309
|
108
|
50
|
|
|
|
195
|
$combo_seen++ if defined $combo_seen; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
elsif (exists $lexicon->{"-$string"}) { |
|
312
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
|
313
|
0
|
|
|
|
|
0
|
$string = "-$string"; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
elsif (exists $lexicon->{"$string-"}) { |
|
316
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
|
317
|
0
|
|
|
|
|
0
|
$string = "$string-"; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
|
|
320
|
960
|
50
|
|
|
|
2097
|
return wantarray ? ($string, $combo_seen) : $string; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub output_knowns { |
|
324
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
325
|
0
|
|
|
|
|
0
|
my @out = (); |
|
326
|
0
|
|
|
|
|
0
|
my $header = < |
|
327
|
|
|
|
|
|
|
Combination [frag familiarity, char familiarity] |
|
328
|
|
|
|
|
|
|
Fragment definitions |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
HEADER |
|
331
|
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
0
|
for my $known ( |
|
333
|
|
|
|
|
|
|
reverse sort { |
|
334
|
|
|
|
|
|
|
$self->{knowns}{$a}[0] <=> $self->{knowns}{$b}[0] || |
|
335
|
0
|
0
|
|
|
|
0
|
$self->{knowns}{$a}[1] <=> $self->{knowns}{$b}[1] |
|
336
|
0
|
|
|
|
|
0
|
} keys %{ $self->{knowns} } |
|
337
|
|
|
|
|
|
|
) { |
|
338
|
0
|
|
|
|
|
0
|
my @definition; |
|
339
|
0
|
|
|
|
|
0
|
for my $chunk (split /\./, $known) { |
|
340
|
|
|
|
|
|
|
push @definition, |
|
341
|
|
|
|
|
|
|
defined $self->{definitions}{$chunk} |
|
342
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
|
343
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
|
344
|
|
|
|
|
|
|
: $self->{not_defined} |
|
345
|
0
|
0
|
|
|
|
0
|
: $self->{unknown}; |
|
|
|
0
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
push @out, sprintf qq/%s [%s]\n%s/, |
|
349
|
|
|
|
|
|
|
$known, |
|
350
|
0
|
|
|
|
|
0
|
join (', ', map { sprintf '%0.2f', $_ } |
|
351
|
0
|
|
|
|
|
0
|
@{ $self->{knowns}{$known} }), |
|
352
|
0
|
|
|
|
|
0
|
join ($self->{separator}, @definition); |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
|
|
355
|
0
|
0
|
|
|
|
0
|
return wantarray ? @out : $header . join "\n\n", @out; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Naive, no locking read/write. If you run a production environment, |
|
359
|
|
|
|
|
|
|
# you know what to do. |
|
360
|
|
|
|
|
|
|
sub lexicon_cache { |
|
361
|
5
|
|
|
5
|
1
|
444
|
my( $self, $file, $value ) = @_; |
|
362
|
5
|
50
|
|
|
|
13
|
warn "Entering lexicon_cache()\n" if $self->{verbose}; |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# Set the file and the lexicon_file attribute if we are told to. |
|
365
|
5
|
100
|
100
|
|
|
25
|
if( $file && $file eq 'lexicon_file' && $value ) { |
|
|
|
|
66
|
|
|
|
|
|
366
|
1
|
|
|
|
|
3
|
$self->{lexicon_file} = $value; |
|
367
|
1
|
|
|
|
|
3
|
$file = $value; |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# If there is no file try to use the lexicon_file. |
|
371
|
5
|
|
66
|
|
|
16
|
$file ||= $self->{lexicon_file}; |
|
372
|
|
|
|
|
|
|
# Otherwise, bail out! |
|
373
|
|
|
|
|
|
|
warn( "No lexicon cache file set\n" ) and return |
|
374
|
5
|
50
|
0
|
|
|
12
|
if $self->{verbose} && !$file; |
|
|
|
|
33
|
|
|
|
|
|
375
|
|
|
|
|
|
|
|
|
376
|
5
|
100
|
|
|
|
16
|
if( $file ) { |
|
377
|
|
|
|
|
|
|
# Store 'em if you got 'em. |
|
378
|
3
|
100
|
|
|
|
3
|
if( keys %{ $self->{lexicon} } ) { |
|
|
3
|
|
|
|
|
11
|
|
|
379
|
2
|
50
|
|
|
|
15
|
warn "store( $self->{lexicon}, $file )\n" if $self->{verbose}; |
|
380
|
2
|
|
|
|
|
9
|
store( $self->{lexicon}, $file ); |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
# ..Retrieve 'em if not. |
|
383
|
|
|
|
|
|
|
else { |
|
384
|
1
|
50
|
33
|
|
|
5
|
warn "retrieve( $file )\n" if $self->{verbose} && -e $file; |
|
385
|
1
|
50
|
|
|
|
22
|
$self->lexicon( retrieve( $file ) ) if -e $file; |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
1; |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
__END__ |