| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package String::KeyboardDistance; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5.005_62; |
|
4
|
1
|
|
|
1
|
|
1174
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
32
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
3713
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
|
8
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
String::KeyboardDistance - String Comparison Algorithm |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use String::KeyboardDistance qw(:all); |
|
17
|
|
|
|
|
|
|
my $s1 = 'Apple'; |
|
18
|
|
|
|
|
|
|
my $s2 = 'Wople'; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# compute a match probability |
|
21
|
|
|
|
|
|
|
my $pr = qwerty_keyboard_distance_match('Apple','Wople'); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# find the keyboard distance between two strings |
|
24
|
|
|
|
|
|
|
my $dst = qwerty_keyboard_distance('IBM','HAL'); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# find the keyboard distance between two characters |
|
27
|
|
|
|
|
|
|
$dst = qwerty_char_distance('a','v'); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
print "maximum distance: $qwerty_max_distance\n"; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This module implmements a version of keyboard distance for fuzzy |
|
34
|
|
|
|
|
|
|
string matching. Keyboard distance is a measure of the physical |
|
35
|
|
|
|
|
|
|
distance between two keys on a keyboard. For example, 'g' has a |
|
36
|
|
|
|
|
|
|
distance of 1 from the keys 'r', 't', 'y', 'f', 'h', 'v', 'b', |
|
37
|
|
|
|
|
|
|
and 'n'. Immediate diagonals (like ''r, 'y', 'v', and 'n') are |
|
38
|
|
|
|
|
|
|
considered to have a distance of 1 instead of 1.414 to help to |
|
39
|
|
|
|
|
|
|
prevent horizontal/vertical bias. |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
A match probability between two strings is computed from the total |
|
42
|
|
|
|
|
|
|
distances between corresponding characters divided by the length of the |
|
43
|
|
|
|
|
|
|
longer string multiplied by the maximum distance between the two furthest |
|
44
|
|
|
|
|
|
|
keys on the keyboard. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
The functions in this module use a simple grid of keys. For the qwerty |
|
47
|
|
|
|
|
|
|
mapping, the grid is similar to the following: |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
| 0 1 2 3 4 5 6 7 8 9 10 11 12 13 |
|
50
|
|
|
|
|
|
|
--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
|
51
|
|
|
|
|
|
|
0 | ` | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | - | = | | |
|
52
|
|
|
|
|
|
|
1 | | q | w | e | r | t | y | u | i | o | p | [ | ] | \ | |
|
53
|
|
|
|
|
|
|
2 | | a | s | d | f | g | h | j | k | l | ; | ' | | | |
|
54
|
|
|
|
|
|
|
3 | | z | x | c | v | b | n | m | , | . | / | | | | |
|
55
|
|
|
|
|
|
|
--+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The grids for both qwerty and dvorak are based on PC style keyboards. |
|
58
|
|
|
|
|
|
|
Shifted characters have the same coordinates (a and A, 6 and ^). |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head2 EXPORT |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This module exports no symbols by default. The following functions |
|
63
|
|
|
|
|
|
|
are available for export through EXPORT_OK: |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
build_qwerty_map |
|
66
|
|
|
|
|
|
|
max_qwerty_distance |
|
67
|
|
|
|
|
|
|
qwerty_char_distance |
|
68
|
|
|
|
|
|
|
qwerty_keyboard_distance |
|
69
|
|
|
|
|
|
|
qwerty_keyboard_distance_match |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
build_dvorak_map |
|
72
|
|
|
|
|
|
|
max_dvorak_distance |
|
73
|
|
|
|
|
|
|
dvorak_char_distance |
|
74
|
|
|
|
|
|
|
dvorak_keyboard_distance |
|
75
|
|
|
|
|
|
|
dvorak_keyboard_distance_match |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
grid_distance |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
The following varialbes are availalbe for export through EXPORT_OK: |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
@qwerty_grid |
|
82
|
|
|
|
|
|
|
$qwerty_map |
|
83
|
|
|
|
|
|
|
$qwerty_max_distance |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
@dvorak_grid |
|
86
|
|
|
|
|
|
|
$dvorak_map |
|
87
|
|
|
|
|
|
|
$dvorak_max_distance |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Additionaly, this module supports the following export tags: |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
:Functions - import all functions |
|
92
|
|
|
|
|
|
|
:Variables - import all variables |
|
93
|
|
|
|
|
|
|
:all - import both functions and variables |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
our @EXP_SUBS = qw( |
|
98
|
|
|
|
|
|
|
build_qwerty_map |
|
99
|
|
|
|
|
|
|
max_qwerty_distance |
|
100
|
|
|
|
|
|
|
qwerty_char_distance |
|
101
|
|
|
|
|
|
|
qwerty_keyboard_distance |
|
102
|
|
|
|
|
|
|
qwerty_keyboard_distance_match |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
build_dvorak_map |
|
105
|
|
|
|
|
|
|
max_dvorak_distance |
|
106
|
|
|
|
|
|
|
dvorak_char_distance |
|
107
|
|
|
|
|
|
|
dvorak_keyboard_distance |
|
108
|
|
|
|
|
|
|
dvorak_keyboard_distance_match |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
grid_distance |
|
111
|
|
|
|
|
|
|
); |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
our @EXP_VARS = qw( |
|
114
|
|
|
|
|
|
|
@qwerty_grid |
|
115
|
|
|
|
|
|
|
$qwerty_map |
|
116
|
|
|
|
|
|
|
$qwerty_max_distance |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
@dvorak_grid |
|
119
|
|
|
|
|
|
|
$dvorak_map |
|
120
|
|
|
|
|
|
|
$dvorak_max_distance |
|
121
|
|
|
|
|
|
|
); |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
|
124
|
|
|
|
|
|
|
'all' => [ @EXP_SUBS, @EXP_VARS ], |
|
125
|
|
|
|
|
|
|
'Functions' => \@EXP_SUBS, |
|
126
|
|
|
|
|
|
|
'Variables' => \@EXP_VARS, |
|
127
|
|
|
|
|
|
|
); |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
|
130
|
|
|
|
|
|
|
our @EXPORT = qw(); |
|
131
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
our @qwerty_grid = ( |
|
134
|
|
|
|
|
|
|
[[ split(//,q{`1234567890-= }) ],[ split(//,q{~!@#$%^&*()_+ })]], |
|
135
|
|
|
|
|
|
|
[[ split(//,q{ qwertyuiop[]\\})],[ split(//,q( QWERTYUIOP{}|))]], |
|
136
|
|
|
|
|
|
|
[[ split(//,q{ asdfghjkl;' }) ],[ split(//,q{ ASDFGHJKL:" })]], |
|
137
|
|
|
|
|
|
|
[[ split(//,q{ zxcvbnm,./ }) ],[ split(//,q{ ZXCVBNM<>? })]], |
|
138
|
|
|
|
|
|
|
); |
|
139
|
|
|
|
|
|
|
our $qwerty_map = build_qwerty_map(); |
|
140
|
|
|
|
|
|
|
our $qwerty_max_distance = max_qwerty_distance(); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
our @dvorak_grid = ( |
|
143
|
|
|
|
|
|
|
[[ split(//,q{`1234567890[] }) ],[ split(//,q"~!@#$%^&*(){} ")]], |
|
144
|
|
|
|
|
|
|
[[ split(//,q{ ',.pyfgcrl/=\\})],[ split(//,q{ "<>PYFGCRL?+|})]], |
|
145
|
|
|
|
|
|
|
[[ split(//,q{ aoeuidhtns- }) ],[ split(//,q{ AOEUIDHTNS_ })]], |
|
146
|
|
|
|
|
|
|
[[ split(//,q{ ;qjkxbmwvz }) ],[ split(//,q{ :QJKXBMWVZ })]], |
|
147
|
|
|
|
|
|
|
); |
|
148
|
|
|
|
|
|
|
our $dvorak_map = build_dvorak_map(); |
|
149
|
|
|
|
|
|
|
our $dvorak_max_distance = max_dvorak_distance(); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 API REFERENCE |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 build_qwerty_map |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
param: array reference to receive the map [optional] |
|
158
|
|
|
|
|
|
|
return: the map (array reference) |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
This function builds a map of each character to its corresponding |
|
161
|
|
|
|
|
|
|
location on the keyboard. The location is derived by looking at the |
|
162
|
|
|
|
|
|
|
keyboard as a simple grid in which the location of keys on the keyboard |
|
163
|
|
|
|
|
|
|
are considereed to be the same as their shifted values. The following |
|
164
|
|
|
|
|
|
|
keys are considered to have the same location: |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
1 and ! |
|
167
|
|
|
|
|
|
|
r and r |
|
168
|
|
|
|
|
|
|
/ and ? |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
The map is an array, where the index is the value returned by chr() for |
|
171
|
|
|
|
|
|
|
the character at that location. The value is an array ref describing |
|
172
|
|
|
|
|
|
|
the location of the character on the keyboard. The first element is the |
|
173
|
|
|
|
|
|
|
y position, the second is the x, and the third represents the shift value |
|
174
|
|
|
|
|
|
|
(0 for non-shifted, 1 for shifted). All non-keyable characters, including |
|
175
|
|
|
|
|
|
|
tabs and spaces, will have undef values in the map, meaning they have no |
|
176
|
|
|
|
|
|
|
point on the grid. These non-key characterss should be considered to be |
|
177
|
|
|
|
|
|
|
the maximum distance away from any other character except themselves. |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The map is constructed at run-time from the @qwerty_grid package global, |
|
180
|
|
|
|
|
|
|
and cached in the $qwerty_map package global. |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=cut |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub build_qwerty_map |
|
185
|
|
|
|
|
|
|
{ |
|
186
|
1
|
|
50
|
1
|
1
|
18
|
my $map = shift||[]; |
|
187
|
1
|
|
|
|
|
3
|
my($i,$j); |
|
188
|
1
|
|
|
|
|
5
|
for($i = 0; $i < @qwerty_grid; ++$i) { |
|
189
|
4
|
|
|
|
|
6
|
for($j = 0; $j < @{$qwerty_grid[$i][0]}; ++$j) { |
|
|
58
|
|
|
|
|
133
|
|
|
190
|
54
|
50
|
|
|
|
102
|
next unless defined $qwerty_grid[$i][0][$j]; |
|
191
|
54
|
100
|
|
|
|
114
|
next if ' ' eq $qwerty_grid[$i][0][$j]; |
|
192
|
|
|
|
|
|
|
# print "building map0: $i,$j: ",$qwerty_grid[$i][0][$j],':', |
|
193
|
|
|
|
|
|
|
# ord($qwerty_grid[$i][0][$j]),"\n"; |
|
194
|
47
|
|
|
|
|
127
|
$map->[ord $qwerty_grid[$i][0][$j]] = [$i,$j,0]; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
4
|
|
|
|
|
7
|
for($j = 0; $j < @{$qwerty_grid[$i][1]}; ++$j) { |
|
|
60
|
|
|
|
|
137
|
|
|
197
|
56
|
50
|
|
|
|
106
|
next unless defined $qwerty_grid[$i][1][$j]; |
|
198
|
56
|
100
|
|
|
|
118
|
next if ' ' eq $qwerty_grid[$i][1][$j]; |
|
199
|
|
|
|
|
|
|
#print "building map1: $i,$j: ",$qwerty_grid[$i][1][$j],':', |
|
200
|
|
|
|
|
|
|
# ord($qwerty_grid[$i][1][$j]),"\n"; |
|
201
|
47
|
|
|
|
|
135
|
$map->[ord $qwerty_grid[$i][1][$j]] = [$i,$j,1]; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
1
|
|
|
|
|
3
|
return $map; |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 build_dvorak_map |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
param: array reference to receive the map [optional] |
|
210
|
|
|
|
|
|
|
return: the map (array reference) |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This function is identical to build_qwerty_map, with the exception |
|
213
|
|
|
|
|
|
|
that it builds a map for dvorak keyboards, and the map is constructed |
|
214
|
|
|
|
|
|
|
from the @dvorak_grid package global, and cached in the $dvorak_map |
|
215
|
|
|
|
|
|
|
package global. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub build_dvorak_map |
|
220
|
|
|
|
|
|
|
{ |
|
221
|
1
|
|
50
|
1
|
1
|
7
|
my $map = shift||[]; |
|
222
|
1
|
|
|
|
|
2
|
my($i,$j); |
|
223
|
1
|
|
|
|
|
3
|
for($i = 0; $i < @dvorak_grid; ++$i) { |
|
224
|
4
|
|
|
|
|
5
|
for($j = 0; $j < @{$dvorak_grid[$i][0]}; ++$j) { |
|
|
60
|
|
|
|
|
149
|
|
|
225
|
56
|
50
|
|
|
|
101
|
next unless defined $dvorak_grid[$i][0][$j]; |
|
226
|
56
|
100
|
|
|
|
104
|
next if ' ' eq $dvorak_grid[$i][0][$j]; |
|
227
|
|
|
|
|
|
|
# print "building map0: $i,$j: ",$dvorak_grid[$i][0][$j],':', |
|
228
|
|
|
|
|
|
|
# ord($dvorak_grid[$i][0][$j]),"\n"; |
|
229
|
47
|
|
|
|
|
107
|
$map->[ord $dvorak_grid[$i][0][$j]] = [$i,$j,0]; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
4
|
|
|
|
|
7
|
for($j = 0; $j < @{$dvorak_grid[$i][1]}; ++$j) { |
|
|
60
|
|
|
|
|
100
|
|
|
232
|
56
|
50
|
|
|
|
87
|
next unless defined $dvorak_grid[$i][1][$j]; |
|
233
|
56
|
100
|
|
|
|
92
|
next if ' ' eq $dvorak_grid[$i][1][$j]; |
|
234
|
|
|
|
|
|
|
#print "building map1: $i,$j: ",$dvorak_grid[$i][1][$j],':', |
|
235
|
|
|
|
|
|
|
# ord($dvorak_grid[$i][1][$j]),"\n"; |
|
236
|
47
|
|
|
|
|
101
|
$map->[ord $dvorak_grid[$i][1][$j]] = [$i,$j,1]; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
1
|
|
|
|
|
3
|
return $map; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head2 max_qwerty_distance |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
return: maximum distance |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
This function dynamicly computes the maximum distance between |
|
247
|
|
|
|
|
|
|
keys in the qwerty map. The maximum key distance is stored in |
|
248
|
|
|
|
|
|
|
the $qwerty_max_distance package global. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=cut |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub max_qwerty_distance |
|
253
|
|
|
|
|
|
|
{ |
|
254
|
1
|
|
|
1
|
1
|
2
|
my $max = 0; |
|
255
|
1
|
|
|
|
|
5
|
for(my $i = 0; $i < @$qwerty_map; ++$i) { |
|
256
|
127
|
100
|
|
|
|
263
|
next unless $qwerty_map->[$i]; |
|
257
|
94
|
|
|
|
|
208
|
for(my $j = 0; $j < @$qwerty_map; ++$j) { |
|
258
|
11938
|
100
|
|
|
|
28376
|
next unless $qwerty_map->[$j]; |
|
259
|
8836
|
100
|
|
|
|
15487
|
next if $i == $j; |
|
260
|
8742
|
|
|
|
|
19487
|
my $dst = qwerty_char_distance(chr($i),chr($j)); |
|
261
|
8742
|
100
|
|
|
|
36715
|
$max = $dst if $dst > $max; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
} |
|
264
|
1
|
|
|
|
|
4
|
return $max; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 max_dvorak_distance |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
return: maximum distance |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
This function dynamicly computes the maximum distance between |
|
272
|
|
|
|
|
|
|
keys in the dvorak map. The maximum key distance is stored in |
|
273
|
|
|
|
|
|
|
the $dvorak_max_distance package global. |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=cut |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub max_dvorak_distance |
|
278
|
|
|
|
|
|
|
{ |
|
279
|
1
|
|
|
1
|
1
|
2
|
my $max = 0; |
|
280
|
1
|
|
|
|
|
5
|
for(my $i = 0; $i < @$dvorak_map; ++$i) { |
|
281
|
127
|
100
|
|
|
|
240
|
next unless $dvorak_map->[$i]; |
|
282
|
94
|
|
|
|
|
176
|
for(my $j = 0; $j < @$dvorak_map; ++$j) { |
|
283
|
11938
|
100
|
|
|
|
22795
|
next unless $dvorak_map->[$j]; |
|
284
|
8836
|
100
|
|
|
|
13632
|
next if $i == $j; |
|
285
|
8742
|
|
|
|
|
16372
|
my $dst = dvorak_char_distance(chr($i),chr($j)); |
|
286
|
8742
|
100
|
|
|
|
27221
|
$max = $dst if $dst > $max; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
} |
|
289
|
1
|
|
|
|
|
7
|
return $max; |
|
290
|
|
|
|
|
|
|
} |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head2 qwerty_char_distance |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
param: char 1 |
|
295
|
|
|
|
|
|
|
param: char 2 |
|
296
|
|
|
|
|
|
|
return: distance |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
This function computes the distance between the two characters passed |
|
299
|
|
|
|
|
|
|
on a qwerty keyboard. |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=cut |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub qwerty_char_distance |
|
304
|
|
|
|
|
|
|
{ |
|
305
|
8926
|
100
|
|
8926
|
1
|
18158
|
return 0 if $_[0] eq $_[1]; # return 0 if same, regardless of map |
|
306
|
|
|
|
|
|
|
# if either of the chars are not in the map, return the max distance |
|
307
|
8789
|
50
|
|
|
|
16127
|
return $qwerty_max_distance unless $qwerty_map->[ord $_[0]]; |
|
308
|
8789
|
50
|
|
|
|
15827
|
return $qwerty_max_distance unless $qwerty_map->[ord $_[1]]; |
|
309
|
8789
|
|
|
|
|
13668
|
return grid_distance( |
|
310
|
8789
|
|
|
|
|
17877
|
@{$qwerty_map->[ord $_[0]]}[0,1], |
|
311
|
8789
|
|
|
|
|
9352
|
@{$qwerty_map->[ord $_[1]]}[0,1], |
|
312
|
|
|
|
|
|
|
); |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head2 dvorak_char_distance |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
param: char 1 |
|
318
|
|
|
|
|
|
|
param: char 2 |
|
319
|
|
|
|
|
|
|
return: distance |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
This function computes the distance between the two characters passed |
|
322
|
|
|
|
|
|
|
on a dvorak keyboard. |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub dvorak_char_distance |
|
327
|
|
|
|
|
|
|
{ |
|
328
|
8926
|
100
|
|
8926
|
1
|
15625
|
return 0 if $_[0] eq $_[1]; # return 0 if same, regardless of map |
|
329
|
|
|
|
|
|
|
# if either of the chars are not in the map, return the max distance |
|
330
|
8789
|
50
|
|
|
|
14668
|
return $dvorak_max_distance unless $dvorak_map->[ord $_[0]]; |
|
331
|
8789
|
50
|
|
|
|
13633
|
return $dvorak_max_distance unless $dvorak_map->[ord $_[1]]; |
|
332
|
8789
|
|
|
|
|
11952
|
return grid_distance( |
|
333
|
8789
|
|
|
|
|
15861
|
@{$dvorak_map->[ord $_[0]]}[0,1], |
|
334
|
8789
|
|
|
|
|
7889
|
@{$dvorak_map->[ord $_[1]]}[0,1], |
|
335
|
|
|
|
|
|
|
); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head2 grid_distance |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
param: x1 - point 1's x coordinate |
|
341
|
|
|
|
|
|
|
param: y1 - point 1's y coordinate |
|
342
|
|
|
|
|
|
|
param: x2 - point 2's x coordinate |
|
343
|
|
|
|
|
|
|
param: y2 - point 2's y coordinate |
|
344
|
|
|
|
|
|
|
return: distance between points |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
This function returns the distance between two points. If the two |
|
347
|
|
|
|
|
|
|
points have an x distance of 1, and a y distance of 1, then they |
|
348
|
|
|
|
|
|
|
are considered to be a distance of 1 apart. This is meant to help |
|
349
|
|
|
|
|
|
|
prevent horizontal/vertical bias in the distancing function. Otherwise |
|
350
|
|
|
|
|
|
|
we use the following formula: |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sqrt( (x1 - x2)**2 + (y1 - y2)**2 ); |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=cut |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub grid_distance |
|
357
|
|
|
|
|
|
|
{ |
|
358
|
17578
|
100
|
100
|
17578
|
1
|
44300
|
return 0 if($_[0] == $_[2] && $_[1] == $_[3]); # points are same |
|
359
|
17390
|
100
|
100
|
|
|
53776
|
return 1 if(abs($_[0] - $_[2]) == 1 && abs($_[1] - $_[3]) == 1); # same as 1 |
|
360
|
16358
|
|
|
|
|
37192
|
sqrt(($_[0] - $_[2])**2 + ($_[1] - $_[3])**2); |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head2 qwerty_keyboard_distance |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
param: string1 |
|
366
|
|
|
|
|
|
|
param: string2 |
|
367
|
|
|
|
|
|
|
return: distance |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Returns the sum of the distances between corresponding characters |
|
370
|
|
|
|
|
|
|
in the two strings. If one string is longer than the other the |
|
371
|
|
|
|
|
|
|
remaining characters are counted as having the same value as the |
|
372
|
|
|
|
|
|
|
maximum distance. |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub qwerty_keyboard_distance |
|
377
|
|
|
|
|
|
|
{ |
|
378
|
29
|
|
|
29
|
1
|
35
|
my($s1,$s2) = @_; |
|
379
|
29
|
|
|
|
|
38
|
my($l1,$l2) = (length($s1),length($s2)); |
|
380
|
29
|
100
|
|
|
|
40
|
my $short = $l1 < $l2 ? $s1 : $s2; |
|
381
|
29
|
100
|
|
|
|
46
|
my $long = $l1 < $l2 ? $s2 : $s1; |
|
382
|
29
|
100
|
|
|
|
38
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
|
383
|
29
|
100
|
|
|
|
40
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
|
384
|
|
|
|
|
|
|
|
|
385
|
29
|
|
|
|
|
29
|
my $tot = 0; |
|
386
|
29
|
|
|
|
|
24
|
my $i; |
|
387
|
29
|
|
|
|
|
62
|
for($i = 0; $i < $ls; ++$i) { |
|
388
|
|
|
|
|
|
|
#print "calling distance(",substr($short,$i,1),',',$long,$i,1,")\n"; |
|
389
|
184
|
|
|
|
|
367
|
$tot += abs(qwerty_char_distance(substr($short,$i,1),substr($long,$i,1))); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
|
|
392
|
29
|
|
|
|
|
59
|
while($i < $ll) { |
|
393
|
12
|
|
|
|
|
12
|
$tot += $qwerty_max_distance; |
|
394
|
12
|
|
|
|
|
19
|
++$i; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
29
|
|
|
|
|
60
|
return $tot; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=head2 qwerty_keyboard_distance_match |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
param: string1 |
|
403
|
|
|
|
|
|
|
param: string2 |
|
404
|
|
|
|
|
|
|
return: probability of match |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
The probability of a match is: |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Pr = 1 - ( D / (L * M) ) |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Where D is the distance between the two strings, L is the length of |
|
411
|
|
|
|
|
|
|
the longer string, and M is the maximum character distance. |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=cut |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub qwerty_keyboard_distance_match |
|
416
|
|
|
|
|
|
|
{ |
|
417
|
29
|
|
|
29
|
1
|
1114
|
my($s1,$s2) = @_; |
|
418
|
29
|
|
|
|
|
64
|
my($l1,$l2) = (length($s1),length($s2)); |
|
419
|
29
|
100
|
|
|
|
58
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
|
420
|
29
|
100
|
|
|
|
37
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
|
421
|
29
|
|
|
|
|
47
|
my $dst = qwerty_keyboard_distance($s1,$s2); |
|
422
|
29
|
|
|
|
|
90
|
return (1 - ($dst/($ll*$qwerty_max_distance))); |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 dvorak_keyboard_distance |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
param: string1 |
|
428
|
|
|
|
|
|
|
param: string2 |
|
429
|
|
|
|
|
|
|
return: distance |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Returns the sum of the distances between corresponding characters |
|
432
|
|
|
|
|
|
|
in the two strings. If one string is longer than the other the |
|
433
|
|
|
|
|
|
|
remaining characters are counted as having the same value as the |
|
434
|
|
|
|
|
|
|
maximum distance. |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub dvorak_keyboard_distance |
|
439
|
|
|
|
|
|
|
{ |
|
440
|
29
|
|
|
29
|
1
|
32
|
my($s1,$s2) = @_; |
|
441
|
29
|
|
|
|
|
34
|
my($l1,$l2) = (length($s1),length($s2)); |
|
442
|
29
|
100
|
|
|
|
43
|
my $short = $l1 < $l2 ? $s1 : $s2; |
|
443
|
29
|
100
|
|
|
|
40
|
my $long = $l1 < $l2 ? $s2 : $s1; |
|
444
|
29
|
100
|
|
|
|
49
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
|
445
|
29
|
100
|
|
|
|
36
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
|
446
|
|
|
|
|
|
|
|
|
447
|
29
|
|
|
|
|
27
|
my $tot = 0; |
|
448
|
29
|
|
|
|
|
28
|
my $i = 0; |
|
449
|
29
|
|
|
|
|
55
|
for($i = 0; $i < $ls; ++$i) { |
|
450
|
|
|
|
|
|
|
#print "calling distance(",substr($short,$i,1),',',$long,$i,1,")\n"; |
|
451
|
184
|
|
|
|
|
376
|
$tot += abs(dvorak_char_distance(substr($short,$i,1),substr($long,$i,1))); |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
29
|
|
|
|
|
54
|
while($i < $ll) { |
|
455
|
12
|
|
|
|
|
14
|
$tot += $dvorak_max_distance; |
|
456
|
12
|
|
|
|
|
22
|
++$i; |
|
457
|
|
|
|
|
|
|
} |
|
458
|
29
|
|
|
|
|
56
|
return $tot; |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 dvorak_keyboard_distance_match |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
param: string1 |
|
464
|
|
|
|
|
|
|
param: string2 |
|
465
|
|
|
|
|
|
|
return: probability of match |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
The probability of a match is computed in the same way as |
|
468
|
|
|
|
|
|
|
for qwerty_keyboard_distance_match(). |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub dvorak_keyboard_distance_match |
|
473
|
|
|
|
|
|
|
{ |
|
474
|
29
|
|
|
29
|
1
|
82
|
my($s1,$s2) = @_; |
|
475
|
29
|
|
|
|
|
37
|
my($l1,$l2) = (length($s1),length($s2)); |
|
476
|
29
|
100
|
|
|
|
51
|
my $ls = $l1 < $l2 ? $l1 : $l2; |
|
477
|
29
|
100
|
|
|
|
43
|
my $ll = $l1 < $l2 ? $l2 : $l1; |
|
478
|
29
|
|
|
|
|
40
|
my $dst = dvorak_keyboard_distance($s1,$s2); |
|
479
|
29
|
|
|
|
|
74
|
return 1 - ($dst/($ll*$dvorak_max_distance)); |
|
480
|
|
|
|
|
|
|
} |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
1; |
|
483
|
|
|
|
|
|
|
__END__ |