| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
=head1 NAME |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Lingua::EN::NameGrammar - grammar tree for Lingua::EN::NameParse |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Internal functions called from NameParse.pm module |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Grammar tree of personal name syntax for module. |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
The grammar defined here is for use with the Parse::RecDescent module. |
|
14
|
|
|
|
|
|
|
Note that parsing is done depth first, meaning match the shortest string first. |
|
15
|
|
|
|
|
|
|
To avoid premature matches, when one rule is a sub set of another longer rule, |
|
16
|
|
|
|
|
|
|
it must appear after the longer rule. See the Parse::RecDescent documentation |
|
17
|
|
|
|
|
|
|
for more details. |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 AUTHOR |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
NameParse::Grammar was written by Kim Ryan . |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Copyright (c) 2018 Kim Ryan. All rights reserved. |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
29
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
|
33
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
package Lingua::EN::NameParse::Grammar; |
|
36
|
2
|
|
|
2
|
|
15
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
55
|
|
|
37
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
952
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = '1.38'; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Rules that define valid orderings of a names components |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $rules_start = q{ full_name : }; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $rules_joint_names = |
|
47
|
|
|
|
|
|
|
q{ |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# A (?) refers to an optional component, occurring 0 or more times. |
|
50
|
|
|
|
|
|
|
# Optional items are returned as an array, which for our case will |
|
51
|
|
|
|
|
|
|
# always consist of one element, when they exist. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
title given_name surname conjunction title given_name surname non_matching(?) |
|
54
|
|
|
|
|
|
|
{ |
|
55
|
|
|
|
|
|
|
# block of code to define actions upon successful completion of a |
|
56
|
|
|
|
|
|
|
# 'production' or rule |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Two separate people |
|
59
|
|
|
|
|
|
|
$return = |
|
60
|
|
|
|
|
|
|
{ |
|
61
|
|
|
|
|
|
|
# Parse::RecDescent lets you return a single scalar, which we use as |
|
62
|
|
|
|
|
|
|
# an anonymous hash reference |
|
63
|
|
|
|
|
|
|
title_1 => $item[1], |
|
64
|
|
|
|
|
|
|
given_name_1 => $item[2], |
|
65
|
|
|
|
|
|
|
surname_1 => $item[3], |
|
66
|
|
|
|
|
|
|
conjunction_1 => $item[4], |
|
67
|
|
|
|
|
|
|
title_2 => $item[5], |
|
68
|
|
|
|
|
|
|
given_name_2 => $item[6], |
|
69
|
|
|
|
|
|
|
surname_2 => $item[7], |
|
70
|
|
|
|
|
|
|
non_matching => $item[8][0], |
|
71
|
|
|
|
|
|
|
number => 2, |
|
72
|
|
|
|
|
|
|
type => 'Mr_John_Smith_&_Ms_Mary_Jones' |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
| |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
title initials surname conjunction title initials surname non_matching(?) |
|
79
|
|
|
|
|
|
|
{ |
|
80
|
|
|
|
|
|
|
$return = |
|
81
|
|
|
|
|
|
|
{ |
|
82
|
|
|
|
|
|
|
title_1 => $item[1], |
|
83
|
|
|
|
|
|
|
initials_1 => $item[2], |
|
84
|
|
|
|
|
|
|
surname_1 => $item[3], |
|
85
|
|
|
|
|
|
|
conjunction_1 => $item[4], |
|
86
|
|
|
|
|
|
|
title_2 => $item[5], |
|
87
|
|
|
|
|
|
|
initials_2 => $item[6], |
|
88
|
|
|
|
|
|
|
surname_2 => $item[7], |
|
89
|
|
|
|
|
|
|
non_matching => $item[8][0], |
|
90
|
|
|
|
|
|
|
number => 2, |
|
91
|
|
|
|
|
|
|
type => 'Mr_A_Smith_&_Ms_B_Jones' |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
| |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
title initials conjunction title initials surname non_matching(?) |
|
97
|
|
|
|
|
|
|
{ |
|
98
|
|
|
|
|
|
|
# Two related people, own initials, shared surname |
|
99
|
|
|
|
|
|
|
$return = |
|
100
|
|
|
|
|
|
|
{ |
|
101
|
|
|
|
|
|
|
title_1 => $item[1], |
|
102
|
|
|
|
|
|
|
initials_1 => $item[2], |
|
103
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
|
104
|
|
|
|
|
|
|
title_2 => $item[4], |
|
105
|
|
|
|
|
|
|
initials_2 => $item[5], |
|
106
|
|
|
|
|
|
|
surname_1 => $item[6], |
|
107
|
|
|
|
|
|
|
non_matching => $item[7][0], |
|
108
|
|
|
|
|
|
|
number => 2, |
|
109
|
|
|
|
|
|
|
type => 'Mr_A_&_Ms_B_Smith' |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
| |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
title initials conjunction initials surname non_matching(?) |
|
115
|
|
|
|
|
|
|
{ |
|
116
|
|
|
|
|
|
|
# Two related people, shared title, separate initials, |
|
117
|
|
|
|
|
|
|
# shared surname. Example, father and son, sisters |
|
118
|
|
|
|
|
|
|
$return = |
|
119
|
|
|
|
|
|
|
{ |
|
120
|
|
|
|
|
|
|
title_1 => $item[1], |
|
121
|
|
|
|
|
|
|
initials_1 => $item[2], |
|
122
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
|
123
|
|
|
|
|
|
|
initials_2 => $item[4], |
|
124
|
|
|
|
|
|
|
surname_1 => $item[5], |
|
125
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
126
|
|
|
|
|
|
|
number => 2, |
|
127
|
|
|
|
|
|
|
type => 'Mr_A_&_B_Smith' |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
| |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
title conjunction title initials conjunction initials surname non_matching(?) |
|
134
|
|
|
|
|
|
|
{ |
|
135
|
|
|
|
|
|
|
# Two related people, own initials, shared surname |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
$return = |
|
138
|
|
|
|
|
|
|
{ |
|
139
|
|
|
|
|
|
|
title_1 => $item[1], |
|
140
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
|
141
|
|
|
|
|
|
|
title_2 => $item[3], |
|
142
|
|
|
|
|
|
|
initials_1 => $item[4], |
|
143
|
|
|
|
|
|
|
conjunction_2 => $item[5], |
|
144
|
|
|
|
|
|
|
initials_2 => $item[6], |
|
145
|
|
|
|
|
|
|
surname_1 => $item[7], |
|
146
|
|
|
|
|
|
|
non_matching => $item[8][0], |
|
147
|
|
|
|
|
|
|
number => 2, |
|
148
|
|
|
|
|
|
|
type => 'Mr_&_Ms_A_&_B_Smith' |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
| |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
title conjunction title initials surname non_matching(?) |
|
155
|
|
|
|
|
|
|
{ |
|
156
|
|
|
|
|
|
|
# Two related people, shared initials, shared surname |
|
157
|
|
|
|
|
|
|
$return = |
|
158
|
|
|
|
|
|
|
{ |
|
159
|
|
|
|
|
|
|
title_1 => $item[1], |
|
160
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
|
161
|
|
|
|
|
|
|
title_2 => $item[3], |
|
162
|
|
|
|
|
|
|
initials_1 => $item[4], |
|
163
|
|
|
|
|
|
|
surname_1 => $item[5], |
|
164
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
165
|
|
|
|
|
|
|
number => 2, |
|
166
|
|
|
|
|
|
|
type => 'Mr_&_Ms_A_Smith' |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
| |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
given_name surname conjunction given_name surname non_matching(?) |
|
172
|
|
|
|
|
|
|
{ |
|
173
|
|
|
|
|
|
|
$return = |
|
174
|
|
|
|
|
|
|
{ |
|
175
|
|
|
|
|
|
|
given_name_1 => $item[1], |
|
176
|
|
|
|
|
|
|
surname_1 => $item[2], |
|
177
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
|
178
|
|
|
|
|
|
|
given_name_2 => $item[4], |
|
179
|
|
|
|
|
|
|
surname_2 => $item[5], |
|
180
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
181
|
|
|
|
|
|
|
number => 2, |
|
182
|
|
|
|
|
|
|
type => 'John_Smith_&_Mary_Jones' |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
| |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
initials surname conjunction initials surname non_matching(?) |
|
188
|
|
|
|
|
|
|
{ |
|
189
|
|
|
|
|
|
|
$return = |
|
190
|
|
|
|
|
|
|
{ |
|
191
|
|
|
|
|
|
|
initials_1 => $item[1], |
|
192
|
|
|
|
|
|
|
surname_1 => $item[2], |
|
193
|
|
|
|
|
|
|
conjunction_1 => $item[3], |
|
194
|
|
|
|
|
|
|
initials_2 => $item[4], |
|
195
|
|
|
|
|
|
|
surname_2 => $item[5], |
|
196
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
197
|
|
|
|
|
|
|
number => 2, |
|
198
|
|
|
|
|
|
|
type => 'A_Smith_&_B_Jones' |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
| |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
given_name conjunction given_name surname non_matching(?) |
|
204
|
|
|
|
|
|
|
{ |
|
205
|
|
|
|
|
|
|
$return = |
|
206
|
|
|
|
|
|
|
{ |
|
207
|
|
|
|
|
|
|
given_name_1 => $item[1], |
|
208
|
|
|
|
|
|
|
conjunction_1 => $item[2], |
|
209
|
|
|
|
|
|
|
given_name_2 => $item[3], |
|
210
|
|
|
|
|
|
|
surname_2 => $item[4], |
|
211
|
|
|
|
|
|
|
non_matching => $item[5][0], |
|
212
|
|
|
|
|
|
|
number => 2, |
|
213
|
|
|
|
|
|
|
type => 'John_&_Mary_Smith' |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
| |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
}; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
my $rules_single_names = |
|
221
|
|
|
|
|
|
|
q{ |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
precursor(?) title given_name_standard middle_name surname suffix(?) non_matching(?) |
|
224
|
|
|
|
|
|
|
{ |
|
225
|
|
|
|
|
|
|
$return = |
|
226
|
|
|
|
|
|
|
{ |
|
227
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
228
|
|
|
|
|
|
|
title_1 => $item[2], |
|
229
|
|
|
|
|
|
|
given_name_1 => $item[3], |
|
230
|
|
|
|
|
|
|
middle_name => $item[4], |
|
231
|
|
|
|
|
|
|
surname_1 => $item[5], |
|
232
|
|
|
|
|
|
|
suffix => $item[6][0], |
|
233
|
|
|
|
|
|
|
non_matching => $item[7][0], |
|
234
|
|
|
|
|
|
|
number => 1, |
|
235
|
|
|
|
|
|
|
type => 'Mr_John_Adam_Smith' |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
| |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
precursor(?) title given_name_standard single_initial surname suffix(?) non_matching(?) |
|
241
|
|
|
|
|
|
|
{ |
|
242
|
|
|
|
|
|
|
$return = |
|
243
|
|
|
|
|
|
|
{ |
|
244
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
245
|
|
|
|
|
|
|
title_1 => $item[2], |
|
246
|
|
|
|
|
|
|
given_name_1 => $item[3], |
|
247
|
|
|
|
|
|
|
initials_1 => $item[4], |
|
248
|
|
|
|
|
|
|
surname_1 => $item[5], |
|
249
|
|
|
|
|
|
|
suffix => $item[6][0], |
|
250
|
|
|
|
|
|
|
non_matching => $item[7][0], |
|
251
|
|
|
|
|
|
|
number => 1, |
|
252
|
|
|
|
|
|
|
type => 'Mr_John_A_Smith' |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
| |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
precursor(?) title given_name surname suffix(?) non_matching(?) |
|
258
|
|
|
|
|
|
|
{ |
|
259
|
|
|
|
|
|
|
$return = |
|
260
|
|
|
|
|
|
|
{ |
|
261
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
262
|
|
|
|
|
|
|
title_1 => $item[2], |
|
263
|
|
|
|
|
|
|
given_name_1 => $item[3], |
|
264
|
|
|
|
|
|
|
surname_1 => $item[4], |
|
265
|
|
|
|
|
|
|
suffix => $item[5][0], |
|
266
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
267
|
|
|
|
|
|
|
number => 1, |
|
268
|
|
|
|
|
|
|
type => 'Mr_John_Smith' |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
| |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
precursor(?) title initials surname suffix(?) non_matching(?) |
|
274
|
|
|
|
|
|
|
{ |
|
275
|
|
|
|
|
|
|
$return = |
|
276
|
|
|
|
|
|
|
{ |
|
277
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
278
|
|
|
|
|
|
|
title_1 => $item[2], |
|
279
|
|
|
|
|
|
|
initials_1 => $item[3], |
|
280
|
|
|
|
|
|
|
surname_1 => $item[4], |
|
281
|
|
|
|
|
|
|
suffix => $item[5][0], |
|
282
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
283
|
|
|
|
|
|
|
number => 1, |
|
284
|
|
|
|
|
|
|
type => 'Mr_A_Smith' |
|
285
|
|
|
|
|
|
|
} |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
| |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
precursor(?) given_name_standard middle_name surname suffix(?) non_matching(?) |
|
290
|
|
|
|
|
|
|
{ |
|
291
|
|
|
|
|
|
|
$return = |
|
292
|
|
|
|
|
|
|
{ |
|
293
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
294
|
|
|
|
|
|
|
given_name_1 => $item[2], |
|
295
|
|
|
|
|
|
|
middle_name => $item[3], |
|
296
|
|
|
|
|
|
|
surname_1 => $item[4], |
|
297
|
|
|
|
|
|
|
suffix => $item[5][0], |
|
298
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
299
|
|
|
|
|
|
|
number => 1, |
|
300
|
|
|
|
|
|
|
type => 'John_Adam_Smith' |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
| |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
precursor(?) given_name_standard single_initial surname suffix(?) non_matching(?) |
|
306
|
|
|
|
|
|
|
{ |
|
307
|
|
|
|
|
|
|
$return = |
|
308
|
|
|
|
|
|
|
{ |
|
309
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
310
|
|
|
|
|
|
|
given_name_1 => $item[2], |
|
311
|
|
|
|
|
|
|
initials_1 => $item[3], |
|
312
|
|
|
|
|
|
|
surname_1 => $item[4], |
|
313
|
|
|
|
|
|
|
suffix => $item[5][0], |
|
314
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
315
|
|
|
|
|
|
|
number => 1, |
|
316
|
|
|
|
|
|
|
type => 'John_A_Smith' |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
| |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
precursor(?) single_initial middle_name surname suffix(?) non_matching(?) |
|
322
|
|
|
|
|
|
|
{ |
|
323
|
|
|
|
|
|
|
$return = |
|
324
|
|
|
|
|
|
|
{ |
|
325
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
326
|
|
|
|
|
|
|
initials_1 => $item[2], |
|
327
|
|
|
|
|
|
|
middle_name => $item[3], |
|
328
|
|
|
|
|
|
|
surname_1 => $item[4], |
|
329
|
|
|
|
|
|
|
suffix => $item[5][0], |
|
330
|
|
|
|
|
|
|
non_matching => $item[6][0], |
|
331
|
|
|
|
|
|
|
number => 1, |
|
332
|
|
|
|
|
|
|
type => 'J_Adam_Smith' |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
| |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
precursor(?) given_name surname suffix(?) non_matching(?) |
|
338
|
|
|
|
|
|
|
{ |
|
339
|
|
|
|
|
|
|
$return = |
|
340
|
|
|
|
|
|
|
{ |
|
341
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
342
|
|
|
|
|
|
|
given_name_1 => $item[2], |
|
343
|
|
|
|
|
|
|
surname_1 => $item[3], |
|
344
|
|
|
|
|
|
|
suffix => $item[4][0], |
|
345
|
|
|
|
|
|
|
non_matching => $item[5][0], |
|
346
|
|
|
|
|
|
|
number => 1, |
|
347
|
|
|
|
|
|
|
type => 'John_Smith' |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
| |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
precursor(?) initials surname suffix(?) non_matching(?) |
|
353
|
|
|
|
|
|
|
{ |
|
354
|
|
|
|
|
|
|
$return = |
|
355
|
|
|
|
|
|
|
{ |
|
356
|
|
|
|
|
|
|
precursor => $item[1][0], |
|
357
|
|
|
|
|
|
|
initials_1 => $item[2], |
|
358
|
|
|
|
|
|
|
surname_1 => $item[3], |
|
359
|
|
|
|
|
|
|
suffix => $item[4][0], |
|
360
|
|
|
|
|
|
|
non_matching => $item[5][0], |
|
361
|
|
|
|
|
|
|
number => 1, |
|
362
|
|
|
|
|
|
|
type => 'A_Smith' |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
| |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
given_name_standard non_matching(?) |
|
368
|
|
|
|
|
|
|
{ |
|
369
|
|
|
|
|
|
|
$return = |
|
370
|
|
|
|
|
|
|
{ |
|
371
|
|
|
|
|
|
|
given_name_1 => $item[1], |
|
372
|
|
|
|
|
|
|
non_matching => $item[2][0], |
|
373
|
|
|
|
|
|
|
number => 1, |
|
374
|
|
|
|
|
|
|
type => 'John' |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
| |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
non_matching(?) |
|
380
|
|
|
|
|
|
|
{ |
|
381
|
|
|
|
|
|
|
$return = |
|
382
|
|
|
|
|
|
|
{ |
|
383
|
|
|
|
|
|
|
non_matching => $item[1][0], |
|
384
|
|
|
|
|
|
|
number => 0, |
|
385
|
|
|
|
|
|
|
type => 'unknown' |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
}; |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
391
|
|
|
|
|
|
|
# Individual components that a name can be composed from. Components are |
|
392
|
|
|
|
|
|
|
# expressed as literals or Perl regular expressions. |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
my $titles = |
|
396
|
|
|
|
|
|
|
q{ |
|
397
|
|
|
|
|
|
|
title : /(MR|MS|M\/S|MRS|MISS|DR) / |
|
398
|
|
|
|
|
|
|
}; |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
my $extended_titles = |
|
401
|
|
|
|
|
|
|
q{ |
|
402
|
|
|
|
|
|
|
| |
|
403
|
|
|
|
|
|
|
/( |
|
404
|
|
|
|
|
|
|
SIR| |
|
405
|
|
|
|
|
|
|
MESSRS| # Plural or Mr |
|
406
|
|
|
|
|
|
|
MADAME?| |
|
407
|
|
|
|
|
|
|
MME| # Madame |
|
408
|
|
|
|
|
|
|
MISTER| |
|
409
|
|
|
|
|
|
|
MASTER| |
|
410
|
|
|
|
|
|
|
MAST| |
|
411
|
|
|
|
|
|
|
MS?GR| # Monsignor |
|
412
|
|
|
|
|
|
|
COUNT| |
|
413
|
|
|
|
|
|
|
COUNTESS| |
|
414
|
|
|
|
|
|
|
DUKE| |
|
415
|
|
|
|
|
|
|
DUCHESS| |
|
416
|
|
|
|
|
|
|
LORD| |
|
417
|
|
|
|
|
|
|
LADY| |
|
418
|
|
|
|
|
|
|
MARQUESS| |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# Medical |
|
421
|
|
|
|
|
|
|
DOCTOR|SISTER|MATRON| |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Legal |
|
424
|
|
|
|
|
|
|
JUDGE| |
|
425
|
|
|
|
|
|
|
JUSTICE| |
|
426
|
|
|
|
|
|
|
MAGISTRATE| |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# Police |
|
429
|
|
|
|
|
|
|
DET|INSP|CONST| |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# Military |
|
432
|
|
|
|
|
|
|
BRIGDIER|BRIG| |
|
433
|
|
|
|
|
|
|
CAPTAIN|CAPT| |
|
434
|
|
|
|
|
|
|
COLONEL|COL| |
|
435
|
|
|
|
|
|
|
COMMANDER IN CHIEF|COMMANDER| |
|
436
|
|
|
|
|
|
|
COMMODORE| |
|
437
|
|
|
|
|
|
|
CDR| # Commander, Commodore |
|
438
|
|
|
|
|
|
|
FIELD\ MARSHALL| |
|
439
|
|
|
|
|
|
|
FLIGHT\ OFFICER| FL OFF| |
|
440
|
|
|
|
|
|
|
FLIGHT\ LIEUTENANT|FLT LT| |
|
441
|
|
|
|
|
|
|
PILOT\ OFFICER| |
|
442
|
|
|
|
|
|
|
GENERAL\ OF\ THE\ ARMY|GENERAL|GEN| |
|
443
|
|
|
|
|
|
|
PTE|PVT|PRIVATE| |
|
444
|
|
|
|
|
|
|
SGT|SARGENT| |
|
445
|
|
|
|
|
|
|
AIR\ COMMANDER| |
|
446
|
|
|
|
|
|
|
AIR\ COMMODORE| |
|
447
|
|
|
|
|
|
|
AIR\ MARSHALL| |
|
448
|
|
|
|
|
|
|
LIEUTENANT\ COLONEL|LT\ COL| |
|
449
|
|
|
|
|
|
|
LT\ GEN| |
|
450
|
|
|
|
|
|
|
LT\ CDR| |
|
451
|
|
|
|
|
|
|
LIEUTENANT|LT|LEUT|LIEUT| |
|
452
|
|
|
|
|
|
|
MAJOR GENERAL|MAJ GEN| |
|
453
|
|
|
|
|
|
|
MAJOR|MAJ| |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# Religious |
|
456
|
|
|
|
|
|
|
RABBI| |
|
457
|
|
|
|
|
|
|
BISHOP| |
|
458
|
|
|
|
|
|
|
BROTHER| |
|
459
|
|
|
|
|
|
|
CHAPLAIN| |
|
460
|
|
|
|
|
|
|
FATHER| |
|
461
|
|
|
|
|
|
|
PASTOR| |
|
462
|
|
|
|
|
|
|
MOTHER\ SUPERIOR|MOTHER| |
|
463
|
|
|
|
|
|
|
MOST\ REVER[E|A]ND| |
|
464
|
|
|
|
|
|
|
MT\ REVD|V\ REVD|REVD| |
|
465
|
|
|
|
|
|
|
MUFTI| |
|
466
|
|
|
|
|
|
|
REVER[E|A]ND| |
|
467
|
|
|
|
|
|
|
REVD| |
|
468
|
|
|
|
|
|
|
REV| |
|
469
|
|
|
|
|
|
|
SHEIKH?| |
|
470
|
|
|
|
|
|
|
VERY\ REVER[E|A]ND| |
|
471
|
|
|
|
|
|
|
VICAR| |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# Other |
|
476
|
|
|
|
|
|
|
AMBASSADOR| |
|
477
|
|
|
|
|
|
|
PROFESSOR| |
|
478
|
|
|
|
|
|
|
PROF| |
|
479
|
|
|
|
|
|
|
ALDERMAN|ALD| |
|
480
|
|
|
|
|
|
|
COUNCILLOR |
|
481
|
|
|
|
|
|
|
)\ /x |
|
482
|
|
|
|
|
|
|
}; |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
my $common = |
|
485
|
|
|
|
|
|
|
q{ |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
precursor : |
|
488
|
|
|
|
|
|
|
/( |
|
489
|
|
|
|
|
|
|
ESTATE\ OF\ THE\ LATE| |
|
490
|
|
|
|
|
|
|
ESTATE\ OF| |
|
491
|
|
|
|
|
|
|
HIS\ EXCELLENCY| |
|
492
|
|
|
|
|
|
|
HIS\ HONOU?R| |
|
493
|
|
|
|
|
|
|
HER\ EXCELLENCY| |
|
494
|
|
|
|
|
|
|
HER\ HONOU?R| |
|
495
|
|
|
|
|
|
|
THE\ RIGHT HONOU?RABLE| |
|
496
|
|
|
|
|
|
|
THE\ HONOU?RABLE| |
|
497
|
|
|
|
|
|
|
RIGHT\ HONOU?RABLE| |
|
498
|
|
|
|
|
|
|
THE\ RT\ HON| |
|
499
|
|
|
|
|
|
|
THE\ HON| |
|
500
|
|
|
|
|
|
|
RT\ HON |
|
501
|
|
|
|
|
|
|
)\ /x |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
conjunction : /AND |& / |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# Used in the John_A_Smith and J_Adam_Smith name types, as well as when intials are set to 1 |
|
506
|
|
|
|
|
|
|
single_initial: /[A-Z] / |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# Examples are Jo-Anne, D'Artagnan, O'Shaugnessy La'Keishia, T-Bone |
|
509
|
|
|
|
|
|
|
split_given_name : /[A-Z]{1,}['|-][A-Z]{2,} / |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
constonant: /[A-DF-HJ-NP-TV-Z]]/ |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# For use with John_Adam_Smith and John_A_Smith name types |
|
514
|
|
|
|
|
|
|
given_name_standard: |
|
515
|
|
|
|
|
|
|
/[AEIOU]/ constonant / / | |
|
516
|
|
|
|
|
|
|
constonant /[AEIOUY] / | |
|
517
|
|
|
|
|
|
|
/[A-Z]{2,} / | |
|
518
|
|
|
|
|
|
|
split_given_name |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# Patronymic, place name and other surname prefixes |
|
522
|
|
|
|
|
|
|
prefix: |
|
523
|
|
|
|
|
|
|
/( |
|
524
|
|
|
|
|
|
|
[A|E]L| # ARABIC, GREEK, |
|
525
|
|
|
|
|
|
|
AP| # WELSH |
|
526
|
|
|
|
|
|
|
BEN| # HEBREW |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
DELLA|DELLE|DALLE| # ITALIAN |
|
529
|
|
|
|
|
|
|
DELA| |
|
530
|
|
|
|
|
|
|
DELL?| |
|
531
|
|
|
|
|
|
|
DE\ LA| |
|
532
|
|
|
|
|
|
|
DE\ LOS| |
|
533
|
|
|
|
|
|
|
DE| |
|
534
|
|
|
|
|
|
|
D[A|I|U]| |
|
535
|
|
|
|
|
|
|
L[A|E|O]| |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
ST| # ABBREVIATION FOR SAINT |
|
538
|
|
|
|
|
|
|
SAN| # SPANISH |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# DUTCH |
|
541
|
|
|
|
|
|
|
DEN| |
|
542
|
|
|
|
|
|
|
VON\ DER| |
|
543
|
|
|
|
|
|
|
VON| |
|
544
|
|
|
|
|
|
|
VAN\ DE[N|R]| |
|
545
|
|
|
|
|
|
|
VAN |
|
546
|
|
|
|
|
|
|
)\ /x |
|
547
|
|
|
|
|
|
|
| |
|
548
|
|
|
|
|
|
|
/[D|L|O]'/ # ITALIAN, IRISH OR FRENCH, abbreviation for 'the', 'of' etc |
|
549
|
|
|
|
|
|
|
| |
|
550
|
|
|
|
|
|
|
/D[A|E]LL'/ |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
middle_name: |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# Dont grab surname prefix too early. For example, John Van Dam could be |
|
555
|
|
|
|
|
|
|
# interpreted as middle name of Van and Surname of Dam. So exclude prefixs |
|
556
|
|
|
|
|
|
|
# from middle names |
|
557
|
|
|
|
|
|
|
...!prefix given_name |
|
558
|
|
|
|
|
|
|
{ |
|
559
|
|
|
|
|
|
|
$return = $item[2]; |
|
560
|
|
|
|
|
|
|
} |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# Use look-ahead to avoid ambiguity between surname and suffix. For example, |
|
564
|
|
|
|
|
|
|
# John Smith Snr, would detect Snr as the surname and Smith as the middle name |
|
565
|
|
|
|
|
|
|
surname : ...!suffix first_surname second_surname(?) |
|
566
|
|
|
|
|
|
|
{ |
|
567
|
|
|
|
|
|
|
if ( $item[2] and $item[3][0] ) |
|
568
|
|
|
|
|
|
|
{ |
|
569
|
|
|
|
|
|
|
$return = "$item[2]$item[3][0]"; |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
else |
|
572
|
|
|
|
|
|
|
{ |
|
573
|
|
|
|
|
|
|
$return = $item[2]; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
first_surname : prefix name |
|
578
|
|
|
|
|
|
|
{ |
|
579
|
|
|
|
|
|
|
$return = "$item[1]$item[2]"; |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
| |
|
582
|
|
|
|
|
|
|
name |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
second_surname : '-' name |
|
586
|
|
|
|
|
|
|
{ |
|
587
|
|
|
|
|
|
|
if ( $item[1] and $item[2] ) |
|
588
|
|
|
|
|
|
|
{ |
|
589
|
|
|
|
|
|
|
$return = "$item[1]$item[2]"; |
|
590
|
|
|
|
|
|
|
} |
|
591
|
|
|
|
|
|
|
} |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# Note space will not occur for first part of a hphenated surname |
|
594
|
|
|
|
|
|
|
# AddressParse::_valid_name will do further check on name context |
|
595
|
|
|
|
|
|
|
name : /[A-Z]{2,} ?/ |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
suffix: |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
/( |
|
601
|
|
|
|
|
|
|
ESQUIRE| |
|
602
|
|
|
|
|
|
|
ESQ | |
|
603
|
|
|
|
|
|
|
SN?R| # Senior |
|
604
|
|
|
|
|
|
|
JN?R| # Junior |
|
605
|
|
|
|
|
|
|
PHD | |
|
606
|
|
|
|
|
|
|
MD | |
|
607
|
|
|
|
|
|
|
LLB | |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
XI{1,3}| # 11th, 12th, 13th |
|
610
|
|
|
|
|
|
|
X | # 10th |
|
611
|
|
|
|
|
|
|
IV | # 4th |
|
612
|
|
|
|
|
|
|
VI{1,3} | # 6th, 7th, 8th |
|
613
|
|
|
|
|
|
|
V | # 5th |
|
614
|
|
|
|
|
|
|
IX | # 9th |
|
615
|
|
|
|
|
|
|
I{1,3} # 1st, 2nd, 3rd |
|
616
|
|
|
|
|
|
|
)\ /x |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
# One or more characters. |
|
620
|
|
|
|
|
|
|
non_matching: /.*/ |
|
621
|
|
|
|
|
|
|
}; |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# Define given name combinations, specifying the minimum number of letters. |
|
624
|
|
|
|
|
|
|
# The correct pair of rules is determined by the 'initials' key in the hash |
|
625
|
|
|
|
|
|
|
# passed to the 'new' method. |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
my $given_name_min_2 = q{ given_name :/[A-Z]{2,} / | split_given_name }; |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
# Joe, Jo-Anne ... |
|
631
|
|
|
|
|
|
|
my $given_name_min_3 = |
|
632
|
|
|
|
|
|
|
q{ |
|
633
|
|
|
|
|
|
|
given_name: /[A-Z]{3,} / | split_given_name |
|
634
|
|
|
|
|
|
|
}; |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# John ... |
|
638
|
|
|
|
|
|
|
my $given_name_min_4 = |
|
639
|
|
|
|
|
|
|
q{ |
|
640
|
|
|
|
|
|
|
given_name: /[A-Z]{4,} / | split_given_name |
|
641
|
|
|
|
|
|
|
}; |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# Define initials combinations specifying the minimum and maximum letters. |
|
645
|
|
|
|
|
|
|
# Order from most complex to simplest, to avoid premature matching. |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# 'A' |
|
648
|
|
|
|
|
|
|
my $initials_1 = q{ initials : single_initial }; |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
#'AB' 'A B' |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
my $initials_2 = |
|
653
|
|
|
|
|
|
|
q{ |
|
654
|
|
|
|
|
|
|
initials: /([A-Z] ){1,2}/ | /([A-Z]){1,2} / |
|
655
|
|
|
|
|
|
|
}; |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
# 'ABC' or 'A B C' |
|
658
|
|
|
|
|
|
|
my $initials_3 = |
|
659
|
|
|
|
|
|
|
q{ |
|
660
|
|
|
|
|
|
|
initials: /([A-Z] ){1,3}/ | /([A-Z]){1,3} / |
|
661
|
|
|
|
|
|
|
}; |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
665
|
|
|
|
|
|
|
# Assemble correct combination for grammar tree. |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub _create |
|
668
|
|
|
|
|
|
|
{ |
|
669
|
2
|
|
|
2
|
|
4
|
my $name = shift; |
|
670
|
|
|
|
|
|
|
|
|
671
|
2
|
|
|
|
|
5
|
my $grammar = $rules_start; |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
|
|
674
|
2
|
50
|
|
|
|
7
|
if ( $name->{joint_names} ) |
|
675
|
|
|
|
|
|
|
{ |
|
676
|
2
|
|
|
|
|
21
|
$grammar .= $rules_joint_names; |
|
677
|
|
|
|
|
|
|
} |
|
678
|
2
|
|
|
|
|
36
|
$grammar .= $rules_single_names; |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
|
681
|
2
|
|
|
|
|
22
|
$grammar .= $common; |
|
682
|
|
|
|
|
|
|
|
|
683
|
2
|
|
|
|
|
11
|
$grammar .= $titles; |
|
684
|
|
|
|
|
|
|
|
|
685
|
2
|
100
|
|
|
|
10
|
if ( $name->{extended_titles} ) |
|
686
|
|
|
|
|
|
|
{ |
|
687
|
1
|
|
|
|
|
2
|
$grammar .= $extended_titles; |
|
688
|
|
|
|
|
|
|
} |
|
689
|
|
|
|
|
|
|
|
|
690
|
2
|
50
|
|
|
|
7
|
$name->{initials} > 3 and $name->{initials} = 3; |
|
691
|
2
|
50
|
|
|
|
9
|
$name->{initials} < 1 and $name->{initials} = 1; |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# Define limit of when a string is treated as an initial, or |
|
694
|
|
|
|
|
|
|
# a given name. For example, if initials are set to 2, MR TO SMITH |
|
695
|
|
|
|
|
|
|
# will have initials of T & O and no given name, but MR TOM SMITH will |
|
696
|
|
|
|
|
|
|
# have no initials, and a given name of Tom. |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
|
|
700
|
2
|
50
|
|
|
|
11
|
if ( $name->{initials} == 1 ) |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
{ |
|
702
|
0
|
|
|
|
|
0
|
$grammar .= $given_name_min_2 . $initials_1; |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
elsif ( $name->{initials} == 2 ) |
|
705
|
|
|
|
|
|
|
{ |
|
706
|
2
|
|
|
|
|
8
|
$grammar .= $initials_2 . $given_name_min_3; |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
elsif ( $name->{initials} == 3 ) |
|
709
|
|
|
|
|
|
|
{ |
|
710
|
0
|
|
|
|
|
0
|
$grammar .= $given_name_min_4 . $initials_3; |
|
711
|
|
|
|
|
|
|
} |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
|
|
714
|
2
|
|
|
|
|
43
|
return($grammar); |
|
715
|
|
|
|
|
|
|
} |
|
716
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
717
|
|
|
|
|
|
|
1; |