| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::Stem::Patch::EO; |
|
2
|
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
82389
|
use v5.8.1; |
|
|
4
|
|
|
|
|
14
|
|
|
|
4
|
|
|
|
|
158
|
|
|
4
|
4
|
|
|
4
|
|
18
|
use utf8; |
|
|
4
|
|
|
|
|
27
|
|
|
|
4
|
|
|
|
|
24
|
|
|
5
|
4
|
|
|
4
|
|
78
|
use strict; |
|
|
4
|
|
|
|
|
6
|
|
|
|
4
|
|
|
|
|
116
|
|
|
6
|
4
|
|
|
4
|
|
16
|
use warnings; |
|
|
4
|
|
|
|
|
5
|
|
|
|
4
|
|
|
|
|
135
|
|
|
7
|
4
|
|
|
4
|
|
1890
|
use parent 'Exporter'; |
|
|
4
|
|
|
|
|
1029
|
|
|
|
4
|
|
|
|
|
34
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw( stem stem_eo stem_aggressive stem_eo_aggressive ); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
*stem_eo = \&stem; |
|
13
|
|
|
|
|
|
|
*stem_eo_aggressive = \&stem_aggressive; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my %protect = ( |
|
16
|
|
|
|
|
|
|
correlative => { map { $_ => 1 } map { |
|
17
|
|
|
|
|
|
|
my $start = $_; |
|
18
|
|
|
|
|
|
|
map { $start . $_ } qw( a al am e el es o om u ) |
|
19
|
|
|
|
|
|
|
} qw( ki ti i ĉi neni ) }, |
|
20
|
|
|
|
|
|
|
root => { map { $_ => 1 } qw( |
|
21
|
|
|
|
|
|
|
ĉar ĉi ĉu kaj ke la minus plus se |
|
22
|
|
|
|
|
|
|
ĉe da de el ekster en ĝis je kun na po pri pro sen tra |
|
23
|
|
|
|
|
|
|
ajn do ja jen ju ne pli tamen tre tro |
|
24
|
|
|
|
|
|
|
ci ĝi ili li mi ni oni ri si ŝi ŝli vi |
|
25
|
|
|
|
|
|
|
unu du tri kvin |
|
26
|
|
|
|
|
|
|
ĵus nun plu tuj |
|
27
|
|
|
|
|
|
|
amen bis boj fi ha he ho hu hura nu ve |
|
28
|
|
|
|
|
|
|
) }, |
|
29
|
|
|
|
|
|
|
simple => { map { my $root = $_; map { $root . $_ => 1 } qw( a e i o ) } qw( |
|
30
|
|
|
|
|
|
|
abrikot absint arogant artrit azot balustrad bant bat biskvit blat boat |
|
31
|
|
|
|
|
|
|
bot briliant cit ĉokolad dat degrad delikat diamant diskont dorlot dot |
|
32
|
|
|
|
|
|
|
ekscit elefant ermit etat evit flat font frat front frot gad gant genot |
|
33
|
|
|
|
|
|
|
glad glat glit grad granat granit grat grenad grot hepat hipokrit hont |
|
34
|
|
|
|
|
|
|
horizont imit incit iniciat intermit invit kalikot kamlot kant kapot |
|
35
|
|
|
|
|
|
|
karot kat klimat komitat kompat konfit konsonant konstant konstat |
|
36
|
|
|
|
|
|
|
kontant kot krad kravat kvant kvit lad lekant leŭtenant limonad lit lot |
|
37
|
|
|
|
|
|
|
markot marmot mat medit merit milit miozot monat mont muskat not oblat |
|
38
|
|
|
|
|
|
|
palat parad parazit pat perlamot pilot pint pirit plad plant plat plot |
|
39
|
|
|
|
|
|
|
pont pot predikat privat profit rabat rabot rad rakont rat renkont rilat |
|
40
|
|
|
|
|
|
|
rot sabat salat sat ŝat skarlat soldat spat spirit spit sprit stat ŝtat |
|
41
|
|
|
|
|
|
|
strat subit sublimat svat ŝvit terebint tint trikot trot universitat |
|
42
|
|
|
|
|
|
|
vant vat vizit volont zenit |
|
43
|
|
|
|
|
|
|
almilit bofrat ciferplat esperant malŝat manplat |
|
44
|
|
|
|
|
|
|
) }, |
|
45
|
|
|
|
|
|
|
); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub stem { |
|
48
|
4
|
|
|
4
|
0
|
22
|
my $word = lc shift; |
|
|
4
|
|
|
674
|
|
6
|
|
|
|
4
|
|
|
|
|
66
|
|
|
|
674
|
|
|
|
|
64652
|
|
|
49
|
|
|
|
|
|
|
|
|
50
|
674
|
|
|
|
|
80029
|
for ($word) { |
|
51
|
|
|
|
|
|
|
# standalone roots |
|
52
|
674
|
100
|
|
|
|
1835
|
last if $protect{root}{$word}; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# l’ l' → la |
|
55
|
544
|
100
|
|
|
|
1649
|
last if s{ (?<= ^ l ) [’'] $}{a}x; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# un’ un' unuj → unu |
|
58
|
540
|
100
|
|
|
|
1063
|
last if s{ (?<= ^ un ) [’'] $}{u}x; |
|
59
|
536
|
100
|
|
|
|
939
|
last if s{ (?<= ^ unu ) j $}{}x; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# -’ -' → -o |
|
62
|
534
|
|
|
|
|
842
|
s{ [’'] $}{o}x; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# nouns, adjectives, -u correlatives: |
|
65
|
|
|
|
|
|
|
# -oj -on -ojn → o |
|
66
|
|
|
|
|
|
|
# -aj -an -ajn → a |
|
67
|
|
|
|
|
|
|
# -uj -un -ujn → u |
|
68
|
534
|
|
|
|
|
1049
|
s{ (?<= [aou] ) (?: [jn] | jn ) $}{}x; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# correlatives: -en → -e |
|
71
|
534
|
|
|
|
|
690
|
s{^ ( (?: [ĉkt] | nen )? ie ) n $}{$1}x; |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# correlative roots |
|
74
|
534
|
100
|
|
|
|
1176
|
last if $protect{correlative}{$word}; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# accusative pronouns: -in → -i |
|
77
|
344
|
100
|
|
|
|
629
|
last if s{ (?<= i ) n $}{}x; |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# accusative adverbs: -en → -o |
|
80
|
318
|
|
|
|
|
319
|
s{ en $}{o}x; |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# verbs: -is -as -os -us -u → -i |
|
83
|
318
|
|
|
|
|
580
|
s{ (?: [aiou] s | u ) $}{i}x; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# lexical aspect: ek- el- |
|
86
|
318
|
|
|
|
|
290
|
s{^ ek (?! scit ) }{}x; |
|
87
|
318
|
|
|
|
|
276
|
s{^ el (?! efant ) }{}x; |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# simple words: root plus single suffix |
|
90
|
318
|
100
|
|
|
|
602
|
last if $protect{simple}{$word}; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# imperfective verbs & action nouns: -adi -ado → -i |
|
93
|
310
|
100
|
|
|
|
493
|
last if s{ ad [io] $}{i}x; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# compound verbs: |
|
96
|
|
|
|
|
|
|
# -inti -anti -onti -iti -ati -oti → -i |
|
97
|
|
|
|
|
|
|
# -inte -ante -onte -ite -ate -ote → -i |
|
98
|
|
|
|
|
|
|
# -inta -anta -onta -ita -ata -ota → -i |
|
99
|
302
|
100
|
|
|
|
833
|
last if s{ (?: [aio] n? t ) [aei] $}{i}x; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# participle nouns: |
|
102
|
|
|
|
|
|
|
# -into -anto -onto → -anto |
|
103
|
|
|
|
|
|
|
# -ito -ato -oto → -ato |
|
104
|
170
|
100
|
|
|
|
508
|
last if s{ [aio] ( n? ) to $}{a$1to}x; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
674
|
|
|
|
|
1585
|
return $word; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub stem_aggressive { |
|
111
|
332
|
|
|
332
|
0
|
583
|
my $word = stem(shift); |
|
112
|
332
|
|
|
|
|
297
|
my $copy = $word; |
|
113
|
|
|
|
|
|
|
|
|
114
|
332
|
|
|
|
|
337
|
for ($word) { |
|
115
|
|
|
|
|
|
|
# protected words |
|
116
|
332
|
100
|
100
|
|
|
1287
|
last if $protect{root}{$word} |
|
117
|
|
|
|
|
|
|
|| $protect{correlative}{$word}; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# remove final suffix |
|
120
|
155
|
|
|
|
|
310
|
s{ [aeio] $}{}x; |
|
121
|
|
|
|
|
|
|
|
|
122
|
155
|
100
|
|
|
|
275
|
last if $protect{simple}{$copy}; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# remove suffix for participle nouns: |
|
125
|
|
|
|
|
|
|
# -int- -ant- -ont- -it- -at- -ot- |
|
126
|
151
|
|
|
|
|
234
|
s{ [aio] n? t $}{}x; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
332
|
|
|
|
|
1140
|
return $word; |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
1; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
__END__ |