line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Stem::Patch::EO; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
145856
|
use v5.8.1; |
|
4
|
|
|
|
|
16
|
|
|
4
|
|
|
|
|
179
|
|
4
|
4
|
|
|
4
|
|
23
|
use utf8; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
34
|
|
5
|
4
|
|
|
4
|
|
86
|
use strict; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
134
|
|
6
|
4
|
|
|
4
|
|
20
|
use warnings; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
215
|
|
7
|
4
|
|
|
4
|
|
3863
|
use parent 'Exporter'; |
|
4
|
|
|
|
|
1356
|
|
|
4
|
|
|
|
|
23
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw( stem stem_eo stem_aggressive stem_eo_aggressive ); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
*stem_eo = \&stem; |
13
|
|
|
|
|
|
|
*stem_eo_aggressive = \&stem_aggressive; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my %protect = ( |
16
|
|
|
|
|
|
|
correlative => { map { $_ => 1 } map { |
17
|
|
|
|
|
|
|
my $start = $_; |
18
|
|
|
|
|
|
|
map { $start . $_ } qw( a al am e el es o om u ) |
19
|
|
|
|
|
|
|
} qw( ki ti i ĉi neni ) }, |
20
|
|
|
|
|
|
|
root => { map { $_ => 1 } qw( |
21
|
|
|
|
|
|
|
ĉar ĉi ĉu kaj ke la minus plus se |
22
|
|
|
|
|
|
|
ĉe da de el ekster en ĝis je kun na po pri pro sen tra |
23
|
|
|
|
|
|
|
ajn do ja jen ju ne pli tamen tre tro |
24
|
|
|
|
|
|
|
ci ĝi ili li mi ni oni ri si ŝi ŝli vi |
25
|
|
|
|
|
|
|
unu du tri kvin |
26
|
|
|
|
|
|
|
ĵus nun plu tuj |
27
|
|
|
|
|
|
|
amen bis boj fi ha he ho hu hura nu ve |
28
|
|
|
|
|
|
|
) }, |
29
|
|
|
|
|
|
|
simple => { map { my $root = $_; map { $root . $_ => 1 } qw( a e i o ) } qw( |
30
|
|
|
|
|
|
|
abrikot absint arogant artrit azot balustrad bant bat biskvit blat boat |
31
|
|
|
|
|
|
|
bot briliant cit ĉokolad dat degrad delikat diamant diskont dorlot dot |
32
|
|
|
|
|
|
|
ekscit elefant ermit etat evit flat font frat front frot gad gant genot |
33
|
|
|
|
|
|
|
glad glat glit grad granat granit grat grenad grot hepat hipokrit hont |
34
|
|
|
|
|
|
|
horizont imit incit iniciat intermit invit kalikot kamlot kant kapot |
35
|
|
|
|
|
|
|
karot kat klimat komitat kompat konfit konsonant konstant konstat |
36
|
|
|
|
|
|
|
kontant kot krad kravat kvant kvit lad lekant leŭtenant limonad lit lot |
37
|
|
|
|
|
|
|
markot marmot mat medit merit milit miozot monat mont muskat not oblat |
38
|
|
|
|
|
|
|
palat parad parazit pat perlamot pilot pint pirit plad plant plat plot |
39
|
|
|
|
|
|
|
pont pot predikat privat profit rabat rabot rad rakont rat renkont rilat |
40
|
|
|
|
|
|
|
rot sabat salat sat ŝat skarlat soldat spat spirit spit sprit stat ŝtat |
41
|
|
|
|
|
|
|
strat subit sublimat svat ŝvit terebint tint trikot trot universitat |
42
|
|
|
|
|
|
|
vant vat vizit volont zenit |
43
|
|
|
|
|
|
|
almilit bofrat ciferplat esperant malŝat manplat |
44
|
|
|
|
|
|
|
) }, |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub stem { |
48
|
4
|
|
|
4
|
0
|
43
|
my $word = lc shift; |
|
4
|
|
|
674
|
|
8
|
|
|
4
|
|
|
|
|
74
|
|
|
674
|
|
|
|
|
96903
|
|
49
|
|
|
|
|
|
|
|
50
|
674
|
|
|
|
|
134298
|
for ($word) { |
51
|
|
|
|
|
|
|
# standalone roots |
52
|
674
|
100
|
|
|
|
2513
|
last if $protect{root}{$word}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# l’ l' → la |
55
|
544
|
100
|
|
|
|
2350
|
last if s{ (?<= ^ l ) [’'] $}{a}x; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# un’ un' unuj → unu |
58
|
540
|
100
|
|
|
|
1487
|
last if s{ (?<= ^ un ) [’'] $}{u}x; |
59
|
536
|
100
|
|
|
|
1304
|
last if s{ (?<= ^ unu ) j $}{}x; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# -’ -' → -o |
62
|
534
|
|
|
|
|
1266
|
s{ [’'] $}{o}x; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# nouns, adjectives, -u correlatives: |
65
|
|
|
|
|
|
|
# -oj -on -ojn → o |
66
|
|
|
|
|
|
|
# -aj -an -ajn → a |
67
|
|
|
|
|
|
|
# -uj -un -ujn → u |
68
|
534
|
|
|
|
|
1595
|
s{ (?<= [aou] ) (?: [jn] | jn ) $}{}x; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# correlatives: -en → -e |
71
|
534
|
|
|
|
|
854
|
s{^ ( (?: [ĉkt] | nen )? ie ) n $}{$1}x; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# correlative roots |
74
|
534
|
100
|
|
|
|
1810
|
last if $protect{correlative}{$word}; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# accusative pronouns: -in → -i |
77
|
344
|
100
|
|
|
|
1055
|
last if s{ (?<= i ) n $}{}x; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# accusative adverbs: -en → -o |
80
|
318
|
|
|
|
|
430
|
s{ en $}{o}x; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# verbs: -is -as -os -us -u → -i |
83
|
318
|
|
|
|
|
796
|
s{ (?: [aiou] s | u ) $}{i}x; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# lexical aspect: ek- el- |
86
|
318
|
|
|
|
|
411
|
s{^ ek (?! scit ) }{}x; |
87
|
318
|
|
|
|
|
369
|
s{^ el (?! efant ) }{}x; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# simple words: root plus single suffix |
90
|
318
|
100
|
|
|
|
803
|
last if $protect{simple}{$word}; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# imperfective verbs & action nouns: -adi -ado → -i |
93
|
310
|
100
|
|
|
|
10267
|
last if s{ ad [io] $}{i}x; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# compound verbs: |
96
|
|
|
|
|
|
|
# -inti -anti -onti -iti -ati -oti → -i |
97
|
|
|
|
|
|
|
# -inte -ante -onte -ite -ate -ote → -i |
98
|
|
|
|
|
|
|
# -inta -anta -onta -ita -ata -ota → -i |
99
|
302
|
100
|
|
|
|
1209
|
last if s{ (?: [aio] n? t ) [aei] $}{i}x; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# participle nouns: |
102
|
|
|
|
|
|
|
# -into -anto -onto → -anto |
103
|
|
|
|
|
|
|
# -ito -ato -oto → -ato |
104
|
170
|
100
|
|
|
|
742
|
last if s{ [aio] ( n? ) to $}{a$1to}x; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
674
|
|
|
|
|
2264
|
return $word; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub stem_aggressive { |
111
|
332
|
|
|
332
|
0
|
794
|
my $word = stem(shift); |
112
|
332
|
|
|
|
|
469
|
my $copy = $word; |
113
|
|
|
|
|
|
|
|
114
|
332
|
|
|
|
|
547
|
for ($word) { |
115
|
|
|
|
|
|
|
# protected words |
116
|
332
|
100
|
100
|
|
|
1693
|
last if $protect{root}{$word} |
117
|
|
|
|
|
|
|
|| $protect{correlative}{$word}; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# remove final suffix |
120
|
155
|
|
|
|
|
418
|
s{ [aeio] $}{}x; |
121
|
|
|
|
|
|
|
|
122
|
155
|
100
|
|
|
|
412
|
last if $protect{simple}{$copy}; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# remove suffix for participle nouns: |
125
|
|
|
|
|
|
|
# -int- -ant- -ont- -it- -at- -ot- |
126
|
151
|
|
|
|
|
360
|
s{ [aio] n? t $}{}x; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
332
|
|
|
|
|
1466
|
return $word; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
1; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
__END__ |