| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Acme::Ikamusume; |
|
2
|
3
|
|
|
3
|
|
84577
|
use 5.010001; |
|
|
3
|
|
|
|
|
7
|
|
|
3
|
3
|
|
|
3
|
|
13
|
use strict; |
|
|
3
|
|
|
|
|
655
|
|
|
|
3
|
|
|
|
|
62
|
|
|
4
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
|
3
|
|
|
|
|
655
|
|
|
|
3
|
|
|
|
|
67
|
|
|
5
|
3
|
|
|
3
|
|
918
|
use utf8; |
|
|
3
|
|
|
|
|
14
|
|
|
|
3
|
|
|
|
|
10
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
|
7
|
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
1363
|
use File::ShareDir qw/dist_file/; |
|
|
3
|
|
|
|
|
12507
|
|
|
|
3
|
|
|
|
|
181
|
|
|
9
|
3
|
|
|
3
|
|
1256
|
use Lingua::JA::Kana; |
|
|
3
|
|
|
|
|
107280
|
|
|
|
3
|
|
|
|
|
248
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
3
|
|
|
3
|
|
1101
|
use Text::Mecabist; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub geso { |
|
14
|
|
|
|
|
|
|
my $self = bless { }, shift; |
|
15
|
|
|
|
|
|
|
my $text = shift // ""; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $parser = Text::Mecabist->new({ |
|
18
|
|
|
|
|
|
|
userdic => dist_file('Acme-Ikamusume', Text::Mecabist->encoding->name .'.dic'), |
|
19
|
|
|
|
|
|
|
}); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $doc = $parser->parse($text, sub { |
|
22
|
|
|
|
|
|
|
my $node = shift; |
|
23
|
|
|
|
|
|
|
return if not $node->readable; |
|
24
|
|
|
|
|
|
|
return if not $node->reading; |
|
25
|
|
|
|
|
|
|
$self->apply_rules($node); |
|
26
|
|
|
|
|
|
|
}); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
return $doc->join('text'); |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub godan { |
|
32
|
|
|
|
|
|
|
my ($verb, $from, $to) = @_; |
|
33
|
|
|
|
|
|
|
if (my ($kana) = $verb =~ /(\p{InHiragana})$/) { |
|
34
|
|
|
|
|
|
|
$kana = kana2romaji($kana); |
|
35
|
|
|
|
|
|
|
$kana =~ s/^sh/s/; |
|
36
|
|
|
|
|
|
|
$kana =~ s/^ch/t/; |
|
37
|
|
|
|
|
|
|
$kana =~ s/^ts/t/; |
|
38
|
|
|
|
|
|
|
$kana =~ s/$from$/$to/; |
|
39
|
|
|
|
|
|
|
$kana =~ s/^a$/wa/; |
|
40
|
|
|
|
|
|
|
$kana =~ s/ti/chi/; |
|
41
|
|
|
|
|
|
|
$kana =~ s/tu/tsu/; |
|
42
|
|
|
|
|
|
|
$kana = romaji2hiragana($kana); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
$verb =~ s/.$/$kana/; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
$verb; |
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
our @rules = ( |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# userdic extra field |
|
52
|
|
|
|
|
|
|
sub { |
|
53
|
|
|
|
|
|
|
my $node = shift; |
|
54
|
|
|
|
|
|
|
my $word = $node->extra1 or return; |
|
55
|
|
|
|
|
|
|
$node->text($word); |
|
56
|
|
|
|
|
|
|
}, |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# IKA: inflection |
|
59
|
|
|
|
|
|
|
sub { |
|
60
|
|
|
|
|
|
|
my $node = shift; |
|
61
|
|
|
|
|
|
|
return if not $node->extra2; |
|
62
|
|
|
|
|
|
|
return if not $node->extra2 =~ /inflection/; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
if ($prev->is('名詞')) { |
|
67
|
|
|
|
|
|
|
$node->text('じゃなイカ'); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
elsif ($prev->is('副詞')) { |
|
70
|
|
|
|
|
|
|
$node->text('でゲソか'); |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
elsif ($prev->is('助動詞') and |
|
73
|
|
|
|
|
|
|
$prev->surface eq 'です' and |
|
74
|
|
|
|
|
|
|
$prev->prev and $prev->prev->text !~ /^[いイ]{2}$/) { |
|
75
|
|
|
|
|
|
|
$prev->text('じゃなイ'); |
|
76
|
|
|
|
|
|
|
$node->text('カ'); |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
if ($prev->text =~ /(?:イー?カ|ゲソ)$/) { |
|
80
|
|
|
|
|
|
|
return; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
elsif ($prev->inflection_type =~ /五段/) { |
|
83
|
|
|
|
|
|
|
$prev->text(godan($prev->text, '.' => 'a')); |
|
84
|
|
|
|
|
|
|
$node->text('なイカ'); |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
elsif ($prev->inflection_type =~ /一段|カ変|サ変/) { |
|
87
|
|
|
|
|
|
|
$node->text('なイカ'); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
}, |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# formal MASU to casual |
|
92
|
|
|
|
|
|
|
sub { |
|
93
|
|
|
|
|
|
|
my $node = shift; |
|
94
|
|
|
|
|
|
|
if ($node->lemma eq 'ます' and |
|
95
|
|
|
|
|
|
|
$node->is('助動詞') and |
|
96
|
|
|
|
|
|
|
$node->prev && $node->prev->is('動詞')) { |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
if ($node->is('基本形')) { # ます |
|
99
|
|
|
|
|
|
|
$node->prev->text($node->prev->lemma); |
|
100
|
|
|
|
|
|
|
$node->text(''); |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
if ($node->next->pos =~ /^助詞/) { |
|
103
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
if ($node->is('連用形') and # ます |
|
107
|
|
|
|
|
|
|
$node->pos3 !~ /五段/) { # 五段 => { -i/っ/ん/い } |
|
108
|
|
|
|
|
|
|
$node->text(''); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
}, |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# no honorific |
|
114
|
|
|
|
|
|
|
sub { |
|
115
|
|
|
|
|
|
|
my $node = shift; |
|
116
|
|
|
|
|
|
|
if ($node->feature =~ /^名詞,接尾,人名,/ and |
|
117
|
|
|
|
|
|
|
$node->prev->text ne 'イカ娘') { |
|
118
|
|
|
|
|
|
|
$node->text(''); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
}, |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# IKA/GESO: replace |
|
123
|
|
|
|
|
|
|
sub { |
|
124
|
|
|
|
|
|
|
my $node = shift; |
|
125
|
|
|
|
|
|
|
my $text = $node->text; |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
$text =~ s/い[いー]か(.)/イーカ$1/g; |
|
128
|
|
|
|
|
|
|
$text =~ s/いか/イカ/g; |
|
129
|
|
|
|
|
|
|
$text =~ s/げそ/ゲソ/g; |
|
130
|
|
|
|
|
|
|
$node->text($text); |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
return if $text =~ /イー?カ|ゲソ/; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
my $curr = katakana2hiragana($node->reading || ""); |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/い[いー]か(.)/イーカ$1/g; |
|
137
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/いか/イカ/g; |
|
138
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/げそ/ゲソ/g; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $next = katakana2hiragana(($node->next and $node->next->reading) || ""); |
|
141
|
|
|
|
|
|
|
my $prev = katakana2hiragana(join "", |
|
142
|
|
|
|
|
|
|
$node->prev && $node->prev->prev && $node->prev->prev->text, |
|
143
|
|
|
|
|
|
|
$node->prev && $node->prev->text); |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^か./ && $curr =~ s/い[いー]$/イー/; |
|
146
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /い[いー]$/ && $curr =~ s/^か(.)/カ$1/; |
|
147
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /[いイ]$/ && $curr =~ s/^か/カ/; |
|
148
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^か/ && $curr =~ s/い$/イ/; |
|
149
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^そ/ && $curr =~ s/げ$/ゲ/; |
|
150
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /げ$/ && $curr =~ s/^そ/ソ/; |
|
151
|
|
|
|
|
|
|
}, |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# IKA/GESO: DA + postp |
|
154
|
|
|
|
|
|
|
sub { |
|
155
|
|
|
|
|
|
|
my $node = shift; |
|
156
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
if ($prev->surface eq 'だ' and |
|
159
|
|
|
|
|
|
|
$prev->text eq 'でゲソ' and |
|
160
|
|
|
|
|
|
|
( |
|
161
|
|
|
|
|
|
|
$node->pos =~ /助詞|助動詞/ or |
|
162
|
|
|
|
|
|
|
$node->is('接尾') |
|
163
|
|
|
|
|
|
|
) |
|
164
|
|
|
|
|
|
|
) { |
|
165
|
|
|
|
|
|
|
my $kana = kana2romaji($node->text); |
|
166
|
|
|
|
|
|
|
if ($kana =~/^(?:ze|n[aeo]|yo|wa)/) { |
|
167
|
|
|
|
|
|
|
$node->text(''); |
|
168
|
|
|
|
|
|
|
$prev->text('じゃなイカ'); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
if ($kana =~ /^zo/) { |
|
171
|
|
|
|
|
|
|
$node->text(''); |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
}, |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub { |
|
177
|
|
|
|
|
|
|
my $node = shift; |
|
178
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
|
179
|
|
|
|
|
|
|
my $latest = join "", |
|
180
|
|
|
|
|
|
|
$prev->prev && $prev->prev->text, |
|
181
|
|
|
|
|
|
|
$prev && $prev->text, |
|
182
|
|
|
|
|
|
|
$node->text; |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
if ($node->is('終助詞') and |
|
185
|
|
|
|
|
|
|
$latest =~ /(?:でゲソ|じゃなイカ)[よなね]$/) { |
|
186
|
|
|
|
|
|
|
$node->text(''); |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
}, |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# IKA: IIKA |
|
191
|
|
|
|
|
|
|
sub { |
|
192
|
|
|
|
|
|
|
my $node = shift; |
|
193
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if ($prev->text !~ /^(?:[いイ]{2})$/) { |
|
196
|
|
|
|
|
|
|
return; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
if ($node->surface =~ /^(?:です|でしょう)$/ and |
|
199
|
|
|
|
|
|
|
$node->next->surface =~ /^か/) { |
|
200
|
|
|
|
|
|
|
$prev->text('いイ'); |
|
201
|
|
|
|
|
|
|
$node->text(''); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
if ($node->surface eq 'でしょうか') { |
|
204
|
|
|
|
|
|
|
$prev->text('いイ'); |
|
205
|
|
|
|
|
|
|
$node->text('カ'); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
}, |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# GESO/IKA: eos |
|
210
|
|
|
|
|
|
|
sub { |
|
211
|
|
|
|
|
|
|
my $node = shift; |
|
212
|
|
|
|
|
|
|
my $next = $node->next or return; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
if ($next->stat == 3 or # MECAB_EOS_NODE |
|
215
|
|
|
|
|
|
|
( |
|
216
|
|
|
|
|
|
|
$next->is('記号') and |
|
217
|
|
|
|
|
|
|
$next->pos1 =~ /句点|括弧閉|GESO可/ |
|
218
|
|
|
|
|
|
|
) |
|
219
|
|
|
|
|
|
|
) { |
|
220
|
|
|
|
|
|
|
if ($node->pos =~ /^(?:その他|記号|助詞|接頭詞|接続詞|連体詞)/) { |
|
221
|
|
|
|
|
|
|
return; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
if ($node->is('助動詞') and |
|
225
|
|
|
|
|
|
|
$node->prev and $node->prev->text eq 'じゃ' and |
|
226
|
|
|
|
|
|
|
$node->surface eq 'ない') { |
|
227
|
|
|
|
|
|
|
$node->text('なイカ'); |
|
228
|
|
|
|
|
|
|
return; |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
if ($node->pos =~ /^助動詞/ and |
|
232
|
|
|
|
|
|
|
$node->prev and $node->prev->text =~ /(?:ゲソ|イー?カ)/) { |
|
233
|
|
|
|
|
|
|
return; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my $latest = join "", |
|
237
|
|
|
|
|
|
|
$node->prev && $node->prev->text, |
|
238
|
|
|
|
|
|
|
$node->text; |
|
239
|
|
|
|
|
|
|
if ($latest =~ /(?:ゲソ|イー?カ)$/) { |
|
240
|
|
|
|
|
|
|
return; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
if ($node->is('動詞') and |
|
247
|
|
|
|
|
|
|
$node->inflection_form =~ '基本形' and |
|
248
|
|
|
|
|
|
|
$next->pos =~ /^助詞/) { |
|
249
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
}, |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# EBI: accent |
|
254
|
|
|
|
|
|
|
sub { |
|
255
|
|
|
|
|
|
|
my $node = shift; |
|
256
|
|
|
|
|
|
|
my $text = $node->text; |
|
257
|
|
|
|
|
|
|
my @ebi_accent = qw(! ♪ ♪ ♫ ♬ ♡); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
$text =~ s{(エビ|えび|海老)}{ |
|
260
|
|
|
|
|
|
|
$1 . $ebi_accent[ int rand scalar @ebi_accent ]; |
|
261
|
|
|
|
|
|
|
}e; |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
$node->text($text); |
|
264
|
|
|
|
|
|
|
}, |
|
265
|
|
|
|
|
|
|
); |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub apply_rules { |
|
268
|
|
|
|
|
|
|
my ($self, $node) = @_; |
|
269
|
|
|
|
|
|
|
for my $rule (@rules) { |
|
270
|
|
|
|
|
|
|
$rule->($node); |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
1; |
|
275
|
|
|
|
|
|
|
__END__ |