| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::JA::Kana; |
|
2
|
2
|
|
|
2
|
|
46164
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
61
|
|
|
3
|
2
|
|
|
2
|
|
10
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
59
|
|
|
4
|
2
|
|
|
2
|
|
1941
|
use utf8; |
|
|
2
|
|
|
|
|
24
|
|
|
|
2
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = sprintf "%d.%02d", q$Revision: 0.7 $ =~ /(\d+)/g; |
|
7
|
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
144
|
use re (); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
51
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
|
10
|
2
|
|
|
2
|
|
11
|
use base qw/Exporter/; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
7695
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT = qw( |
|
12
|
|
|
|
|
|
|
hira2kata hiragana2katakana |
|
13
|
|
|
|
|
|
|
kata2hira katakana2hiragana |
|
14
|
|
|
|
|
|
|
romaji2hiragana romaji2katakana |
|
15
|
|
|
|
|
|
|
kana2romaji |
|
16
|
|
|
|
|
|
|
hankaku2zenkaku zenkaku2hankaku |
|
17
|
|
|
|
|
|
|
); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $USE_REGEXP_ASSEMBLE = do { |
|
20
|
|
|
|
|
|
|
eval 'require Regexp::Assemble'; |
|
21
|
|
|
|
|
|
|
$@ ? 0 : 1; |
|
22
|
|
|
|
|
|
|
}; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our $Re_Vowels = qr/[aeiou]/i; |
|
26
|
|
|
|
|
|
|
our $Re_Consonants = qr/[bcdfghjklpqrstvwxyz]/i; # note the absense of n and m |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our %Kata2Hepburn = qw( |
|
29
|
|
|
|
|
|
|
ア a イ i ウ u エ e オ o |
|
30
|
|
|
|
|
|
|
ァ xa ィ xi ゥ xu ェ xe ォ xo |
|
31
|
|
|
|
|
|
|
カ ka キ ki ク ku ケ ke コ ko |
|
32
|
|
|
|
|
|
|
ガ ga ギ gi グ gu ゲ ge ゴ go |
|
33
|
|
|
|
|
|
|
キャ kya キュ kyu キョ kyo |
|
34
|
|
|
|
|
|
|
ギャ gya ギュ gyu ギョ gyo |
|
35
|
|
|
|
|
|
|
サ sa シ shi ス su セ se ソ so |
|
36
|
|
|
|
|
|
|
ザ za ジ ji ズ zu ゼ ze ゾ zo |
|
37
|
|
|
|
|
|
|
シャ sha シュ shu ショ sho |
|
38
|
|
|
|
|
|
|
ジャ ja ジュ ju ジョ jo |
|
39
|
|
|
|
|
|
|
タ ta チ chi ツ tsu テ te ト to |
|
40
|
|
|
|
|
|
|
ティ ti トゥ tu |
|
41
|
|
|
|
|
|
|
ダ da ディ di ドゥ du デ de ド do |
|
42
|
|
|
|
|
|
|
ヂ dhi ヅ dhu |
|
43
|
|
|
|
|
|
|
チャ cha チュ chu チェ che チョ cho |
|
44
|
|
|
|
|
|
|
ヂャ dha ヂュ dhu ヂェ dhe ヂョ dho |
|
45
|
|
|
|
|
|
|
ナ na ニ ni ヌ nu ネ ne ノ no |
|
46
|
|
|
|
|
|
|
ニャ nya ニュ nyu ニョ nyo |
|
47
|
|
|
|
|
|
|
ハ ha ヒ hi フ fu ヘ he ホ ho |
|
48
|
|
|
|
|
|
|
ヒャ hya ヒュ hyu ヒョ hyo |
|
49
|
|
|
|
|
|
|
バ ba ビ bi ブ bu ベ be ボ bo |
|
50
|
|
|
|
|
|
|
ビャ bya ビュ byu ビョ byo |
|
51
|
|
|
|
|
|
|
パ pa ピ pi プ pu ペ pe ポ po |
|
52
|
|
|
|
|
|
|
ピャ pya ピュ pyu ピョ pyo |
|
53
|
|
|
|
|
|
|
ファ fa フィ fi フェ fe フォ fo |
|
54
|
|
|
|
|
|
|
マ ma ミ mi ム mu メ me モ mo |
|
55
|
|
|
|
|
|
|
ミャ mya ミュ myu ミョ myo |
|
56
|
|
|
|
|
|
|
ヤ ya ユ yu イェ ye ヨ yo |
|
57
|
|
|
|
|
|
|
ャ xya ュ xyu ョ xyo |
|
58
|
|
|
|
|
|
|
ラ ra リ ri ル ru レ re ロ ro |
|
59
|
|
|
|
|
|
|
リャ rya リュ ryu リョ ryo |
|
60
|
|
|
|
|
|
|
ワ wa ヰ wi ヱ we ヲ wo |
|
61
|
|
|
|
|
|
|
ウァ wa ウィ wi ウェ we ウォ wo |
|
62
|
|
|
|
|
|
|
ヴァ va ヴィ vi ヴ vu ヴェ ve ヴォ vo |
|
63
|
|
|
|
|
|
|
ン n |
|
64
|
|
|
|
|
|
|
); |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
our %Kana2Hepburn = |
|
67
|
|
|
|
|
|
|
( %Kata2Hepburn, map { katakana2hiragana($_) } %Kata2Hepburn ); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
our $Re_Kana2Hepburn = do { |
|
70
|
|
|
|
|
|
|
if ($USE_REGEXP_ASSEMBLE) { |
|
71
|
|
|
|
|
|
|
my $ra = Regexp::Assemble->new(); |
|
72
|
|
|
|
|
|
|
$ra->add($_) for keys %Kana2Hepburn; |
|
73
|
|
|
|
|
|
|
$ra->re; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
else { |
|
76
|
|
|
|
|
|
|
my $str = join '|', keys %Kana2Hepburn; |
|
77
|
|
|
|
|
|
|
qr/(?:$str)/; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
}; |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
our %Romaji2Kata = qw( |
|
82
|
|
|
|
|
|
|
a ア i イ u ウ e エ o オ |
|
83
|
|
|
|
|
|
|
xa ァ xi ィ xu ゥ xe ェ xo ォ |
|
84
|
|
|
|
|
|
|
ka カ ki キ ku ク ke ケ ko コ |
|
85
|
|
|
|
|
|
|
ga ガ gi ギ gu グ ge ゲ go ゴ |
|
86
|
|
|
|
|
|
|
kya キャ kyu キュ kyo キョ |
|
87
|
|
|
|
|
|
|
gya ギャ gyu ギュ gyo ギョ |
|
88
|
|
|
|
|
|
|
sa サ shi シ su ス se セ so ソ |
|
89
|
|
|
|
|
|
|
si シ |
|
90
|
|
|
|
|
|
|
za ザ ji ジ zu ズ ze ゼ zo ゾ |
|
91
|
|
|
|
|
|
|
zi ジ |
|
92
|
|
|
|
|
|
|
sha シャ shu シュ sho ショ |
|
93
|
|
|
|
|
|
|
ja ジャ ju ジュ jo ジョ |
|
94
|
|
|
|
|
|
|
sya シャ syu シュ syo ショ |
|
95
|
|
|
|
|
|
|
ta タ chi チ tsu ツ te テ to ト |
|
96
|
|
|
|
|
|
|
xtu ッ |
|
97
|
|
|
|
|
|
|
ti ティ tu トゥ |
|
98
|
|
|
|
|
|
|
da ダ di ディ du ドゥ de デ do ド |
|
99
|
|
|
|
|
|
|
dhi ヂ dhu ヅ |
|
100
|
|
|
|
|
|
|
cha チャ chu チュ che チェ cho チョ |
|
101
|
|
|
|
|
|
|
tya チャ tyu チュ tye チェ tyo チョ |
|
102
|
|
|
|
|
|
|
dha ヂャ dhu ヂュ dhe ヂェ dho ヂョ |
|
103
|
|
|
|
|
|
|
dya ヂャ tyu ヂュ tye ヂェ tyo ヂョ |
|
104
|
|
|
|
|
|
|
na ナ ni ニ nu ヌ ne ネ no ノ |
|
105
|
|
|
|
|
|
|
nya ニャ nyu ニュ nyo ニョ |
|
106
|
|
|
|
|
|
|
ha ハ hi ヒ fu フ he ヘ ho ホ |
|
107
|
|
|
|
|
|
|
hu フ |
|
108
|
|
|
|
|
|
|
hya ヒャ hyu ヒュ hyo ヒョ |
|
109
|
|
|
|
|
|
|
ba バ bi ビ bu ブ be ベ bo ボ |
|
110
|
|
|
|
|
|
|
bya ビャ byu ビュ byo ビョ |
|
111
|
|
|
|
|
|
|
pa パ pi ピ pu プ pe ペ po ポ |
|
112
|
|
|
|
|
|
|
pya ピャ pyu ピュ pyo ピョ |
|
113
|
|
|
|
|
|
|
fa ファ fi フィ fe フェ fo フォ |
|
114
|
|
|
|
|
|
|
ma マ mi ミ mu ム me メ mo モ |
|
115
|
|
|
|
|
|
|
mya ミャ myu ミュ myo ミョ |
|
116
|
|
|
|
|
|
|
ya ヤ yu ユ ye イェ yo ヨ |
|
117
|
|
|
|
|
|
|
xya ャ xyu ュ xyo ョ |
|
118
|
|
|
|
|
|
|
ra ラ ri リ ru ル re レ ro ロ |
|
119
|
|
|
|
|
|
|
rya リャ ryu リュ ryo リョ |
|
120
|
|
|
|
|
|
|
la ラ li リ lu ル le レ lo ロ |
|
121
|
|
|
|
|
|
|
wa ワ wo ヲ |
|
122
|
|
|
|
|
|
|
wi ウィ we ウェ |
|
123
|
|
|
|
|
|
|
va ヴァ vi ヴィ vu ヴ ve ヴェ vo ヴォ |
|
124
|
|
|
|
|
|
|
); |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
our $Re_Romaji2Kata = do { |
|
127
|
|
|
|
|
|
|
if ($USE_REGEXP_ASSEMBLE) { |
|
128
|
|
|
|
|
|
|
my $ra = Regexp::Assemble->new(); |
|
129
|
|
|
|
|
|
|
$ra->add($_) for keys %Romaji2Kata; |
|
130
|
|
|
|
|
|
|
my $str = $ra->re; |
|
131
|
|
|
|
|
|
|
if ($] >= 5.009005) { |
|
132
|
|
|
|
|
|
|
my ($pattern, $mod) = re::regexp_pattern($str); |
|
133
|
|
|
|
|
|
|
$str = $pattern; |
|
134
|
|
|
|
|
|
|
} else { |
|
135
|
|
|
|
|
|
|
substr( $str, 0, 8, '' ); # remove '(?-xism:' |
|
136
|
|
|
|
|
|
|
substr( $str, -1, 1, '' ); # and ')'; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
qr/$str/i; # and recompile with i |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
else { |
|
141
|
|
|
|
|
|
|
my $str = join '|', sort {length($b) <=> length($a)} keys %Romaji2Kata; |
|
142
|
|
|
|
|
|
|
qr/(?:$str)/i; |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
}; |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
our %Kana2Romaji = %Kana2Hepburn; |
|
148
|
|
|
|
|
|
|
our $Re_Kana2Romaji = $Re_Kana2Hepburn; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub katakana2hiragana{ |
|
151
|
550
|
|
|
550
|
1
|
653
|
my $str = shift; |
|
152
|
2
|
|
|
2
|
|
34
|
$str =~ tr/ァ-ンヴ/ぁ-んゔ/; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
44
|
|
|
|
550
|
|
|
|
|
968
|
|
|
153
|
550
|
|
|
|
|
1562
|
$str; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub hiragana2katakana{ |
|
157
|
0
|
|
|
0
|
1
|
0
|
my $str = shift; |
|
158
|
0
|
|
|
|
|
0
|
$str =~ tr/ぁ-んゔ/ァ-ンヴ/; |
|
159
|
0
|
|
|
|
|
0
|
$str; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
{ |
|
163
|
2
|
|
|
2
|
|
14962
|
no warnings 'once'; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
360
|
|
|
164
|
|
|
|
|
|
|
*kata2hira = \&katakana2hiragana; |
|
165
|
|
|
|
|
|
|
*hira2kata = \&hiragana2katakana; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub romaji2katakana{ |
|
169
|
2
|
|
|
2
|
1
|
6
|
my $str = shift; |
|
170
|
|
|
|
|
|
|
# step 1; tta -> ッta |
|
171
|
2
|
|
|
|
|
67
|
$str =~ s{ ($Re_Consonants) \1 }{ "ッ$1" }msxgei; |
|
|
0
|
|
|
|
|
0
|
|
|
172
|
|
|
|
|
|
|
# step 2; |
|
173
|
2
|
50
|
|
|
|
736
|
$str =~ s{ ($Re_Romaji2Kata) }{ $Romaji2Kata{lc $1} || $1 }msxgei; |
|
|
7
|
|
|
|
|
55
|
|
|
174
|
|
|
|
|
|
|
# step 3; |
|
175
|
2
|
|
|
|
|
50
|
$str =~ s{ ([ァ-ン])[mn] }{ "$1ン" }msxgei; |
|
|
0
|
|
|
|
|
0
|
|
|
176
|
2
|
|
|
|
|
16
|
$str; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
2
|
|
|
2
|
1
|
227
|
sub romaji2hiragana{ katakana2hiragana(romaji2katakana(shift)) }; |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub kana2romaji{ |
|
182
|
0
|
|
|
0
|
1
|
|
my $str = shift; |
|
183
|
|
|
|
|
|
|
# step 1; |
|
184
|
0
|
0
|
|
|
|
|
$str =~ s{ ($Re_Kana2Romaji) }{ $Kana2Romaji{$1} || $1 }msxge; |
|
|
0
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# step 2; ッta -> tta |
|
186
|
0
|
|
|
|
|
|
$str =~ s{ [っッ]($Re_Consonants) }{ "$1$1" }msxge; |
|
|
0
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# step 3; oー -> oo |
|
188
|
0
|
|
|
|
|
|
$str =~ s{ ($Re_Vowels)ー }{ "$1$1" }msxge; |
|
|
0
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
$str; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if ($0 eq __FILE__){ |
|
194
|
|
|
|
|
|
|
warn $USE_REGEXP_ASSEMBLE; |
|
195
|
|
|
|
|
|
|
binmode STDOUT, ':utf8'; |
|
196
|
|
|
|
|
|
|
local $\ = "\n"; |
|
197
|
|
|
|
|
|
|
warn $Re_Romaji2Kata; |
|
198
|
|
|
|
|
|
|
print romaji2katakana("Dan Kogai"); |
|
199
|
|
|
|
|
|
|
print romaji2katakana("shimbashi"); |
|
200
|
|
|
|
|
|
|
print romaji2katakana("konnichiwa"); |
|
201
|
|
|
|
|
|
|
print romaji2hiragana("Dan Kogai"); |
|
202
|
|
|
|
|
|
|
print romaji2hiragana("shimbashi"); |
|
203
|
|
|
|
|
|
|
warn $Re_Kana2Romaji; |
|
204
|
|
|
|
|
|
|
print kana2romaji("ダンコガイ"); |
|
205
|
|
|
|
|
|
|
print kana2romaji("マイッタ"); |
|
206
|
|
|
|
|
|
|
print kana2romaji("シンバシ"); |
|
207
|
|
|
|
|
|
|
print romaji2hiragana("ryoukai"); # RT#39590 |
|
208
|
|
|
|
|
|
|
print romaji2hiragana("virama"); # RT#45402 |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
2
|
|
|
2
|
|
54439
|
use Encode; |
|
|
2
|
|
|
|
|
29138
|
|
|
|
2
|
|
|
|
|
239
|
|
|
212
|
2
|
|
|
2
|
|
1848
|
use Encode::JP::H2Z; |
|
|
2
|
|
|
|
|
7713
|
|
|
|
2
|
|
|
|
|
542
|
|
|
213
|
|
|
|
|
|
|
my $eucjp = Encode::find_encoding('eucjp'); |
|
214
|
|
|
|
|
|
|
sub hankaku2zenkaku { |
|
215
|
0
|
|
|
0
|
1
|
|
my $str = $eucjp->encode(shift); |
|
216
|
0
|
|
|
|
|
|
Encode::JP::H2Z::h2z(\$str); |
|
217
|
0
|
|
|
|
|
|
$eucjp->decode($str); |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub zenkaku2hankaku { |
|
221
|
0
|
|
|
0
|
1
|
|
my $str = $eucjp->encode(shift); |
|
222
|
0
|
|
|
|
|
|
Encode::JP::H2Z::z2h(\$str); |
|
223
|
0
|
|
|
|
|
|
$eucjp->decode($str); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
1; # End of Lingua::JA::Kana |
|
228
|
|
|
|
|
|
|
__END__ |