line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::KO::TypoCorrector; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
22672
|
use 5.008001; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
68
|
|
4
|
2
|
|
|
2
|
|
8
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
127
|
|
5
|
2
|
|
|
2
|
|
22
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
930
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT = qw( to_hangul ); |
12
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [@EXPORT ] ); |
13
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $en_h = "rRseEfaqQtTdwWczxvg"; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $reg_h = "[" . $en_h . "]"; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $en_b = { |
22
|
|
|
|
|
|
|
k => 0, #ㅏ |
23
|
|
|
|
|
|
|
o => 1, #ㅐ |
24
|
|
|
|
|
|
|
i => 2, #ㅑ |
25
|
|
|
|
|
|
|
O => 3, #ㅒ |
26
|
|
|
|
|
|
|
j => 4, #ㅓ |
27
|
|
|
|
|
|
|
p => 5, #ㅖ |
28
|
|
|
|
|
|
|
u => 6, #ㅕ |
29
|
|
|
|
|
|
|
P => 7, #ㅖ |
30
|
|
|
|
|
|
|
h => 8, #ㅗ |
31
|
|
|
|
|
|
|
hk => 9, #ㅘ |
32
|
|
|
|
|
|
|
ho => 10, #ㅙ |
33
|
|
|
|
|
|
|
hl => 11, #ㅚ |
34
|
|
|
|
|
|
|
y => 12, #ㅛ |
35
|
|
|
|
|
|
|
n => 13, #ㅜ |
36
|
|
|
|
|
|
|
nj => 14, #ㅝ |
37
|
|
|
|
|
|
|
np => 15, #ㅞ |
38
|
|
|
|
|
|
|
nl => 16, #ㅟ |
39
|
|
|
|
|
|
|
b => 17, #ㅠ |
40
|
|
|
|
|
|
|
m => 18, #ㅡ |
41
|
|
|
|
|
|
|
ml => 19, #ㅢ |
42
|
|
|
|
|
|
|
l => 20, # |
43
|
|
|
|
|
|
|
}; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $reg_b = "hk|ho|hl|nj|np|nl|ml|k|o|i|O|j|p|u|P|h|y|n|b|m|l"; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $en_f = { |
48
|
|
|
|
|
|
|
"" => 0, # 받침없음 |
49
|
|
|
|
|
|
|
r => 1, # ㄱ |
50
|
|
|
|
|
|
|
R => 2, # ㄲ |
51
|
|
|
|
|
|
|
rt => 3, # ㄱㅅ |
52
|
|
|
|
|
|
|
s => 4, # ㄴ |
53
|
|
|
|
|
|
|
sw => 5, # ㄴㅈ |
54
|
|
|
|
|
|
|
sg => 6, # ㄴㅎ |
55
|
|
|
|
|
|
|
e => 7, # ㄷ |
56
|
|
|
|
|
|
|
f => 8, # ㄹ |
57
|
|
|
|
|
|
|
fr => 9, # ㄹㄱ |
58
|
|
|
|
|
|
|
fa => 10, # ㄹㅁ |
59
|
|
|
|
|
|
|
fq => 11, # ㄹㅂ |
60
|
|
|
|
|
|
|
ft => 12, # ㄹㅅ |
61
|
|
|
|
|
|
|
fx => 13, # ㄹㅌ |
62
|
|
|
|
|
|
|
fv => 14, # ㄹㅍ |
63
|
|
|
|
|
|
|
fg => 15, # ㄹㅎ |
64
|
|
|
|
|
|
|
a => 16, # ㅁ |
65
|
|
|
|
|
|
|
q => 17, # ㅂ |
66
|
|
|
|
|
|
|
qt => 18, # ㅂㅅ |
67
|
|
|
|
|
|
|
t => 19, # ㅅ |
68
|
|
|
|
|
|
|
T => 20, # ㅆ |
69
|
|
|
|
|
|
|
d => 21, # ㅇ |
70
|
|
|
|
|
|
|
w => 22, # ㅈ |
71
|
|
|
|
|
|
|
c => 23, # ㅊ |
72
|
|
|
|
|
|
|
z => 24, # ㅋ |
73
|
|
|
|
|
|
|
x => 25, # ㅌ |
74
|
|
|
|
|
|
|
v => 26, # ㅍ |
75
|
|
|
|
|
|
|
g => 27, # ㅎ |
76
|
|
|
|
|
|
|
}; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my $reg_f = "rt|sw|sg|fr|fa|fq|ft|fx|fv|fg|qt|r|R|s|e|f|a|q|t|T|d|w|c|z|x|v|g|"; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $reg_exp = "(".$reg_h.")(".$reg_b.")((?:".$reg_f.")(?=(?:".$reg_h.")(?:".$reg_b."))|(?:".$reg_f."))"; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub to_hangul { |
83
|
2
|
|
|
2
|
0
|
13
|
my $text = shift; |
84
|
2
|
|
|
|
|
197
|
$text =~ s/$reg_exp/replace($1,$2,$3)/ge; |
|
10
|
|
|
|
|
23
|
|
85
|
2
|
|
|
|
|
18
|
return $text; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub replace { |
89
|
10
|
|
|
10
|
0
|
27
|
my ($h,$b,$f) = @_; |
90
|
10
|
|
|
|
|
73
|
return chr(index($en_h, $h)*21*28 + $en_b->{$b} * 28 + $en_f->{$f} + 44032); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
1; |
94
|
|
|
|
|
|
|
__END__ |