File Coverage

blib/lib/Lingua/KO/Hangul/Util.pm
Criterion Covered Total %
statement 170 170 100.0
branch 54 54 100.0
condition 37 38 97.3
subroutine 40 40 100.0
pod 13 13 100.0
total 314 315 99.6


line stmt bran cond sub pod time code
1             package Lingua::KO::Hangul::Util;
2              
3 6     6   13957 use 5.006001;
  6         25  
  6         261  
4 6     6   33 use strict;
  6         11  
  6         176  
5 6     6   28 use warnings;
  6         14  
  6         1680  
6              
7             require Exporter;
8              
9             our $VERSION = '0.27';
10             our $PACKAGE = __PACKAGE__;
11              
12             our @EXPORT = qw(
13             decomposeHangul
14             composeHangul
15             getHangulName
16             parseHangulName
17             getHangulComposite
18             );
19             our @EXPORT_OK = qw(
20             decomposeSyllable
21             composeSyllable
22             decomposeJamo
23             composeJamo
24             decomposeFull
25             getSyllableType
26             isStandardForm
27             insertFiller
28             );
29             our %EXPORT_TAGS = (
30             'all' => [ @EXPORT, @EXPORT_OK ],
31             );
32              
33             ##### The above part is common to XS and PP #####
34              
35             our @ISA = qw(Exporter);
36 6     6   46 use Carp;
  6         11  
  6         1284  
37              
38             #####
39              
40             my @JamoL = ( # Initial (HANGUL CHOSEONG)
41             "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
42             "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H",
43             );
44              
45             my @JamoV = ( # Medial (HANGUL JUNGSEONG)
46             "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
47             "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
48             "YU", "EU", "YI", "I",
49             );
50              
51             my @JamoT = ( # Final (HANGUL JONGSEONG)
52             "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
53             "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
54             "S", "SS", "NG", "J", "C", "K", "T", "P", "H",
55             );
56              
57             my $BlockName = "HANGUL SYLLABLE ";
58              
59             #####
60              
61 6     6   33 use constant SBase => 0xAC00;
  6         10  
  6         530  
62 6     6   28 use constant SFinal => 0xD7A3; # SBase -1 + SCount
  6         9  
  6         243  
63 6     6   26 use constant SCount => 11172; # LCount * NCount
  6         1153  
  6         353  
64 6     6   45 use constant NCount => 588; # VCount * TCount
  6         9  
  6         263  
65 6     6   27 use constant LBase => 0x1100;
  6         10  
  6         294  
66 6     6   76 use constant LFinal => 0x1112;
  6         11  
  6         350  
67 6     6   28 use constant LCount => 19; # scalar @JamoL
  6         8  
  6         227  
68 6     6   25 use constant VBase => 0x1161;
  6         8  
  6         250  
69 6     6   26 use constant VFinal => 0x1175;
  6         8  
  6         235  
70 6     6   45 use constant VCount => 21; # scalar @JamoV
  6         22  
  6         349  
71 6     6   30 use constant TBase => 0x11A7;
  6         7  
  6         239  
72 6     6   27 use constant TFinal => 0x11C2;
  6         14  
  6         300  
73 6     6   25 use constant TCount => 28; # scalar @JamoT
  6         8  
  6         266  
74 6     6   28 use constant JBase => 0x1100;
  6         11  
  6         258  
75 6     6   26 use constant JFinal => 0x11FF;
  6         11  
  6         282  
76 6     6   30 use constant JCount => 256;
  6         8  
  6         231  
77              
78 6     6   28 use constant JamoLIni => 0x1100;
  6         9  
  6         238  
79 6     6   28 use constant JamoLFin => 0x1159;
  6         8  
  6         1373  
80 6     6   31 use constant JamoLFill => 0x115F;
  6         8  
  6         1434  
81 6     6   31 use constant JamoVIni => 0x1160;
  6         9  
  6         247  
82 6     6   45 use constant JamoVFin => 0x11A2;
  6         23  
  6         248  
83 6     6   28 use constant JamoTIni => 0x11A8;
  6         12  
  6         275  
84 6     6   27 use constant JamoTFin => 0x11F9;
  6         9  
  6         31957  
85              
86             my(%CodeL, %CodeV, %CodeT);
87             @CodeL{@JamoL} = 0 .. LCount-1;
88             @CodeV{@JamoV} = 0 .. VCount-1;
89             @CodeT{@JamoT} = 0 .. TCount-1;
90              
91             my $IsJ = sub { JBase <= $_[0] && $_[0] <= JFinal };
92             my $IsS = sub { SBase <= $_[0] && $_[0] <= SFinal };
93             my $IsL = sub { LBase <= $_[0] && $_[0] <= LFinal };
94             my $IsV = sub { VBase <= $_[0] && $_[0] <= VFinal };
95             my $IsT = sub { TBase < $_[0] && $_[0] <= TFinal };
96             # TBase <= $_[0] is false!
97             my $IsLV = sub {
98             SBase <= $_[0] && $_[0] <= SFinal && (($_[0] - SBase ) % TCount) == 0;
99             };
100              
101             #####
102              
103             # separator is a semicolon, ';'.
104             my %Map12; # ("integer;integer" => integer)
105             my %Map123; # ("integer;integer;integer" => integer)
106              
107             my %Decomp = (
108             0x1101 => [0x1100, 0x1100],
109             0x1104 => [0x1103, 0x1103],
110             0x1108 => [0x1107, 0x1107],
111             0x110A => [0x1109, 0x1109],
112             0x110D => [0x110C, 0x110C],
113             0x1113 => [0x1102, 0x1100],
114             0x1114 => [0x1102, 0x1102],
115             0x1115 => [0x1102, 0x1103],
116             0x1116 => [0x1102, 0x1107],
117             0x1117 => [0x1103, 0x1100],
118             0x1118 => [0x1105, 0x1102],
119             0x1119 => [0x1105, 0x1105],
120             0x111A => [0x1105, 0x1112],
121             0x111B => [0x1105, 0x110B],
122             0x111C => [0x1106, 0x1107],
123             0x111D => [0x1106, 0x110B],
124             0x111E => [0x1107, 0x1100],
125             0x111F => [0x1107, 0x1102],
126             0x1120 => [0x1107, 0x1103],
127             0x1121 => [0x1107, 0x1109],
128             0x1122 => [0x1107, 0x1109, 0x1100],
129             0x1123 => [0x1107, 0x1109, 0x1103],
130             0x1124 => [0x1107, 0x1109, 0x1107],
131             0x1125 => [0x1107, 0x1109, 0x1109],
132             0x1126 => [0x1107, 0x1109, 0x110C],
133             0x1127 => [0x1107, 0x110C],
134             0x1128 => [0x1107, 0x110E],
135             0x1129 => [0x1107, 0x1110],
136             0x112A => [0x1107, 0x1111],
137             0x112B => [0x1107, 0x110B],
138             0x112C => [0x1107, 0x1107, 0x110B],
139             0x112D => [0x1109, 0x1100],
140             0x112E => [0x1109, 0x1102],
141             0x112F => [0x1109, 0x1103],
142             0x1130 => [0x1109, 0x1105],
143             0x1131 => [0x1109, 0x1106],
144             0x1132 => [0x1109, 0x1107],
145             0x1133 => [0x1109, 0x1107, 0x1100],
146             0x1134 => [0x1109, 0x1109, 0x1109],
147             0x1135 => [0x1109, 0x110B],
148             0x1136 => [0x1109, 0x110C],
149             0x1137 => [0x1109, 0x110E],
150             0x1138 => [0x1109, 0x110F],
151             0x1139 => [0x1109, 0x1110],
152             0x113A => [0x1109, 0x1111],
153             0x113B => [0x1109, 0x1112],
154             0x113D => [0x113C, 0x113C],
155             0x113F => [0x113E, 0x113E],
156             0x1141 => [0x110B, 0x1100],
157             0x1142 => [0x110B, 0x1103],
158             0x1143 => [0x110B, 0x1106],
159             0x1144 => [0x110B, 0x1107],
160             0x1145 => [0x110B, 0x1109],
161             0x1146 => [0x110B, 0x1140],
162             0x1147 => [0x110B, 0x110B],
163             0x1148 => [0x110B, 0x110C],
164             0x1149 => [0x110B, 0x110E],
165             0x114A => [0x110B, 0x1110],
166             0x114B => [0x110B, 0x1111],
167             0x114D => [0x110C, 0x110B],
168             0x114F => [0x114E, 0x114E],
169             0x1151 => [0x1150, 0x1150],
170             0x1152 => [0x110E, 0x110F],
171             0x1153 => [0x110E, 0x1112],
172             0x1156 => [0x1111, 0x1107],
173             0x1157 => [0x1111, 0x110B],
174             0x1158 => [0x1112, 0x1112],
175             0x1162 => [0x1161, 0x1175],
176             0x1164 => [0x1163, 0x1175],
177             0x1166 => [0x1165, 0x1175],
178             0x1168 => [0x1167, 0x1175],
179             0x116A => [0x1169, 0x1161],
180             0x116B => [0x1169, 0x1161, 0x1175],
181             0x116C => [0x1169, 0x1175],
182             0x116F => [0x116E, 0x1165],
183             0x1170 => [0x116E, 0x1165, 0x1175],
184             0x1171 => [0x116E, 0x1175],
185             0x1174 => [0x1173, 0x1175],
186             0x1176 => [0x1161, 0x1169],
187             0x1177 => [0x1161, 0x116E],
188             0x1178 => [0x1163, 0x1169],
189             0x1179 => [0x1163, 0x116D],
190             0x117A => [0x1165, 0x1169],
191             0x117B => [0x1165, 0x116E],
192             0x117C => [0x1165, 0x1173],
193             0x117D => [0x1167, 0x1169],
194             0x117E => [0x1167, 0x116E],
195             0x117F => [0x1169, 0x1165],
196             0x1180 => [0x1169, 0x1165, 0x1175],
197             0x1181 => [0x1169, 0x1167, 0x1175],
198             0x1182 => [0x1169, 0x1169],
199             0x1183 => [0x1169, 0x116E],
200             0x1184 => [0x116D, 0x1163],
201             0x1185 => [0x116D, 0x1163, 0x1175],
202             0x1186 => [0x116D, 0x1167],
203             0x1187 => [0x116D, 0x1169],
204             0x1188 => [0x116D, 0x1175],
205             0x1189 => [0x116E, 0x1161],
206             0x118A => [0x116E, 0x1161, 0x1175],
207             0x118B => [0x116E, 0x1165, 0x1173],
208             0x118C => [0x116E, 0x1167, 0x1175],
209             0x118D => [0x116E, 0x116E],
210             0x118E => [0x1172, 0x1161],
211             0x118F => [0x1172, 0x1165],
212             0x1190 => [0x1172, 0x1165, 0x1175],
213             0x1191 => [0x1172, 0x1167],
214             0x1192 => [0x1172, 0x1167, 0x1175],
215             0x1193 => [0x1172, 0x116E],
216             0x1194 => [0x1172, 0x1175],
217             0x1195 => [0x1173, 0x116E],
218             0x1196 => [0x1173, 0x1173],
219             0x1197 => [0x1173, 0x1175, 0x116E],
220             0x1198 => [0x1175, 0x1161],
221             0x1199 => [0x1175, 0x1163],
222             0x119A => [0x1175, 0x1169],
223             0x119B => [0x1175, 0x116E],
224             0x119C => [0x1175, 0x1173],
225             0x119D => [0x1175, 0x119E],
226             0x119F => [0x119E, 0x1165],
227             0x11A0 => [0x119E, 0x116E],
228             0x11A1 => [0x119E, 0x1175],
229             0x11A2 => [0x119E, 0x119E],
230             0x11A9 => [0x11A8, 0x11A8],
231             0x11AA => [0x11A8, 0x11BA],
232             0x11AC => [0x11AB, 0x11BD],
233             0x11AD => [0x11AB, 0x11C2],
234             0x11B0 => [0x11AF, 0x11A8],
235             0x11B1 => [0x11AF, 0x11B7],
236             0x11B2 => [0x11AF, 0x11B8],
237             0x11B3 => [0x11AF, 0x11BA],
238             0x11B4 => [0x11AF, 0x11C0],
239             0x11B5 => [0x11AF, 0x11C1],
240             0x11B6 => [0x11AF, 0x11C2],
241             0x11B9 => [0x11B8, 0x11BA],
242             0x11BB => [0x11BA, 0x11BA],
243             0x11C3 => [0x11A8, 0x11AF],
244             0x11C4 => [0x11A8, 0x11BA, 0x11A8],
245             0x11C5 => [0x11AB, 0x11A8],
246             0x11C6 => [0x11AB, 0x11AE],
247             0x11C7 => [0x11AB, 0x11BA],
248             0x11C8 => [0x11AB, 0x11EB],
249             0x11C9 => [0x11AB, 0x11C0],
250             0x11CA => [0x11AE, 0x11A8],
251             0x11CB => [0x11AE, 0x11AF],
252             0x11CC => [0x11AF, 0x11A8, 0x11BA],
253             0x11CD => [0x11AF, 0x11AB],
254             0x11CE => [0x11AF, 0x11AE],
255             0x11CF => [0x11AF, 0x11AE, 0x11C2],
256             0x11D0 => [0x11AF, 0x11AF],
257             0x11D1 => [0x11AF, 0x11B7, 0x11A8],
258             0x11D2 => [0x11AF, 0x11B7, 0x11BA],
259             0x11D3 => [0x11AF, 0x11B8, 0x11BA],
260             0x11D4 => [0x11AF, 0x11B8, 0x11C2],
261             0x11D5 => [0x11AF, 0x11B8, 0x11BC],
262             0x11D6 => [0x11AF, 0x11BA, 0x11BA],
263             0x11D7 => [0x11AF, 0x11EB],
264             0x11D8 => [0x11AF, 0x11BF],
265             0x11D9 => [0x11AF, 0x11F9],
266             0x11DA => [0x11B7, 0x11A8],
267             0x11DB => [0x11B7, 0x11AF],
268             0x11DC => [0x11B7, 0x11B8],
269             0x11DD => [0x11B7, 0x11BA],
270             0x11DE => [0x11B7, 0x11BA, 0x11BA],
271             0x11DF => [0x11B7, 0x11EB],
272             0x11E0 => [0x11B7, 0x11BE],
273             0x11E1 => [0x11B7, 0x11C2],
274             0x11E2 => [0x11B7, 0x11BC],
275             0x11E3 => [0x11B8, 0x11AF],
276             0x11E4 => [0x11B8, 0x11C1],
277             0x11E5 => [0x11B8, 0x11C2],
278             0x11E6 => [0x11B8, 0x11BC],
279             0x11E7 => [0x11BA, 0x11A8],
280             0x11E8 => [0x11BA, 0x11AE],
281             0x11E9 => [0x11BA, 0x11AF],
282             0x11EA => [0x11BA, 0x11B8],
283             0x11EC => [0x11BC, 0x11A8],
284             0x11ED => [0x11BC, 0x11A8, 0x11A8],
285             0x11EE => [0x11BC, 0x11BC],
286             0x11EF => [0x11BC, 0x11BF],
287             0x11F1 => [0x11F0, 0x11BA],
288             0x11F2 => [0x11F0, 0x11EB],
289             0x11F3 => [0x11C1, 0x11B8],
290             0x11F4 => [0x11C1, 0x11BC],
291             0x11F5 => [0x11C2, 0x11AB],
292             0x11F6 => [0x11C2, 0x11AF],
293             0x11F7 => [0x11C2, 0x11B7],
294             0x11F8 => [0x11C2, 0x11B8],
295             );
296              
297             foreach my $char (sort {$a <=> $b} keys %Decomp) {
298             $char or croak("$PACKAGE : composition to NULL is not allowed");
299             my @dec = @{ $Decomp{$char} };
300             @dec == 2 || @dec == 3 or
301             croak(sprintf("$PACKAGE : weird decomposition [%04X]", $char));
302             if (@dec == 2) {
303             $Map12{"$dec[0];$dec[1]"} = $char;
304             } else {
305             $Map123{"$dec[0];$dec[1];$dec[2]"} = $char;
306             }
307             }
308              
309             #####
310              
311             sub getSyllableType($) {
312 251     251 1 742 my $u = shift;
313             return
314 251 100 100     2568 JamoLIni <= $u && $u <= JamoLFin || $u == JamoLFill ? "L" :
    100 100        
    100 100        
    100 100        
    100          
315             JamoVIni <= $u && $u <= JamoVFin ? "V" :
316             JamoTIni <= $u && $u <= JamoTFin ? "T" :
317             SBase <= $u && $u <= SFinal ?
318             ($u - SBase) % TCount ? "LVT" : "LV" : "NA";
319             }
320              
321             my %Fillers = (
322             "LT" => [ 0x1160, 0x115F, 0x1160 ],
323             "LNA" => [ 0x1160 ],
324             "TV" => [ 0x115F ],
325             "LVTV" => [ 0x115F ],
326             "NAV" => [ 0x115F ],
327             "NAT" => [ 0x115F, 0x1160 ],
328             );
329              
330             sub isStandardForm($) {
331 50     50 1 624 my $str = shift(@_).pack('U*');
332              
333 50         65 my $ptype = 'NA';
334 50         172 foreach my $ch (unpack('U*', $str)) {
335 109         180 my $ctype = getSyllableType($ch);
336 109 100       344 return "" if $Fillers{"$ptype$ctype"};
337 96         234 $ptype = $ctype;
338             }
339 37 100       192 return $ptype eq "L" ? "" : 1;
340             }
341              
342             sub insertFiller($) {
343 51     51 1 8148 my $str = shift(@_).pack('U*');
344 51         72 my $ptype = 'NA';
345 51         59 my(@ret);
346 51         226 foreach my $ch (unpack('U*', $str)) {
347 110         163 my $ctype = getSyllableType($ch);
348 13         32 $Fillers{"$ptype$ctype"} and
349 110 100       252 push(@ret, @{ $Fillers{"$ptype$ctype"} });
350 110         116 push @ret, $ch;
351 110         214 $ptype = $ctype;
352             }
353 51 100       121 $ptype eq "L" and push(@ret, @{ $Fillers{"LNA"} });
  4         12  
354 51         279 return pack('U*', @ret);
355             }
356              
357             sub getHangulName ($) {
358 11184     11184 1 44410 my $u = shift;
359 11184 100       16227 return undef unless &$IsS($u);
360 11177         15281 my $sindex = $u - SBase;
361 11177         12846 my $lindex = int( $sindex / NCount);
362 11177         16360 my $vindex = int(($sindex % NCount) / TCount);
363 11177         10312 my $tindex = $sindex % TCount;
364 11177         30946 return "$BlockName$JamoL[$lindex]$JamoV[$vindex]$JamoT[$tindex]";
365             }
366              
367             sub parseHangulName ($) {
368 11205     11205 1 12318 my $arg = shift;
369 11205 100       32438 return undef unless $arg =~ s/$BlockName//o;
370 11196 100       37520 return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/;
371 11193 100 100     69395 return undef unless exists $CodeL{$1}
      100        
372             && exists $CodeV{$2} && exists $CodeT{$3};
373 11177         41338 return SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3};
374             }
375              
376             sub getHangulComposite ($$) {
377 13 100 100 13 1 401 if (&$IsL($_[0]) && &$IsV($_[1])) {
378 2         4 my $lindex = $_[0] - LBase;
379 2         3 my $vindex = $_[1] - VBase;
380 2         11 return (SBase + ($lindex * VCount + $vindex) * TCount);
381             }
382 11 100 100     29 if (&$IsLV($_[0]) && &$IsT($_[1])) {
383 2         10 return($_[0] + $_[1] - TBase);
384             }
385 9         34 return undef;
386             }
387              
388             sub decomposeJamo ($) {
389 274     274 1 15480 my $str = shift(@_).pack('U*');
390 274         330 my(@ret);
391 274         613 foreach my $ch (unpack('U*', $str)) {
392 297 100       991 push @ret, $Decomp{$ch} ? @{ $Decomp{$ch} } : ($ch);
  197         660  
393             }
394 274         1398 return pack('U*', @ret);
395             }
396              
397             sub decomposeSyllable ($) {
398 11190     11190 1 67516 my $str = shift(@_).pack('U*');
399 11190         11461 my(@ret);
400 11190         21788 foreach my $ch (unpack('U*', $str)) {
401 11202         19363 my @r = decomposeHangul($ch);
402 11202 100       38898 push @ret, @r ? @r : ($ch);
403             }
404 11190         39247 return pack('U*', @ret);
405             }
406              
407             sub decomposeHangul ($) {
408 22383     22383 1 63154 my $code = shift;
409 22383 100       39795 return unless &$IsS($code);
410 22364         32079 my $sindex = $code - SBase;
411 22364         28936 my $lindex = int( $sindex / NCount);
412 22364         36515 my $vindex = int(($sindex % NCount) / TCount);
413 22364         23240 my $tindex = $sindex % TCount;
414 22364 100       55794 my @ret = (
415             LBase + $lindex,
416             VBase + $vindex,
417             $tindex ? (TBase + $tindex) : (),
418             );
419 22364 100       73140 wantarray ? @ret : pack('U*', @ret);
420             }
421              
422             sub composeJamo ($) {
423 265     265 1 16697 my $str = shift(@_).pack('U*');
424 265         815 my @tmp = unpack('U*', $str);
425 265         1462 for (my $i = 0; $i < @tmp; $i++) {
426 271 100       585 next unless &$IsJ($tmp[$i]);
427              
428 264 100 100     4481 if ($tmp[$i + 2] && $Map123{"$tmp[$i];$tmp[$i+1];$tmp[$i+2]"}) {
    100 100        
429 31         91 $tmp[$i] = $Map123{"$tmp[$i];$tmp[$i+1];$tmp[$i+2]"};
430 31         58 $tmp[$i+1] = $tmp[$i+2] = undef;
431 31         85 $i += 2;
432             }
433             elsif ($tmp[$i + 1] && $Map12{"$tmp[$i];$tmp[$i+1]"}) {
434 162         1210 $tmp[$i] = $Map12{"$tmp[$i];$tmp[$i+1]"};
435 162         232 $tmp[$i+1] = undef;
436 162         438 $i ++;
437             }
438             }
439 265         1419 return pack 'U*', grep defined, @tmp;
440             }
441              
442             sub composeSyllable ($) {
443 22370     22370 1 33271 my $str = shift(@_).pack('U*');
444 22370         21961 my(@ret);
445 22370         42732 foreach my $ch (unpack('U*', $str)) {
446 66310 100 50     154014 push(@ret, $ch) and next unless @ret;
447              
448             # 1. check to see if $ret[-1] is L and $ch is V.
449              
450 43943 100 100     75772 if (&$IsL($ret[-1]) && &$IsV($ch)) {
451 22355         25172 $ret[-1] -= LBase; # LIndex
452 22355         22368 $ch -= VBase; # VIndex
453 22355         35730 $ret[-1] = SBase + ($ret[-1] * VCount + $ch) * TCount;
454 22355         43103 next; # discard $ch
455             }
456              
457             # 2. check to see if $ret[-1] is LV and $ch is T.
458              
459 21588 100 100     46882 if (&$IsLV($ret[-1]) && &$IsT($ch)) {
460 21555         27861 $ret[-1] += $ch - TBase; # + TIndex
461 21555         48857 next; # discard $ch
462             }
463              
464             # 3. just append $ch
465 33         65 push(@ret, $ch);
466             }
467 22370         71273 return pack('U*', @ret);
468             }
469              
470             ##### The below part is common to XS and PP #####
471              
472 9     9 1 150 sub decomposeFull ($) { decomposeJamo(decomposeSyllable(shift)) }
473              
474             sub composeHangul ($) {
475 11189     11189 1 19299 my $ret = composeSyllable(shift);
476 11189 100       32964 wantarray ? unpack('U*', $ret) : $ret;
477             }
478              
479             1;
480             __END__
481              
482             =head1 NAME
483              
484             Lingua::KO::Hangul::Util - utility functions for Hangul in Unicode
485              
486             =head1 SYNOPSIS
487              
488             use Lingua::KO::Hangul::Util qw(:all);
489              
490             decomposeSyllable("\x{AC00}"); # "\x{1100}\x{1161}"
491             composeSyllable("\x{1100}\x{1161}"); # "\x{AC00}"
492             decomposeJamo("\x{1101}"); # "\x{1100}\x{1100}"
493             composeJamo("\x{1100}\x{1100}"); # "\x{1101}"
494              
495             getHangulName(0xAC00); # "HANGUL SYLLABLE GA"
496             parseHangulName("HANGUL SYLLABLE GA"); # 0xAC00
497              
498             =head1 DESCRIPTION
499              
500             A Hangul syllable consists of Hangul jamo (Hangul letters).
501              
502             Hangul letters are classified into three classes:
503              
504             CHOSEONG (the initial sound) as a leading consonant (L),
505             JUNGSEONG (the medial sound) as a vowel (V),
506             JONGSEONG (the final sound) as a trailing consonant (T).
507              
508             Any Hangul syllable is a composition of (i) L + V, or (ii) L + V + T.
509              
510             =head2 Composition and Decomposition
511              
512             =over 4
513              
514             =item C<$resultant_string = decomposeSyllable($string)>
515              
516             It decomposes a precomposed syllable (C<LV> or C<LVT>)
517             to a sequence of conjoining jamo (C<L + V> or C<L + V + T>)
518             and returns the result as a string.
519              
520             Any characters other than Hangul syllables are not affected.
521              
522             =item C<$resultant_string = composeSyllable($string)>
523              
524             It composes a sequence of conjoining jamo (C<L + V> or C<L + V + T>)
525             to a precomposed syllable (C<LV> or C<LVT>) if possible,
526             and returns the result as a string.
527             A syllable C<LV> and final jamo C<T> are also composed.
528              
529             Any characters other than Hangul jamo and syllables are not affected.
530              
531             =item C<$resultant_string = decomposeJamo($string)>
532              
533             It decomposes a complex jamo to a sequence of simple jamo if possible,
534             and returns the result as a string.
535             Any characters other than complex jamo are not affected.
536              
537             e.g.
538             CHOSEONG SIOS-PIEUP to CHOSEONG SIOS + PIEUP
539             JUNGSEONG AE to JUNGSEONG A + I
540             JUNGSEONG WE to JUNGSEONG U + EO + I
541             JONGSEONG SSANGSIOS to JONGSEONG SIOS + SIOS
542              
543             =item C<$resultant_string = composeJamo($string)>
544              
545             It composes a sequence of simple jamo (C<L1 + L2>, C<V1 + V2 + V3>, etc.)
546             to a complex jamo if possible,
547             and returns the result as a string.
548             Any characters other than simple jamo are not affected.
549              
550             e.g.
551             CHOSEONG SIOS + PIEUP to CHOSEONG SIOS-PIEUP
552             JUNGSEONG A + I to JUNGSEONG AE
553             JUNGSEONG U + EO + I to JUNGSEONG WE
554             JONGSEONG SIOS + SIOS to JONGSEONG SSANGSIOS
555              
556             =item C<$resultant_string = decomposeFull($string)>
557              
558             It decomposes a syllable/complex jamo to a sequence of simple jamo.
559             Equivalent to C<decomposeJamo(decomposeSyllable($string))>.
560              
561             =back
562              
563             =head2 Composition and Decomposition (Old-interface, deprecated!)
564              
565             =over 4
566              
567             =item C<$string_decomposed = decomposeHangul($code_point)>
568              
569             =item C<@codepoints = decomposeHangul($code_point)>
570              
571             If the specified code point is of a Hangul syllable,
572             it returns a list of code points (in a list context)
573             or a string (in a scalar context) of its decomposition.
574              
575             decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA.
576             returns "\x{1100}\x{1161}" or (0x1100, 0x1161);
577              
578             decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL.
579             returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF);
580              
581             Otherwise, returns false (empty string or empty list).
582              
583             decomposeHangul(0x0041) # outside Hangul syllables
584             returns empty string or empty list.
585              
586             =item C<$string_composed = composeHangul($src_string)>
587              
588             =item C<@code_points_composed = composeHangul($src_string)>
589              
590             Any sequence of an initial jamo C<L> and a medial jamo C<V>
591             is composed to a syllable C<LV>;
592             then any sequence of a syllable C<LV> and a final jamo C<T>
593             is composed to a syllable C<LVT>.
594              
595             Any characters other than Hangul jamo and syllables are not affected.
596              
597             composeHangul("\x{1100}\x{1173}\x{11AF}.")
598             # returns "\x{AE00}." or (0xAE00,0x2E);
599              
600             =item C<$code_point_composite = getHangulComposite($code_point_here, $code_point_next)>
601              
602             It returns the codepoint of the composite
603             if both two code points, C<$code_point_here> and C<$code_point_next>,
604             are in Hangul, and composable.
605              
606             Otherwise, returns C<undef>.
607              
608             =back
609              
610             =head2 Hangul Syllable Name
611              
612             The following functions handle only a precomposed Hangul syllable
613             (from C<U+AC00> to C<U+D7A3>), but not a Hangul jamo
614             or other Hangul-related character.
615              
616             Names of Hangul syllables have a format of C<"HANGUL SYLLABLE %s">.
617              
618             =over 4
619              
620             =item C<$name = getHangulName($code_point)>
621              
622             If the specified code point is of a Hangul syllable,
623             it returns its name; otherwise it returns undef.
624              
625             getHangulName(0xAC00) returns "HANGUL SYLLABLE GA";
626             getHangulName(0x0041) returns undef.
627              
628             =item C<$codepoint = parseHangulName($name)>
629              
630             If the specified name is of a Hangul syllable,
631             it returns its code point; otherwise it returns undef.
632              
633             parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00;
634              
635             parseHangulName("LATIN SMALL LETTER A") returns undef;
636              
637             parseHangulName("HANGUL SYLLABLE PERL") returns undef;
638             # Regrettably, HANGUL SYLLABLE PERL does not exist :-)
639              
640             =back
641              
642             =head2 Standard Korean Syllable Block
643              
644             Standard Korean syllable block consists of C<L+ V+ T*>
645             (a sequence of one or more L, one or more V, and zero or more T)
646             according to conjoining jamo behabior revised in Unicode 3.2 (cf. UAX #28).
647             A sequence of C<L> followed by C<T> is not a syllable block without C<V>,
648             but consists of two nonstandard syllable blocks: one without C<V>, and another
649             without C<L> and C<V>.
650              
651             =over 4
652              
653             =item C<$bool = isStandardForm($string)>
654              
655             It returns boolean whether the string is encoded in the standard form
656             without a nonstandard sequence. It returns true only if the string
657             contains no nonstandard sequence.
658              
659             =item C<$resultant_string = insertFiller($string)>
660              
661             It transforms the string into standard form by inserting fillers
662             into each syllables and returns the result as a string.
663             Choseong filler (C<Lf>, C<U+115F>) is inserted into a syllable block
664             without C<L>. Jungseong filler (C<Vf>, C<U+1160>) is inserted into
665             a syllable block without C<V>.
666              
667             =item C<$type = getSyllableType($code_point)>
668              
669             It returns the Hangul syllable type (cf. F<HangulSyllableType.txt>)
670             for the specified code point as a string:
671             C<"L"> for leading jamo, C<"V"> for vowel jamo, C<"T"> for trailing jamo,
672             C<"LV"> for LV syllables, C<"LVT"> for LVT syllables, and C<"NA">
673             for other code points (as B<N>ot B<A>pplicable).
674              
675             =back
676              
677             =head1 EXPORT
678              
679             By default:
680              
681             decomposeHangul
682             composeHangul
683             getHangulName
684             parseHangulName
685             getHangulComposite
686              
687             On request:
688              
689             decomposeSyllable
690             composeSyllable
691             decomposeJamo
692             composeJamo
693             decomposeFull
694             isStandardForm
695             insertFiller
696             getSyllableType
697              
698             =head1 CAVEAT
699              
700             This module does not support Hangul jamo assigned in Unicode 5.2.0 (2009).
701              
702             A list of Hangul charcters this module supports:
703              
704             1100..1159 ; 1.1 # [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
705             115F..11A2 ; 1.1 # [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
706             11A8..11F9 ; 1.1 # [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
707             AC00..D7A3 ; 2.0 # [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
708              
709             =head1 AUTHOR
710              
711             SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
712              
713             Copyright(C) 2001, 2003, 2005, SADAHIRO Tomoyuki. Japan.
714             All rights reserved.
715              
716             This module is free software; you can redistribute it
717             and/or modify it under the same terms as Perl itself.
718              
719             =head1 SEE ALSO
720              
721             =over 4
722              
723             =item Unicode Normalization Forms (UAX #15)
724              
725             L<http://www.unicode.org/reports/tr15/>
726              
727             =item Conjoining Jamo Behavior (revision) in UAX #28
728              
729             L<http://www.unicode.org/reports/tr28/#3_11_conjoining_jamo_behavior>
730              
731             =item Hangul Syllable Type
732              
733             L<http://www.unicode.org/Public/UNIDATA/HangulSyllableType.txt>
734              
735             =item Jamo Decomposition in Old Unicode
736              
737             L<http://www.unicode.org/Public/2.1-Update3/UnicodeData-2.1.8.txt>
738              
739             =item ISO/IEC JTC1/SC22/WG20 N954
740              
741             Paper by K. KIM:
742             New canonical decomposition and composition processes for Hangeul
743              
744             L<http://std.dkuug.dk/JTC1/SC22/WG20/docs/N954.PDF>
745              
746             (summary: L<http://std.dkuug.dk/JTC1/SC22/WG20/docs/N953.PDF>)
747             (cf. L<http://std.dkuug.dk/JTC1/SC22/WG20/docs/documents.html>)
748              
749             =back
750              
751             =cut