File Coverage

blib/lib/Lingua/MY/Zawgyi2Unicode.pm
Criterion Covered Total %
statement 143 144 99.3
branch 3 4 75.0
condition n/a
subroutine 9 9 100.0
pod 2 3 66.6
total 157 160 98.1


line stmt bran cond sub pod time code
1             package Lingua::MY::Zawgyi2Unicode;
2              
3 1     1   27611 use Readonly;
  1         2  
  1         36  
4 1     1   3 use Exporter;
  1         2  
  1         26  
5 1     1   3 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         5  
  1         69  
6              
7             @ISA = qw(Exporter);
8             @EXPORT = qw(isZawgyi isBurmese convert);
9             @EXPORT_OK = qw(isZawgyi isBurmese convert);
10              
11 1     1   3 use strict;
  1         1  
  1         15  
12 1     1   2 use warnings;
  1         1  
  1         442  
13              
14             our $VERSION = '0.001';
15              
16              
17             =head1 NAME
18              
19             Lingua::MY::Zawgyi2Unicode - providing a module for converting Burmese text in Zawgyi to Unicode (UTF-8).
20              
21             =head1 VERSION
22              
23             0.001
24              
25             =head1 SYNOPSIS
26              
27             use Lingua::MY::Zawgyi2Unicode;
28              
29             # /.../
30              
31             # check if the $string is Burmese (fast operation)
32             # and if so, also check if the $string is in
33             # zawgyi encoding.
34              
35             if (isBurmese($string) and isZawgyi($string)) {
36             $string = convert($string);
37             }
38              
39             =head1 DESCRIPTION
40              
41             A Perl implementation to convert Burmese text in Zawgyi to Unicode (UTF-8). Inspiration, algortithms, and bits of code has been cherry picketed from the L project and L project.
42              
43             =head1 FUNCTIONS
44              
45             =head2 isBurmese
46              
47             Check if a string is Burmese text, either Zawgyi or Unicode. Considered a quick operation.
48              
49             =head2 isZawgui
50              
51             Check if a string is Zawgyi. This function is slower than isBurmese, so it makes sense to check with isBurmese prior to call this.
52              
53             =head2 convert
54              
55             This function convert the supplied string to Unicode and return the result. Do not call this function with a (Burmese) Unicode string, if uncertain of the encoding, check with the above is-functions.
56              
57             =head1 SOURCE
58              
59             L
60              
61              
62             =head1 HOMEPAGE
63              
64             L
65              
66              
67             =head1 AUTHOR
68              
69             Joakim Lagerqvist, C<< >>
70              
71             =head1 BUGS
72              
73             Please report any bugs or feature requests to C, or through
74             the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
75              
76              
77             =head1 COPYRIGHT & LICENSE
78              
79             Copyright 2009 Joakim Lagerqvist, all rights reserved.
80             This program is free software; you can redistribute it and/or modify it
81             under the same terms as Perl itself.
82              
83             =cut
84              
85              
86             Readonly my $zawgyire => qr/
87             \x{1031}\x{103b} | #e+medial ra
88             # beginning e or medial ra
89             ^\x{1031} | ^\x{103b} |
90             # independent vowel, dependent vowel, tone , medial ra wa ha (no ya
91             # because of 103a+103b is valid in unicode) , digit ,
92             #symbol + medial ra
93             [\x{1022}-\x{1030}\x{1032}-\x{1039}\x{103b}-\x{103d}\x{1040}-\x{104f} ] \x{103b} |
94             # end with asat
95             \x{1039}$ |
96             # medial ha + medial wa
97             \x{103d}\x{103c} |
98             # medial ra + medial wa
99             \x{103b}\x{103c} |
100             # consonant + asat + ya ra wa ha independent vowel e dot below
101             # visarga asat medial ra digit symbol
102             [\x{1000}-\x{1021}]\x{1039}[\x{101a}\x{101b}\x{101d}\x{101f}\x{1022}-\x{102a}\x{1031}\x{1037}-\x{1039}\x{103b}\x{1040}-\x{104f}] |
103             # II+I II ae
104             \x{102e}[\x{102d}\x{103e}\x{1032}] |
105             # ae + I II
106             \x{1032}[\x{102d}\x{102e}] |
107             # I II , II I, I I, II II
108             [\x{102d}\x{102e}][\x{102d}\x{102e}] |
109             # shan digit + vowel
110             [\x{1090}-\x{1099}][\x{102b}-\x{1030}\x{1032}\x{1037}\x{103c}-\x{103e}] |
111             # consonant + medial ya + dependent vowel tone asat
112             [\x{1000}-\x{102a}]\x{103a}[\x{102c}-\x{102e}\x{1032}-\x{1036}] |
113             # independent vowel dependent vowel tone digit + e [ FIXED !!! - not include medial ]
114             [\x{1023}-\x{1030}\x{1032}-\x{1039}\x{1040}-\x{104f}]\x{1031} |
115             # other shapes of medial ra + consonant not in Shan consonant
116             [\x{107e}-\x{1084}][\x{1001}\x{1003}\x{1005}-\x{100f}\x{1012}-\x{1014}\x{1016}-\x{1018}\x{101f}] |
117             # u + asat
118             \x{1025}\x{1039} |
119             # eain-dray
120             [\x{1081}\x{1083}]\x{108f} |
121             # short na + stack characters
122             \x{108f}[\x{1060}-\x{108d}]
123             # I II ae dow bolow above + asat typing error
124             [\x{102d}-\x{1030}\x{1032}\x{1036}\x{1037}]\x{1039} |
125             # aa + asat awww
126             \x{102c}\x{1039} |
127             # ya + medial wa
128             \x{101b}\x{103c} |
129             # non digit + zero + \x{102d} (i vowel) [FIXED!!! rules tested zero + i vowel in numeric usage]
130             [^\x{1040}-\x{1049}]\x{1040}\x{102d} |
131             # e + zero + vowel
132             \x{1031}?\x{1040}[\x{102b}\x{105a}\x{102e}-\x{1030}\x{1032}\x{1036}-\x{1038}] |
133             # e + seven + vowel
134             \x{1031}?\x{1047}[\x{102c}-\x{1030}\x{1032}\x{1036}-\x{1038}] |
135             # U | UU | AI + (zawgyi) dot below
136             [\x{102f}\x{1030}\x{1032}]\x{1094} |
137             # virama + (zawgyi) medial ra
138             \x{1039}[\x{107e}-\x{1084}]
139             /x;
140              
141             sub isBurmese {
142 3     3 1 11 my $str = shift;
143 3 50       15 if ($str =~ /[\x{1000}-\x{1021}]/) {
144 3         12 return 1;
145             }
146 0         0 return 0;
147             }
148              
149             sub isZawgyi {
150 3     3 0 4 my $str = shift;
151              
152 3         75 my @lines = split (/[\f\n\r\t\v\x{00a0}\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}\x{feff}]/, $str);
153 3         7 for my $line (@lines) {
154 3         3 my $prepend = '';
155 3         44 for my $word (split (/\s/, $line)) {
156 65         1539 $word = $prepend.$word;
157 65         52 $prepend = ' ';
158 65 100       91 if ($word =~ /$zawgyire/) {
159 1         17 return 1;
160             }
161             }
162             }
163 2         64 return 0;
164             }
165              
166             #From Rabbit
167             sub convert {
168 1     1 1 10560 my ($zawgyi) = @_;
169            
170 1     1   4 no warnings 'uninitialized';
  1         1  
  1         1523  
171              
172 1         8 $zawgyi =~ s/\x{200b}//g;
173 1         41 $zawgyi =~ s/(\x{103d}|\x{1087})/\x{103e}/g;
174 1         8 $zawgyi =~ s/\x{103c}/\x{103d}/g;
175 1         41 $zawgyi =~ s/(\x{103b}|\x{107e}|\x{107f}|\x{1080}|\x{1081}|\x{1082}|\x{1083}|\x{1084})/\x{103c}/g;
176 1         44 $zawgyi =~ s/(\x{103a}|\x{107d})/\x{103b}/g;
177 1         18 $zawgyi =~ s/\x{1039}/\x{103a}/g;
178 1         41 $zawgyi =~ s/(\x{1066}|\x{1067})/\x{1039}\x{1006}/g;
179 1         7 $zawgyi =~ s/\x{106a}/\x{1009}/g;
180 1         7 $zawgyi =~ s/\x{106b}/\x{100a}/g;
181 1         5 $zawgyi =~ s/\x{106c}/\x{1039}\x{100b}/g;
182 1         5 $zawgyi =~ s/\x{106d}/\x{1039}\x{100c}/g;
183 1         5 $zawgyi =~ s/\x{106e}/\x{100d}\x{1039}\x{100d}/g;
184 1         5 $zawgyi =~ s/\x{106f}/\x{100d}\x{1039}\x{100e}/g;
185 1         4 $zawgyi =~ s/\x{1070}/\x{1039}\x{100f}/g;
186 1         28 $zawgyi =~ s/(\x{1071}|\x{1072})/\x{1039}\x{1010}/g;
187 1         5 $zawgyi =~ s/\x{1060}/\x{1039}\x{1000}/g;
188 1         4 $zawgyi =~ s/\x{1061}/\x{1039}\x{1001}/g;
189 1         6 $zawgyi =~ s/\x{1062}/\x{1039}\x{1002}/g;
190 1         5 $zawgyi =~ s/\x{1063}/\x{1039}\x{1003}/g;
191 1         6 $zawgyi =~ s/\x{1065}/\x{1039}\x{1005}/g;
192 1         5 $zawgyi =~ s/\x{1068}/\x{1039}\x{1007}/g;
193 1         5 $zawgyi =~ s/\x{1069}/\x{1039}\x{1008}/g;
194 1         28 $zawgyi =~ s/(\x{1073}|\x{1074})/\x{1039}\x{1011}/g;
195 1         4 $zawgyi =~ s/\x{1075}/\x{1039}\x{1012}/g;
196 1         6 $zawgyi =~ s/\x{1076}/\x{1039}\x{1013}/g;
197 1         5 $zawgyi =~ s/\x{1077}/\x{1039}\x{1014}/g;
198 1         13 $zawgyi =~ s/\x{1078}/\x{1039}\x{1015}/g;
199 1         5 $zawgyi =~ s/\x{1079}/\x{1039}\x{1016}/g;
200 1         4 $zawgyi =~ s/\x{107a}/\x{1039}\x{1017}/g;
201 1         7 $zawgyi =~ s/\x{107c}/\x{1039}\x{1019}/g;
202 1         4 $zawgyi =~ s/\x{1085}/\x{1039}\x{101c}/g;
203 1         4 $zawgyi =~ s/\x{1033}/\x{102f}/g;
204 1         4 $zawgyi =~ s/\x{1034}/\x{1030}/g;
205 1         5 $zawgyi =~ s/\x{103f}/\x{1030}/g;
206 1         5 $zawgyi =~ s/\x{1086}/\x{103f}/g;
207 1         4 $zawgyi =~ s/\x{1036}\x{1088}/\x{1088}\x{1036}/g;
208 1         5 $zawgyi =~ s/\x{1088}/\x{103e}\x{102f}/g;
209 1         4 $zawgyi =~ s/\x{1089}/\x{103e}\x{1030}/g;
210 1         7 $zawgyi =~ s/\x{108a}/\x{103d}\x{103e}/g;
211 1         21 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{1064}/\x{1004}\x{103a}\x{1039}$1/g;
212 1         5 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{108b}/\x{1004}\x{103a}\x{1039}$1\x{102d}/g;
213 1         5 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{108c}/\x{1004}\x{103a}\x{1039}$1\x{102e}/g;
214 1         5 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{108d}/\x{1004}\x{103a}\x{1039}$1\x{1036}/g;
215 1         4 $zawgyi =~ s/\x{108e}/\x{102d}\x{1036}/g;
216 1         5 $zawgyi =~ s/\x{108f}/\x{1014}/g;
217 1         5 $zawgyi =~ s/\x{1090}/\x{101b}/g;
218 1         18 $zawgyi =~ s/\x{1091}/\x{100f}\x{1039}\x{100d}/g;
219 1         10 $zawgyi =~ s/\x{1019}\x{102c}(\x{107b}|\x{1093})/\x{1019}\x{1039}\x{1018}\x{102c}/g;
220 1         31 $zawgyi =~ s/(\x{107b}|\x{1093})/\x{1039}\x{1018}/g;
221 1         36 $zawgyi =~ s/(\x{1094}|\x{1095})/\x{1037}/g;
222 1         6 $zawgyi =~ s/\x{1096}/\x{1039}\x{1010}\x{103d}/g;
223 1         4 $zawgyi =~ s/\x{1097}/\x{100b}\x{1039}\x{100b}/g;
224 1         23 $zawgyi =~ s/\x{103c}([\x{1000}-\x{1021}])([\x{1000}-\x{1021}])?/$1\x{103c}$2/g;
225 1         5 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{103c}\x{103a}/\x{103c}$1\x{103a}/g;
226 1         4 $zawgyi =~ s/\x{1047}(?=[\x{102c}-\x{1030}\x{1032}\x{1036}-\x{1038}\x{103d}\x{1038}])/\x{101b}/g;
227 1         4 $zawgyi =~ s/\x{1031}\x{1047}/\x{1031}\x{101b}/g;
228 1         4 $zawgyi =~ s/\x{1040}(\x{102e}|\x{102f}|\x{102d}\x{102f}|\x{1030}|\x{1036}|\x{103d}|\x{103e})/\x{101d}$1/g;
229 1         4 $zawgyi =~ s/([^\x{1040}\x{1041}\x{1042}\x{1043}\x{1044}\x{1045}\x{1046}\x{1047}\x{1048}\x{1049}])\x{1040}\x{102b}/$1\x{101d}\x{102b}/g;
230 1         4 $zawgyi =~ s/([\x{1040}\x{1041}\x{1042}\x{1043}\x{1044}\x{1045}\x{1046}\x{1047}\x{1048}\x{1049}])\x{1040}\x{102b}(?!\x{1038})/$1\x{101d}\x{102b}/g;
231 1         3 $zawgyi =~ s/^\x{1040}(?=\x{102b})/\x{101d}/g;
232 1         5 $zawgyi =~ s/\x{1040}\x{102d}(?!\x{0020}?\/)/\x{101d}\x{102d}/g;
233 1         4 $zawgyi =~ s/([^\x{1040}-\x{1049}])\x{1040}([^\x{1040}-\x{1049}]|[\x{104a}\x{104b}])/$1\x{101d}$2/g;
234 1         5 $zawgyi =~ s/([^\x{1040}-\x{1049}])\x{1040}(?=[\\f\\n\\r])/$1\x{101d}/g;
235 1         3 $zawgyi =~ s/([^\x{1040}-\x{1049}])\x{1040}$/$1\x{101d}/g;
236 1         43 $zawgyi =~ s/\x{1031}([\x{1000}-\x{1021}])(\x{103e})?(\x{103b})?/$1$2$3\x{1031}/g;
237 1         27 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{1031}([\x{103b}\x{103c}\x{103d}\x{103e}]+)/$1$2\x{1031}/g;
238 1         5 $zawgyi =~ s/\x{1032}\x{103d}/\x{103d}\x{1032}/g;
239 1         4 $zawgyi =~ s/\x{103d}\x{103b}/\x{103b}\x{103d}/g;
240 1         4 $zawgyi =~ s/\x{103a}\x{1037}/\x{1037}\x{103a}/g;
241 1         10 $zawgyi =~ s/\x{102f}(\x{102d}|\x{102e}|\x{1036}|\x{1037})\x{102f}/\x{102f}$1/g;
242 1         14 $zawgyi =~ s/\x{102f}\x{102f}/\x{102f}/g;
243 1         37 $zawgyi =~ s/(\x{102f}|\x{1030})(\x{102d}|\x{102e})/$2$1/g;
244 1         8 $zawgyi =~ s/(\x{103e})(\x{103b}|\x{103c})/$2$1/g;
245 1         5 $zawgyi =~ s/\x{1025}(\x{103a}|\x{102c})/\x{1009}$1/g;
246 1         4 $zawgyi =~ s/\x{1025}\x{102e}/\x{1026}/g;
247 1         4 $zawgyi =~ s/\x{1005}\x{103b}/\x{1008}/g;
248 1         8 $zawgyi =~ s/\x{1036}(\x{102f}|\x{1030})/$1\x{1036}/g;
249 1         4 $zawgyi =~ s/\x{1031}\x{1037}\x{103e}/\x{103e}\x{1031}\x{1037}/g;
250 1         3 $zawgyi =~ s/\x{1031}\x{103e}\x{102c}/\x{103e}\x{1031}\x{102c}/g;
251 1         5 $zawgyi =~ s/\x{105a}/\x{102b}\x{103a}/g;
252 1         4 $zawgyi =~ s/\x{1031}\x{103b}\x{103e}/\x{103b}\x{103e}\x{1031}/g;
253 1         37 $zawgyi =~ s/(\x{102d}|\x{102e})(\x{103d}|\x{103e})/$2$1/g;
254 1         10 $zawgyi =~ s/\x{102c}\x{1039}([\x{1000}-\x{1021}])/\x{1039}$1\x{102c}/g;
255 1         4 $zawgyi =~ s/\x{103c}\x{1004}\x{103a}\x{1039}([\x{1000}-\x{1021}])/\x{1004}\x{103a}\x{1039}$1\x{103c}/g;
256 1         5 $zawgyi =~ s/\x{1039}\x{103c}\x{103a}\x{1039}([\x{1000}-\x{1021}])/\x{103a}\x{1039}$1\x{103c}/g;
257 1         4 $zawgyi =~ s/\x{103c}\x{1039}([\x{1000}-\x{1021}])/\x{1039}$1\x{103c}/g;
258 1         4 $zawgyi =~ s/\x{1036}\x{1039}([\x{1000}-\x{1021}])/\x{1039}$1\x{1036}/g;
259 1         6 $zawgyi =~ s/\x{1092}/\x{100b}\x{1039}\x{100c}/g;
260 1         5 $zawgyi =~ s/\x{104e}/\x{104e}\x{1004}\x{103a}\x{1038}/g;
261 1         5 $zawgyi =~ s/\x{1040}(\x{102b}|\x{102c}|\x{1036})/\x{101d}$1/g;
262 1         5 $zawgyi =~ s/\x{1025}\x{1039}/\x{1009}\x{1039}/g;
263 1         4 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{103c}\x{1031}\x{103d}/$1\x{103c}\x{103d}\x{1031}/g;
264 1         4 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{103b}\x{1031}\x{103d}(\x{103e})?/$1\x{103b}\x{103d}$2\x{1031}/g;
265 1         3 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{103d}\x{1031}\x{103b}/$1\x{103b}\x{103d}\x{1031}/g;
266 1         9 $zawgyi =~ s/([\x{1000}-\x{1021}])\x{1031}(\x{1039}[\x{1000}-\x{1021}])/$1$2\x{1031}/g;
267 1         7 $zawgyi =~ s/\x{1038}\x{103a}/\x{103a}\x{1038}/g;
268 1         31 $zawgyi =~ s/\x{102d}\x{103a}|\x{103a}\x{102d}/\x{102d}/g;
269 1         4 $zawgyi =~ s/\x{102d}\x{102f}\x{103a}/\x{102d}\x{102f}/g;
270 1         6 $zawgyi =~ s/ \x{1037}/\x{1037}/g;
271 1         4 $zawgyi =~ s/\x{1037}\x{1036}/\x{1036}\x{1037}/g;
272 1         4 $zawgyi =~ s/\x{102d}\x{102d}/\x{102d}/g;
273 1         3 $zawgyi =~ s/\x{102e}\x{102e}/\x{102e}/g;
274 1         31 $zawgyi =~ s/\x{102d}\x{102e}|\x{102e}\x{102d}/\x{102e}/g;
275 1         4 $zawgyi =~ s/\x{102f}\x{102f}/\x{102f}/g;
276 1         4 $zawgyi =~ s/\x{102f}\x{102d}/\x{102d}\x{102f}/g;
277 1         4 $zawgyi =~ s/\x{1037}\x{1037}/\x{1037}/g;
278 1         4 $zawgyi =~ s/\x{1032}\x{1032}/\x{1032}/g;
279 1         3 $zawgyi =~ s/\x{1044}\x{1004}\x{103a}\x{1038}/\x{104e}\x{1004}\x{103a}\x{1038}/g;
280 1         5 $zawgyi =~ s/\x{103a}\x{103a}/\x{103a}/g;
281 1         4 $zawgyi =~ s/ \x{1037}/\x{1037}/g;
282              
283 1         4 return $zawgyi;
284             }
285              
286             1;