File Coverage

blib/lib/Lingua/LO/NLP/Romanize/PCGN.pm

Criterion	Covered	Total	%
statement	67	67	100.0
branch	17	18	94.4
condition	12	14	85.7
subroutine	16	16	100.0
pod	2	2	100.0
total	114	117	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::LO::NLP::Romanize::PCGN;
2	5			5		1001	use strict;
	5					9
	5					126
3	5			5		21	use warnings;
	5					8
	5					97
4	5			5		68	use 5.012000;
	5					15
5	5			5		19	use utf8;
	5					8
	5					20
6	5			5		113	use feature qw/ unicode_strings say /;
	5					8
	5					349
7	5			5		24	use charnames qw/ :full lao /;
	5					8
	5					25
8	5			5		2296	use version 0.77; our $VERSION = version->declare('v1.0.1');
	5					84
	5					32
9	5			5		452	use Carp;
	5					12
	5					325
10	5			5		27	use List::Util 1.33 'none';
	5					84
	5					393
11	5			5		328	use Lingua::LO::NLP::Analyze;
	5					9
	5					40
12	5			5		151	use parent 'Lingua::LO::NLP::Romanize';
	5					8
	5					26
13
14							=encoding utf8
15
16							=head1 NAME
17
18							Lingua::LO::NLP::Romanize::PCGN - Romanize Lao syllables according to the PCGN standard
19
20							=head1 FUNCTION
21
22							This class is not supposed to be used directly. Rather use
23							L as a factory:
24
25							my $o = Lingua::LO::NLP::Romanize->new(variant => 'PCGN');
26
27							=cut
28
29							my %CONSONANTS = (
30							'ກ' => 'k',
31							'ຂ' => 'kh',
32							'ຄ' => 'kh',
33							'ງ' => 'ng',
34							'ຈ' => 'ch',
35							'ສ' => 's',
36							'ຊ' => 'x',
37							'ຍ' => [qw/ gn y /],
38							'ດ' => [qw/ d t /],
39							'ຕ' => 't',
40							'ຖ' => 'th',
41							'ທ' => 'th',
42							'ນ' => 'n',
43							'ບ' => [qw/ b p /],
44							'ປ' => 'p',
45							'ຜ' => 'ph',
46							'ຝ' => 'f',
47							'ພ' => 'ph',
48							'ຟ' => 'f',
49							'ມ' => 'm',
50							'ຢ' => 'y',
51							'ລ' => 'l',
52							"\N{LAO SEMIVOWEL SIGN LO}" => 'l',
53							'ວ' => [qw/ v o /],
54							'ຫ' => 'h',
55							'ອ' => '',
56							'ຮ' => 'h',
57							'ຣ' => 'r',
58							'ໜ' => 'n',
59							'ໝ' => 'm',
60							'ຫຼ' => 'l',
61							'ຫຍ' => 'gn',
62							'ຫນ' => 'n',
63							'ຫມ' => 'm',
64							'ຫຣ' => 'r',
65							'ຫລ' => 'l',
66							'ຫວ' => 'v',
67							);
68
69							my %CONS_VOWELS = map { $_ => 1 } qw/ ຍ ຽ ອ ວ /;
70
71							my %VOWELS = (
72							### Monophthongs
73							'Xະ' => 'a',
74							'Xັ' => 'a',
75							'Xາ' => 'a',
76							'Xາວ' => 'ao',
77
78							'Xິ' => 'i',
79							'Xີ' => 'i',
80							'Xິວ' => 'iou', # TODO correct?
81							'Xີວ' => 'iou', # TODO correct?
82
83							'Xຶ' => 'u',
84							'Xື' => 'u',
85
86							'Xຸ' => 'ou',
87							'Xູ' => 'ou',
88
89							'ເXະ' => 'é',
90							'ເXັ' => 'é',
91							'ເX' => 'é',
92
93							'ແXະ' => 'è',
94							'ແXັ' => 'è',
95							'ແX' => 'è',
96							'ແXວ' => 'èo',
97
98							'ໂXະ' => 'ô',
99							'Xົ' => 'ô',
100							'ໂX' => 'ô',
101							'ໂXຍ' => 'ôy', # TODO correct?
102							'Xອຍ' => 'oy',
103
104							'ເXາະ' => 'o',
105							'Xັອ' => 'o',
106							'Xໍ' => 'o',
107							'Xອ' => 'o',
108
109							'ເXິ' => 'eu',
110							'ເXີ' => 'eu',
111							'ເXື' => 'eu', # TODO correct?
112
113							'ເXັຍ' => 'ia', # /iə/
114							'Xັຽ' => 'ia', # /iə/
115							'ເXຍ' => 'ia', # /iːə/
116							'Xຽ' => 'ia', # /iːə/
117							'Xຽວ' => 'iao',
118
119							'ເXີຍ' => 'euy',
120							'ເXຶອ' => 'ua',
121							'ເXືອ' => 'ua',
122							'ເXືອຍ' => 'uai',
123							'Xົວະ' => 'oua',
124							'Xົວ' => 'oua',
125							'Xວ' => 'oua',
126							'Xວຍ' => 'ouai',
127
128							'ໄX' => 'ai',
129							'ໃX' => 'ai',
130							'Xາຍ' => 'ay',
131							'Xັຍ' => 'ay', # /aj/
132
133							'ເXົາ' => 'ao',
134							'Xຳ' => 'am', # composed U+0EB3
135							'Xໍາ' => 'am',
136							);
137							{
138							# Replace "X" in %VOWELS keys with DOTTED CIRCLE. Makes code easier to edit.
139							my %v;
140							foreach my $v (keys %VOWELS) {
141							(my $w = $v) =~ s/X/\N{DOTTED CIRCLE}/;
142							$v{$w} = $VOWELS{$v};
143							}
144							%VOWELS = %v;
145							}
146
147							=head2 new
148
149							You don't call this constructor directly bu via L.
150
151							=cut
152
153							sub new {
154	6			6	1	21	my ($class, %args) = @_;
155							return bless {
156	71			71		469	romanize_vowel => sub { $VOWELS{ $_[0] } },
157	6					42	}, $class;
158							}
159
160							sub _romanize_syllable {
161	99			99		206	my ($self, $c) = @_;
162	99					159	my ($endcons, $result);
163	99					2217	my $parse = $c->parse;
164	99					1989	my $vowel = $c->vowel;
165
166	99					1827	my $cons = $c->consonant;
167	99					1865	my $h = $c->h;
168	99					1922	my $sv = $c->semivowel;
169	99	100	100			639	if($cons eq 'ຫ' and $sv) {
170							# ຫ with semivowel. Drop the ຫ and use the semivowel as consonant
171	2					10	$result = $self->romanize_consonant($sv, 0);
172	2					5	undef $sv;
173							} else {
174							# The regular case
175	97					253	$result = $self->romanize_consonant($cons, 0);
176	97	100				221	$sv = $self->romanize_consonant($sv, 1) if $sv;
177							}
178
179	99					1579	$endcons = $c->end_consonant;
180	99	100				499	if(defined $endcons) {
181	46	100	66			190	if(exists $CONS_VOWELS{ $endcons } and not defined $parse->{vowel3}) {
182	17					44	$vowel .= $endcons; # consonant can be used as a vowel
183	17					36	$endcons = '';
184							} else {
185	29					71	$endcons = $self->romanize_consonant($endcons, 1);
186							}
187							} else {
188	53					97	$endcons = ''; # avoid special-casing later
189							}
190
191							# TODO remove debug
192	99	50				282	carp sprintf("Missing VOWELS def for `%s' in `%s'", $vowel, $c->syllable) unless defined $VOWELS{ $vowel };
193
194	99		100			378	$sv //= '';
195	99					1715	my $rom_vowel = $self->{romanize_vowel}->($vowel, $c->tone);
196	99	100	100	59		483	if($parse->{vowel0} and none { defined } @$parse{qw/ vowel1 vowel2 vowel3 /}) {
	59					143
197	15					45	$result .= $rom_vowel . $sv . $endcons;
198							} else {
199	84					196	$result .= $sv . $rom_vowel . $endcons;
200							}
201							# Duplication sign
202	99	100	66			341	if(defined $parse->{extra} and $parse->{extra} eq 'ໆ') {
203	6	100				22	$result .= ($self->{hyphen} eq ' ' ? '-' : $self->{hyphen}) . "$result";
204							}
205	99					667	return $result;
206							}
207
208							=head2 romanize_consonant
209
210							$o->romanize_consonant($consonant, $position_in_syllable);
211
212							Get the appropriate consonant romanization for its position in the syllable.
213							Implemented as a method so it can be overridden in subclasses. C<$position>
214							must be 0 for initial/core position, 1 for end consonants.
215
216							=cut
217
218							sub romanize_consonant {
219	97			97	1	210	my (undef, $cons, $position) = @_;
220	97					231	my $consdata = $CONSONANTS{ $cons };
221	97	100				288	return ref $consdata ? $consdata->[$position] : $consdata;
222							}
223
224							1;
225