File Coverage

blib/lib/Lingua/LO/NLP.pm
Criterion Covered Total %
statement 38 49 77.5
branch 0 4 0.0
condition 1 2 50.0
subroutine 13 14 92.8
pod 5 5 100.0
total 57 74 77.0


line stmt bran cond sub pod time code
1             package Lingua::LO::NLP;
2 2     2   95149 use strict;
  2         7  
  2         51  
3 2     2   9 use warnings;
  2         3  
  2         40  
4 2     2   29 use 5.012000;
  2         6  
5 2     2   8 use utf8;
  2         4  
  2         27  
6 2     2   69 use feature 'unicode_strings';
  2         5  
  2         177  
7 2     2   235 use version 0.77; our $VERSION = version->declare('v1.0.3');
  2         1387  
  2         14  
8 2     2   479 use Lingua::LO::NLP::Syllabify;
  2         5  
  2         18  
9 2     2   349 use Lingua::LO::NLP::Analyze;
  2         6  
  2         12  
10 2     2   385 use Lingua::LO::NLP::Romanize;
  2         4  
  2         10  
11              
12             =encoding utf8
13              
14             =head1 NAME
15              
16             Lingua::LO::NLP - Various Lao text processing functions
17              
18             =head1 SYNOPSIS
19              
20             use utf8;
21             use 5.10.1;
22             use open qw/ :std :encoding(UTF-8) /;
23             use Lingua::LO::NLP;
24             use Data::Dumper;
25              
26             my $lao = Lingua::LO::NLP->new;
27              
28             my @syllables = $lao->split_to_syllables("ສະບາຍດີ"); # qw( ສະ ບາຍ ດີ )
29             print Dumper(\@syllables);
30              
31             for my $syl (@syllables) {
32             my $analysis = $lao->analyze_syllable($syl);
33             printf "%s: %s\n", $analysis->syllable, $analysis->tone;
34             # ສະ: TONE_HIGH_STOP
35             # ບາຍ: TONE_LOW
36             # ດີ: TONE_LOW
37             }
38              
39             say $lao->romanize("ສະບາຍດີ", variant => 'PCGN', hyphen => "\N{HYPHEN}"); # sa‐bay‐di
40             say $lao->romanize("ສະບາຍດີ", variant => 'IPA'); # saʔ baːj diː
41              
42             =head1 DESCRIPTION
43              
44             This module provides various functions for processing Lao text. Currently it can
45              
46             =over 4
47              
48             =item
49              
50             split Lao text (usually written without blanks between words) into syllables
51              
52             =item
53              
54             analyze syllables with regards to core and end consonants, vowels, tone and
55             other properties
56              
57             =item
58              
59             romanize Lao text according to the PCGN standard or to IPA (experimental)
60              
61             =back
62              
63             These functions are basically just shortcuts to the functionality of some
64             specialized modules: L,
65             L and L. If
66             you need only one of them, you can shave off a little overhead by using those
67             directly.
68              
69             =head1 METHODS
70              
71             =head2 new
72              
73             new(option => value, ...)
74              
75             =head3 Options
76              
77             =over 4
78              
79             =item * C: passed to L and L.
80              
81             =back
82              
83             =cut
84             sub new {
85 2     2 1 77 my $class = shift;
86 2         4 my %opts = @_;
87 2         10 return bless \%opts, $class;
88             }
89              
90             =head2 split_to_syllables
91              
92             my @syllables = $object->split_to_syllables( $text, %options );
93              
94             Split Lao text into its syllables using a regexp modelled after PHISSAMAY,
95             DALALOY and DURRANI: I. Takes
96             as its only mandatory parameter a character string to split and optionally a
97             number of named options; see L for those.
98             Returns a list of syllables.
99              
100             =cut
101             sub split_to_syllables {
102 1     1 1 6 my $self = shift;
103 1         2 my $text = shift;
104              
105             return Lingua::LO::NLP::Syllabify->new(
106             $text,
107             normalize => $self->{normalize},
108             @_
109 1         10 )->get_syllables;
110             }
111              
112             =head2 analyze_syllable
113              
114             my $classified = $object->analyze_syllable( $syllable, %options );
115              
116             Returns a L object that allows you to query
117             various syllable properties such as core consonant, tone mark, vowel length and
118             tone. See there for details.
119              
120             =cut
121             sub analyze_syllable {
122 1     1 1 1216 my $self = shift;
123 1         2 my $syllable = shift;
124             return Lingua::LO::NLP::Analyze->new(
125             $syllable,
126             normalize => $self->{normalize},
127             @_
128 1         10 );
129             }
130              
131             =head2 romanize
132              
133             $object->romanize( $lao, %options );
134              
135             Returns a romanized version of the text passed in as C<$lao>. See
136             L for options. The default C is 'PCGN'.
137              
138             =cut
139             sub romanize {
140 2     2 1 583 my (undef, $lao, %options) = @_;
141 2   50     13 $options{variant} //= 'PCGN';
142 2         11 return Lingua::LO::NLP::Romanize->new(%options)->romanize( $lao );
143             }
144              
145             =head2 analyze_text
146              
147             my @syllables = $object->analyze_text( $text, %options );
148              
149             Split Lao text into its syllables and analyze them, returning an array of
150             hashes. Each hash has at least a key 'analysis' with a
151             L object as a value. If the Coption is set
152             to a true value, it also has a "romanization" key. In this case, the C
153             option (see L) is also required.
154              
155             =cut
156             sub analyze_text {
157 0     0 1   my $self = shift;
158 0           my $text = shift;
159 0           my %opts = @_;
160 0           my $romanizer;
161 0 0         $romanizer = Lingua::LO::NLP::Romanize->new( %opts ) if delete $opts{romanize};
162              
163             my @result = Lingua::LO::NLP::Syllabify->new(
164             $text,
165             normalize => $self->{normalize},
166 0           %opts
167             )->get_syllables;
168              
169 0 0         if($romanizer) {
170             return map {
171 0           {
172 0           analysis => $_,
173             romanization => $romanizer->romanize_syllable($_)
174             }
175             } @result;
176             } else {
177 0           return map { { analysis => $_ } } @result;
  0            
178             }
179             }
180              
181             =head1 SEE ALSO
182              
183             L is the module that inspired this one. It has some
184             issues with ambiguous syllable boundaries as in "ໃນວົງ" though.
185              
186             =head1 AUTHOR
187              
188             Matthias Bethke, Ematthias@towiski.deE
189              
190             =head1 COPYRIGHT AND LICENSE
191              
192             Copyright (C) 2016-2017 by Matthias Bethke
193              
194             This library is free software; you can redistribute it and/or modify it under
195             the same terms as Perl itself, either Perl version 5.14.2 or, at your option,
196             any later version of Perl 5 you may have available. Significant portions of the
197             code are (C) PostgreSQL Global Development Group and The Regents of the
198             University of California. All modified versions must retain the file COPYRIGHT
199             included in the distribution.
200              
201             =cut
202              
203             1;