File Coverage

blib/lib/Lingua/EN/Inflect/Phrase.pm
Criterion Covered Total %
statement 113 113 100.0
branch 74 76 97.3
condition 57 65 87.6
subroutine 12 12 100.0
pod 2 2 100.0
total 258 268 96.2


line stmt bran cond sub pod time code
1             package Lingua::EN::Inflect::Phrase;
2             our $AUTHORITY = 'cpan:AVAR';
3             $Lingua::EN::Inflect::Phrase::VERSION = '0.20';
4 4     4   230370 use strict;
  4         43  
  4         97  
5 4     4   18 use warnings;
  4         9  
  4         96  
6 4     4   18 use Exporter 'import';
  4         8  
  4         104  
7 4     4   2634 use Lingua::EN::Inflect;
  4         109949  
  4         301  
8 4     4   2011 use Lingua::EN::Inflect::Number;
  4         2320  
  4         27  
9 4     4   1960 use Lingua::EN::Tagger;
  4         212236  
  4         165  
10 4     4   1388 use Lingua::EN::FindNumber '$number_re';
  4         14615  
  4         403  
11 4     4   1417 use Lingua::EN::Number::IsOrdinal 'is_ordinal';
  4         1889  
  4         5489  
12              
13             =head1 NAME
14              
15             Lingua::EN::Inflect::Phrase - Inflect short English Phrases
16              
17             =cut
18              
19             =head1 SYNOPSIS
20              
21             use Lingua::EN::Inflect::Phrase;
22             use Test::More tests => 2;
23              
24             my $plural = Lingua::EN::Inflect::Phrase::to_PL('green egg and ham');
25              
26             is $plural, 'green eggs and ham';
27              
28             my $singular = Lingua::EN::Inflect::Phrase::to_S('green eggs and ham');
29              
30             is $singular, 'green egg and ham';
31              
32             =head1 DESCRIPTION
33              
34             Attempts to pluralize or singularize short English phrases.
35              
36             Does not throw exceptions at present, if you attempt to pluralize an already
37             pluralized phrase, it will leave it unchanged (and vice versa.)
38              
39             The behavior of this module is subject to change as I tweak the heuristics, as
40             some things get fixed others might regress. The processing of natural language
41             is a messy business.
42              
43             If it doesn't work, please email or submit to RT the example you tried, and
44             I'll try to fix it.
45              
46             =head1 OPTIONS
47              
48             By default, this module prefers to treat words as nouns (sometimes words can be
49             interpreted as a verb or a noun without context.) This is better for things
50             such as database table/column names, which is what this module is primarily
51             for.
52              
53             This behavior can be switched with the variable C<$prefer_nouns>. The default
54             is C<1>.
55              
56             For example:
57              
58             {
59             local $Lingua::EN::Inflect::Phrase::prefer_nouns = 0;
60             is Lingua::EN::Inflect::Phrase::to_S('sources split'), 'source splits';
61             }
62             {
63             local $Lingua::EN::Inflect::Phrase::prefer_nouns = 1;
64             is Lingua::EN::Inflect::Phrase::to_S('source splits'), 'source split';
65             }
66              
67             =head1 OPTIONAL EXPORTS
68              
69             L, L
70              
71             =cut
72              
73             our @EXPORT_OK = qw/to_PL to_S/;
74              
75             =head1 SUBROUTINES
76              
77             =cut
78              
79             our $prefer_nouns = 1;
80              
81             my $MAYBE_NOUN = qr{(\S+)/(?:NN[PS]?|CD|JJ)\b};
82             my $MAYBE_NOUN_TAG = qr{/(?:NN[PS]?|CD|JJ)\b};
83             my $NOUN_OR_VERB = qr{(\S+)/(?:NN[PS]?|CD|JJ|VB[A-Z]?)\b};
84             my $NOUN_OR_VERB_TAG = qr{/(?:NN[PS]?|CD|JJ|VB[A-Z]?)\b};
85             my $VERB_TAG = qr{/VB[A-z]?\b};
86              
87             my $PREPOSITION_OR_CONJUNCTION_TAG = qr{/(?:CC|IN)\b};
88              
89             my $tagger;
90              
91             sub _inflect_noun {
92 402     402   17580 my ($noun, $want_plural, $is_plural) = @_;
93              
94 402         638 my $want_singular = not $want_plural;
95              
96 402 100       1340 $is_plural = Lingua::EN::Inflect::Number::number($noun) ne 's'
97             unless defined $is_plural;
98              
99             # fix "people" and "heroes" and a few others
100 402 100 100     190573 if ($noun =~ /^(?:people|person)\z/i) {
    100 100        
    100 100        
    100          
    100          
    100          
    100          
    100          
101 4 100       16 return $want_singular ? 'person' : 'people';
102             }
103             elsif ($noun =~ /^hero(?:es)?\z/i) {
104 4 100       14 return $want_singular ? 'hero' : 'heroes';
105             }
106             elsif ($want_singular && lc($noun) eq 'aliases') {
107 1         4 return 'alias';
108             }
109             elsif ($want_singular && lc($noun) eq 'statuses') {
110 1         3 return 'status';
111             }
112             elsif (lc($noun) eq 'belongs') {
113 4         10 return undef;
114             }
115             elsif ($want_plural && lc($noun) eq 'two') {
116 2         6 return 'twos';
117             }
118             elsif ($noun =~ /^[A-Z].+ity\z/) {
119 2 100       11 return $want_plural ? ucfirst(Lingua::EN::Inflect::Number::to_PL(lc($noun))) : $noun;
120             }
121             elsif ($noun =~ /^[A-Z].+ities\z/) {
122 2 100       9 return $want_plural ? $noun : ucfirst(Lingua::EN::Inflect::Number::to_S(lc($noun)));
123             }
124              
125 382 100 100     1822 if ($want_plural && (not $is_plural)) {
    100 100        
126 61         180 return Lingua::EN::Inflect::Number::to_PL($noun);
127             }
128             elsif ($want_singular && $is_plural) {
129 43         126 return Lingua::EN::Inflect::Number::to_S($noun);
130             }
131              
132 278         521 return undef;
133             }
134              
135             sub _inflect {
136 373     373   771 my ($phrase, $want_plural) = @_;
137 373         567 my $want_singular = not $want_plural;
138              
139             # 'a' inflects to 'some', special-case it here
140 373 100       814 if ($phrase eq 'a') {
141 2 100       12 return $want_singular ? $phrase : 'as';
142             }
143              
144             # Do not tag initial number, if any.
145             # Regex is from perldoc -q 'is a number'.
146 371         2317 my ($det, $number, $pad, $rest) =
147             $phrase =~ m{^(\s*\S+/DET)?(\s*(?:[+-]?)(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?)(\s*)(.*)$};
148              
149 371   100     2473 $_ ||= '' for $det, $pad, $rest;
150              
151 371         501 my $tagged;
152 371   66     687 $tagger ||= Lingua::EN::Tagger->new;
153              
154             # force plural unless number is '1'
155 371 100       525267 if ((grep { defined && length } ($number, $pad, $rest)) == 3) {
  1113 100       3212  
156 112         325 my $tagged_rest = $tagger->get_readable($rest);
157              
158 112         98654 $tagged = $det . $number . $pad . $tagged_rest;
159              
160 112 100 100     614 if ($number =~ /^\s*1(?:\.0*[Ee]?0*)?\z/
161             && $tagged_rest !~ m{^(?:\S+/CC|\d)}) {
162 32         61 $want_plural = 0;
163 32         48 $want_singular = 1;
164             }
165             else {
166 80         124 $want_plural = 1;
167 80         142 $want_singular = 0;
168             }
169             }
170             else {
171 259         766 $tagged = $tagger->get_readable($phrase);
172             }
173              
174             # check for phrases like "one something" and force singular,
175             # or "one and a half ..." and force plural
176 371 100 100     260274 if (my ($det, $number, $conj, $and_zero, $pad, $rest) = $tagged =~ m{
    100 66        
177             ^ (\s* \S+/DET)?
178             (\s* (?:one|single))/(?:JJ|NN|CD)\b
179             (\s*\S+/CC\b)?
180             (?:(\s* (?:no|zero))/(?:DET|CD))?
181             (\s*)
182             (.*)
183             }x) {
184              
185 72   100     532 $_ ||= '' for $det, $conj, $and_zero, $pad, $rest;
186              
187 72         198 $tagged = $det . $number . $conj . $and_zero . $pad . $rest;
188              
189 72 100 66     247 if (length $conj && (not $and_zero)) {
    100          
190 4         6 $want_plural = 1;
191 4         6 $want_singular = 0;
192             }
193             elsif (length $rest) {
194 64         107 $want_plural = 0;
195 64         99 $want_singular = 1;
196             }
197             }
198             # handle other numbers as words at the start of the phrase
199             # using Lingua::EN::FindNumber
200             elsif ($tagged =~ m{^\s*(?:(\S+)/DET)?}
201             && (substr $phrase, $+[1]||0) =~ /^\s*$number_re/) {
202              
203 68   100     765 $number = (sort { length $a <=> length $b } map $_||'', ($1, $2, $3, $4, $5))[-1];
  364         555  
204              
205 68 100       262 if (not is_ordinal($number)) {
206 56         2573 my $tagged_number_re;
207              
208 56         187 foreach my $num_elem (split /\s+/, $number) {
209 60         159 $tagged_number_re .= "\Q$num_elem\E/[A-Z]+\\s*";
210             }
211              
212 56         89 my $tagged_number;
213 56         515 ($tagged_number, $pad, $rest) = $tagged =~ m/($tagged_number_re)(\s*)(.*)/;
214 56         220 my @tagged_number_pos = ($-[1], $+[1]);
215              
216 56 100       182 if (length $rest) {
217 44         118 substr($tagged, $tagged_number_pos[0], ($tagged_number_pos[1] - $tagged_number_pos[0])) = $number;
218 44         72 $want_plural = 1;
219 44         93 $want_singular = 0;
220             }
221             }
222             }
223              
224 371         1356 my ($noun, $tag);
225              
226             # last noun (or verb that could be a noun) before a preposition/conjunction
227             # or last noun/verb
228 371 100 100     6246 if ( (($noun) = $tagged =~ m|${MAYBE_NOUN} (?! .* ${MAYBE_NOUN_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
      66        
      100        
229             .* ${PREPOSITION_OR_CONJUNCTION_TAG}|x)
230              
231             or (($noun) = $tagged =~ m|${MAYBE_NOUN} (?!.*${MAYBE_NOUN_TAG})|x)
232              
233             or (($noun) = $tagged =~ m|${NOUN_OR_VERB} (?!.*${NOUN_OR_VERB_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
234             .* ${PREPOSITION_OR_CONJUNCTION_TAG}|x)
235              
236             or (($noun) = $tagged =~ m|${NOUN_OR_VERB} (?! .* ${NOUN_OR_VERB_TAG})|x)) {
237              
238 365         1265 my @pos = ($-[1], $+[1]);
239 365         648 my $inflected_noun;
240              
241 365         784 $inflected_noun = _inflect_noun($noun, $want_plural);
242              
243             # check if there is a verb following the noun
244             # the verb either needs to be pluarlized or be taken as the noun,
245             # depending on the value of $prefer_nouns
246 365         38820 my ($verb) = substr($tagged, $pos[1]) =~ m|^/[A-Z]+\s+(\S+)${VERB_TAG}|;
247              
248 365         2041 my @verb_pos = map $pos[1] + $_, grep defined, ($-[1], $+[1]);
249              
250             # the verb may be tagged as a noun unless singularized (pluralized as a noun.)
251 365 100 100     1718 if ((not $verb) && (not $prefer_nouns)
      66        
252             && $tagger->get_readable(_inflect_noun($noun, 1, 0)) =~ $VERB_TAG) {
253              
254             # find the preceding noun
255 2 50       2848 if (my ($preceding_noun) = substr($tagged, 0, $pos[0]) =~ m|${MAYBE_NOUN}\s*\z|) {
256 2         8 my @preceding_noun_pos = ($-[1], $+[1]);
257              
258 2         6 $verb = $noun;
259 2         5 @verb_pos = @pos;
260 2         4 $noun = $preceding_noun;
261 2         4 @pos = @preceding_noun_pos;
262 2         5 $inflected_noun = _inflect_noun($noun, $want_plural);
263             }
264             }
265              
266 365 100       1099 if ($verb) {
267 35         95 my $plural_verb = Lingua::EN::Inflect::PL_V($verb);
268              
269 35 100       2979 if ($prefer_nouns) {
    100          
270 31 100 33     100 if ($tagger->get_readable($plural_verb) =~ $MAYBE_NOUN
      66        
      100        
271             || ( # noun singular verb plural should be handled as noun noun, unless something follows it,
272             # and only for "VB" not "VBZ" or "VBN"
273             $verb eq $plural_verb
274             && $tagger->get_readable(_inflect_noun($verb, 1)) =~ $MAYBE_NOUN
275             && substr($tagged, $verb_pos[1]) =~ m{^\s*/VB\s*$}
276             )) {
277 7         5582 $inflected_noun = _inflect_noun($verb, $want_plural);
278              
279 7         1752 @pos = @verb_pos;
280             }
281             }
282             elsif ($inflected_noun) {
283 2 100       6 if ($want_plural) {
    50          
284 1         5 substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $plural_verb;
285             }
286             elsif ($want_singular) {
287             # to singularize a verb we pluralize it as a noun
288 1         4 my $singular_verb = _inflect_noun($verb, 1, 0);
289              
290 1         730 substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $singular_verb;
291             }
292             }
293             }
294              
295 365 100       36369 substr($tagged, $pos[0], ($pos[1] - $pos[0])) = $inflected_noun if $inflected_noun;
296              
297 365         2070 ($phrase = $tagged) =~ s{/[A-Z]+}{}g;
298             }
299             # fallback
300             else {
301 6         21 my $number = Lingua::EN::Inflect::Number::number($phrase);
302              
303 6 100 100     2676 if ($want_plural && $number ne 'p') {
    100 100        
304 2         8 return Lingua::EN::Inflect::Number::to_PL($phrase);
305             }
306             elsif ($want_singular && $number ne 's') {
307 1         4 return Lingua::EN::Inflect::Number::to_S($phrase);
308             }
309             }
310              
311 368         2288 return $phrase;
312             }
313              
314             =head2 to_PL
315              
316             Attempts to pluralizes a phrase unless already plural.
317              
318             =cut
319              
320             sub to_PL {
321 187     187 1 37713 return _inflect(shift, 1);
322             }
323              
324             =head2 to_S
325              
326             Attempts to singularize a phrase unless already singular.
327              
328             =cut
329              
330             sub to_S {
331 186     186 1 460 return _inflect(shift, 0);
332             }
333              
334             =head1 BUGS
335              
336             Please report any bugs or feature requests to C
337             rt.cpan.org>, or through the web interface at
338             L. I
339             will be notified, and then you'll automatically be notified of progress on your
340             bug as I make changes.
341              
342             =head1 REPOSITORY
343              
344             git clone git://github.com/rkitover/lingua-en-inflect-phrase.git lingua-en-inflect-phrase
345              
346             =head1 SEE ALSO
347              
348             L, L, L
349              
350             =head1 AUTHOR
351              
352             rkitover: Rafael Kitover
353              
354             =head1 CONTRIBUTORS
355              
356             zakame: Zak B. Elep
357              
358             =head1 LICENSE AND COPYRIGHT
359              
360             Copyright (c) 2018 Rafael Kitover (rkitover@cpan.org).
361              
362             This program is free software; you can redistribute it and/or modify it
363             under the terms of either: the GNU General Public License as published
364             by the Free Software Foundation; or the Artistic License.
365              
366             See http://dev.perl.org/licenses/ for more information.
367              
368             =cut
369              
370             1;
371             # vim:et sts=2 sw=2 tw=0: