File Coverage

blib/lib/Lingua/EN/Inflect/Phrase.pm
Criterion Covered Total %
statement 112 112 100.0
branch 66 68 97.0
condition 56 65 86.1
subroutine 13 13 100.0
pod 2 2 100.0
total 249 260 95.7


line stmt bran cond sub pod time code
1             package Lingua::EN::Inflect::Phrase;
2             BEGIN {
3 4     4   263987 $Lingua::EN::Inflect::Phrase::AUTHORITY = 'cpan:AVAR';
4             }
5             {
6             $Lingua::EN::Inflect::Phrase::VERSION = '0.18';
7             }
8              
9 4     4   433 use strict;
  4         8  
  4         127  
10 4     4   19 use warnings;
  4         6  
  4         102  
11 4     4   17 use Exporter 'import';
  4         8  
  4         187  
12 4     4   122559 use Lingua::EN::Inflect;
  4         132337  
  4         514  
13 4     4   11407 use Lingua::EN::Inflect::Number;
  4         6743  
  4         30  
14 4     4   4234 use Lingua::EN::Tagger;
  4         437914  
  4         339  
15 4     4   4253 use Lingua::EN::FindNumber '$number_re';
  4         25368  
  4         615  
16 4     4   4590 use Lingua::EN::Number::IsOrdinal 'is_ordinal';
  4         3366  
  4         7661  
17              
18             =head1 NAME
19              
20             Lingua::EN::Inflect::Phrase - Inflect short English Phrases
21              
22             =cut
23              
24             =head1 SYNOPSIS
25              
26             use Lingua::EN::Inflect::Phrase;
27             use Test::More tests => 2;
28              
29             my $plural = Lingua::EN::Inflect::Phrase::to_PL('green egg and ham');
30              
31             is $plural, 'green eggs and ham';
32              
33             my $singular = Lingua::EN::Inflect::Phrase::to_S('green eggs and ham');
34              
35             is $singular, 'green egg and ham';
36              
37             =head1 DESCRIPTION
38              
39             Attempts to pluralize or singularize short English phrases.
40              
41             Does not throw exceptions at present, if you attempt to pluralize an already
42             pluralized phrase, it will leave it unchanged (and vice versa.)
43              
44             The behavior of this module is subject to change as I tweak the heuristics, as
45             some things get fixed others might regress. The processing of natural language
46             is a messy business.
47              
48             If it doesn't work, please email or submit to RT the example you tried, and
49             I'll try to fix it.
50              
51             =head1 OPTIONS
52              
53             By default, this module prefers to treat words as nouns (sometimes words can be
54             interpreted as a verb or a noun without context.) This is better for things
55             such as database table/column names, which is what this module is primarily
56             for.
57              
58             This behavior can be switched with the variable C<$prefer_nouns>. The default
59             is C<1>.
60              
61             For example:
62              
63             {
64             local $Lingua::EN::Inflect::Phrase::prefer_nouns = 0;
65             is Lingua::EN::Inflect::Phrase::to_S('sources split'), 'source splits';
66             }
67             {
68             local $Lingua::EN::Inflect::Phrase::prefer_nouns = 1;
69             is Lingua::EN::Inflect::Phrase::to_S('source splits'), 'source split';
70             }
71              
72             =head1 OPTIONAL EXPORTS
73              
74             L</to_PL>, L</to_S>
75              
76             =cut
77              
78             our @EXPORT_OK = qw/to_PL to_S/;
79              
80             =head1 SUBROUTINES
81              
82             =cut
83              
84             our $prefer_nouns = 1;
85              
86             my $MAYBE_NOUN = qr{(\S+)/(?:NNS?|CD|JJ)\b};
87             my $MAYBE_NOUN_TAG = qr{/(?:NNS?|CD|JJ)\b};
88             my $NOUN_OR_VERB = qr{(\S+)/(?:NNS?|CD|JJ|VB[A-Z]?)\b};
89             my $NOUN_OR_VERB_TAG = qr{/(?:NNS?|CD|JJ|VB[A-Z]?)\b};
90             my $VERB_TAG = qr{/VB[A-z]?\b};
91              
92             my $PREPOSITION_OR_CONJUNCTION_TAG = qr{/(?:CC|IN)\b};
93              
94             my $tagger;
95              
96             sub _inflect_noun {
97 394     394   24575 my ($noun, $want_plural, $is_plural) = @_;
98              
99 394         823 my $want_singular = not $want_plural;
100              
101 394 100       2657 $is_plural = Lingua::EN::Inflect::Number::number($noun) ne 's'
102             unless defined $is_plural;
103              
104             # fix "people" and "heroes" and a few others
105 394 100 100     401227 if ($noun =~ /^(?:people|person)\z/i) {
    100 100        
    100 100        
    100          
    100          
    100          
106 4 100       21 return $want_singular ? 'person' : 'people';
107             }
108             elsif ($noun =~ /^hero(?:es)?\z/i) {
109 4 100       999 return $want_singular ? 'hero' : 'heroes';
110             }
111             elsif ($want_singular && lc($noun) eq 'aliases') {
112 1         7 return 'alias';
113             }
114             elsif ($want_singular && lc($noun) eq 'statuses') {
115 1         4 return 'status';
116             }
117             elsif (lc($noun) eq 'belongs') {
118 4         67 return undef;
119             }
120             elsif ($want_plural && lc($noun) eq 'two') {
121 2         10 return 'twos';
122             }
123              
124 378 100 100     5363 if ($want_plural && (not $is_plural)) {
    100 100        
125 60         300 return Lingua::EN::Inflect::Number::to_PL($noun);
126             }
127             elsif ($want_singular && $is_plural) {
128 42         179 return Lingua::EN::Inflect::Number::to_S($noun);
129             }
130              
131 276         743 return undef;
132             }
133              
134             sub _inflect {
135 365     365   1007 my ($phrase, $want_plural) = @_;
136 365         871 my $want_singular = not $want_plural;
137              
138             # 'a' inflects to 'some', special-case it here
139 365 100       1355 if ($phrase eq 'a') {
140 2 100       18 return $want_singular ? $phrase : 'as';
141             }
142              
143             # Do not tag initial number, if any.
144             # Regex is from perldoc -q 'is a number'.
145 363         3046 my ($det, $number, $pad, $rest) =
146             $phrase =~ m{^(\s*\S+/DET)?(\s*(?:[+-]?)(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?)(\s*)(.*)$};
147              
148 363   100     4485 $_ ||= '' for $det, $pad, $rest;
149              
150 363         614 my $tagged;
151 363   66     1198 $tagger ||= Lingua::EN::Tagger->new;
152              
153             # force plural unless number is '1'
154 363 100       961256 if ((grep { defined && length } ($number, $pad, $rest)) == 3) {
  1089 100       5773  
155 112         638 my $tagged_rest = $tagger->get_readable($rest);
156              
157 112         133846 $tagged = $det . $number . $pad . $tagged_rest;
158              
159 112 100 100     1103 if ($number =~ /^\s*1(?:\.0*[Ee]?0*)?\z/
160             && $tagged_rest !~ m{^(?:\S+/CC|\d)}) {
161 32         67 $want_plural = 0;
162 32         73 $want_singular = 1;
163             }
164             else {
165 80         144 $want_plural = 1;
166 80         176 $want_singular = 0;
167             }
168             }
169             else {
170 251         1893 $tagged = $tagger->get_readable($phrase);
171             }
172              
173             # check for phrases like "one something" and force singular,
174             # or "one and a half ..." and force plural
175 363 100 100     393076 if (my ($det, $number, $conj, $and_zero, $pad, $rest) = $tagged =~ m{
    100 66        
176             ^ (\s* \S+/DET)?
177             (\s* (?:one|single))/(?:JJ|NN|CD)\b
178             (\s*\S+/CC\b)?
179             (?:(\s* (?:no|zero))/(?:DET|CD))?
180             (\s*)
181             (.*)
182             }x) {
183              
184 72   100     1067 $_ ||= '' for $det, $conj, $and_zero, $pad, $rest;
185              
186 72         295 $tagged = $det . $number . $conj . $and_zero . $pad . $rest;
187              
188 72 100 66     576 if (length $conj && (not $and_zero)) {
    100          
189 4         9 $want_plural = 1;
190 4         8 $want_singular = 0;
191             }
192             elsif (length $rest) {
193 64         178 $want_plural = 0;
194 64         128 $want_singular = 1;
195             }
196             }
197             # handle other numbers as words at the start of the phrase
198             # using Lingua::EN::FindNumber
199             elsif ($tagged =~ m{^\s*(?:(\S+)/DET)?}
200             && (substr $phrase, $+[1]||0) =~ /^\s*$number_re/) {
201              
202 68   100     2069 $number = (sort { length $a <=> length $b } map $_||'', ($1, $2, $3, $4, $5))[-1];
  364         736  
203              
204 68 100       406 if (not is_ordinal($number)) {
205 56         8428 my $tagged_number_re;
206              
207 56         961 foreach my $num_elem (split /\s+/, $number) {
208 60         271 $tagged_number_re .= "\Q$num_elem\E/[A-Z]+\\s*";
209             }
210              
211 56         117 my $tagged_number;
212 56         929 ($tagged_number, $pad, $rest) = $tagged =~ m/($tagged_number_re)(\s*)(.*)/;
213 56         324 my @tagged_number_pos = ($-[1], $+[1]);
214              
215 56 100       340 if (length $rest) {
216 44         180 substr($tagged, $tagged_number_pos[0], ($tagged_number_pos[1] - $tagged_number_pos[0])) = $number;
217 44         98 $want_plural = 1;
218 44         124 $want_singular = 0;
219             }
220             }
221             }
222              
223 363         1747 my ($noun, $tag);
224              
225             # last noun (or verb that could be a noun) before a preposition/conjunction
226             # or last noun/verb
227 363 100 100     12551 if ( (($noun) = $tagged =~ m|${MAYBE_NOUN} (?! .* ${MAYBE_NOUN_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
      66        
      100        
228             .* ${PREPOSITION_OR_CONJUNCTION_TAG}|x)
229              
230             or (($noun) = $tagged =~ m|${MAYBE_NOUN} (?!.*${MAYBE_NOUN_TAG})|x)
231              
232             or (($noun) = $tagged =~ m|${NOUN_OR_VERB} (?!.*${NOUN_OR_VERB_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
233             .* ${PREPOSITION_OR_CONJUNCTION_TAG}|x)
234              
235             or (($noun) = $tagged =~ m|${NOUN_OR_VERB} (?! .* ${NOUN_OR_VERB_TAG})|x)) {
236              
237 357         3261 my @pos = ($-[1], $+[1]);
238 357         908 my $inflected_noun;
239              
240 357         1151 $inflected_noun = _inflect_noun($noun, $want_plural);
241              
242             # check if there is a verb following the noun
243             # the verb either needs to be pluarlized or be taken as the noun,
244             # depending on the value of $prefer_nouns
245 357         66264 my ($verb) = substr($tagged, $pos[1]) =~ m|^/[A-Z]+\s+(\S+)${VERB_TAG}|;
246              
247 357         4001 my @verb_pos = map $pos[1] + $_, grep defined, ($-[1], $+[1]);
248              
249             # the verb may be tagged as a noun unless singularized (pluralized as a noun.)
250 357 100 100     4149 if ((not $verb) && (not $prefer_nouns)
      66        
251             && $tagger->get_readable(_inflect_noun($noun, 1, 0)) =~ $VERB_TAG) {
252              
253             # find the preceding noun
254 2 50       3738 if (my ($preceding_noun) = substr($tagged, 0, $pos[0]) =~ m|${MAYBE_NOUN}\s*\z|) {
255 2         10 my @preceding_noun_pos = ($-[1], $+[1]);
256              
257 2         6 $verb = $noun;
258 2         6 @verb_pos = @pos;
259 2         4 $noun = $preceding_noun;
260 2         6 @pos = @preceding_noun_pos;
261 2         6 $inflected_noun = _inflect_noun($noun, $want_plural);
262             }
263             }
264              
265 357 100       1660 if ($verb) {
266 35         159 my $plural_verb = Lingua::EN::Inflect::PL_V($verb);
267              
268 35 100       4693 if ($prefer_nouns) {
    100          
269 31 100 33     621 if ($tagger->get_readable($plural_verb) =~ $MAYBE_NOUN
      66        
      66        
270             || ( # noun singular verb plural should be handled as noun noun, unless something follows it,
271             # and only for "VB" not "VBZ" or "VBN"
272             $verb eq $plural_verb
273             && $tagger->get_readable(_inflect_noun($verb, 1)) =~ $MAYBE_NOUN
274             && substr($tagged, $verb_pos[1]) =~ m{^\s*/VB\s*$}
275             )) {
276 7         6939 $inflected_noun = _inflect_noun($verb, $want_plural);
277              
278 7         8118 @pos = @verb_pos;
279             }
280             }
281             elsif ($inflected_noun) {
282 2 100       10 if ($want_plural) {
    50          
283 1         5 substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $plural_verb;
284             }
285             elsif ($want_singular) {
286             # to singularize a verb we pluralize it as a noun
287 1         3 my $singular_verb = _inflect_noun($verb, 1, 0);
288              
289 1         931 substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $singular_verb;
290             }
291             }
292             }
293              
294 357 100       54193 substr($tagged, $pos[0], ($pos[1] - $pos[0])) = $inflected_noun if $inflected_noun;
295              
296 357         4654 ($phrase = $tagged) =~ s{/[A-Z]+}{}g;
297             }
298             # fallback
299             else {
300 6         35 my $number = Lingua::EN::Inflect::Number::number($phrase);
301              
302 6 100 100     3652 if ($want_plural && $number ne 'p') {
    100 100        
303 2         13 return Lingua::EN::Inflect::Number::to_PL($phrase);
304             }
305             elsif ($want_singular && $number ne 's') {
306 1         7 return Lingua::EN::Inflect::Number::to_S($phrase);
307             }
308             }
309              
310 360         4044 return $phrase;
311             }
312              
313             =head2 to_PL
314              
315             Attempts to pluralizes a phrase unless already plural.
316              
317             =cut
318              
319             sub to_PL {
320 183     183 1 77714 return _inflect(shift, 1);
321             }
322              
323             =head2 to_S
324              
325             Attempts to singularize a phrase unless already singular.
326              
327             =cut
328              
329             sub to_S {
330 182     182 1 827 return _inflect(shift, 0);
331             }
332              
333             =head1 BUGS
334              
335             Please report any bugs or feature requests to C<bug-lingua-en-inflect-phrase at
336             rt.cpan.org>, or through the web interface at
337             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Lingua-EN-Inflect-Phrase>. I
338             will be notified, and then you'll automatically be notified of progress on your
339             bug as I make changes.
340              
341             =head1 SUPPORT
342              
343             More information at:
344              
345             =over 4
346              
347             =item * RT: CPAN's request tracker
348              
349             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Lingua-EN-Inflect-Phrase>
350              
351             =item * AnnoCPAN: Annotated CPAN documentation
352              
353             L<http://annocpan.org/dist/Lingua-EN-Inflect-Phrase>
354              
355             =item * CPAN Ratings
356              
357             L<http://cpanratings.perl.org/d/Lingua-EN-Inflect-Phrase>
358              
359             =item * Search CPAN
360              
361             L<http://search.cpan.org/dist/Lingua-EN-Inflect-Phrase/>
362              
363             =back
364              
365             =head1 REPOSITORY
366              
367             git clone git://github.com/rkitover/lingua-en-inflect-phrase.git lingua-en-inflect-phrase
368              
369             =head1 SEE ALSO
370              
371             L<Lingua::EN::Inflect>, L<Lingua::EN::Inflect::Number>, L<Lingua::EN::Tagger>
372              
373             =head1 AUTHOR
374              
375             Rafael Kitover <rkitover@cpan.org>
376              
377             =head1 LICENSE AND COPYRIGHT
378              
379             Copyright (c) 2010 Rafael Kitover (rkitover@cpan.org).
380              
381             This program is free software; you can redistribute it and/or modify it
382             under the terms of either: the GNU General Public License as published
383             by the Free Software Foundation; or the Artistic License.
384              
385             See http://dev.perl.org/licenses/ for more information.
386              
387             =cut
388              
389             1;
390             # vim:et sts=2 sw=2 tw=0: