File Coverage

blib/lib/Lingua/EN/Inflect/Phrase.pm

Criterion	Covered	Total	%
statement	113	113	100.0
branch	74	76	97.3
condition	57	65	87.6
subroutine	12	12	100.0
pod	2	2	100.0
total	258	268	96.2

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::EN::Inflect::Phrase;
2							our $AUTHORITY = 'cpan:AVAR';
3							$Lingua::EN::Inflect::Phrase::VERSION = '0.20';
4	4			4		230370	use strict;
	4					43
	4					97
5	4			4		18	use warnings;
	4					9
	4					96
6	4			4		18	use Exporter 'import';
	4					8
	4					104
7	4			4		2634	use Lingua::EN::Inflect;
	4					109949
	4					301
8	4			4		2011	use Lingua::EN::Inflect::Number;
	4					2320
	4					27
9	4			4		1960	use Lingua::EN::Tagger;
	4					212236
	4					165
10	4			4		1388	use Lingua::EN::FindNumber '$number_re';
	4					14615
	4					403
11	4			4		1417	use Lingua::EN::Number::IsOrdinal 'is_ordinal';
	4					1889
	4					5489
12
13							=head1 NAME
14
15							Lingua::EN::Inflect::Phrase - Inflect short English Phrases
16
17							=cut
18
19							=head1 SYNOPSIS
20
21							use Lingua::EN::Inflect::Phrase;
22							use Test::More tests => 2;
23
24							my $plural = Lingua::EN::Inflect::Phrase::to_PL('green egg and ham');
25
26							is $plural, 'green eggs and ham';
27
28							my $singular = Lingua::EN::Inflect::Phrase::to_S('green eggs and ham');
29
30							is $singular, 'green egg and ham';
31
32							=head1 DESCRIPTION
33
34							Attempts to pluralize or singularize short English phrases.
35
36							Does not throw exceptions at present, if you attempt to pluralize an already
37							pluralized phrase, it will leave it unchanged (and vice versa.)
38
39							The behavior of this module is subject to change as I tweak the heuristics, as
40							some things get fixed others might regress. The processing of natural language
41							is a messy business.
42
43							If it doesn't work, please email or submit to RT the example you tried, and
44							I'll try to fix it.
45
46							=head1 OPTIONS
47
48							By default, this module prefers to treat words as nouns (sometimes words can be
49							interpreted as a verb or a noun without context.) This is better for things
50							such as database table/column names, which is what this module is primarily
51							for.
52
53							This behavior can be switched with the variable C<$prefer_nouns>. The default
54							is C<1>.
55
56							For example:
57
58							{
59							local $Lingua::EN::Inflect::Phrase::prefer_nouns = 0;
60							is Lingua::EN::Inflect::Phrase::to_S('sources split'), 'source splits';
61							}
62							{
63							local $Lingua::EN::Inflect::Phrase::prefer_nouns = 1;
64							is Lingua::EN::Inflect::Phrase::to_S('source splits'), 'source split';
65							}
66
67							=head1 OPTIONAL EXPORTS
68
69							L, L
70
71							=cut
72
73							our @EXPORT_OK = qw/to_PL to_S/;
74
75							=head1 SUBROUTINES
76
77							=cut
78
79							our $prefer_nouns = 1;
80
81							my $MAYBE_NOUN = qr{(\S+)/(?:NN[PS]?\|CD\|JJ)\b};
82							my $MAYBE_NOUN_TAG = qr{/(?:NN[PS]?\|CD\|JJ)\b};
83							my $NOUN_OR_VERB = qr{(\S+)/(?:NN[PS]?\|CD\|JJ\|VB[A-Z]?)\b};
84							my $NOUN_OR_VERB_TAG = qr{/(?:NN[PS]?\|CD\|JJ\|VB[A-Z]?)\b};
85							my $VERB_TAG = qr{/VB[A-z]?\b};
86
87							my $PREPOSITION_OR_CONJUNCTION_TAG = qr{/(?:CC\|IN)\b};
88
89							my $tagger;
90
91							sub _inflect_noun {
92	402			402		17580	my ($noun, $want_plural, $is_plural) = @_;
93
94	402					638	my $want_singular = not $want_plural;
95
96	402	100				1340	$is_plural = Lingua::EN::Inflect::Number::number($noun) ne 's'
97							unless defined $is_plural;
98
99							# fix "people" and "heroes" and a few others
100	402	100	100			190573	if ($noun =~ /^(?:people\|person)\z/i) {
		100	100
		100	100
		100
		100
		100
		100
		100
101	4	100				16	return $want_singular ? 'person' : 'people';
102							}
103							elsif ($noun =~ /^hero(?:es)?\z/i) {
104	4	100				14	return $want_singular ? 'hero' : 'heroes';
105							}
106							elsif ($want_singular && lc($noun) eq 'aliases') {
107	1					4	return 'alias';
108							}
109							elsif ($want_singular && lc($noun) eq 'statuses') {
110	1					3	return 'status';
111							}
112							elsif (lc($noun) eq 'belongs') {
113	4					10	return undef;
114							}
115							elsif ($want_plural && lc($noun) eq 'two') {
116	2					6	return 'twos';
117							}
118							elsif ($noun =~ /^[A-Z].+ity\z/) {
119	2	100				11	return $want_plural ? ucfirst(Lingua::EN::Inflect::Number::to_PL(lc($noun))) : $noun;
120							}
121							elsif ($noun =~ /^[A-Z].+ities\z/) {
122	2	100				9	return $want_plural ? $noun : ucfirst(Lingua::EN::Inflect::Number::to_S(lc($noun)));
123							}
124
125	382	100	100			1822	if ($want_plural && (not $is_plural)) {
		100	100
126	61					180	return Lingua::EN::Inflect::Number::to_PL($noun);
127							}
128							elsif ($want_singular && $is_plural) {
129	43					126	return Lingua::EN::Inflect::Number::to_S($noun);
130							}
131
132	278					521	return undef;
133							}
134
135							sub _inflect {
136	373			373		771	my ($phrase, $want_plural) = @_;
137	373					567	my $want_singular = not $want_plural;
138
139							# 'a' inflects to 'some', special-case it here
140	373	100				814	if ($phrase eq 'a') {
141	2	100				12	return $want_singular ? $phrase : 'as';
142							}
143
144							# Do not tag initial number, if any.
145							# Regex is from perldoc -q 'is a number'.
146	371					2317	my ($det, $number, $pad, $rest) =
147							$phrase =~ m{^(\s\S+/DET)?(\s(?:[+-]?)(?=\d\|\.\d)\d(?:\.\d)?(?:[Ee](?:[+-]?\d+))?)(\s)(.)$};
148
149	371		100			2473	$_ \|\|= '' for $det, $pad, $rest;
150
151	371					501	my $tagged;
152	371		66			687	$tagger \|\|= Lingua::EN::Tagger->new;
153
154							# force plural unless number is '1'
155	371	100				525267	if ((grep { defined && length } ($number, $pad, $rest)) == 3) {
	1113	100				3212
156	112					325	my $tagged_rest = $tagger->get_readable($rest);
157
158	112					98654	$tagged = $det . $number . $pad . $tagged_rest;
159
160	112	100	100			614	if ($number =~ /^\s1(?:\.0[Ee]?0*)?\z/
161							&& $tagged_rest !~ m{^(?:\S+/CC\|\d)}) {
162	32					61	$want_plural = 0;
163	32					48	$want_singular = 1;
164							}
165							else {
166	80					124	$want_plural = 1;
167	80					142	$want_singular = 0;
168							}
169							}
170							else {
171	259					766	$tagged = $tagger->get_readable($phrase);
172							}
173
174							# check for phrases like "one something" and force singular,
175							# or "one and a half ..." and force plural
176	371	100	100			260274	if (my ($det, $number, $conj, $and_zero, $pad, $rest) = $tagged =~ m{
		100	66
177							^ (\s* \S+/DET)?
178							(\s* (?:one\|single))/(?:JJ\|NN\|CD)\b
179							(\s*\S+/CC\b)?
180							(?:(\s* (?:no\|zero))/(?:DET\|CD))?
181							(\s*)
182							(.*)
183							}x) {
184
185	72		100			532	$_ \|\|= '' for $det, $conj, $and_zero, $pad, $rest;
186
187	72					198	$tagged = $det . $number . $conj . $and_zero . $pad . $rest;
188
189	72	100	66			247	if (length $conj && (not $and_zero)) {
		100
190	4					6	$want_plural = 1;
191	4					6	$want_singular = 0;
192							}
193							elsif (length $rest) {
194	64					107	$want_plural = 0;
195	64					99	$want_singular = 1;
196							}
197							}
198							# handle other numbers as words at the start of the phrase
199							# using Lingua::EN::FindNumber
200							elsif ($tagged =~ m{^\s*(?:(\S+)/DET)?}
201							&& (substr $phrase, $+[1]\|\|0) =~ /^\s*$number_re/) {
202
203	68		100			765	$number = (sort { length $a <=> length $b } map $_\|\|'', ($1, $2, $3, $4, $5))[-1];
	364					555
204
205	68	100				262	if (not is_ordinal($number)) {
206	56					2573	my $tagged_number_re;
207
208	56					187	foreach my $num_elem (split /\s+/, $number) {
209	60					159	$tagged_number_re .= "\Q$num_elem\E/[A-Z]+\\s*";
210							}
211
212	56					89	my $tagged_number;
213	56					515	($tagged_number, $pad, $rest) = $tagged =~ m/($tagged_number_re)(\s)(.)/;
214	56					220	my @tagged_number_pos = ($-[1], $+[1]);
215
216	56	100				182	if (length $rest) {
217	44					118	substr($tagged, $tagged_number_pos[0], ($tagged_number_pos[1] - $tagged_number_pos[0])) = $number;
218	44					72	$want_plural = 1;
219	44					93	$want_singular = 0;
220							}
221							}
222							}
223
224	371					1356	my ($noun, $tag);
225
226							# last noun (or verb that could be a noun) before a preposition/conjunction
227							# or last noun/verb
228	371	100	100			6246	if ( (($noun) = $tagged =~ m\|${MAYBE_NOUN} (?! .* ${MAYBE_NOUN_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
			66
			100
229							.* ${PREPOSITION_OR_CONJUNCTION_TAG}\|x)
230
231							or (($noun) = $tagged =~ m\|${MAYBE_NOUN} (?!.*${MAYBE_NOUN_TAG})\|x)
232
233							or (($noun) = $tagged =~ m\|${NOUN_OR_VERB} (?!.${NOUN_OR_VERB_TAG} . ${PREPOSITION_OR_CONJUNCTION_TAG})
234							.* ${PREPOSITION_OR_CONJUNCTION_TAG}\|x)
235
236							or (($noun) = $tagged =~ m\|${NOUN_OR_VERB} (?! .* ${NOUN_OR_VERB_TAG})\|x)) {
237
238	365					1265	my @pos = ($-[1], $+[1]);
239	365					648	my $inflected_noun;
240
241	365					784	$inflected_noun = _inflect_noun($noun, $want_plural);
242
243							# check if there is a verb following the noun
244							# the verb either needs to be pluarlized or be taken as the noun,
245							# depending on the value of $prefer_nouns
246	365					38820	my ($verb) = substr($tagged, $pos[1]) =~ m\|^/[A-Z]+\s+(\S+)${VERB_TAG}\|;
247
248	365					2041	my @verb_pos = map $pos[1] + $_, grep defined, ($-[1], $+[1]);
249
250							# the verb may be tagged as a noun unless singularized (pluralized as a noun.)
251	365	100	100			1718	if ((not $verb) && (not $prefer_nouns)
			66
252							&& $tagger->get_readable(_inflect_noun($noun, 1, 0)) =~ $VERB_TAG) {
253
254							# find the preceding noun
255	2	50				2848	if (my ($preceding_noun) = substr($tagged, 0, $pos[0]) =~ m\|${MAYBE_NOUN}\s*\z\|) {
256	2					8	my @preceding_noun_pos = ($-[1], $+[1]);
257
258	2					6	$verb = $noun;
259	2					5	@verb_pos = @pos;
260	2					4	$noun = $preceding_noun;
261	2					4	@pos = @preceding_noun_pos;
262	2					5	$inflected_noun = _inflect_noun($noun, $want_plural);
263							}
264							}
265
266	365	100				1099	if ($verb) {
267	35					95	my $plural_verb = Lingua::EN::Inflect::PL_V($verb);
268
269	35	100				2979	if ($prefer_nouns) {
		100
270	31	100	33			100	if ($tagger->get_readable($plural_verb) =~ $MAYBE_NOUN
			66
			100
271							\|\| ( # noun singular verb plural should be handled as noun noun, unless something follows it,
272							# and only for "VB" not "VBZ" or "VBN"
273							$verb eq $plural_verb
274							&& $tagger->get_readable(_inflect_noun($verb, 1)) =~ $MAYBE_NOUN
275							&& substr($tagged, $verb_pos[1]) =~ m{^\s/VB\s$}
276							)) {
277	7					5582	$inflected_noun = _inflect_noun($verb, $want_plural);
278
279	7					1752	@pos = @verb_pos;
280							}
281							}
282							elsif ($inflected_noun) {
283	2	100				6	if ($want_plural) {
		50
284	1					5	substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $plural_verb;
285							}
286							elsif ($want_singular) {
287							# to singularize a verb we pluralize it as a noun
288	1					4	my $singular_verb = _inflect_noun($verb, 1, 0);
289
290	1					730	substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $singular_verb;
291							}
292							}
293							}
294
295	365	100				36369	substr($tagged, $pos[0], ($pos[1] - $pos[0])) = $inflected_noun if $inflected_noun;
296
297	365					2070	($phrase = $tagged) =~ s{/[A-Z]+}{}g;
298							}
299							# fallback
300							else {
301	6					21	my $number = Lingua::EN::Inflect::Number::number($phrase);
302
303	6	100	100			2676	if ($want_plural && $number ne 'p') {
		100	100
304	2					8	return Lingua::EN::Inflect::Number::to_PL($phrase);
305							}
306							elsif ($want_singular && $number ne 's') {
307	1					4	return Lingua::EN::Inflect::Number::to_S($phrase);
308							}
309							}
310
311	368					2288	return $phrase;
312							}
313
314							=head2 to_PL
315
316							Attempts to pluralizes a phrase unless already plural.
317
318							=cut
319
320							sub to_PL {
321	187			187	1	37713	return _inflect(shift, 1);
322							}
323
324							=head2 to_S
325
326							Attempts to singularize a phrase unless already singular.
327
328							=cut
329
330							sub to_S {
331	186			186	1	460	return _inflect(shift, 0);
332							}
333
334							=head1 BUGS
335
336							Please report any bugs or feature requests to C
337							rt.cpan.org>, or through the web interface at
338							L. I
339							will be notified, and then you'll automatically be notified of progress on your
340							bug as I make changes.
341
342							=head1 REPOSITORY
343
344							git clone git://github.com/rkitover/lingua-en-inflect-phrase.git lingua-en-inflect-phrase
345
346							=head1 SEE ALSO
347
348							L, L, L
349
350							=head1 AUTHOR
351
352							rkitover: Rafael Kitover
353
354							=head1 CONTRIBUTORS
355
356							zakame: Zak B. Elep
357
358							=head1 LICENSE AND COPYRIGHT
359
360							Copyright (c) 2018 Rafael Kitover (rkitover@cpan.org).
361
362							This program is free software; you can redistribute it and/or modify it
363							under the terms of either: the GNU General Public License as published
364							by the Free Software Foundation; or the Artistic License.
365
366							See http://dev.perl.org/licenses/ for more information.
367
368							=cut
369
370							1;
371							# vim:et sts=2 sw=2 tw=0: