File Coverage

blib/lib/Lingua/EN/Inflect/Phrase.pm

Criterion	Covered	Total	%
statement	112	112	100.0
branch	66	68	97.0
condition	56	65	86.1
subroutine	13	13	100.0
pod	2	2	100.0
total	249	260	95.7

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::EN::Inflect::Phrase;
2							BEGIN {
3	4			4		263987	$Lingua::EN::Inflect::Phrase::AUTHORITY = 'cpan:AVAR';
4							}
5							{
6							$Lingua::EN::Inflect::Phrase::VERSION = '0.18';
7							}
8
9	4			4		433	use strict;
	4					8
	4					127
10	4			4		19	use warnings;
	4					6
	4					102
11	4			4		17	use Exporter 'import';
	4					8
	4					187
12	4			4		122559	use Lingua::EN::Inflect;
	4					132337
	4					514
13	4			4		11407	use Lingua::EN::Inflect::Number;
	4					6743
	4					30
14	4			4		4234	use Lingua::EN::Tagger;
	4					437914
	4					339
15	4			4		4253	use Lingua::EN::FindNumber '$number_re';
	4					25368
	4					615
16	4			4		4590	use Lingua::EN::Number::IsOrdinal 'is_ordinal';
	4					3366
	4					7661
17
18							=head1 NAME
19
20							Lingua::EN::Inflect::Phrase - Inflect short English Phrases
21
22							=cut
23
24							=head1 SYNOPSIS
25
26							use Lingua::EN::Inflect::Phrase;
27							use Test::More tests => 2;
28
29							my $plural = Lingua::EN::Inflect::Phrase::to_PL('green egg and ham');
30
31							is $plural, 'green eggs and ham';
32
33							my $singular = Lingua::EN::Inflect::Phrase::to_S('green eggs and ham');
34
35							is $singular, 'green egg and ham';
36
37							=head1 DESCRIPTION
38
39							Attempts to pluralize or singularize short English phrases.
40
41							Does not throw exceptions at present, if you attempt to pluralize an already
42							pluralized phrase, it will leave it unchanged (and vice versa.)
43
44							The behavior of this module is subject to change as I tweak the heuristics, as
45							some things get fixed others might regress. The processing of natural language
46							is a messy business.
47
48							If it doesn't work, please email or submit to RT the example you tried, and
49							I'll try to fix it.
50
51							=head1 OPTIONS
52
53							By default, this module prefers to treat words as nouns (sometimes words can be
54							interpreted as a verb or a noun without context.) This is better for things
55							such as database table/column names, which is what this module is primarily
56							for.
57
58							This behavior can be switched with the variable C<$prefer_nouns>. The default
59							is C<1>.
60
61							For example:
62
63							{
64							local $Lingua::EN::Inflect::Phrase::prefer_nouns = 0;
65							is Lingua::EN::Inflect::Phrase::to_S('sources split'), 'source splits';
66							}
67							{
68							local $Lingua::EN::Inflect::Phrase::prefer_nouns = 1;
69							is Lingua::EN::Inflect::Phrase::to_S('source splits'), 'source split';
70							}
71
72							=head1 OPTIONAL EXPORTS
73
74							L</to_PL>, L</to_S>
75
76							=cut
77
78							our @EXPORT_OK = qw/to_PL to_S/;
79
80							=head1 SUBROUTINES
81
82							=cut
83
84							our $prefer_nouns = 1;
85
86							my $MAYBE_NOUN = qr{(\S+)/(?:NNS?\|CD\|JJ)\b};
87							my $MAYBE_NOUN_TAG = qr{/(?:NNS?\|CD\|JJ)\b};
88							my $NOUN_OR_VERB = qr{(\S+)/(?:NNS?\|CD\|JJ\|VB[A-Z]?)\b};
89							my $NOUN_OR_VERB_TAG = qr{/(?:NNS?\|CD\|JJ\|VB[A-Z]?)\b};
90							my $VERB_TAG = qr{/VB[A-z]?\b};
91
92							my $PREPOSITION_OR_CONJUNCTION_TAG = qr{/(?:CC\|IN)\b};
93
94							my $tagger;
95
96							sub _inflect_noun {
97	394			394		24575	my ($noun, $want_plural, $is_plural) = @_;
98
99	394					823	my $want_singular = not $want_plural;
100
101	394	100				2657	$is_plural = Lingua::EN::Inflect::Number::number($noun) ne 's'
102							unless defined $is_plural;
103
104							# fix "people" and "heroes" and a few others
105	394	100	100			401227	if ($noun =~ /^(?:people\|person)\z/i) {
		100	100
		100	100
		100
		100
		100
106	4	100				21	return $want_singular ? 'person' : 'people';
107							}
108							elsif ($noun =~ /^hero(?:es)?\z/i) {
109	4	100				999	return $want_singular ? 'hero' : 'heroes';
110							}
111							elsif ($want_singular && lc($noun) eq 'aliases') {
112	1					7	return 'alias';
113							}
114							elsif ($want_singular && lc($noun) eq 'statuses') {
115	1					4	return 'status';
116							}
117							elsif (lc($noun) eq 'belongs') {
118	4					67	return undef;
119							}
120							elsif ($want_plural && lc($noun) eq 'two') {
121	2					10	return 'twos';
122							}
123
124	378	100	100			5363	if ($want_plural && (not $is_plural)) {
		100	100
125	60					300	return Lingua::EN::Inflect::Number::to_PL($noun);
126							}
127							elsif ($want_singular && $is_plural) {
128	42					179	return Lingua::EN::Inflect::Number::to_S($noun);
129							}
130
131	276					743	return undef;
132							}
133
134							sub _inflect {
135	365			365		1007	my ($phrase, $want_plural) = @_;
136	365					871	my $want_singular = not $want_plural;
137
138							# 'a' inflects to 'some', special-case it here
139	365	100				1355	if ($phrase eq 'a') {
140	2	100				18	return $want_singular ? $phrase : 'as';
141							}
142
143							# Do not tag initial number, if any.
144							# Regex is from perldoc -q 'is a number'.
145	363					3046	my ($det, $number, $pad, $rest) =
146							$phrase =~ m{^(\s\S+/DET)?(\s(?:[+-]?)(?=\d\|\.\d)\d(?:\.\d)?(?:[Ee](?:[+-]?\d+))?)(\s)(.)$};
147
148	363		100			4485	$_ \|\|= '' for $det, $pad, $rest;
149
150	363					614	my $tagged;
151	363		66			1198	$tagger \|\|= Lingua::EN::Tagger->new;
152
153							# force plural unless number is '1'
154	363	100				961256	if ((grep { defined && length } ($number, $pad, $rest)) == 3) {
	1089	100				5773
155	112					638	my $tagged_rest = $tagger->get_readable($rest);
156
157	112					133846	$tagged = $det . $number . $pad . $tagged_rest;
158
159	112	100	100			1103	if ($number =~ /^\s1(?:\.0[Ee]?0*)?\z/
160							&& $tagged_rest !~ m{^(?:\S+/CC\|\d)}) {
161	32					67	$want_plural = 0;
162	32					73	$want_singular = 1;
163							}
164							else {
165	80					144	$want_plural = 1;
166	80					176	$want_singular = 0;
167							}
168							}
169							else {
170	251					1893	$tagged = $tagger->get_readable($phrase);
171							}
172
173							# check for phrases like "one something" and force singular,
174							# or "one and a half ..." and force plural
175	363	100	100			393076	if (my ($det, $number, $conj, $and_zero, $pad, $rest) = $tagged =~ m{
		100	66
176							^ (\s* \S+/DET)?
177							(\s* (?:one\|single))/(?:JJ\|NN\|CD)\b
178							(\s*\S+/CC\b)?
179							(?:(\s* (?:no\|zero))/(?:DET\|CD))?
180							(\s*)
181							(.*)
182							}x) {
183
184	72		100			1067	$_ \|\|= '' for $det, $conj, $and_zero, $pad, $rest;
185
186	72					295	$tagged = $det . $number . $conj . $and_zero . $pad . $rest;
187
188	72	100	66			576	if (length $conj && (not $and_zero)) {
		100
189	4					9	$want_plural = 1;
190	4					8	$want_singular = 0;
191							}
192							elsif (length $rest) {
193	64					178	$want_plural = 0;
194	64					128	$want_singular = 1;
195							}
196							}
197							# handle other numbers as words at the start of the phrase
198							# using Lingua::EN::FindNumber
199							elsif ($tagged =~ m{^\s*(?:(\S+)/DET)?}
200							&& (substr $phrase, $+[1]\|\|0) =~ /^\s*$number_re/) {
201
202	68		100			2069	$number = (sort { length $a <=> length $b } map $_\|\|'', ($1, $2, $3, $4, $5))[-1];
	364					736
203
204	68	100				406	if (not is_ordinal($number)) {
205	56					8428	my $tagged_number_re;
206
207	56					961	foreach my $num_elem (split /\s+/, $number) {
208	60					271	$tagged_number_re .= "\Q$num_elem\E/[A-Z]+\\s*";
209							}
210
211	56					117	my $tagged_number;
212	56					929	($tagged_number, $pad, $rest) = $tagged =~ m/($tagged_number_re)(\s)(.)/;
213	56					324	my @tagged_number_pos = ($-[1], $+[1]);
214
215	56	100				340	if (length $rest) {
216	44					180	substr($tagged, $tagged_number_pos[0], ($tagged_number_pos[1] - $tagged_number_pos[0])) = $number;
217	44					98	$want_plural = 1;
218	44					124	$want_singular = 0;
219							}
220							}
221							}
222
223	363					1747	my ($noun, $tag);
224
225							# last noun (or verb that could be a noun) before a preposition/conjunction
226							# or last noun/verb
227	363	100	100			12551	if ( (($noun) = $tagged =~ m\|${MAYBE_NOUN} (?! .* ${MAYBE_NOUN_TAG} .* ${PREPOSITION_OR_CONJUNCTION_TAG})
			66
			100
228							.* ${PREPOSITION_OR_CONJUNCTION_TAG}\|x)
229
230							or (($noun) = $tagged =~ m\|${MAYBE_NOUN} (?!.*${MAYBE_NOUN_TAG})\|x)
231
232							or (($noun) = $tagged =~ m\|${NOUN_OR_VERB} (?!.${NOUN_OR_VERB_TAG} . ${PREPOSITION_OR_CONJUNCTION_TAG})
233							.* ${PREPOSITION_OR_CONJUNCTION_TAG}\|x)
234
235							or (($noun) = $tagged =~ m\|${NOUN_OR_VERB} (?! .* ${NOUN_OR_VERB_TAG})\|x)) {
236
237	357					3261	my @pos = ($-[1], $+[1]);
238	357					908	my $inflected_noun;
239
240	357					1151	$inflected_noun = _inflect_noun($noun, $want_plural);
241
242							# check if there is a verb following the noun
243							# the verb either needs to be pluarlized or be taken as the noun,
244							# depending on the value of $prefer_nouns
245	357					66264	my ($verb) = substr($tagged, $pos[1]) =~ m\|^/[A-Z]+\s+(\S+)${VERB_TAG}\|;
246
247	357					4001	my @verb_pos = map $pos[1] + $_, grep defined, ($-[1], $+[1]);
248
249							# the verb may be tagged as a noun unless singularized (pluralized as a noun.)
250	357	100	100			4149	if ((not $verb) && (not $prefer_nouns)
			66
251							&& $tagger->get_readable(_inflect_noun($noun, 1, 0)) =~ $VERB_TAG) {
252
253							# find the preceding noun
254	2	50				3738	if (my ($preceding_noun) = substr($tagged, 0, $pos[0]) =~ m\|${MAYBE_NOUN}\s*\z\|) {
255	2					10	my @preceding_noun_pos = ($-[1], $+[1]);
256
257	2					6	$verb = $noun;
258	2					6	@verb_pos = @pos;
259	2					4	$noun = $preceding_noun;
260	2					6	@pos = @preceding_noun_pos;
261	2					6	$inflected_noun = _inflect_noun($noun, $want_plural);
262							}
263							}
264
265	357	100				1660	if ($verb) {
266	35					159	my $plural_verb = Lingua::EN::Inflect::PL_V($verb);
267
268	35	100				4693	if ($prefer_nouns) {
		100
269	31	100	33			621	if ($tagger->get_readable($plural_verb) =~ $MAYBE_NOUN
			66
			66
270							\|\| ( # noun singular verb plural should be handled as noun noun, unless something follows it,
271							# and only for "VB" not "VBZ" or "VBN"
272							$verb eq $plural_verb
273							&& $tagger->get_readable(_inflect_noun($verb, 1)) =~ $MAYBE_NOUN
274							&& substr($tagged, $verb_pos[1]) =~ m{^\s/VB\s$}
275							)) {
276	7					6939	$inflected_noun = _inflect_noun($verb, $want_plural);
277
278	7					8118	@pos = @verb_pos;
279							}
280							}
281							elsif ($inflected_noun) {
282	2	100				10	if ($want_plural) {
		50
283	1					5	substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $plural_verb;
284							}
285							elsif ($want_singular) {
286							# to singularize a verb we pluralize it as a noun
287	1					3	my $singular_verb = _inflect_noun($verb, 1, 0);
288
289	1					931	substr($tagged, $verb_pos[0], ($verb_pos[1] - $verb_pos[0])) = $singular_verb;
290							}
291							}
292							}
293
294	357	100				54193	substr($tagged, $pos[0], ($pos[1] - $pos[0])) = $inflected_noun if $inflected_noun;
295
296	357					4654	($phrase = $tagged) =~ s{/[A-Z]+}{}g;
297							}
298							# fallback
299							else {
300	6					35	my $number = Lingua::EN::Inflect::Number::number($phrase);
301
302	6	100	100			3652	if ($want_plural && $number ne 'p') {
		100	100
303	2					13	return Lingua::EN::Inflect::Number::to_PL($phrase);
304							}
305							elsif ($want_singular && $number ne 's') {
306	1					7	return Lingua::EN::Inflect::Number::to_S($phrase);
307							}
308							}
309
310	360					4044	return $phrase;
311							}
312
313							=head2 to_PL
314
315							Attempts to pluralizes a phrase unless already plural.
316
317							=cut
318
319							sub to_PL {
320	183			183	1	77714	return _inflect(shift, 1);
321							}
322
323							=head2 to_S
324
325							Attempts to singularize a phrase unless already singular.
326
327							=cut
328
329							sub to_S {
330	182			182	1	827	return _inflect(shift, 0);
331							}
332
333							=head1 BUGS
334
335							Please report any bugs or feature requests to C<bug-lingua-en-inflect-phrase at
336							rt.cpan.org>, or through the web interface at
337							L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Lingua-EN-Inflect-Phrase>. I
338							will be notified, and then you'll automatically be notified of progress on your
339							bug as I make changes.
340
341							=head1 SUPPORT
342
343							More information at:
344
345							=over 4
346
347							=item * RT: CPAN's request tracker
348
349							L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Lingua-EN-Inflect-Phrase>
350
351							=item * AnnoCPAN: Annotated CPAN documentation
352
353							L<http://annocpan.org/dist/Lingua-EN-Inflect-Phrase>
354
355							=item * CPAN Ratings
356
357							L<http://cpanratings.perl.org/d/Lingua-EN-Inflect-Phrase>
358
359							=item * Search CPAN
360
361							L<http://search.cpan.org/dist/Lingua-EN-Inflect-Phrase/>
362
363							=back
364
365							=head1 REPOSITORY
366
367							git clone git://github.com/rkitover/lingua-en-inflect-phrase.git lingua-en-inflect-phrase
368
369							=head1 SEE ALSO
370
371							L<Lingua::EN::Inflect>, L<Lingua::EN::Inflect::Number>, L<Lingua::EN::Tagger>
372
373							=head1 AUTHOR
374
375							Rafael Kitover <rkitover@cpan.org>
376
377							=head1 LICENSE AND COPYRIGHT
378
379							Copyright (c) 2010 Rafael Kitover (rkitover@cpan.org).
380
381							This program is free software; you can redistribute it and/or modify it
382							under the terms of either: the GNU General Public License as published
383							by the Free Software Foundation; or the Artistic License.
384
385							See http://dev.perl.org/licenses/ for more information.
386
387							=cut
388
389							1;
390							# vim:et sts=2 sw=2 tw=0: