File Coverage

blib/lib/WordLists/Inflect/Simple.pm
Criterion Covered Total %
statement 144 183 78.6
branch 64 98 65.3
condition 72 140 51.4
subroutine 16 19 84.2
pod 0 15 0.0
total 296 455 65.0


line stmt bran cond sub pod time code
1             package WordLists::Inflect::Simple;
2 2     2   3028 use utf8;
  2         9  
  2         10  
3 2     2   51 use strict;
  2         5  
  2         51  
4 2     2   10 use warnings;
  2         3  
  2         50  
5 2     2   558 use WordLists::Base;
  2         4  
  2         6107  
6             our $VERSION = $WordLists::Base::VERSION;
7            
8             our %sTypes = (
9             n=>
10             [qw(
11             singular
12             plural
13             )],
14             v=>
15             [qw(
16             present_1st_person
17             present_2nd_person
18             present_3rd_person
19             present_1st_person_plural
20             present_2nd_person_plural
21             present_3rd_person_plural
22            
23             present_participle
24             past_tense
25             past_participle
26            
27             infinitive
28             )],
29             adj=>
30             [qw(
31             comparative
32             superlative
33             )],
34            
35             ); # but everything should start as infinitive!
36             our $VOWELS = "aeiou";
37             our $iDEBUG = 5;
38            
39             sub new
40             {
41 1     1 0 80 my $class = shift;
42 1         1 my $args = shift;
43 1 50       5 $args = {} unless defined $args;
44 1         4 my $self =
45             {
46             special_case => [qw(man woman person child e y s of in)], # general O' -in-law
47             irregular => {},
48 1         3 %{$args}
49             };
50 1         6 bless $self, $class;
51             }
52             sub possible_special_cases
53             {
54 0     0 0 0 return qw(man woman person child e y s of in general O' -in-law);
55             }
56             sub special_cases
57             {
58 0     0 0 0 my $self = shift;
59 0         0 my $new = shift;
60 0 0 0     0 if (defined $new and ref $new eq ref [])
61             {
62 0         0 $self->{'special_case'} = $new;
63             }
64 0         0 return @{$self->{'special_case'}}
  0         0  
65             }
66             sub is_special_cased
67             {
68 198     198 0 214 my ($self , $sCase ) = @_;
69 198         164 return grep {$_ eq $sCase} @{$self->{'special_case'}};
  1813         2811  
  198         298  
70             }
71             sub add_special_case
72             {
73 1     1 0 309 my ($self , $sCase ) = @_;
74 1         2 $self->{'special_case'} = [ $sCase, @{$self->{'special_case'}} ];
  1         6  
75             }
76            
77             sub remove_special_case
78             {
79 0     0 0 0 my ($self , $sCase ) = @_;
80 0         0 $self->{'special_case'} = [ grep {$_ ne $sCase} @{$self->{'special_case'}} ];
  0         0  
  0         0  
81             }
82            
83             sub pos_from_type
84             {
85 1     1 0 2 my ($self, $sType) = @_;
86 1         3 foreach my $sPos (keys %sTypes)
87             {
88 1 50       2 if (grep {$_ eq $sType} @{$sTypes{$sPos}})
  10         13  
  1         2  
89             {
90 1         4 return $sPos;
91             }
92             }
93 0         0 return '';
94             }
95             sub add_irregular_word
96             {
97 2     2 0 4 my ($self, $args) = @_;
98 2         3 my $sW = $args->{'w'};
99 2         2 foreach my $key (keys %{$args})
  2         6  
100             {
101 4 100       6 if (grep {$_ eq $key} keys %sTypes) # key is a pos
  12 100       23  
102 42         54 {
103 1         1 foreach my $sType (keys %{$args->{$key}})
  1         3  
104             {
105 1         6 $self->add_irregular_inflection({w=>$sW, 'pos'=>$key, type=>$sType, inflection=>$args->{$key}{$sType}});
106             }
107             }
108 9         9 elsif (grep { grep{$_ eq $key} @{$sTypes{$_}} } keys %sTypes)
  9         15  
109             {
110 1         7 $self->add_irregular_inflection({w=>$sW, type=>$key, inflection=>$args->{$key}});
111             }
112             }
113 2         7 return all_irregular_inflections({w=>$sW});
114             }
115            
116             sub add_irregular_inflection
117             {
118 2     2 0 3 my ($self, $args) = @_;
119 2         3 my $sW = $args->{'w'};
120 2         4 my $sPos = $args->{'pos'};
121 2         4 my $sType = $args->{'type'};
122 2         26 my $sInf = $args->{'inflection'};
123 2   66     8 $sPos ||= $self->pos_from_type($sType);
124 2 50       5 if (!$sPos)
125             {
126 0         0 warn "Pos required! ($sW, ?, $sType)";
127 0         0 return undef;
128             }
129 2         3 push (@{$self->{irregular}{$sW}{$sPos}{$sType}}, $sInf);
  2         11  
130             }
131            
132             sub regular_inflection
133             {
134 50     50 0 9404 my ($self, $args) = @_;
135 50         62 my $sW = $args->{'w'};
136 50         62 my $sPos = $args->{'pos'};
137 50         47 my $sType = $args->{'type'};
138 50         58 my $sInf = $sW;
139 50         320 my $three_syllables = qr/[$VOWELS]+[^$VOWELS]+[$VOWELS]+[^$VOWELS]+(?:y|[$VOWELS]+[^$VOWELS]+|[$VOWELS])$/;
140 50 100 100     566 if (
    100 100        
    100 66        
    100 100        
    100 100        
      66        
      100        
      100        
      66        
141             ($sPos eq 'n' and $sType eq 'plural')
142             or
143             ($sPos eq 'v' and $sType eq 'present_3rd_person')
144             )
145             {
146 14         18 $sInf = $sW.'s';
147 14 50       23 $sInf =~ s/siss$/eses/ if $self->is_special_cased('sis'); #thesis => theses
148 14 50       24 $sInf =~ s/(s|x|sh|ch|z)s$/$1es/ if $self->is_special_cased('s');
149 14 50       28 $sInf =~ s/([^$VOWELS])ys$/$1ies/ if $self->is_special_cased('y');
150 14 50 33     54 $sInf =~ s/womans$/women/ if $self->is_special_cased('woman') and $sInf !~ /^[[:upper:]]/;
151 14 50       25 $sInf =~ s/Womans$/Women/ if $self->is_special_cased('woman');
152 14 50 33     27 $sInf =~ s/mans$/men/ if $self->is_special_cased('man') and $sInf !~ /^[[:upper:]]/; # German Germans
153 14 50       26 $sInf =~ s/Mans$/Men/ if $self->is_special_cased('man'); # Man O'War
154 14 50 33     26 $sInf =~ s/persons$/people/ if $self->is_special_cased('person') and $sInf !~ /^[[:upper:]]/;
155 14 50       25 $sInf =~ s/^Persons$/People/ if $self->is_special_cased('person'); # Person of Colour
156 14 50 33     26 $sInf =~ s/childs$/children/ if $self->is_special_cased('child') and $sInf !~ /^[[:upper:]]/; # Rothschild Rothschilds
157 14 50       25 $sInf =~ s/^Childs$/Children/ if $self->is_special_cased('child'); # Child of the 60s
158             }
159             elsif (
160             ($sPos eq 'v' and $sType eq 'past_tense')
161             or
162             ($sPos eq 'v' and $sType eq 'past_participle')
163             )
164             {
165 10         16 $sInf = $sW.'ed';
166 10 50       17 $sInf =~ s/eed$/ed/ if $self->is_special_cased('e');
167 10 50       22 $sInf =~ s/([^$VOWELS])yed$/$1ied/ if $self->is_special_cased('y');
168             }
169             elsif (
170             ($sPos eq 'v' and $sType eq 'present_participle')
171             )
172             {
173 11         15 $sInf = $sW.'ing';
174             #$sInf =~ s/([^$VOWELS])eing$/$1ing/ if $self->is_special_cased('e');
175 11 50       22 $sInf =~ s/([^aeio])eing$/$1ing/ if $self->is_special_cased('e');
176            
177             }
178             elsif (
179             ($sPos eq 'adj' and $sType eq 'comparative')
180             )
181             {
182            
183 3 50       13 if ($sW =~ /$three_syllables/)
184             {
185 0         0 return "more $sW";
186             }
187             else
188             {
189 3         5 $sInf = $sW.'er';
190 3 50       9 $sInf =~ s/eer$/er/ if $self->is_special_cased('e');
191 3 50       8 $sInf =~ s/([^$VOWELS])yer$/$1ier/ if $self->is_special_cased('y');
192             }
193             }
194             elsif (
195             ($sPos eq 'adj' and $sType eq 'superlative')
196             )
197             {
198 4 100       35 if ($sW =~ /$three_syllables/)
199             {
200 1         8 return "most $sW";
201             }
202             else
203             {
204 3         6 $sInf = $sW.'est';
205 3 50       6 $sInf =~ s/([^$VOWELS])yest$/$1iest/ if $self->is_special_cased('y');
206 3 50       8 $sInf =~ s/eest$/est/ if $self->is_special_cased('e');
207             }
208             }
209 49         299 return $sInf;
210             }
211            
212             sub get_irregular_inflections
213             {
214 22     22 0 22 my ($self, $args) = @_;
215 22         26 my $sW = $args->{'w'};
216 22         26 my $sPos = $args->{'pos'};
217 22         24 my $sType = $args->{'type'};
218 22         24 my $sInf = $args->{'inflection'};
219 22   33     35 $sPos ||= pos_from_type($sType);
220 22 50       33 if (!$sPos)
221             {
222 0         0 warn "Pos required! ($sW, ?, $sType)";
223 0         0 return undef;
224             }
225 22 100       62 return @{$self->{'irregular'}{$sW}{$sPos}{$sType}} if defined $self->{'irregular'}{$sW}{$sPos}{$sType};
  2         8  
226 20         48 return undef;
227             }
228            
229             sub irregular_inflection
230             {
231 22     22 0 24 my ($self, $args) = @_;
232 22         18 my $sInf = ${[$self->get_irregular_inflections($args)]}[0];
  22         36  
233 22 100       46 unless (defined $sInf)
234             {
235 20         34 $sInf = $self->regular_inflection($args);
236             }
237 22         46 return $sInf;
238             }
239            
240             sub phrase_inflection
241             {
242 24     24 0 2498 my ($self, $args) = @_;
243 24         29 my $sPhrase = $args->{'w'};
244 24         28 my $sPos = $args->{'pos'};
245            
246 24   100     71 $args->{'inflect'} ||= \&WordLists::Inflect::Simple::irregular_inflection;
247            
248 24         71 my @sTokens = split(/\s/, $sPhrase); # Even in "top-up card" and "Man O'War" we never want to split by /-/ or /'/. This is only an issue in irregulars anyway.
249            
250 24 100 66     328 if ($sPos eq 'v')
    50 33        
    50 33        
    50 66        
    50 33        
    100 33        
    50 66        
    50 33        
    50 66        
    100 33        
    100 33        
    100 0        
    50 0        
      100        
      66        
      66        
      66        
      33        
      33        
      0        
      66        
      33        
      33        
      66        
      33        
      33        
      0        
      66        
      66        
      33        
251             {
252 14         19 $args->{'w'} = $sTokens[0];
253 14         14 $sTokens[0] = &{$args->{'inflect'}}($self, $args);
  14         23  
254             }
255             elsif ($sPos eq 'n' and $sTokens[-1]=~/^O['’]/ and defined $sTokens[-2] and $self->is_special_cased("O'"))
256             {
257 0         0 $args->{'w'} = $sTokens[-2];
258 0         0 $sTokens[-2] = &{$args->{'inflect'}}($self, $args);
  0         0  
259             }
260             elsif ($sPos eq 'n' and $sTokens[-1]=~/^[Gg]eneral/ and defined $sTokens[-2] and $self->is_special_cased('general'))
261             {
262 0         0 $args->{'w'} = $sTokens[-2];
263 0         0 $sTokens[-2] = &{$args->{'inflect'}}($self, $args);
  0         0  
264             }
265             elsif ($sPos eq 'n' and $sTokens[-1]=~/(.*)-in-law/ and $self->is_special_cased('-in-law'))
266             {
267 0         0 $args->{'w'} = $1;
268 0         0 $sTokens[-1] = &{$args->{'inflect'}}($self, $args).'-in-law';
  0         0  
269             }
270             elsif ($sPos eq 'n' and $sTokens[-1] eq 'law' and defined $sTokens[-2] and $sTokens[-2] eq 'in' and defined $sTokens[-3] and $self->is_special_cased('-in-law'))
271             {
272 0         0 $args->{'w'} = $sTokens[-3];
273 0         0 $sTokens[-3] = &{$args->{'inflect'}}($self, $args);
  0         0  
274             }
275             elsif ($sPos eq 'n' and defined $sTokens[-3] and $sTokens[-2]=~/\bof\b/ and $self->is_special_cased('of'))
276             {
277 1         2 $args->{'w'} = $sTokens[-3];
278 1         2 $sTokens[-3] = &{$args->{'inflect'}}($self, $args);
  1         2  
279             }
280             elsif ($sPos eq 'n' and defined $sTokens[-4] and $sTokens[-3]=~/\bof\b/ and $sTokens[-2]=~/\bthe\b/ and $self->is_special_cased('of'))
281             {
282 0         0 $args->{'w'} = $sTokens[-4];
283 0         0 $sTokens[-4] = &{$args->{'inflect'}}($self, $args);
  0         0  
284             }
285             elsif ($sPos eq 'n' and defined $sTokens[-3] and $sTokens[-2]=~/\bin\b/ and $self->is_special_cased('in'))
286             {
287 0         0 $args->{'w'} = $sTokens[-3];
288 0         0 $sTokens[-3] = &{$args->{'inflect'}}($self, $args);
  0         0  
289             }
290             elsif ($sPos eq 'n' and defined $sTokens[-4] and $sTokens[-3]=~/\bin\b/ and $sTokens[-2]=~/\bthe\b/ and $self->is_special_cased('in'))
291             {
292 0         0 $args->{'w'} = $sTokens[-4];
293 0         0 $sTokens[-4] = &{$args->{'inflect'}}($self, $args);
  0         0  
294             }
295             elsif ($sPos eq 'n')
296             {
297 5         7 $args->{'w'} = $sTokens[-1];
298 5         7 $sTokens[-1] = &{$args->{'inflect'}}($self, $args);
  5         12  
299             }
300             elsif ($sPos eq 'adj' and $#sTokens==0)
301             {
302 2         4 $args->{'w'} = $sTokens[0];
303 2         3 $sTokens[0] = &{$args->{'inflect'}}($self, $args);
  2         4  
304             }
305             elsif ($sPos eq 'adj' and $args->{'type'} eq 'comparative')
306             {
307 1         3 unshift @sTokens, 'more';
308 1         9 return join (' ', @sTokens);
309             }
310             elsif ($sPos eq 'adj' and $args->{'type'} eq 'superlative')
311             {
312 1         3 unshift @sTokens, 'most';
313 1         7 return join (' ', @sTokens);
314             }
315 22         34 $args->{'w'} = $sPhrase;
316 22         115 return join (' ', @sTokens);
317             }
318            
319             sub all_inflections
320             {
321 4     4 0 11 my ($self, $args) = @_;
322 4         5 my $result ={};
323 4         10 my $sPos = $args->{'pos'};
324 4 100       7 if (defined $sPos)
325             {
326            
327 3         3 foreach my $sType (@{$sTypes{$sPos}})
  3         8  
328             {
329 14         20 $args->{'type'} = $sType;
330 14         25 $result->{$sPos}{$sType} = $self->phrase_inflection($args);
331             }
332             }
333             else
334             {
335 1         5 foreach (keys %sTypes)
336             {
337 3         8 $args->{'pos'} = $_;
338 3         4 $result->{$_}=${$self->all_inflections($args)}{$_};
  3         9  
339             }
340             }
341 4         28 return $result;
342             }
343             sub all_irregular_inflections
344             {
345 2     2 0 3 my ($self, $args) = @_;
346 2         2 my $result ={};
347 2 50       8 return $result unless defined $args->{'w'};
348 0           return $result = $self->{'irregular'}{$args->{'w'}};
349             }
350             1;
351            
352             =pod
353            
354             =head1 NAME
355            
356             WordLists::Inflect::Simple
357            
358             =head1 SYNOPSIS
359            
360             $inflector = WordLists::Inflect::Simple->new;
361             $sPlural = $inflector->regular_inflection({w=>'sky', pos=>'n', type=>'plural'});
362             $inflector->add_special_case('general');
363             $sPlural = $inflector->phrase_inflection({w=>'Director General', pos=>'n', type=>'plural'});
364            
365             =head1 DESCRIPTION
366            
367             This module provides an object which can be used to generate regular and semi-regular English inflections.
368            
369             By default, it comes with several defaults for semi-regular special cases - dealing with word-final 'e', 'y', and sibilants, dealing with words ending 'man'. This behaviour can be turned on and off.
370            
371             It deliberately does not deal with irregular forms, even important ones like the verb 'be'. However, it does provide an interface for user-specified irregular inflections, and it is trivial to write a wrapper module (subclass) which loads a pre-written set of inflections.
372            
373             It does not deal with semi-regular patterns which require knowledge of the behaviour of individual words - for example, there is no reliable way of inspecting 'abet' and discerning that the 't' must be doubled in the present participle. Similarly, there is no attempt made to identify Latin '-us/i' plurals, as this would require making exceptions for words like 'minibus', 'omnibus', and 'octopus'. These must be entered as irregular inflections.
374            
375             =head2 Special Cases
376            
377             =head3 e
378            
379             Words ending in e, when given inflections like 'ed', 'er', 'est' do not get a second 'e', e.g. blue => bluer, not blue => blueer. Verbs ending in e also lose the e in the present participle, unless the e is preceded by [aeio] (e.g. argue => arguing but see => seeing).
380            
381             =head3 y
382            
383             Words ending in y, when given 's' inflections or 'e' inflections are subject to a conversion of the 'y' to 'i'/'ie', unless the 'y' is preceded by a vowel, e.g. sky => skies, but day => days.
384            
385             =head3 s
386            
387             Words ending in sibilants (s, x, sh, ch, z), when given 's' inflections gain an 'e', e.g. 'sash'=>'sashes'.
388            
389             =head3 of
390            
391             When a phrase of the form X of Y is inflected as a noun, it is X rather than Y which is inflected. This also applies to the pattern X of the Y.
392            
393             =head3 in
394            
395             When a phrase of the form X in Y is inflected as a noun, it is X rather than Y which is inflected. This also applies to the pattern X in the Y.
396            
397             =head3 general
398            
399             When a phrase of the form X General is inflected as a noun, it is X rather than General which is inflected.
400            
401             =head3 O'
402            
403             When a phrase of the form X O'Y is inflected as a noun, it is X rather than Y which is inflected.
404            
405             =head3 man
406            
407             Nouns ending in 'man' which do not begin with a capital are pluralised 'men' (postman => postmen but German => Germans).
408            
409             =head3 woman
410            
411             Nouns ending in 'woman' which do not begin with a capital are pluralised 'women'.
412            
413             =head3 person
414            
415             Nouns ending in 'person' which do not begin with a capital are pluralised 'people'.
416            
417             =head3 child
418            
419             Nouns ending in 'child' which do not begin with a capital are pluralised 'children'.
420            
421             =head3 -in-law
422            
423             When a phrase of the form X-in-law is inflected as a noun, it is X rather than -in-law which is inflected.
424            
425             =head2 Miscellaneous notes
426            
427             There are three parts of speech which can be inflected with this, C, C, and C.
428            
429             =head1 TODO
430            
431             Improve the accessors for the special cases so a user can query the object for useful special cases to add, specify a fixed list of special cases so new cases don't affect functionality, etc.
432            
433             Add pos normalisation and an interface for customising the pos normalisation routine.
434            
435             Document all methods.
436            
437             =head1 BUGS
438            
439             English is buggy. Newspeak is doubleplusgoodlier; consider upgrading.
440            
441             Some potentially unexpected results may arise, e.g. with 'man' special cased, human is incorrectly pluralised as humen, not the 'more regular' (and correct) humans.
442            
443             Please use the Github issues tracker for other bugs.
444            
445             =head1 LICENSE
446            
447             Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
448            
449             =cut