File Coverage

blib/lib/Lingua/EN/Inflect.pm
Criterion Covered Total %
statement 338 420 80.4
branch 358 470 76.1
condition 144 285 50.5
subroutine 34 47 72.3
pod 9 33 27.2
total 883 1255 70.3


line stmt bran cond sub pod time code
1             package Lingua::EN::Inflect;
2              
3 15     15   82917 use strict;
  15         115  
  15         684  
4 15     15   93 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
  15         28  
  15         208724  
5              
6             require Exporter;
7             @ISA = qw(Exporter);
8              
9             our $VERSION = '1.905';
10              
11             %EXPORT_TAGS =
12             (
13             ALL => [ qw( classical inflect
14             PL PL_N PL_V PL_ADJ NO NUM A AN
15             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
16             PART_PRES
17             ORD
18             NUMWORDS
19             WORDLIST
20             def_noun def_verb def_adj def_a def_an )],
21              
22             INFLECTIONS => [ qw( classical inflect
23             PL PL_N PL_V PL_ADJ PL_eq
24             NO NUM A AN PART_PRES )],
25              
26             PLURALS => [ qw( classical inflect
27             PL PL_N PL_V PL_ADJ NO NUM
28             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
29              
30             COMPARISONS => [ qw( classical
31             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
32              
33             ARTICLES => [ qw( classical inflect NUM A AN )],
34              
35             NUMERICAL => [ qw( ORD NUMWORDS )],
36              
37             USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )],
38             );
39              
40             Exporter::export_ok_tags(qw( ALL ));
41              
42             # SUPPORT CLASSICAL PLURALIZATIONS
43              
44             my %def_classical = (
45             all => 0,
46             zero => 0,
47             herd => 0,
48             names => 1,
49             persons => 0,
50             ancient => 0,
51             );
52              
53             my %all_classical = (
54             all => 1,
55             zero => 1,
56             herd => 1,
57             names => 1,
58             persons => 1,
59             ancient => 1,
60             noble_and_ancient => 10,
61             );
62              
63             my %classical = %def_classical;
64              
65             my $classical_mode = join '|', keys %all_classical;
66             $classical_mode = qr/^(?:$classical_mode)$/;
67              
68             sub classical
69             {
70 1802 100   1802 0 85161 if (!@_) {
71 1         6 %classical = %all_classical;
72 1         3 return;
73             }
74 1801 100 100     4627 if (@_==1 && $_[0] !~ $classical_mode) {
75 5 100       29 %classical = $_[0] ? %all_classical : ();
76 5         11 return;
77             }
78 1796         4126 while (@_) {
79 2687         4235 my $arg = shift;
80 2687 50       13957 if ($arg !~ $classical_mode) {
81 0         0 die "Unknown classical mode ($arg)\n";
82             }
83 2687 100 66     12121 if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; }
  2681         6243  
84 6         19 else { $classical{$arg} = 1; }
85              
86 2687 100       6842 if ($arg eq 'all') {
87 1785 100       9972 %classical = $classical{all} ? %all_classical : ();
88             }
89             }
90             }
91              
92             my $persistent_count;
93              
94             sub NUM # (;$count,$show)
95             {
96 0 0   0 0 0 if (defined $_[0])
97             {
98 0         0 $persistent_count = $_[0];
99 0 0 0     0 return $_[0] if !defined($_[1]) || $_[1];
100             }
101             else
102             {
103 0         0 $persistent_count = undef;
104             }
105 0         0 return '';
106             }
107              
108              
109             # 0. PERFORM GENERAL INFLECTIONS IN A STRING
110              
111 735     735 0 1819 sub enclose { "(?:$_[0])" }
112              
113             sub inflect
114             {
115 0     0 0 0 my $save_persistent_count = $persistent_count;
116 0         0 my @sections = split /(NUM\([^)]*\))/, $_[0];
117 0         0 my $inflection = "";
118              
119 0         0 foreach ( @sections )
120             {
121 0 0       0 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe)
  0         0  
122             {
123 0   0     0 1 while
      0        
      0        
      0        
      0        
      0        
      0        
      0        
124 0         0 s/\bPL \( ([^),]*) (, ([^)]*) )? \) / PL($1,$3) /xeg
125 0         0 || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \) / PL_N($1,$3) /xeg
126 0         0 || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \) / PL_V($1,$3) /xeg
127 0         0 || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \) / PL_ADJ($1,$3) /xeg
128 0         0 || s/\bAN? \( ([^),]*) (, ([^)]*) )? \) / A($1,$3) /xeg
129 0         0 || s/\bNO \( ([^),]*) (, ([^)]*) )? \) / NO($1,$3) /xeg
130 0         0 || s/\bORD \( ([^)]*) \) / ORD($1) /xeg
131 0         0 || s/\bNUMWORDS \( ([^)]*) \) / NUMWORDS($1) /xeg
132 0         0 || s/\bPART_PRES \( ([^)]*) \) / PART_PRES($1) /xeg
133             }
134              
135 0         0 $inflection .= $_;
136             }
137              
138 0         0 $persistent_count = $save_persistent_count;
139 0         0 return $inflection;
140             }
141              
142              
143             # 1. PLURALS
144              
145             my %PL_sb_irregular_s =
146             (
147             "corpus" => "corpuses|corpora",
148             "opus" => "opuses|opera",
149             "magnum opus" => "magnum opuses|magna opera",
150             "genus" => "genera",
151             "mythos" => "mythoi",
152             "penis" => "penises|penes",
153             "testis" => "testes",
154             "atlas" => "atlases|atlantes",
155             "yes" => "yeses",
156             'editio princeps' => 'editiones principes',
157             'starets' => 'startsy',
158             'staretz' => 'startzy',
159             'cyclops' => 'cyclopses',
160             'tursiops' => 'tursiopses',
161             'triceratops' => 'triceratopses',
162             'protoceratops' => 'protoceratopses',
163             );
164              
165             my %PL_sb_irregular =
166             (
167             "child" => "children",
168             "brother" => "brothers|brethren",
169             "loaf" => "loaves",
170             "hoof" => "hoofs|hooves",
171             "beef" => "beefs|beeves",
172             "thief" => "thiefs|thieves",
173             "money" => "monies",
174             "mongoose" => "mongooses",
175             "ox" => "oxen",
176             "cow" => "cows|kine",
177             "graffito" => "graffiti",
178             "prima donna" => "prima donnas|prime donne",
179             "octopus" => "octopuses|octopodes",
180             "genie" => "genies|genii",
181             "ganglion" => "ganglions|ganglia",
182             "trilby" => "trilbys",
183             "turf" => "turfs|turves",
184             "numen" => "numina",
185             "atman" => "atmas",
186             "occiput" => "occiputs|occipita",
187             'sabretooth' => 'sabretooths',
188             'sabertooth' => 'sabertooths',
189             'lowlife' => 'lowlifes',
190             'flatfoot' => 'flatfoots',
191             'tenderfoot' => 'tenderfoots',
192             'Romany' => 'Romanies',
193             'romany' => 'romanies',
194             'Tornese' => 'Tornesi',
195             'Jerry' => 'Jerrys',
196             'jerry' => 'jerries',
197             'Mary' => 'Marys',
198             'mary' => 'maries',
199             'talouse' => 'talouses',
200             'blouse' => 'blouses',
201             'Rom' => 'Roma',
202             'rom' => 'roma',
203             'carmen' => 'carmina',
204             'cheval' => 'chevaux',
205             'chervonetz' => 'chervontzi',
206             'kuvasz' => 'kuvaszok',
207             'felo' => 'felones',
208             'put-off' => 'put-offs',
209             'set-off' => 'set-offs',
210             'set-out' => 'set-outs',
211             'set-to' => 'set-tos',
212             'brother-german' => 'brothers-german|brethren-german',
213             'studium generale' => 'studia generali',
214              
215             %PL_sb_irregular_s,
216             );
217              
218             my $PL_sb_irregular = enclose join '|', reverse sort keys %PL_sb_irregular;
219              
220             # Z's that don't double
221              
222             my @PL_sb_z_zes =
223             (
224             "batz", "quartz", "topaz", "snooz(?=e)", "kibbutz",
225             );
226             my $PL_sb_z_zes = enclose join '|', @PL_sb_z_zes;
227              
228             # UNCONDITIONAL "..is" -> "..ides"
229              
230             my @PL_sb_U_is_ides =
231             (
232             "aphis",
233             );
234              
235             my $PL_sb_U_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_U_is_ides;
236              
237             # CLASSICAL "..is" -> "..ides"
238              
239             my @PL_sb_C_is_ides =
240             (
241             # GENERAL WORDS...
242              
243             "ephemeris", "iris", "clitoris",
244             "chrysalis", "epididymis",
245              
246             # INFLAMATIONS...
247              
248             ".*itis",
249              
250             );
251              
252             my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides;
253              
254             # UNCONDITIONAL "..a" -> "..ata"
255              
256             my @PL_sb_U_a_ata =
257             (
258             "plasmalemma", "pseudostoma",
259             );
260              
261             my $PL_sb_U_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_U_a_ata;
262              
263             # CLASSICAL "..a" -> "..ata"
264              
265             my @PL_sb_C_a_ata =
266             (
267             "anathema", "bema", "carcinoma", "charisma", "diploma",
268             "dogma", "drama", "edema", "enema", "enigma", "lemma",
269             "lymphoma", "magma", "melisma", "miasma", "oedema",
270             "sarcoma", "schema", "soma", "stigma", "stoma", "trauma",
271             "gumma", "pragma", "bema",
272             );
273              
274             my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata;
275              
276             # UNCONDITIONAL "..a" -> "..ae"
277              
278             my $PL_sb_U_a_ae = enclose join "|",
279             (
280             "alumna", "alga", "vertebra", "persona"
281             );
282              
283             # CLASSICAL "..a" -> "..ae"
284              
285             my $PL_sb_C_a_ae = enclose join "|",
286             (
287             "amoeba", "antenna", "formula", "hyperbola",
288             "medusa", "nebula", "parabola", "abscissa",
289             "hydra", "nova", "lacuna", "aurora", ".*umbra",
290             "flora", "fauna",
291             );
292              
293             # CLASSICAL "..en" -> "..ina"
294              
295             my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) }
296             (
297             "stamen", "foramen", "lumen"
298             );
299              
300             # UNCONDITIONAL "..um" -> "..a"
301              
302             my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) }
303             (
304             "bacterium", "agendum", "desideratum", "erratum",
305             "stratum", "datum", "ovum", "extremum",
306             "candelabrum", "intermedium", "malum", "Progymnasium",
307             );
308              
309             # CLASSICAL "..um" -> "..a"
310              
311             my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) }
312             (
313             "maximum", "minimum", "momentum", "optimum",
314             "quantum", "cranium", "curriculum", "dictum",
315             "phylum", "aquarium", "compendium", "emporium",
316             "enconium", "gymnasium", "honorarium", "interregnum",
317             "lustrum", "memorandum", "millennium", "rostrum",
318             "spectrum", "speculum", "stadium", "trapezium",
319             "ultimatum", "medium", "vacuum", "velum",
320             "consortium",
321             );
322              
323             # UNCONDITIONAL "..us" -> "i"
324              
325             my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) }
326             (
327             "alumnus", "alveolus", "bacillus", "bronchus",
328             "locus", "nucleus", "stimulus", "meniscus",
329             "sarcophagus", "interradius", "perradius", "triradius",
330             );
331              
332             # CLASSICAL "..us" -> "..i"
333              
334             my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) }
335             (
336             "focus", "radius", "genius",
337             "incubus", "succubus", "nimbus",
338             "fungus", "nucleolus", "stylus",
339             "torus", "umbilicus", "uterus",
340             "hippopotamus", "cactus",
341             );
342              
343             # CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS)
344              
345             my $PL_sb_C_us_us = enclose join "|",
346             (
347             "status", "apparatus", "prospectus", "sinus",
348             "hiatus", "impetus", "plexus",
349             );
350              
351             # UNCONDITIONAL "..on" -> "a"
352              
353             my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) }
354             (
355             "criterion", "perihelion", "aphelion",
356             "phenomenon", "prolegomenon", "noumenon",
357             "organon", "asyndeton", "hyperbaton",
358             "legomenon",
359             );
360              
361             # CLASSICAL "..on" -> "..a"
362              
363             my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) }
364             (
365             "oxymoron",
366             );
367              
368             # CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os")
369              
370             my @PL_sb_C_o_i =
371             (
372             "solo", "soprano", "basso", "alto",
373             "contralto", "tempo", "piano", "virtuoso",
374             );
375             my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i;
376              
377             # ALWAYS "..o" -> "..os"
378              
379             my $PL_sb_U_o_os = enclose join "|",
380             (
381             "^ado", "aficionado", "aggro",
382             "albino", "allegro", "ammo",
383             "Antananarivo", "archipelago", "armadillo",
384             "auto", "avocado", "Bamako",
385             "Barquisimeto", "bimbo", "bingo",
386             "Biro", "bolero", "Bolzano",
387             "bongo", "Boto", "burro",
388             "Cairo", "canto", "cappuccino",
389             "casino", "cello", "Chicago",
390             "Chimango", "cilantro", "cochito",
391             "coco", "Colombo", "Colorado",
392             "commando", "concertino", "contango",
393             "credo", "crescendo", "cyano",
394             "demo", "ditto", "Draco",
395             "dynamo", "embryo", "Esperanto",
396             "espresso", "euro", "falsetto",
397             "Faro", "fiasco", "Filipino",
398             "flamenco", "furioso", "generalissimo",
399             "Gestapo", "ghetto", "gigolo",
400             "gizmo", "Greensboro", "gringo",
401             "Guaiabero", "guano", "gumbo",
402             "gyro", "hairdo", "hippo",
403             "Idaho", "impetigo", "inferno",
404             "info", "intermezzo", "intertrigo",
405             "Iquico", "^ISO", "jumbo",
406             "junto", "Kakapo", "kilo",
407             "Kinkimavo", "Kokako", "Kosovo",
408             "Lesotho", "libero", "libido",
409             "libretto", "lido", "Lilo",
410             "limbo", "limo", "lineno",
411             "lingo", "lino", "livedo",
412             "loco", "logo", "lumbago",
413             "macho", "macro", "mafioso",
414             "magneto", "magnifico", "Majuro",
415             "Malabo", "manifesto", "Maputo",
416             "Maracaibo", "medico", "memo",
417             "metro", "Mexico", "micro",
418             "Milano", "Monaco", "mono",
419             "Montenegro", "Morocco", "Muqdisho",
420             "myo", "^NATO", "^NCO",
421             "neutrino", "^NGO", "Ningbo",
422             "octavo", "oregano", "Orinoco",
423             "Orlando", "Oslo", "^oto",
424             "panto", "Paramaribo", "Pardusco",
425             "pedalo", "photo", "pimento",
426             "pinto", "pleco", "Pluto",
427             "pogo", "polo", "poncho",
428             "Porto-Novo", "Porto", "pro",
429             "psycho", "pueblo", "quarto",
430             "Quito", "rhino", "risotto",
431             "rococo", "rondo", "Sacramento",
432             "saddo", "sago", "salvo",
433             "Santiago", "Sapporo", "Sarajevo",
434             "scherzando", "scherzo", "silo",
435             "sirocco", "sombrero", "staccato",
436             "sterno", "stucco", "stylo",
437             "sumo", "Taiko", "techno",
438             "terrazzo", "testudo", "timpano",
439             "tiro", "tobacco", "Togo",
440             "Tokyo", "torero", "Torino",
441             "Toronto", "torso", "tremolo",
442             "typo", "tyro", "ufo",
443             "UNESCO", "vaquero", "vermicello",
444             "verso", "vibrato", "violoncello",
445             "Virgo", "weirdo", "WHO",
446             "WTO", "Yamoussoukro", "yo-yo",
447             "zero", "Zibo",
448              
449             @PL_sb_C_o_i,
450             );
451              
452              
453             # UNCONDITIONAL "..ch" -> "..chs"
454              
455             my $PL_sb_U_ch_chs = enclose join "|", map { substr($_,0,-2) }
456             qw(
457             czech eunuch stomach
458             );
459              
460             # UNCONDITIONAL "..[ei]x" -> "..ices"
461              
462             my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) }
463             (
464             "codex", "murex", "silex",
465             );
466              
467             my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) }
468             (
469             "radix", "helix",
470             );
471              
472             # CLASSICAL "..[ei]x" -> "..ices"
473              
474             my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) }
475             (
476             "vortex", "vertex", "cortex", "latex",
477             "pontifex", "apex", "index", "simplex",
478             );
479              
480             my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) }
481             (
482             "appendix",
483             );
484              
485             # ARABIC: ".." -> "..i"
486              
487             my $PL_sb_C_i = enclose join "|",
488             (
489             "afrit", "afreet", "efreet",
490             );
491              
492             # HEBREW: ".." -> "..im"
493              
494             my $PL_sb_C_im = enclose join "|",
495             (
496             "goy", "seraph", "cherub", "zuz", "kibbutz",
497             );
498              
499             # UNCONDITIONAL "..man" -> "..mans"
500              
501             my $PL_sb_U_man_mans = enclose join "|",
502             qw(
503             \bataman caiman cayman ceriman
504             \bdesman \bdolman \bfarman \bharman \bhetman
505             human \bleman ottoman shaman talisman
506             Alabaman Bahaman Burman German
507             Hiroshiman Liman Nakayaman Norman Oklahoman
508             Panaman Roman Selman Sonaman Tacoman Yakiman
509             Yokohaman Yuman
510             );
511              
512             my @PL_sb_uninflected_s =
513             (
514             # PAIRS OR GROUPS SUBSUMED TO A SINGULAR...
515             "breeches", "britches", "pajamas", "pyjamas", "clippers", "gallows",
516             "hijinks", "headquarters", "pliers", "scissors", "testes", "herpes",
517             "pincers", "shears", "proceedings", "trousers",
518              
519             # UNASSIMILATED LATIN 4th DECLENSION
520              
521             "cantus", "coitus", "nexus",
522              
523             # RECENT IMPORTS...
524             "contretemps", "corps", "debris",
525             ".*ois", "siemens",
526              
527             # DISEASES
528             ".*measles", "mumps",
529              
530             # MISCELLANEOUS OTHERS...
531             "diabetes", "jackanapes", ".*series", "species", "rabies",
532             "chassis", "innings", "news", "mews", "haggis",
533             );
534              
535             my $PL_sb_uninflected_herd = enclose join "|",
536             # DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION
537             (
538             "wildebeest", "swine", "eland", "bison", "buffalo",
539             "elk", "rhinoceros", 'zucchini',
540             'caribou', 'dace', 'grouse', 'guinea[- ]fowl',
541             'haddock', 'hake', 'halibut', 'herring', 'mackerel',
542             'pickerel', 'pike', 'roe', 'seed', 'shad',
543             'snipe', 'teal', 'turbot', 'water[- ]fowl',
544             );
545              
546             my $PL_sb_lese_lesen = enclose join "|",
547             (
548             'Auslese',
549             'beerenauslese',
550             'Spaetlese',
551             'trockenbeerenauslese',
552             );
553              
554             my $PL_sb_uninflected = enclose join "|",
555             (
556             # SOME FISH AND HERD ANIMALS
557             ".*fish", "tuna", "salmon", "mackerel", "trout",
558             "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
559              
560             ".*deer", ".*sheep", "moose",
561              
562             # ALL NATIONALS ENDING IN -ese
563             "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
564             "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
565             "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
566             "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
567             "Shavese", "Vermontese", "Wenchowese", "Yengeese",
568             ".*[nrlm]ese",
569              
570             # SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE)
571              
572             @PL_sb_uninflected_s,
573              
574             # DISEASES
575             ".*pox",
576              
577              
578             # OTHER ODDITIES
579             "graffiti", "djinn", 'samuri',
580             '.*craft$', 'offspring', 'pence', 'quid', 'hertz',
581             );
582              
583             # SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es)
584              
585             my $PL_sb_singular_s = enclose join '|',
586             (
587             ".*ss",
588             "acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
589             "bronchitis", "bursitis", "caddis", "cannabis",
590             "canvas", "chaos", "cosmos", "dais", "digitalis",
591             "epidermis", "ethos", "eyas", "gas", "glottis",
592             "hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
593             "pathos", "pelvis", "polis", "rhinoceros",
594             "sassafras", "trellis", ".*us", "[A-Z].*es",
595              
596             @PL_sb_C_is_ides,
597             @PL_sb_U_is_ides,
598             );
599              
600             my $PL_v_special_s = enclose join '|',
601             (
602             $PL_sb_singular_s,
603             @PL_sb_uninflected_s,
604             keys %PL_sb_irregular_s,
605             '(.*[csx])is',
606             '(.*)ceps',
607             '[A-Z].*s',
608             );
609              
610             my %PL_sb_postfix_adj = (
611             'general' => ['(?!major|lieutenant|brigadier|adjutant|.*star)\S+'],
612             'martial' => [qw(court)],
613             );
614              
615             foreach (keys %PL_sb_postfix_adj) {
616             $PL_sb_postfix_adj{$_} = enclose
617             enclose(join('|', @{$PL_sb_postfix_adj{$_}}))
618             . "(?=(?:-|\\s+)$_)";
619             }
620              
621             my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)';
622              
623             my $PL_prep = enclose join '|', qw (
624             about above across after against amid amidst among around as at athwart atop
625             barring before behind below beneath beside besides between betwixt beyond but by
626             circa
627             despite down during
628             except
629             failing for from
630             given
631             in inside into
632             like
633             minus
634             near next
635             of off on onto out outside over
636             pace past per plus pro
637             qua
638             round
639             sans save since
640             than through throughout thru thruout till times to toward towards
641             under underneath unlike until unto up upon
642             versus via vs
643             with within without worth
644             );
645              
646             my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eua])(?:-|\s+))a(?:-|\s+)(.*)';
647              
648             my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eua])((-|\s+)(.*))?)';
649              
650              
651             my %PL_pron_nom =
652             (
653             # NOMINATIVE REFLEXIVE
654              
655             "i" => "we", "myself" => "ourselves",
656             "you" => "you", "yourself" => "yourselves",
657             "she" => "they", "herself" => "themselves",
658             "he" => "they", "himself" => "themselves",
659             "it" => "they", "itself" => "themselves",
660             "they" => "they", "themself" => "themselves",
661              
662             # POSSESSIVE
663              
664             "mine" => "ours",
665             "yours" => "yours",
666             "hers" => "theirs",
667             "his" => "theirs",
668             "its" => "theirs",
669             "theirs" => "theirs",
670             );
671              
672             my %PL_pron_acc =
673             (
674             # ACCUSATIVE REFLEXIVE
675              
676             "me" => "us", "myself" => "ourselves",
677             "you" => "you", "yourself" => "yourselves",
678             "her" => "them", "herself" => "themselves",
679             "him" => "them", "himself" => "themselves",
680             "it" => "them", "itself" => "themselves",
681             "them" => "them", "themself" => "themselves",
682             );
683              
684             my $PL_pron_acc = enclose join '|', keys %PL_pron_acc;
685              
686             my %PL_v_irregular_pres =
687             (
688             # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
689             # 3RD PERS. (INDET.)
690              
691             "am" => "are", "are" => "are", "is" => "are",
692             "was" => "were", "were" => "were", "was" => "were",
693             "have" => "have", "have" => "have", "has" => "have",
694             "do" => "do", "do" => "do", "does" => "do",
695             );
696              
697             my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres;
698              
699             my %PL_v_ambiguous_pres =
700             (
701             # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
702             # 3RD PERS. (INDET.)
703              
704             "act" => "act", "act" => "act", "acts" => "act",
705             "blame" => "blame", "blame" => "blame", "blames" => "blame",
706             "can" => "can", "can" => "can", "can" => "can",
707             "must" => "must", "must" => "must", "must" => "must",
708             "fly" => "fly", "fly" => "fly", "flies" => "fly",
709             "copy" => "copy", "copy" => "copy", "copies" => "copy",
710             "drink" => "drink", "drink" => "drink", "drinks" => "drink",
711             "fight" => "fight", "fight" => "fight", "fights" => "fight",
712             "fire" => "fire", "fire" => "fire", "fires" => "fire",
713             "like" => "like", "like" => "like", "likes" => "like",
714             "look" => "look", "look" => "look", "looks" => "look",
715             "make" => "make", "make" => "make", "makes" => "make",
716             "reach" => "reach", "reach" => "reach", "reaches" => "reach",
717             "run" => "run", "run" => "run", "runs" => "run",
718             "sink" => "sink", "sink" => "sink", "sinks" => "sink",
719             "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
720             "view" => "view", "view" => "view", "views" => "view",
721             );
722              
723             my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres;
724              
725              
726             my $PL_v_irregular_non_pres = enclose join '|',
727             (
728             "did", "had", "ate", "made", "put",
729             "spent", "fought", "sank", "gave", "sought",
730             "shall", "could", "ought", "should",
731             );
732              
733             my $PL_v_ambiguous_non_pres = enclose join '|',
734             (
735             "thought", "saw", "bent", "will", "might", "cut",
736             );
737              
738             # "..oes" -> "..oe" (the rest are "..oes" -> "o")
739              
740             my $PL_v_oes_oe = enclose join "|",
741             qw(
742             .*shoes .*hoes .*toes
743             canoes floes oboes roes throes woes
744             );
745              
746             my $PL_count_zero = enclose join '|',
747             (
748             0, "no", "zero", "nil"
749             );
750              
751             my $PL_count_one = enclose join '|',
752             (
753             1, "a", "an", "one", "each", "every", "this", "that",
754             );
755              
756             my %PL_adj_special =
757             (
758             "a" => "some", "an" => "some",
759             "this" => "these", "that" => "those",
760             );
761             my $PL_adj_special = enclose join '|', keys %PL_adj_special;
762              
763             my %PL_adj_poss =
764             (
765             "my" => "our",
766             "your" => "your",
767             "its" => "their",
768             "her" => "their",
769             "his" => "their",
770             "their" => "their",
771             );
772             my $PL_adj_poss = enclose join '|', keys %PL_adj_poss;
773              
774              
775             sub checkpat
776             {
777 0     0 0 0 local $SIG{__WARN__} = sub {0};
  6     6   38  
778 6 50 33     445 do {$@ =~ s/at.*?$//;
  0         0  
779 0         0 die "\nBad user-defined singular pattern:\n\t$@\n"}
780             if (!eval "'' =~ m/$_[0]/; 1;" or $@);
781 6         62 return @_;
782             }
783              
784             sub checkpatsubs
785             {
786 6     6 0 20 checkpat($_[0]);
787 6 50       16 if (defined $_[1])
788             {
789 6     0   34 local $SIG{__WARN__} = sub {0};
  0         0  
790 6 50 33     316 do {$@ =~ s/at.*?$//;
  0         0  
791 0         0 die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"}
792             if (!eval "qq{$_[1]}; 1;" or $@);
793             }
794 6         29 return @_;
795             }
796              
797             my @PL_sb_user_defined = ();
798             my @PL_v_user_defined = ();
799             my @PL_adj_user_defined = ();
800             my @A_a_user_defined = ();
801              
802             sub def_noun
803             {
804 2     2 1 14 unshift @PL_sb_user_defined, checkpatsubs(@_);
805 2         7 return 1;
806             }
807              
808             sub def_verb
809             {
810 1     1 1 9 unshift @PL_v_user_defined, checkpatsubs(@_[4,5]);
811 1         4 unshift @PL_v_user_defined, checkpatsubs(@_[2,3]);
812 1         5 unshift @PL_v_user_defined, checkpatsubs(@_[0,1]);
813 1         4 return 1;
814             }
815              
816             sub def_adj
817             {
818 1     1 1 7 unshift @PL_adj_user_defined, checkpatsubs(@_);
819 1         4 return 1;
820             }
821              
822             sub def_a
823             {
824 0     0 1 0 unshift @A_a_user_defined, checkpat(@_,'a');
825 0         0 return 1;
826             }
827              
828             sub def_an
829             {
830 0     0 1 0 unshift @A_a_user_defined, checkpat(@_,'an');
831 0         0 return 1;
832             }
833              
834             sub ud_match
835             {
836 14976     14976 0 23796 my $word = shift;
837 14976         34364 for (my $i=0; $i < @_; $i+=2)
838             {
839 44 100       367 if ($word =~ /^(?:$_[$i])$/i)
840             {
841 8 50       27 last unless defined $_[$i+1];
842 8         359 return eval '"'.$_[$i+1].'"';
843             }
844             }
845 14968         33064 return undef;
846             }
847              
848             do
849             {
850             local $SIG{__WARN__} = sub {0};
851             my $rcfile;
852              
853             $rcfile = $INC{'Lingua//EN/Inflect.pm'} || '';
854             $rcfile =~ s/Inflect.pm$/.inflectrc/;
855             do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
856             if $rcfile && -r $rcfile && -s $rcfile;
857              
858             $rcfile = "$ENV{HOME}/.inflectrc" || '';
859             do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
860             if $rcfile && -r $rcfile && -s $rcfile;
861             };
862              
863             sub postprocess # FIX PEDANTRY AND CAPITALIZATION :-)
864             {
865 11179     11179 0 25279 my ($orig, $inflected) = @_;
866 11179 100       20487 $inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e;
  199         753  
867 11179 100       46794 return $orig =~ /^I$/ ? $inflected
    100          
    100          
868             : $orig =~ /^[A-Z]+$/ ? uc $inflected
869             : $orig =~ /^[A-Z]/ ? ucfirst $inflected
870             : $inflected;
871             }
872              
873             sub PL
874             # PL($word,$number)
875             {
876 1914     1914 1 6889 my ($str, $count) = @_;
877 1914         9344 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
878 1914 50       4429 return $str unless $word;
879 1914   66     3606 my $plural = postprocess $word, _PL_special_adjective($word,$count)
880             || _PL_special_verb($word,$count)
881             || _PL_noun($word,$count);
882 1914         6994 return $pre.$plural.$post;
883             }
884              
885             sub PL_N
886             # PL_N($word,$number)
887             {
888 7041     7041 1 16751 my ($str, $count) = @_;
889 7041         36844 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
890 7041 50       15235 return $str unless $word;
891 7041         12861 my $plural = postprocess $word, _PL_noun($word,$count);
892 7041         29252 return $pre.$plural.$post;
893             }
894              
895             sub PL_V
896             # PL_V($word,$number)
897             {
898 2118     2118 1 7145 my ($str, $count) = @_;
899 2118         11499 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
900 2118 50       5112 return $str unless $word;
901 2118   66     4664 my $plural = postprocess $word, _PL_special_verb($word,$count)
902             || _PL_general_verb($word,$count);
903 2118         6987 return $pre.$plural.$post;
904             }
905              
906             sub PL_ADJ
907             # PL_ADJ($word,$number)
908             {
909 106     106 1 211 my ($str, $count) = @_;
910 106         538 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
911 106 50       258 return $str unless $word;
912 106   66     205 my $plural = postprocess $word, _PL_special_adjective($word,$count)
913             || $word;
914 106         447 return $pre.$plural.$post;
915             }
916              
917 3564 100 100 3564 0 20631 sub PL_eq { _PL_eq(@_, \&PL_N) || _PL_eq(@_, \&PL_V) || _PL_eq(@_, \&PL_ADJ); }
918 0     0 0 0 sub PL_N_eq { _PL_eq(@_, \&PL_N); }
919 0     0 0 0 sub PL_V_eq { _PL_eq(@_, \&PL_V); }
920 0     0 0 0 sub PL_ADJ_eq { _PL_eq(@_, \&PL_ADJ); }
921              
922             sub _PL_eq
923             {
924 3744     3744   7371 my ( $word1, $word2, $PL ) = @_;
925 3744         12245 my %classval = %classical;
926 3744         13690 %classical = %all_classical;
927 3744         6926 my $result = "";
928 3744 100 66     11964 $result = "eq" if !$result && $word1 eq $word2;
929 3744 100 100     8497 $result = "p:s" if !$result && $word1 eq &$PL($word2);
930 3744 100 100     9029 $result = "s:p" if !$result && &$PL($word1) eq $word2;
931 3744         8266 %classical = ();
932 3744 100 100     7874 $result = "p:s" if !$result && $word1 eq &$PL($word2);
933 3744 100 100     8038 $result = "s:p" if !$result && &$PL($word1) eq $word2;
934 3744         13302 %classical = %classval;
935              
936 3744 100 66     15561 if ($PL == \&PL || $PL == \&PL_N)
937             {
938 3564 50 66     7770 $result = "p:p"
939             if !$result && _PL_check_plurals_N($word1,$word2);
940 3564 50 66     7042 $result = "p:p"
941             if !$result && _PL_check_plurals_N($word2,$word1);
942             }
943 3744 100 66     11863 if ($PL == \&PL || $PL == \&PL_ADJ)
944             {
945 60 50 33     131 $result = "p:p"
946             if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL);
947             }
948              
949 3744         14702 return $result;
950             }
951              
952             sub _PL_reg_plurals
953             {
954 4320     4320   168643 $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/
955             }
956              
957             sub _PL_check_plurals_N
958             {
959 240     240   592 my $pair = "$_[0]|$_[1]";
960 240 50       767 foreach ( values %PL_sb_irregular_s ) { return 1 if $_ eq $pair; }
  3840         6878  
961 240 50       929 foreach ( values %PL_sb_irregular ) { return 1 if $_ eq $pair; }
  15120         24638  
962              
963 240 50 33     456 return 1 if
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
964             _PL_reg_plurals($pair, $PL_sb_U_a_ata, "as","ata")
965             || _PL_reg_plurals($pair, $PL_sb_C_a_ata, "as","ata")
966             || _PL_reg_plurals($pair, $PL_sb_U_is_ides, "is","ides")
967             || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides")
968             || _PL_reg_plurals($pair, $PL_sb_C_a_ae, "s","e")
969             || _PL_reg_plurals($pair, $PL_sb_C_en_ina, "ens","ina")
970             || _PL_reg_plurals($pair, $PL_sb_C_um_a, "ums","a")
971             || _PL_reg_plurals($pair, $PL_sb_C_us_i, "uses","i")
972             || _PL_reg_plurals($pair, $PL_sb_C_on_a, "ons","a")
973             || _PL_reg_plurals($pair, $PL_sb_C_o_i, "os","i")
974             || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices")
975             || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices")
976             || _PL_reg_plurals($pair, $PL_sb_C_i, "s","i")
977             || _PL_reg_plurals($pair, $PL_sb_C_im, "s","im")
978              
979             || _PL_reg_plurals($pair, '.*eau', "s","x")
980             || _PL_reg_plurals($pair, '.*ieu', "s","x")
981             || _PL_reg_plurals($pair, '.*tri', "xes","ces")
982             || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges");
983              
984              
985 240         1199 return 0;
986             }
987              
988             sub _PL_check_plurals_ADJ
989             {
990 0     0   0 my ( $word1a, $word2a ) = @_;
991 0         0 my ( $word1b, $word2b ) = @_;
992              
993 0 0       0 $word1a = '' unless $word1a =~ s/'s?$//;
994 0 0       0 $word2a = '' unless $word2a =~ s/'s?$//;
995 0 0       0 $word1b = '' unless $word1b =~ s/s'$//;
996 0 0       0 $word2b = '' unless $word2b =~ s/s'$//;
997              
998 0 0       0 if ($word1a)
999             {
1000 0 0 0     0 return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a)
      0        
1001             || _PL_check_plurals_N($word2a, $word1a) );
1002 0 0 0     0 return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b)
      0        
1003             || _PL_check_plurals_N($word2b, $word1a) );
1004             }
1005 0 0       0 if ($word1b)
1006             {
1007 0 0 0     0 return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a)
      0        
1008             || _PL_check_plurals_N($word2a, $word1b) );
1009 0 0 0     0 return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b)
      0        
1010             || _PL_check_plurals_N($word2b, $word1b) );
1011             }
1012              
1013              
1014 0         0 return "";
1015             }
1016              
1017             sub _PL_noun
1018             {
1019 8983     8983   15609 my ( $word, $count ) = @_;
1020 8983         12189 my $value; # UTILITY VARIABLE
1021              
1022             # DEFAULT TO PLURAL
1023              
1024 8983 50 66     26860 $count = $persistent_count
1025             if !defined($count) && defined($persistent_count);
1026              
1027             $count = (defined $count and $count=~/^($PL_count_one)$/io
1028             or defined $count and $classical{zero}
1029 8983 100 66     36541 and $count=~/^($PL_count_zero)$/io)
1030             ? 1
1031             : 2;
1032              
1033 8983 100       16884 return $word if $count==1;
1034              
1035             # HANDLE USER-DEFINED NOUNS
1036              
1037 8977 100       15700 return $value if defined($value = ud_match($word, @PL_sb_user_defined));
1038              
1039              
1040             # HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS
1041              
1042 8974 50       17893 $word eq '' and return $word;
1043              
1044 8974 100 100     83680 $word =~ /^($PL_sb_uninflected)$/i && !exists $PL_sb_irregular{$word} && $word !~ /^($PL_sb_lese_lesen)$/i
      100        
1045             and return $word;
1046              
1047 8280 100 100     37560 $classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i
1048             and return $word;
1049              
1050              
1051             # HANDLE ISOLATED IRREGULAR PLURALS
1052              
1053             $word =~ /^($PL_sb_irregular)$/i
1054 8238 100 33     36557 and return ( $PL_sb_irregular{$1} || $PL_sb_irregular{lc $1} );
1055             $word =~ /(.*)\b($PL_sb_irregular)$/i
1056 7732 50 0     36242 and return $1 . ( $PL_sb_irregular{$2} || $PL_sb_irregular{lc $2} );
1057              
1058              
1059             # HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.)
1060              
1061 7732 100 66     43386 $word =~ /^(?:$PL_sb_postfix_adj)$/i
1062             and $value = $2
1063             and return _PL_noun($1,2)
1064             . $value;
1065              
1066 7672 100 100     40583 $word =~ /^(?:$PL_sb_prep_dual_compound)$/i
1067             and $value = [$2,$3]
1068             and return _PL_noun($1,2)
1069             . $value->[0]
1070             . _PL_noun($value->[1]);
1071              
1072 7664 100 66     45097 $word =~ /^(?:$PL_sb_prep_compound)$/i
1073             and $value = $2
1074             and return _PL_noun($1,2)
1075             . $value;
1076              
1077             # HANDLE PRONOUNS
1078              
1079             $word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i
1080 7512 100       36679 and return $1.$PL_pron_acc{lc($2)};
1081              
1082 7328 100       19725 $value = $PL_pron_nom{lc($word)}
1083             and return $value;
1084              
1085             $word =~ /^($PL_pron_acc)$/i
1086 7198 100       23931 and return $PL_pron_acc{lc($1)};
1087              
1088              
1089             # HANDLE FAMILIES OF IRREGULAR PLURALS
1090              
1091 7170 100       48875 $word =~ /(.*$PL_sb_U_man_mans)$/i
1092             and return "$1s";
1093 6926 100       22819 $word =~ /(\S*)quy$/i
1094             and return "$1quies";
1095 6918 100       17446 $word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people";
    100          
1096 6878 100       15618 $word =~ /(.*)man$/i and return "$1men";
1097 6712 100       15137 $word =~ /(.*[ml])ouse$/i and return "$1ice";
1098 6664 100       13568 $word =~ /(.*)goose$/i and return "$1geese";
1099 6656 100       14235 $word =~ /(.*)tooth$/i and return "$1teeth";
1100 6648 100       13583 $word =~ /(.*)foot$/i and return "$1feet";
1101              
1102             # HANDLE UNASSIMILATED IMPORTS
1103              
1104 6634 100       13013 $word =~ /(.*)ceps$/i and return $word;
1105 6630 100       13170 $word =~ /(.*)zoon$/i and return "$1zoa";
1106 6606 100       14640 $word =~ /(.*[csx])is$/i and return "$1es";
1107 6574 100       20022 $word =~ /(.*$PL_sb_U_a_ata)a$/i and return "$1ata";
1108 6558 100       19721 $word =~ /(.*$PL_sb_U_is_ides)is$/i and return "$1ides";
1109 6550 100       21488 $word =~ /(.*$PL_sb_U_ch_chs)ch$/i and return "$1chs";
1110 6526 100       21265 $word =~ /(.*$PL_sb_U_ex_ices)ex$/i and return "$1ices";
1111 6502 100       21059 $word =~ /(.*$PL_sb_U_ix_ices)ix$/i and return "$1ices";
1112 6478 100       24225 $word =~ /(.*$PL_sb_U_um_a)um$/i and return "$1a";
1113 6358 100       21877 $word =~ /(.*$PL_sb_U_us_i)us$/i and return "$1i";
1114 6246 100       21983 $word =~ /(.*$PL_sb_U_on_a)on$/i and return "$1a";
1115 6150 100       19857 $word =~ /(.*$PL_sb_U_a_ae)$/i and return "$1e";
1116 6118 100       17641 $word =~ /(.*$PL_sb_lese_lesen)$/i and return "$1n";
1117              
1118             # HANDLE INCOMPLETELY ASSIMILATED IMPORTS
1119              
1120 6086 100       13689 if ($classical{ancient})
1121             {
1122 4262 100       9649 $word =~ /(.*)trix$/i and return "$1trices";
1123 4227 100       9598 $word =~ /(.*)eau$/i and return "$1eaux";
1124 4173 100       8702 $word =~ /(.*)ieu$/i and return "$1ieux";
1125 4161 100       12818 $word =~ /(.{2,}[yia])nx$/i and return "$1nges";
1126 4137 100       14114 $word =~ /(.*$PL_sb_C_en_ina)en$/i and return "$1ina";
1127 4119 100       14135 $word =~ /(.*$PL_sb_C_ex_ices)ex$/i and return "$1ices";
1128 4066 100       10353 $word =~ /(.*$PL_sb_C_ix_ices)ix$/i and return "$1ices";
1129 4060 100       15293 $word =~ /(.*$PL_sb_C_um_a)um$/i and return "$1a";
1130 3887 100       13706 $word =~ /(.*$PL_sb_C_us_i)us$/i and return "$1i";
1131 3809 100       12004 $word =~ /(.*$PL_sb_C_us_us)$/i and return "$1";
1132 3781 100       25142 $word =~ /(.*$PL_sb_C_a_ae)$/i and return "$1e";
1133 3673 100       14935 $word =~ /(.*$PL_sb_C_a_ata)a$/i and return "$1ata";
1134 3505 100       17130 $word =~ /(.*$PL_sb_C_is_ides)is$/i and return "$1ides";
1135 3439 100       15807 $word =~ /(.*$PL_sb_C_o_i)o$/i and return "$1i";
1136 3391 100       8681 $word =~ /(.*$PL_sb_C_on_a)on$/i and return "$1a";
1137 3385 100       14537 $word =~ /$PL_sb_C_im$/i and return "${word}im";
1138 3355 100       15197 $word =~ /$PL_sb_C_i$/i and return "${word}i";
1139             }
1140              
1141              
1142             # HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS
1143              
1144 5167 100       34768 $word =~ /^($PL_sb_singular_s)$/i and return "$1es";
1145 3796 50 66     9360 $word =~ /^([A-Z].*s)$/ and $classical{names} and return "$1es";
1146 3737 100       11546 $word =~ /^($PL_sb_z_zes)$/i and return "$1es";
1147 3725 100       11459 $word =~ /^(.*[^z])(z)$/i and return "$1zzes";
1148 3701 100       14618 $word =~ /^(.*)([cs]h|x|zz|ss)$/i and return "$1$2es";
1149             # $word =~ /(.*)(us)$/i and return "$1$2es";
1150              
1151             # HANDLE ...f -> ...ves
1152              
1153 3480 100       8987 $word =~ /(.*[eao])lf$/i and return "$1lves";
1154 3440 100       9086 $word =~ /(.*[^d])eaf$/i and return "$1eaves";
1155 3412 100       7555 $word =~ /(.*[nlw])ife$/i and return "$1ives";
1156 3368 100       7475 $word =~ /(.*)arf$/i and return "$1arves";
1157              
1158             # HANDLE ...y
1159              
1160 3344 100       8008 $word =~ /(.*[aeiou])y$/i and return "$1ys";
1161 3284 100 100     6851 $word =~ /([A-Z].*y)$/ and $classical{names} and return "$1s";
1162 3261 100       7991 $word =~ /(.*)y$/i and return "$1ies";
1163              
1164             # HANDLE ...o
1165              
1166 3159 100       46908 $word =~ /$PL_sb_U_o_os$/i and return "${word}s";
1167 2887 100       7785 $word =~ /[aeiou]o$/i and return "${word}s";
1168 2815 100       6266 $word =~ /o$/i and return "${word}es";
1169              
1170              
1171             # OTHERWISE JUST ADD ...s
1172              
1173 2727         14652 return "${word}s";
1174             }
1175              
1176              
1177             sub _PL_special_verb
1178             {
1179 3984     3984   7511 my ( $word, $count ) = @_;
1180 3984 50 66     12552 $count = $persistent_count
1181             if !defined($count) && defined($persistent_count);
1182             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1183 3984 50 33     15575 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1184             : 2;
1185              
1186 3984 50       12043 return undef if $count=~/^($PL_count_one)$/io;
1187              
1188 3984         5529 my $value; # UTILITY VARIABLE
1189              
1190             # HANDLE USER-DEFINED VERBS
1191              
1192 3984 100       7618 return $value if defined($value = ud_match($word, @PL_v_user_defined));
1193              
1194             # HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND)
1195              
1196             $word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i
1197 3982 100       17744 and return $PL_v_irregular_pres{lc $1}.$2;
1198              
1199             # HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES
1200              
1201 3854 100       14294 $word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i
1202             and return $word;
1203              
1204             # HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND)
1205              
1206             $word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i
1207 3794 100       11788 and return $PL_v_irregular_pres{lc $1}.$2;
1208              
1209 3764 100       9860 $word =~ /^\S+n't\b/i
1210             and return $word;
1211              
1212             # HANDLE SPECIAL CASES
1213              
1214 3756 100       25428 $word =~ /^($PL_v_special_s)$/ and return undef;
1215 3096 100       7672 $word =~ /\s/ and return undef;
1216 2924 100       5525 $word =~ /^quizzes$/i and return "quiz";
1217              
1218             # HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS)
1219              
1220 2920 100       11580 $word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i and return "$1$2";
1221              
1222 2904 100       6777 $word =~ /^(..+)ies$/i and return "$1y";
1223              
1224 2895 100       15065 $word =~ /($PL_v_oes_oe)$/ and return substr($1,0,-1);
1225 2799 50       7036 $word =~ /^(.+)oes$/i and return "$1o";
1226              
1227 2799 100       8530 $word =~ /^(.*[^s])s$/i and return $1;
1228              
1229             # OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE)
1230              
1231 2624         10233 return undef;
1232             }
1233              
1234             sub _PL_general_verb
1235             {
1236 1742     1742   3664 my ( $word, $count ) = @_;
1237 1742 50 33     5561 $count = $persistent_count
1238             if !defined($count) && defined($persistent_count);
1239             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1240 1742 50 33     6786 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1241             : 2;
1242              
1243 1742 50       4775 return $word if $count=~/^($PL_count_one)$/io;
1244              
1245             # HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND)
1246              
1247             $word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i
1248 1742 100       7392 and return $PL_v_ambiguous_pres{lc $1}.$2;
1249              
1250             # HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES
1251              
1252 1738 100       5546 $word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i
1253             and return $word;
1254              
1255             # OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED
1256              
1257 1720         5785 return $word;
1258              
1259             }
1260              
1261             sub _PL_special_adjective
1262             {
1263 2020     2020   4048 my ( $word, $count ) = @_;
1264 2020 50 66     6822 $count = $persistent_count
1265             if !defined($count) && defined($persistent_count);
1266             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1267 2020 100 100     8564 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1268             : 2;
1269              
1270 2020 100       5931 return $word if $count=~/^($PL_count_one)$/io;
1271              
1272              
1273             # HANDLE USER-DEFINED ADJECTIVES
1274              
1275 2015         3002 my $value;
1276 2015 100       3761 return $value if defined($value = ud_match($word, @PL_adj_user_defined));
1277              
1278             # HANDLE KNOWN CASES
1279              
1280             $word =~ /^($PL_adj_special)$/i
1281 2012 100       8690 and return $PL_adj_special{lc $1};
1282              
1283             # HANDLE POSSESSIVES
1284              
1285             $word =~ /^($PL_adj_poss)$/i
1286 1986 100       6577 and return $PL_adj_poss{lc $1};
1287              
1288 1948 100       4317 $word =~ /^(.*)'s?$/ and do { my $pl = PL_N($1);
  72         148  
1289 72 100       397 return "$pl'" . ($pl =~ m/s$/ ? "" : "s");
1290             };
1291              
1292             # OTHERWISE, NO IDEA
1293              
1294 1876         5830 return undef;
1295              
1296             }
1297              
1298              
1299             # 2. INDEFINITE ARTICLES
1300              
1301             # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND"
1302             # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY
1303             # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!)
1304              
1305             my $A_abbrev = q{
1306             (?! FJO | [HLMNS]Y. | RY[EO] | SQU
1307             | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
1308             [FHLMNRSX][A-Z]
1309             };
1310              
1311             # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A
1312             # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE
1313             # IMPLIES AN ABBREVIATION.
1314              
1315             my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)';
1316              
1317             # EXCEPTIONS TO EXCEPTIONS
1318              
1319             my $A_explicit_an = enclose join '|',
1320             (
1321             "euler",
1322             "hour(?!i)", "heir", "honest", "hono",
1323             );
1324              
1325             my $A_ordinal_an = enclose join '|',
1326             (
1327             "[aefhilmnorsx]-?th",
1328             );
1329              
1330             my $A_ordinal_a = enclose join '|',
1331             (
1332             "[bcdgjkpqtuvwyz]-?th",
1333             );
1334              
1335             sub A {
1336 0     0 0 0 my ($str, $count) = @_;
1337 0         0 my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i );
1338 0 0       0 return $str unless $word;
1339 0         0 my $result = _indef_article($word,$count);
1340 0         0 return $pre.$result.$post;
1341             }
1342              
1343 0     0 0 0 sub AN { goto &A }
1344              
1345             sub _indef_article {
1346 0     0   0 my ( $word, $count ) = @_;
1347              
1348 0 0 0     0 $count = $persistent_count
1349             if !defined($count) && defined($persistent_count);
1350              
1351 0 0 0     0 return "$count $word"
1352             if defined $count && $count!~/^($PL_count_one)$/io;
1353              
1354             # HANDLE USER-DEFINED VARIANTS
1355              
1356 0         0 my $value;
1357 0 0       0 return "$value $word"
1358             if defined($value = ud_match($word, @A_a_user_defined));
1359              
1360             # HANDLE ORDINAL FORMS
1361              
1362 0 0       0 $word =~ /^($A_ordinal_a)/i and return "a $word";
1363 0 0       0 $word =~ /^($A_ordinal_an)/i and return "an $word";
1364              
1365             # HANDLE SPECIAL CASES
1366              
1367 0 0       0 $word =~ /^($A_explicit_an)/i and return "an $word";
1368 0 0       0 $word =~ /^[aefhilmnorsx]$/i and return "an $word";
1369 0 0       0 $word =~ /^[bcdgjkpqtuvwyz]$/i and return "a $word";
1370              
1371              
1372             # HANDLE ABBREVIATIONS
1373              
1374 0 0       0 $word =~ /^($A_abbrev)/ox and return "an $word";
1375 0 0       0 $word =~ /^[aefhilmnorsx][.-]/i and return "an $word";
1376 0 0       0 $word =~ /^[a-z][.-]/i and return "a $word";
1377              
1378             # HANDLE CONSONANTS
1379              
1380 0 0       0 $word =~ /^[^aeiouy]/i and return "a $word";
1381              
1382             # HANDLE SPECIAL VOWEL-FORMS
1383              
1384 0 0       0 $word =~ /^e[uw]/i and return "a $word";
1385 0 0       0 $word =~ /^onc?e\b/i and return "a $word";
1386 0 0       0 $word =~ /^uni([^nmd]|mo)/i and return "a $word";
1387 0 0       0 $word =~ /^ut[th]/i and return "an $word";
1388 0 0       0 $word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word";
1389              
1390             # HANDLE SPECIAL CAPITALS
1391              
1392 0 0       0 $word =~ /^U[NK][AIEO]?/ and return "a $word";
1393              
1394             # HANDLE VOWELS
1395              
1396 0 0       0 $word =~ /^[aeiou]/i and return "an $word";
1397              
1398             # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND)
1399              
1400 0 0       0 $word =~ /^($A_y_cons)/io and return "an $word";
1401              
1402             # OTHERWISE, GUESS "a"
1403 0         0 return "a $word";
1404             }
1405              
1406             # 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)"
1407              
1408             sub NO
1409             {
1410 124     124 0 2596 my ($str, $count, $opt_ref) = @_;
1411 124         895 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
1412              
1413 124 50 33     421 $count = $persistent_count
1414             if !defined($count) && defined($persistent_count);
1415 124 100       252 $count = 0 unless $count;
1416              
1417 124 100       684 if ($count =~ /^$PL_count_zero$/) {
1418 3         16 return "${pre}no ". PL($word,0) . $post ;
1419             }
1420              
1421 121         281 $count =~ s{[^\d.e+-]}{}gi;
1422 121         302 1 while $count =~ s{([.].*)[.]}{$1};
1423             $count = defined $opt_ref->{words_below} && $count < $opt_ref->{words_below}
1424 121 100 100     570 ? NUMWORDS($count)
1425             : $count;
1426              
1427 121 100 100     423 if (defined $opt_ref->{comma} || defined $opt_ref->{comma_every}) {
1428 56 100       149 $opt_ref->{comma_every} = 3 if !defined $opt_ref->{comma_every};
1429             $opt_ref->{comma} = ',' if !defined $opt_ref->{comma}
1430 56 100 100     255 || $opt_ref->{comma} =~ /^\d+$/;
1431              
1432 56         91 $count = _commify($count, @{$opt_ref}{'comma','comma_every'});
  56         150  
1433             }
1434              
1435 121         344 return "$pre$count " . PL($word,$count) . $post
1436             }
1437              
1438             sub _commify {
1439 56     56   119 my ($number, $comma, $every) = @_;
1440 56 50       112 return if !defined $comma;
1441 56 50       109 return if !defined $comma;
1442 56         349 $number =~ s{(?:(?<=^)|(?<=^-))(\d\d{$every,})}
  42         112  
1443 42         393 { my $n = $1;
1444 42         133 $n=~s/(?<=.)(?=(?:.{$every})+$)/$comma/g;
1445             $n;
1446 56         153 }e;
1447             return $number;
1448             }
1449              
1450             # PARTICIPLES
1451              
1452             sub PART_PRES
1453 6     6 0 13 {
1454 6 100 33     108 local $_ = PL_V(shift,2);
      33        
      66        
      66        
      66        
      33        
      33        
      66        
      66        
1455             s/ie$/y/
1456             or s/ue$/u/
1457             or s/([auy])e$/$1/
1458             or s/ski$/ski/
1459             or s/[^b]i$//
1460             or s/^(are|were)$/be/
1461             or s/^(had)$/hav/
1462             or s/(hoe)$/$1/
1463             or s/([^e])e$/$1/
1464             or m/er$/
1465 6         33 or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
1466             return "${_}ing";
1467             }
1468              
1469              
1470              
1471             # NUMERICAL INFLECTIONS
1472              
1473             my %nth =
1474             (
1475             0 => 'th',
1476             1 => 'st',
1477             2 => 'nd',
1478             3 => 'rd',
1479             4 => 'th',
1480             5 => 'th',
1481             6 => 'th',
1482             7 => 'th',
1483             8 => 'th',
1484             9 => 'th',
1485             11 => 'th',
1486             12 => 'th',
1487             13 => 'th',
1488             );
1489              
1490              
1491             my %ordinal;
1492             @ordinal{qw(ty one two three five eight nine twelve )}=
1493             qw(tieth first second third fifth eighth ninth twelfth);
1494              
1495             my $ordinal_suff = join '|', keys %ordinal, "";
1496              
1497             $ordinal{""} = 'th';
1498              
1499             sub ORD($)
1500 166     166 0 307 {
1501 166 100       747 my $num = shift;
1502 83         192 if ($num =~ /\d/) {
1503 83   66     607 my $tail = substr($num, -3);
1504             return $num . ($nth{$tail%100} || $nth{$tail%10});
1505             }
1506 83         686 else {
1507 83         451 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/;
1508             return $num;
1509             }
1510             }
1511              
1512              
1513             my %default_args =
1514             (
1515             'group' => 0,
1516             'comma' => ',',
1517             'and' => 'and',
1518             'zero' => 'zero',
1519             'one' => 'one',
1520             'decimal' => 'point',
1521             );
1522              
1523             my @unit = ('',qw(one two three four five six seven eight nine));
1524             my @teen = qw(ten eleven twelve thirteen fourteen
1525             fifteen sixteen seventeen eighteen nineteen);
1526             my @ten = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety));
1527              
1528             ## From http://en.wikipedia.org/wiki/Names_of_large_numbers
1529             our %millmap = (
1530             #num zeros
1531             '0' => { US => q(), UK => q(), EU => q()},
1532             '3' => { US => q(thousand), UK => q(thousand), EU => q(thousand)},
1533             '6' => { US => q(million), UK => q(million), EU => q(million)},
1534             '9' => { US => q(billion), UK => q(thousand million), EU => q(milliard)},
1535             '12' => { US => q(trillion), UK => q(billion), EU => q(billion)},
1536             '15' => { US => q(quadrillion), UK => q(thousand billion), EU => q(billiard)},
1537             '18' => { US => q(quintillion), UK => q(trillion), EU => q(trillion)},
1538             '21' => { US => q(sextillion), UK => q(thousand trillion), EU => q(trilliard)},
1539             '24' => { US => q(septillion), UK => q(quadrillion), EU => q(quadrillion)},
1540             '27' => { US => q(octillion), UK => q(thousand quadrillion), EU => q(quadrilliard)},
1541             '30' => { US => q(nonillion), UK => q(quintillion), EU => q(quintillion)},
1542             '33' => { US => q(decillion), UK => q(thousand quintillion), EU => q(quintilliard)},
1543             '36' => { US => q(undecillion), UK => q(sextillion), EU => q(sextillion)},
1544             '39' => { US => q(duodecillion), UK => q(thousand sextillion), EU => q(sextilliard)},
1545             '42' => { US => q(tredecillion), UK => q(septillion), EU => q(septillion)},
1546             '45' => { US => q(quattuordecillion), UK => q(thousand septillion), EU => q(septilliard)},
1547             '48' => { US => q(quindecillion), UK => q(octillion), EU => q(octillion)},
1548             '51' => { US => q(sedecillion), UK => q(thousand octillion), EU => q(octilliard)},
1549             '54' => { US => q(septendecillion), UK => q(nonillion), EU => q(nonillion)},
1550             '57' => { US => q(octodecillion), UK => q(thousand nonillion), EU => q(nonilliard)},
1551             '60' => { US => q(novendecillion), UK => q(decillion), EU => q(decillion)},
1552             '63' => { US => q(vigintillion), UK => q(thousand decillion), EU => q(decilliard)},
1553             '66' => { US => q(unvigintillion), UK => q(undecillion), EU => q(undecillion)},
1554             '69' => { US => q(duovigintillion), UK => q(thousand undecillion), EU => q(undecilliard)},
1555             '72' => { US => q(tresvigintillion), UK => q(duodecillion), EU => q(duodecillion)},
1556             '75' => { US => q(quattuorvigintillion), UK => q(thousand duodecillion), EU => q(duodecilliard)},
1557             '78' => { US => q(quinquavigintillion), UK => q(tredecillion), EU => q(tredecillion)},
1558             '81' => { US => q(sesvigintillion), UK => q(thousand tredecillion), EU => q(tredecilliard)},
1559             '84' => { US => q(septemvigintillion), UK => q(quattuordecillion), EU => q(quattuordecillion)},
1560             '87' => { US => q(octovigintillion), UK => q(thousand quattuordecillion), EU => q(quattuordecilliard)},
1561             '90' => { US => q(novemvigintillion), UK => q(quindecillion), EU => q(quindecillion)},
1562             '93' => { US => q(trigintillion), UK => q(thousand quindecillion), EU => q(quindecilliard)},
1563             '96' => { US => q(untrigintillion), UK => q(sedecillion), EU => q(sedecillion)},
1564             '99' => { US => q(duotrigintillion), UK => q(thousand sedecillion), EU => q(sedecilliard)},
1565             '102' => { US => q(trestrigintillion), UK => q(septendecillion), EU => q(septendecillion)},
1566             '105' => { US => q(quattuortrigintillion), UK => q(thousand septendecillion), EU => q(septendecilliard)},
1567             '108' => { US => q(quinquatrigintillion), UK => q(octodecillion), EU => q(octodecillion)},
1568             '111' => { US => q(sestrigintillion), UK => q(thousand octodecillion), EU => q(octodecilliard)},
1569             '114' => { US => q(septentrigintillion), UK => q(novendecillion), EU => q(novendecillion)},
1570             '117' => { US => q(octotrigintillion), UK => q(thousand novendecillion), EU => q(novendecilliard)},
1571             '120' => { US => q(noventrigintillion), UK => q(vigintillion), EU => q(vigintillion)},
1572             '123' => { US => q(quadragintillion), UK => q(thousand vigintillion), EU => q(vigintilliard)},
1573             '153' => { US => q(quinquagintillion), UK => q(thousand quinquavigintillion), EU => q(quinquavigintilliard) },
1574             '183' => { US => q(sexagintillion), UK => q(thousand trigintillion), EU => q(trigintilliard) },
1575             '213' => { US => q(septuagintillion), UK => q(thousand quinquatrigintillion), EU => q(quinquatrigintilliard) },
1576             '243' => { US => q(octogintillion), UK => q(thousand quadragintillion), EU => q(quadragintilliard) },
1577             '273' => { US => q(nonagintillion), UK => q(thousand quinquaquadragintillion), EU => q(quinquaquadragintilliard) },
1578             '303' => { US => q(centillion), UK => q(thousand quinquagintillion), EU => q(quinquagintilliard) },
1579             '306' => { US => q(uncentillion), UK => q(unquinquagintillion), EU => q(unquinquagintillion) },
1580             '309' => { US => q(duocentillion), UK => q(thousand unquinquagintillion), EU => q(unquinquagintilliard) },
1581             '312' => { US => q(trescentillion), UK => q(duoquinquagintillion), EU => q(duoquinquagintillion) },
1582             '333' => { US => q(decicentillion), UK => q(thousand quinquaquinquagintillion), EU => q(quinquaquinquagintilliard) },
1583             '336' => { US => q(undecicentillion), UK => q(sesquinquagintillion), EU => q(sesquinquagintillion) },
1584             '363' => { US => q(viginticentillion), UK => q(thousand sexagintillion), EU => q(sexagintilliard) },
1585             '366' => { US => q(unviginticentillion), UK => q(unsexagintillion), EU => q(unsexagintillion) },
1586             '393' => { US => q(trigintacentillion), UK => q(thousand quinquasexagintillion), EU => q(quinquasexagintilliard) },
1587             '423' => { US => q(quadragintacentillion), UK => q(thousand septuagintillion), EU => q(septuagintilliard) },
1588             '453' => { US => q(quinquagintacentillion), UK => q(thousand quinquaseptuagintillion), EU => q(quinquaseptuagintilliard) },
1589             '483' => { US => q(sexagintacentillion), UK => q(thousand octogintillion), EU => q(octogintilliard) },
1590             '513' => { US => q(septuagintacentillion), UK => q(thousand quinquaoctogintillion), EU => q(quinquaoctogintilliard) },
1591             '543' => { US => q(octogintacentillion), UK => q(thousand nonagintillion), EU => q(nonagintilliard) },
1592             '573' => { US => q(nonagintacentillion), UK => q(thousand quinquanonagintillion), EU => q(quinquanonagintilliard) },
1593             '603' => { US => q(ducentillion), UK => q(thousand centillion), EU => q(centilliard) },
1594             '903' => { US => q(trecentillion), UK => q(thousand quinquagintacentillion), EU => q(quinquagintacentilliard) },
1595             '1203' => { US => q(quadringentillion), UK => q(thousand ducentillion), EU => q(ducentilliard) },
1596             '1503' => { US => q(quingentillion), UK => q(thousand quinquagintaducentillion), EU => q(quinquagintaducentilliard) },
1597             '1803' => { US => q(sescentillion), UK => q(thousand trecentillion), EU => q(trecentilliard) },
1598             '2103' => { US => q(septingentillion), UK => q(thousand quinquagintatrecentillion), EU => q(quinquagintatrecentilliard) },
1599             '2403' => { US => q(octingentillion), UK => q(thousand quadringentillion), EU => q(quadringentilliard) },
1600             '2703' => { US => q(nongentillion), UK => q(thousand quinquagintaquadringentillion), EU => q(quinquagintaquadringentilliard) },
1601             '3003' => { US => q(millinillion), UK => q(thousand quingentillion), EU => q(quingentilliard) },
1602             );
1603              
1604             my $millchoice = 'US';
1605             my @millkeys = sort {$a <=> $b } keys %millmap;
1606             my $maxmill = (sort {$a <=> $b } keys %millmap)[-1];
1607             our $millast = undef;
1608             our @mill;
1609             for my $numZeros ( 0..$maxmill ) {
1610             next if $numZeros%3 != 0;
1611             my $arraySlot = $numZeros/3;
1612             if( defined( $millmap{$numZeros} ) ) {
1613             $mill[$arraySlot] = " $millmap{$numZeros}->{$millchoice}";
1614             $millast = $arraySlot;
1615             } else {
1616             my $missing = $arraySlot - $millast;
1617             $mill[$arraySlot] = $mill[$missing] . $mill[$millast];
1618             }
1619             }
1620 14855   100 14855 0 38539  
1621 14855 50       27428 sub mill { my $ind = $_[0]||0;
1622 14855 50       1256259 die "Number out of range\n" if $ind > $#mill;
1623             return $ind<@mill ? $mill[$ind] : ' ???illion'; }
1624 6785     6785 0 15209  
1625             sub unit { return $unit[$_[0]]. mill($_[1]); }
1626              
1627             sub ten
1628 6041 100 100 6041 0 28721 {
    100          
1629             return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2])
1630 1087   100     5401 if $_[0] ne '1';
1631             return $teen[$_[1]]. $mill[$_[2]||0];
1632             }
1633              
1634             sub hund
1635 3128 100 100 3128 0 8745 {
    100          
1636             return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '')
1637 324 100 100     1032 . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0];
1638 12         55 return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2];
1639             return '';
1640             }
1641              
1642              
1643             sub enword
1644 1075     1075 0 2383 {
1645             my ($num,$group,$zero,$one,$comma,$and) = @_;
1646 1075 100       3552  
    100          
    100          
    100          
    100          
1647             if ($group==1)
1648 90 100       360 {
  3342 100       9527  
1649             $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg;
1650             }
1651             elsif ($group==2)
1652 64 100       214 {
  1602 100       3851  
1653 64 100       228 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg;
  30         88  
1654             $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e;
1655             }
1656             elsif ($group==3)
1657 64 100       176 {
  1055 100       3233  
    100          
    100          
1658 64 50       213 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg;
  23 100       76  
1659 64 100       212 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e;
  23 100       93  
1660             $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e;
1661             }
1662 49         74 elsif ($num+0==0) {
1663             $num = $zero;
1664             }
1665 45         112 elsif ($num+0==1) {
1666             $num = $one;
1667             }
1668 763         1429 else {
1669 763         1059 $num =~ s/\A\s*0+//;
1670 763         3783 my $mill = 0;
  3128         8571  
1671 763         2675 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e;
  371         781  
1672 763         2862 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e;
  352         701  
1673             $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e;
1674 1075         3021 }
1675             return $num;
1676             }
1677              
1678             sub NUMWORDS
1679 1315     1315 0 50300 {
1680             my $num = shift;
1681 1315 100 66     3857  
1682 56         672 if (@_ % 2 and require Carp) {
1683             die "Missing value in option list (odd number of option args) at"
1684             . join ' line ', (caller)[1,2];
1685             }
1686 1259         6215  
1687 1259         2642 my %arg = ( %default_args, @_ );
1688             my $group = $arg{group};
1689              
1690 1259 100 100     3530 # Handle "stylistic" conversions (up to a given threshold)...
1691 230         798 if (exists $arg{threshold} && $num > $arg{threshold}) {
1692 230         543 my ($whole, $frac) = split /[.]/, $num;
1693 230 100       602 while ($arg{comma}) {
1694             $whole =~ s{ (\d) ( \d{3}(?:,|\z) ) }{$1,$2}xms
1695             or last;
1696 230 100       1026 }
1697             return $frac ? "$whole.$frac" : $whole;
1698             }
1699 1029 50       3987  
1700 1029 50       3018 die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/;
    50          
1701             my $sign = ($num =~ /\A\s*\+/) ? "plus"
1702             : ($num =~ /\A\s*\-/) ? "minus"
1703             : '';
1704 1029         2206  
1705 1029         1898 my ($zero, $one) = @arg{'zero','one'};
1706 1029         1441 my $comma = $arg{comma};
1707             my $and = $arg{'and'};
1708 1029         2235  
1709             my $ord = $num =~ s/(st|nd|rd|th)\Z//;
1710 1029 100       3349 my @chunks = ($arg{decimal})
    50          
1711             ? $group ? split(/\./, $num) : split(/\./, $num, 2)
1712             : ($num);
1713 1029         1613  
1714             my $first = 1;
1715 1029 100       2039  
  7         10  
  7         13  
1716             if ($chunks[0] eq '') { $first=0; shift @chunks; }
1717 1029         1729  
1718             foreach ( @chunks )
1719 1075         2034 {
1720 1075 100       1905 s/\D//g;
1721             $_ = '0' unless $_;
1722 1075 100 100     3155  
  26         54  
1723 1049         2260 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) }
1724             else { $_ = enword($_,$group,$zero,$one,$comma,$and) }
1725 1075         3705  
1726 1075         7330 s/, \Z//;
1727 1075 100 100     4669 s/\s+,/,/g;
1728 1075         11766 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first;
1729 1075         27740 s/\s+/ /g;
1730 1075 100       2621 s/(\A\s|\s\Z)//g;
1731             $first = '' if $first;
1732             }
1733 1029         1643  
1734 1029 100       1814 my @numchunks = ();
1735             if ($first =~ /0/)
1736 7         15 {
1737             unshift @chunks, '';
1738             }
1739             else
1740 1022         5033 {
1741             @numchunks = split /\Q$comma /, $chunks[0];
1742             }
1743 1029 100 100     2632  
1744             $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/
1745             if $ord and @numchunks;
1746 1029         2335  
1747             foreach (@chunks[1..$#chunks])
1748 53         102 {
1749 53         200 push @numchunks, $arg{decimal};
1750             push @numchunks, split /\Q$comma /;
1751             }
1752 1029 50       2379  
    100          
1753             if (wantarray)
1754 0 0       0 {
1755             unshift @numchunks, $sign if $sign;
1756 0         0 return @numchunks
1757             }
1758             elsif ($group)
1759 168 50       1657 {
1760             return ($sign?"$sign ":'') . join ", ", @numchunks;
1761             }
1762             else
1763 861 50       1635 {
1764 861         2316 $num = ($sign?"$sign ":'') . shift @numchunks;
1765 861         1502 $first = ($num !~ /$arg{decimal}\Z/);
1766             foreach ( @numchunks )
1767 3197 100       7525 {
    100          
1768             if (/\A$arg{decimal}\Z/)
1769 22         41 {
1770 22         42 $num .= " $_";
1771             $first = 0;
1772             }
1773             elsif ($first)
1774 3067         5808 {
1775             $num .= "$comma $_";
1776             }
1777             else
1778 108         199 {
1779             $num .= " $_";
1780             }
1781 861         3920 }
1782             return $num;
1783             }
1784             }
1785              
1786             # Join words with commas and a trailing 'and' (when appropriate)...
1787              
1788 29     29 0 155 sub WORDLIST {
1789             my %opt;
1790             my @words;
1791 29         58  
1792 101 100       197 for my $arg (@_) {
1793 24         45 if (ref $arg eq 'HASH' ) {
  24         95  
1794             %opt = (%opt, %{$arg});
1795             }
1796 77         132 else {
1797             push @words, $arg;
1798             }
1799             }
1800 29 50       109  
1801 29 100       75 return "" if @words == 0;
1802             return "$words[0]" if @words == 1;
1803 24 100       54  
1804 24 100       51 my $conj = exists($opt{conj}) ? $opt{conj} : 'and';
1805 6         42 if (@words == 2) {
1806 6         37 $conj =~ s/^ (?=\S) | (?<=\S) $/ /gxms;
1807             return "$words[0]$conj$words[1]";
1808             }
1809              
1810 18 100       101 my $sep = exists $opt{sep} ? $opt{sep}
    50          
1811             : grep(/,/, @words) ? q{; }
1812             : q{, }
1813             ;
1814              
1815 18 100       68 my $final_sep = !exists $opt{final_sep} ? "$sep $conj"
    100          
1816             : length($opt{final_sep}) == 0 ? $conj
1817             : "$opt{final_sep} $conj"
1818 18         85 ;
1819 18         103 $final_sep =~ s/\s+/ /gmxs;
1820             $final_sep =~ s/^ (?=[^\W\d_]) | (?<=\S) $/ /gxms;
1821 18         142  
1822             return join($sep, @words[0..@words-2]) . "$final_sep$words[-1]";
1823             }
1824              
1825              
1826              
1827             1;
1828              
1829             __END__