File Coverage

blib/lib/Lingua/EN/Inflect.pm
Criterion Covered Total %
statement 341 423 80.6
branch 358 470 76.1
condition 144 285 50.5
subroutine 35 48 72.9
pod 9 33 27.2
total 887 1259 70.4


line stmt bran cond sub pod time code
1             package Lingua::EN::Inflect;
2              
3 15     15   55859 use strict;
  15         100  
  15         503  
4 15     15   77 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
  15         25  
  15         976  
5 15     15   9627 use Env;
  15         30902  
  15         73  
6              
7             require Exporter;
8             @ISA = qw(Exporter);
9              
10             our $VERSION = '1.903';
11              
12             %EXPORT_TAGS =
13             (
14             ALL => [ qw( classical inflect
15             PL PL_N PL_V PL_ADJ NO NUM A AN
16             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
17             PART_PRES
18             ORD
19             NUMWORDS
20             WORDLIST
21             def_noun def_verb def_adj def_a def_an )],
22              
23             INFLECTIONS => [ qw( classical inflect
24             PL PL_N PL_V PL_ADJ PL_eq
25             NO NUM A AN PART_PRES )],
26              
27             PLURALS => [ qw( classical inflect
28             PL PL_N PL_V PL_ADJ NO NUM
29             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
30              
31             COMPARISONS => [ qw( classical
32             PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
33              
34             ARTICLES => [ qw( classical inflect NUM A AN )],
35              
36             NUMERICAL => [ qw( ORD NUMWORDS )],
37              
38             USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )],
39             );
40              
41             Exporter::export_ok_tags(qw( ALL ));
42              
43             # SUPPORT CLASSICAL PLURALIZATIONS
44              
45             my %def_classical = (
46             all => 0,
47             zero => 0,
48             herd => 0,
49             names => 1,
50             persons => 0,
51             ancient => 0,
52             );
53              
54             my %all_classical = (
55             all => 1,
56             zero => 1,
57             herd => 1,
58             names => 1,
59             persons => 1,
60             ancient => 1,
61             noble_and_ancient => 10,
62             );
63              
64             my %classical = %def_classical;
65              
66             my $classical_mode = join '|', keys %all_classical;
67             $classical_mode = qr/^(?:$classical_mode)$/;
68              
69             sub classical
70             {
71 1786 100   1786 0 97037 if (!@_) {
72 1         5 %classical = %all_classical;
73 1         3 return;
74             }
75 1785 100 100     4419 if (@_==1 && $_[0] !~ $classical_mode) {
76 5 100       26 %classical = $_[0] ? %all_classical : ();
77 5         14 return;
78             }
79 1780         3758 while (@_) {
80 2663         4132 my $arg = shift;
81 2663 50       12348 if ($arg !~ $classical_mode) {
82 0         0 die "Unknown classical mode ($arg)\n";
83             }
84 2663 100 66     11062 if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; }
  2657         6347  
85 6         15 else { $classical{$arg} = 1; }
86              
87 2663 100       6649 if ($arg eq 'all') {
88 1769 100       8870 %classical = $classical{all} ? %all_classical : ();
89             }
90             }
91             }
92              
93             my $persistent_count;
94              
95             sub NUM # (;$count,$show)
96             {
97 0 0   0 0 0 if (defined $_[0])
98             {
99 0         0 $persistent_count = $_[0];
100 0 0 0     0 return $_[0] if !defined($_[1]) || $_[1];
101             }
102             else
103             {
104 0         0 $persistent_count = undef;
105             }
106 0         0 return '';
107             }
108              
109              
110             # 0. PERFORM GENERAL INFLECTIONS IN A STRING
111              
112 735     735 0 1707 sub enclose { "(?:$_[0])" }
113              
114             sub inflect
115             {
116 0     0 0 0 my $save_persistent_count = $persistent_count;
117 0         0 my @sections = split /(NUM\([^)]*\))/, $_[0];
118 0         0 my $inflection = "";
119              
120 0         0 foreach ( @sections )
121             {
122 0 0       0 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe)
  0         0  
123             {
124 0   0     0 1 while
      0        
      0        
      0        
      0        
      0        
      0        
      0        
125 0         0 s/\bPL \( ([^),]*) (, ([^)]*) )? \) / PL($1,$3) /xeg
126 0         0 || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \) / PL_N($1,$3) /xeg
127 0         0 || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \) / PL_V($1,$3) /xeg
128 0         0 || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \) / PL_ADJ($1,$3) /xeg
129 0         0 || s/\bAN? \( ([^),]*) (, ([^)]*) )? \) / A($1,$3) /xeg
130 0         0 || s/\bNO \( ([^),]*) (, ([^)]*) )? \) / NO($1,$3) /xeg
131 0         0 || s/\bORD \( ([^)]*) \) / ORD($1) /xeg
132 0         0 || s/\bNUMWORDS \( ([^)]*) \) / NUMWORDS($1) /xeg
133 0         0 || s/\bPART_PRES \( ([^)]*) \) / PART_PRES($1) /xeg
134             }
135              
136 0         0 $inflection .= $_;
137             }
138              
139 0         0 $persistent_count = $save_persistent_count;
140 0         0 return $inflection;
141             }
142              
143              
144             # 1. PLURALS
145              
146             my %PL_sb_irregular_s =
147             (
148             "corpus" => "corpuses|corpora",
149             "opus" => "opuses|opera",
150             "magnum opus" => "magnum opuses|magna opera",
151             "genus" => "genera",
152             "mythos" => "mythoi",
153             "penis" => "penises|penes",
154             "testis" => "testes",
155             "atlas" => "atlases|atlantes",
156             "yes" => "yeses",
157             'editio princeps' => 'editiones principes',
158             'starets' => 'startsy',
159             'staretz' => 'startzy',
160             );
161              
162             my %PL_sb_irregular =
163             (
164             "child" => "children",
165             "brother" => "brothers|brethren",
166             "loaf" => "loaves",
167             "hoof" => "hoofs|hooves",
168             "beef" => "beefs|beeves",
169             "thief" => "thiefs|thieves",
170             "money" => "monies",
171             "mongoose" => "mongooses",
172             "ox" => "oxen",
173             "cow" => "cows|kine",
174             "graffito" => "graffiti",
175             "prima donna" => "prima donnas|prime donne",
176             "octopus" => "octopuses|octopodes",
177             "genie" => "genies|genii",
178             "ganglion" => "ganglions|ganglia",
179             "trilby" => "trilbys",
180             "turf" => "turfs|turves",
181             "numen" => "numina",
182             "atman" => "atmas",
183             "occiput" => "occiputs|occipita",
184             'sabretooth' => 'sabretooths',
185             'sabertooth' => 'sabertooths',
186             'lowlife' => 'lowlifes',
187             'flatfoot' => 'flatfoots',
188             'tenderfoot' => 'tenderfoots',
189             'Romany' => 'Romanies',
190             'romany' => 'romanies',
191             'Tornese' => 'Tornesi',
192             'Jerry' => 'Jerrys',
193             'jerry' => 'jerries',
194             'Mary' => 'Marys',
195             'mary' => 'maries',
196             'talouse' => 'talouses',
197             'blouse' => 'blouses',
198             'Rom' => 'Roma',
199             'rom' => 'roma',
200             'carmen' => 'carmina',
201             'cheval' => 'chevaux',
202             'chervonetz' => 'chervontzi',
203             'kuvasz' => 'kuvaszok',
204             'felo' => 'felones',
205             'put-off' => 'put-offs',
206             'set-off' => 'set-offs',
207             'set-out' => 'set-outs',
208             'set-to' => 'set-tos',
209             'brother-german' => 'brothers-german|brethren-german',
210             'studium generale' => 'studia generali',
211              
212             %PL_sb_irregular_s,
213             );
214              
215             my $PL_sb_irregular = enclose join '|', reverse sort keys %PL_sb_irregular;
216              
217             # Z's that don't double
218              
219             my @PL_sb_z_zes =
220             (
221             "batz", "quartz", "topaz", "snooz(?=e)", "kibbutz",
222             );
223             my $PL_sb_z_zes = enclose join '|', @PL_sb_z_zes;
224              
225             # UNCONDITIONAL "..is" -> "..ides"
226              
227             my @PL_sb_U_is_ides =
228             (
229             "aphis",
230             );
231              
232             my $PL_sb_U_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_U_is_ides;
233              
234             # CLASSICAL "..is" -> "..ides"
235              
236             my @PL_sb_C_is_ides =
237             (
238             # GENERAL WORDS...
239              
240             "ephemeris", "iris", "clitoris",
241             "chrysalis", "epididymis",
242              
243             # INFLAMATIONS...
244              
245             ".*itis",
246              
247             );
248              
249             my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides;
250              
251             # UNCONDITIONAL "..a" -> "..ata"
252              
253             my @PL_sb_U_a_ata =
254             (
255             "plasmalemma", "pseudostoma",
256             );
257              
258             my $PL_sb_U_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_U_a_ata;
259              
260             # CLASSICAL "..a" -> "..ata"
261              
262             my @PL_sb_C_a_ata =
263             (
264             "anathema", "bema", "carcinoma", "charisma", "diploma",
265             "dogma", "drama", "edema", "enema", "enigma", "lemma",
266             "lymphoma", "magma", "melisma", "miasma", "oedema",
267             "sarcoma", "schema", "soma", "stigma", "stoma", "trauma",
268             "gumma", "pragma", "bema",
269             );
270              
271             my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata;
272              
273             # UNCONDITIONAL "..a" -> "..ae"
274              
275             my $PL_sb_U_a_ae = enclose join "|",
276             (
277             "alumna", "alga", "vertebra", "persona"
278             );
279              
280             # CLASSICAL "..a" -> "..ae"
281              
282             my $PL_sb_C_a_ae = enclose join "|",
283             (
284             "amoeba", "antenna", "formula", "hyperbola",
285             "medusa", "nebula", "parabola", "abscissa",
286             "hydra", "nova", "lacuna", "aurora", ".*umbra",
287             "flora", "fauna",
288             );
289              
290             # CLASSICAL "..en" -> "..ina"
291              
292             my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) }
293             (
294             "stamen", "foramen", "lumen"
295             );
296              
297             # UNCONDITIONAL "..um" -> "..a"
298              
299             my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) }
300             (
301             "bacterium", "agendum", "desideratum", "erratum",
302             "stratum", "datum", "ovum", "extremum",
303             "candelabrum", "intermedium", "malum", "Progymnasium",
304             );
305              
306             # CLASSICAL "..um" -> "..a"
307              
308             my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) }
309             (
310             "maximum", "minimum", "momentum", "optimum",
311             "quantum", "cranium", "curriculum", "dictum",
312             "phylum", "aquarium", "compendium", "emporium",
313             "enconium", "gymnasium", "honorarium", "interregnum",
314             "lustrum", "memorandum", "millennium", "rostrum",
315             "spectrum", "speculum", "stadium", "trapezium",
316             "ultimatum", "medium", "vacuum", "velum",
317             "consortium",
318             );
319              
320             # UNCONDITIONAL "..us" -> "i"
321              
322             my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) }
323             (
324             "alumnus", "alveolus", "bacillus", "bronchus",
325             "locus", "nucleus", "stimulus", "meniscus",
326             "sarcophagus", "interradius", "perradius", "triradius",
327             );
328              
329             # CLASSICAL "..us" -> "..i"
330              
331             my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) }
332             (
333             "focus", "radius", "genius",
334             "incubus", "succubus", "nimbus",
335             "fungus", "nucleolus", "stylus",
336             "torus", "umbilicus", "uterus",
337             "hippopotamus", "cactus",
338             );
339              
340             # CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS)
341              
342             my $PL_sb_C_us_us = enclose join "|",
343             (
344             "status", "apparatus", "prospectus", "sinus",
345             "hiatus", "impetus", "plexus",
346             );
347              
348             # UNCONDITIONAL "..on" -> "a"
349              
350             my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) }
351             (
352             "criterion", "perihelion", "aphelion",
353             "phenomenon", "prolegomenon", "noumenon",
354             "organon", "asyndeton", "hyperbaton",
355             "legomenon",
356             );
357              
358             # CLASSICAL "..on" -> "..a"
359              
360             my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) }
361             (
362             "oxymoron",
363             );
364              
365             # CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os")
366              
367             my @PL_sb_C_o_i =
368             (
369             "solo", "soprano", "basso", "alto",
370             "contralto", "tempo", "piano", "virtuoso",
371             );
372             my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i;
373              
374             # ALWAYS "..o" -> "..os"
375              
376             my $PL_sb_U_o_os = enclose join "|",
377             (
378             "^ado", "aficionado", "aggro",
379             "albino", "allegro", "ammo",
380             "Antananarivo", "archipelago", "armadillo",
381             "auto", "avocado", "Bamako",
382             "Barquisimeto", "bimbo", "bingo",
383             "Biro", "bolero", "Bolzano",
384             "bongo", "Boto", "burro",
385             "Cairo", "canto", "cappuccino",
386             "casino", "cello", "Chicago",
387             "Chimango", "cilantro", "cochito",
388             "coco", "Colombo", "Colorado",
389             "commando", "concertino", "contango",
390             "credo", "crescendo", "cyano",
391             "demo", "ditto", "Draco",
392             "dynamo", "embryo", "Esperanto",
393             "espresso", "euro", "falsetto",
394             "Faro", "fiasco", "Filipino",
395             "flamenco", "furioso", "generalissimo",
396             "Gestapo", "ghetto", "gigolo",
397             "gizmo", "Greensboro", "gringo",
398             "Guaiabero", "guano", "gumbo",
399             "gyro", "hairdo", "hippo",
400             "Idaho", "impetigo", "inferno",
401             "info", "intermezzo", "intertrigo",
402             "Iquico", "^ISO", "jumbo",
403             "junto", "Kakapo", "kilo",
404             "Kinkimavo", "Kokako", "Kosovo",
405             "Lesotho", "libero", "libido",
406             "libretto", "lido", "Lilo",
407             "limbo", "limo", "lineno",
408             "lingo", "lino", "livedo",
409             "loco", "logo", "lumbago",
410             "macho", "macro", "mafioso",
411             "magneto", "magnifico", "Majuro",
412             "Malabo", "manifesto", "Maputo",
413             "Maracaibo", "medico", "memo",
414             "metro", "Mexico", "micro",
415             "Milano", "Monaco", "mono",
416             "Montenegro", "Morocco", "Muqdisho",
417             "myo", "^NATO", "^NCO",
418             "neutrino", "^NGO", "Ningbo",
419             "octavo", "oregano", "Orinoco",
420             "Orlando", "Oslo", "^oto",
421             "panto", "Paramaribo", "Pardusco",
422             "pedalo", "photo", "pimento",
423             "pinto", "pleco", "Pluto",
424             "pogo", "polo", "poncho",
425             "Porto-Novo", "Porto", "pro",
426             "psycho", "pueblo", "quarto",
427             "Quito", "rhino", "risotto",
428             "rococo", "rondo", "Sacramento",
429             "saddo", "sago", "salvo",
430             "Santiago", "Sapporo", "Sarajevo",
431             "scherzando", "scherzo", "silo",
432             "sirocco", "sombrero", "staccato",
433             "sterno", "stucco", "stylo",
434             "sumo", "Taiko", "techno",
435             "terrazzo", "testudo", "timpano",
436             "tiro", "tobacco", "Togo",
437             "Tokyo", "torero", "Torino",
438             "Toronto", "torso", "tremolo",
439             "typo", "tyro", "ufo",
440             "UNESCO", "vaquero", "vermicello",
441             "verso", "vibrato", "violoncello",
442             "Virgo", "weirdo", "WHO",
443             "WTO", "Yamoussoukro", "yo-yo",
444             "zero", "Zibo",
445              
446             @PL_sb_C_o_i,
447             );
448              
449              
450             # UNCONDITIONAL "..ch" -> "..chs"
451              
452             my $PL_sb_U_ch_chs = enclose join "|", map { substr($_,0,-2) }
453             qw(
454             czech eunuch stomach
455             );
456              
457             # UNCONDITIONAL "..[ei]x" -> "..ices"
458              
459             my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) }
460             (
461             "codex", "murex", "silex",
462             );
463              
464             my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) }
465             (
466             "radix", "helix",
467             );
468              
469             # CLASSICAL "..[ei]x" -> "..ices"
470              
471             my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) }
472             (
473             "vortex", "vertex", "cortex", "latex",
474             "pontifex", "apex", "index", "simplex",
475             );
476              
477             my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) }
478             (
479             "appendix",
480             );
481              
482             # ARABIC: ".." -> "..i"
483              
484             my $PL_sb_C_i = enclose join "|",
485             (
486             "afrit", "afreet", "efreet",
487             );
488              
489             # HEBREW: ".." -> "..im"
490              
491             my $PL_sb_C_im = enclose join "|",
492             (
493             "goy", "seraph", "cherub", "zuz", "kibbutz",
494             );
495              
496             # UNCONDITIONAL "..man" -> "..mans"
497              
498             my $PL_sb_U_man_mans = enclose join "|",
499             qw(
500             \bataman caiman cayman ceriman
501             \bdesman \bdolman \bfarman \bharman \bhetman
502             human \bleman ottoman shaman talisman
503             Alabaman Bahaman Burman German
504             Hiroshiman Liman Nakayaman Norman Oklahoman
505             Panaman Roman Selman Sonaman Tacoman Yakiman
506             Yokohaman Yuman
507             );
508              
509             my @PL_sb_uninflected_s =
510             (
511             # PAIRS OR GROUPS SUBSUMED TO A SINGULAR...
512             "breeches", "britches", "pajamas", "pyjamas", "clippers", "gallows",
513             "hijinks", "headquarters", "pliers", "scissors", "testes", "herpes",
514             "pincers", "shears", "proceedings", "trousers",
515              
516             # UNASSIMILATED LATIN 4th DECLENSION
517              
518             "cantus", "coitus", "nexus",
519              
520             # RECENT IMPORTS...
521             "contretemps", "corps", "debris",
522             ".*ois", "siemens",
523              
524             # DISEASES
525             ".*measles", "mumps",
526              
527             # MISCELLANEOUS OTHERS...
528             "diabetes", "jackanapes", ".*series", "species", "rabies",
529             "chassis", "innings", "news", "mews", "haggis",
530             );
531              
532             my $PL_sb_uninflected_herd = enclose join "|",
533             # DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION
534             (
535             "wildebeest", "swine", "eland", "bison", "buffalo",
536             "elk", "rhinoceros", 'zucchini',
537             'caribou', 'dace', 'grouse', 'guinea[- ]fowl',
538             'haddock', 'hake', 'halibut', 'herring', 'mackerel',
539             'pickerel', 'pike', 'roe', 'seed', 'shad',
540             'snipe', 'teal', 'turbot', 'water[- ]fowl',
541             );
542              
543             my $PL_sb_lese_lesen = enclose join "|",
544             (
545             'Auslese',
546             'beerenauslese',
547             'Spaetlese',
548             'trockenbeerenauslese',
549             );
550              
551             my $PL_sb_uninflected = enclose join "|",
552             (
553             # SOME FISH AND HERD ANIMALS
554             ".*fish", "tuna", "salmon", "mackerel", "trout",
555             "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
556              
557             ".*deer", ".*sheep", "moose",
558              
559             # ALL NATIONALS ENDING IN -ese
560             "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
561             "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
562             "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
563             "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
564             "Shavese", "Vermontese", "Wenchowese", "Yengeese",
565             ".*[nrlm]ese",
566              
567             # SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE)
568              
569             @PL_sb_uninflected_s,
570              
571             # DISEASES
572             ".*pox",
573              
574              
575             # OTHER ODDITIES
576             "graffiti", "djinn", 'samuri',
577             '.*craft$', 'offspring', 'pence', 'quid', 'hertz',
578             );
579              
580             # SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es)
581              
582             my $PL_sb_singular_s = enclose join '|',
583             (
584             ".*ss",
585             "acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
586             "bronchitis", "bursitis", "caddis", "cannabis",
587             "canvas", "chaos", "cosmos", "dais", "digitalis",
588             "epidermis", "ethos", "eyas", "gas", "glottis",
589             "hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
590             "pathos", "pelvis", "polis", "rhinoceros",
591             "sassafras", "trellis", ".*us", "[A-Z].*es",
592              
593             @PL_sb_C_is_ides,
594             @PL_sb_U_is_ides,
595             );
596              
597             my $PL_v_special_s = enclose join '|',
598             (
599             $PL_sb_singular_s,
600             @PL_sb_uninflected_s,
601             keys %PL_sb_irregular_s,
602             '(.*[csx])is',
603             '(.*)ceps',
604             '[A-Z].*s',
605             );
606              
607             my %PL_sb_postfix_adj = (
608             'general' => ['(?!major|lieutenant|brigadier|adjutant|.*star)\S+'],
609             'martial' => [qw(court)],
610             );
611              
612             foreach (keys %PL_sb_postfix_adj) {
613             $PL_sb_postfix_adj{$_} = enclose
614             enclose(join('|', @{$PL_sb_postfix_adj{$_}}))
615             . "(?=(?:-|\\s+)$_)";
616             }
617              
618             my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)';
619              
620             my $PL_prep = enclose join '|', qw (
621             about above across after against amid amidst among around as at athwart atop
622             barring before behind below beneath beside besides between betwixt beyond but by
623             circa
624             despite down during
625             except
626             failing for from
627             given
628             in inside into
629             like
630             minus
631             near next
632             of off on onto out outside over
633             pace past per plus pro
634             qua
635             round
636             sans save since
637             than through throughout thru thruout till times to toward towards
638             under underneath unlike until unto up upon
639             versus via vs
640             with within without worth
641             );
642              
643             my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eua])(?:-|\s+))a(?:-|\s+)(.*)';
644              
645             my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eua])((-|\s+)(.*))?)';
646              
647              
648             my %PL_pron_nom =
649             (
650             # NOMINATIVE REFLEXIVE
651              
652             "i" => "we", "myself" => "ourselves",
653             "you" => "you", "yourself" => "yourselves",
654             "she" => "they", "herself" => "themselves",
655             "he" => "they", "himself" => "themselves",
656             "it" => "they", "itself" => "themselves",
657             "they" => "they", "themself" => "themselves",
658              
659             # POSSESSIVE
660              
661             "mine" => "ours",
662             "yours" => "yours",
663             "hers" => "theirs",
664             "his" => "theirs",
665             "its" => "theirs",
666             "theirs" => "theirs",
667             );
668              
669             my %PL_pron_acc =
670             (
671             # ACCUSATIVE REFLEXIVE
672              
673             "me" => "us", "myself" => "ourselves",
674             "you" => "you", "yourself" => "yourselves",
675             "her" => "them", "herself" => "themselves",
676             "him" => "them", "himself" => "themselves",
677             "it" => "them", "itself" => "themselves",
678             "them" => "them", "themself" => "themselves",
679             );
680              
681             my $PL_pron_acc = enclose join '|', keys %PL_pron_acc;
682              
683             my %PL_v_irregular_pres =
684             (
685             # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
686             # 3RD PERS. (INDET.)
687              
688             "am" => "are", "are" => "are", "is" => "are",
689             "was" => "were", "were" => "were", "was" => "were",
690             "have" => "have", "have" => "have", "has" => "have",
691             "do" => "do", "do" => "do", "does" => "do",
692             );
693              
694             my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres;
695              
696             my %PL_v_ambiguous_pres =
697             (
698             # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
699             # 3RD PERS. (INDET.)
700              
701             "act" => "act", "act" => "act", "acts" => "act",
702             "blame" => "blame", "blame" => "blame", "blames" => "blame",
703             "can" => "can", "can" => "can", "can" => "can",
704             "must" => "must", "must" => "must", "must" => "must",
705             "fly" => "fly", "fly" => "fly", "flies" => "fly",
706             "copy" => "copy", "copy" => "copy", "copies" => "copy",
707             "drink" => "drink", "drink" => "drink", "drinks" => "drink",
708             "fight" => "fight", "fight" => "fight", "fights" => "fight",
709             "fire" => "fire", "fire" => "fire", "fires" => "fire",
710             "like" => "like", "like" => "like", "likes" => "like",
711             "look" => "look", "look" => "look", "looks" => "look",
712             "make" => "make", "make" => "make", "makes" => "make",
713             "reach" => "reach", "reach" => "reach", "reaches" => "reach",
714             "run" => "run", "run" => "run", "runs" => "run",
715             "sink" => "sink", "sink" => "sink", "sinks" => "sink",
716             "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
717             "view" => "view", "view" => "view", "views" => "view",
718             );
719              
720             my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres;
721              
722              
723             my $PL_v_irregular_non_pres = enclose join '|',
724             (
725             "did", "had", "ate", "made", "put",
726             "spent", "fought", "sank", "gave", "sought",
727             "shall", "could", "ought", "should",
728             );
729              
730             my $PL_v_ambiguous_non_pres = enclose join '|',
731             (
732             "thought", "saw", "bent", "will", "might", "cut",
733             );
734              
735             # "..oes" -> "..oe" (the rest are "..oes" -> "o")
736              
737             my $PL_v_oes_oe = enclose join "|",
738             qw(
739             .*shoes .*hoes .*toes
740             canoes floes oboes roes throes woes
741             );
742              
743             my $PL_count_zero = enclose join '|',
744             (
745             0, "no", "zero", "nil"
746             );
747              
748             my $PL_count_one = enclose join '|',
749             (
750             1, "a", "an", "one", "each", "every", "this", "that",
751             );
752              
753             my %PL_adj_special =
754             (
755             "a" => "some", "an" => "some",
756             "this" => "these", "that" => "those",
757             );
758             my $PL_adj_special = enclose join '|', keys %PL_adj_special;
759              
760             my %PL_adj_poss =
761             (
762             "my" => "our",
763             "your" => "your",
764             "its" => "their",
765             "her" => "their",
766             "his" => "their",
767             "their" => "their",
768             );
769             my $PL_adj_poss = enclose join '|', keys %PL_adj_poss;
770              
771              
772             sub checkpat
773             {
774 0     0 0 0 local $SIG{__WARN__} = sub {0};
  6     6   33  
775 6 50 33     363 do {$@ =~ s/at.*?$//;
  0         0  
776 0         0 die "\nBad user-defined singular pattern:\n\t$@\n"}
777             if (!eval "'' =~ m/$_[0]/; 1;" or $@);
778 6         27 return @_;
779             }
780              
781             sub checkpatsubs
782             {
783 6     6 0 17 checkpat($_[0]);
784 6 50       14 if (defined $_[1])
785             {
786 6     0   26 local $SIG{__WARN__} = sub {0};
  0         0  
787 6 50 33     256 do {$@ =~ s/at.*?$//;
  0         0  
788 0         0 die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"}
789             if (!eval "qq{$_[1]}; 1;" or $@);
790             }
791 6         25 return @_;
792             }
793              
794             my @PL_sb_user_defined = ();
795             my @PL_v_user_defined = ();
796             my @PL_adj_user_defined = ();
797             my @A_a_user_defined = ();
798              
799             sub def_noun
800             {
801 2     2 1 12 unshift @PL_sb_user_defined, checkpatsubs(@_);
802 2         5 return 1;
803             }
804              
805             sub def_verb
806             {
807 1     1 1 7 unshift @PL_v_user_defined, checkpatsubs(@_[4,5]);
808 1         4 unshift @PL_v_user_defined, checkpatsubs(@_[2,3]);
809 1         5 unshift @PL_v_user_defined, checkpatsubs(@_[0,1]);
810 1         3 return 1;
811             }
812              
813             sub def_adj
814             {
815 1     1 1 7 unshift @PL_adj_user_defined, checkpatsubs(@_);
816 1         4 return 1;
817             }
818              
819             sub def_a
820             {
821 0     0 1 0 unshift @A_a_user_defined, checkpat(@_,'a');
822 0         0 return 1;
823             }
824              
825             sub def_an
826             {
827 0     0 1 0 unshift @A_a_user_defined, checkpat(@_,'an');
828 0         0 return 1;
829             }
830              
831             sub ud_match
832             {
833 14844     14844 0 21822 my $word = shift;
834 14844         30626 for (my $i=0; $i < @_; $i+=2)
835             {
836 44 100       317 if ($word =~ /^(?:$_[$i])$/i)
837             {
838 8 50       26 last unless defined $_[$i+1];
839 8         305 return eval '"'.$_[$i+1].'"';
840             }
841             }
842 14836         29737 return undef;
843             }
844              
845             do
846             {
847             local $SIG{__WARN__} = sub {0};
848             my $rcfile;
849              
850             $rcfile = $INC{'Lingua//EN/Inflect.pm'} || '';
851             $rcfile =~ s/Inflect.pm$/.inflectrc/;
852             do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
853             if $rcfile && -r $rcfile && -s $rcfile;
854              
855             $rcfile = "$ENV{HOME}/.inflectrc" || '';
856             do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
857             if $rcfile && -r $rcfile && -s $rcfile;
858             };
859              
860             sub postprocess # FIX PEDANTRY AND CAPITALIZATION :-)
861             {
862 11079     11079 0 23060 my ($orig, $inflected) = @_;
863 11079 100       19074 $inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e;
  199         719  
864 11079 100       39792 return $orig =~ /^I$/ ? $inflected
    100          
    100          
865             : $orig =~ /^[A-Z]+$/ ? uc $inflected
866             : $orig =~ /^[A-Z]/ ? ucfirst $inflected
867             : $inflected;
868             }
869              
870             sub PL
871             # PL($word,$number)
872             {
873 1898     1898 1 6059 my ($str, $count) = @_;
874 1898         8008 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
875 1898 50       4037 return $str unless $word;
876 1898   66     3519 my $plural = postprocess $word, _PL_special_adjective($word,$count)
877             || _PL_special_verb($word,$count)
878             || _PL_noun($word,$count);
879 1898         6115 return $pre.$plural.$post;
880             }
881              
882             sub PL_N
883             # PL_N($word,$number)
884             {
885 6973     6973 1 15654 my ($str, $count) = @_;
886 6973         32014 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
887 6973 50       14841 return $str unless $word;
888 6973         12203 my $plural = postprocess $word, _PL_noun($word,$count);
889 6973         26275 return $pre.$plural.$post;
890             }
891              
892             sub PL_V
893             # PL_V($word,$number)
894             {
895 2102     2102 1 6731 my ($str, $count) = @_;
896 2102         10021 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
897 2102 50       4490 return $str unless $word;
898 2102   66     4258 my $plural = postprocess $word, _PL_special_verb($word,$count)
899             || _PL_general_verb($word,$count);
900 2102         6544 return $pre.$plural.$post;
901             }
902              
903             sub PL_ADJ
904             # PL_ADJ($word,$number)
905             {
906 106     106 1 222 my ($str, $count) = @_;
907 106         497 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
908 106 50       224 return $str unless $word;
909 106   66     237 my $plural = postprocess $word, _PL_special_adjective($word,$count)
910             || $word;
911 106         426 return $pre.$plural.$post;
912             }
913              
914 3532 100 100 3532 0 19645 sub PL_eq { _PL_eq(@_, \&PL_N) || _PL_eq(@_, \&PL_V) || _PL_eq(@_, \&PL_ADJ); }
915 0     0 0 0 sub PL_N_eq { _PL_eq(@_, \&PL_N); }
916 0     0 0 0 sub PL_V_eq { _PL_eq(@_, \&PL_V); }
917 0     0 0 0 sub PL_ADJ_eq { _PL_eq(@_, \&PL_ADJ); }
918              
919             sub _PL_eq
920             {
921 3712     3712   6899 my ( $word1, $word2, $PL ) = @_;
922 3712         11640 my %classval = %classical;
923 3712         12739 %classical = %all_classical;
924 3712         6461 my $result = "";
925 3712 100 66     10834 $result = "eq" if !$result && $word1 eq $word2;
926 3712 100 100     7776 $result = "p:s" if !$result && $word1 eq &$PL($word2);
927 3712 100 100     8723 $result = "s:p" if !$result && &$PL($word1) eq $word2;
928 3712         7839 %classical = ();
929 3712 100 100     7278 $result = "p:s" if !$result && $word1 eq &$PL($word2);
930 3712 100 100     7053 $result = "s:p" if !$result && &$PL($word1) eq $word2;
931 3712         11994 %classical = %classval;
932              
933 3712 100 66     14203 if ($PL == \&PL || $PL == \&PL_N)
934             {
935 3532 50 66     7031 $result = "p:p"
936             if !$result && _PL_check_plurals_N($word1,$word2);
937 3532 50 66     6853 $result = "p:p"
938             if !$result && _PL_check_plurals_N($word2,$word1);
939             }
940 3712 100 66     11711 if ($PL == \&PL || $PL == \&PL_ADJ)
941             {
942 60 50 33     133 $result = "p:p"
943             if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL);
944             }
945              
946 3712         13357 return $result;
947             }
948              
949             sub _PL_reg_plurals
950             {
951 4320     4320   143706 $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/
952             }
953              
954             sub _PL_check_plurals_N
955             {
956 240     240   488 my $pair = "$_[0]|$_[1]";
957 240 50       686 foreach ( values %PL_sb_irregular_s ) { return 1 if $_ eq $pair; }
  2880         4559  
958 240 50       858 foreach ( values %PL_sb_irregular ) { return 1 if $_ eq $pair; }
  14160         21850  
959              
960 240 50 33     455 return 1 if
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
961             _PL_reg_plurals($pair, $PL_sb_U_a_ata, "as","ata")
962             || _PL_reg_plurals($pair, $PL_sb_C_a_ata, "as","ata")
963             || _PL_reg_plurals($pair, $PL_sb_U_is_ides, "is","ides")
964             || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides")
965             || _PL_reg_plurals($pair, $PL_sb_C_a_ae, "s","e")
966             || _PL_reg_plurals($pair, $PL_sb_C_en_ina, "ens","ina")
967             || _PL_reg_plurals($pair, $PL_sb_C_um_a, "ums","a")
968             || _PL_reg_plurals($pair, $PL_sb_C_us_i, "uses","i")
969             || _PL_reg_plurals($pair, $PL_sb_C_on_a, "ons","a")
970             || _PL_reg_plurals($pair, $PL_sb_C_o_i, "os","i")
971             || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices")
972             || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices")
973             || _PL_reg_plurals($pair, $PL_sb_C_i, "s","i")
974             || _PL_reg_plurals($pair, $PL_sb_C_im, "s","im")
975              
976             || _PL_reg_plurals($pair, '.*eau', "s","x")
977             || _PL_reg_plurals($pair, '.*ieu', "s","x")
978             || _PL_reg_plurals($pair, '.*tri', "xes","ces")
979             || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges");
980              
981              
982 240         1089 return 0;
983             }
984              
985             sub _PL_check_plurals_ADJ
986             {
987 0     0   0 my ( $word1a, $word2a ) = @_;
988 0         0 my ( $word1b, $word2b ) = @_;
989              
990 0 0       0 $word1a = '' unless $word1a =~ s/'s?$//;
991 0 0       0 $word2a = '' unless $word2a =~ s/'s?$//;
992 0 0       0 $word1b = '' unless $word1b =~ s/s'$//;
993 0 0       0 $word2b = '' unless $word2b =~ s/s'$//;
994              
995 0 0       0 if ($word1a)
996             {
997 0 0 0     0 return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a)
      0        
998             || _PL_check_plurals_N($word2a, $word1a) );
999 0 0 0     0 return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b)
      0        
1000             || _PL_check_plurals_N($word2b, $word1a) );
1001             }
1002 0 0       0 if ($word1b)
1003             {
1004 0 0 0     0 return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a)
      0        
1005             || _PL_check_plurals_N($word2a, $word1b) );
1006 0 0 0     0 return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b)
      0        
1007             || _PL_check_plurals_N($word2b, $word1b) );
1008             }
1009              
1010              
1011 0         0 return "";
1012             }
1013              
1014             sub _PL_noun
1015             {
1016 8899     8899   14817 my ( $word, $count ) = @_;
1017 8899         11428 my $value; # UTILITY VARIABLE
1018              
1019             # DEFAULT TO PLURAL
1020              
1021 8899 50 66     23974 $count = $persistent_count
1022             if !defined($count) && defined($persistent_count);
1023              
1024             $count = (defined $count and $count=~/^($PL_count_one)$/io
1025             or defined $count and $classical{zero}
1026 8899 100 66     32471 and $count=~/^($PL_count_zero)$/io)
1027             ? 1
1028             : 2;
1029              
1030 8899 100       15311 return $word if $count==1;
1031              
1032             # HANDLE USER-DEFINED NOUNS
1033              
1034 8893 100       14593 return $value if defined($value = ud_match($word, @PL_sb_user_defined));
1035              
1036              
1037             # HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS
1038              
1039 8890 50       16797 $word eq '' and return $word;
1040              
1041 8890 100 100     71000 $word =~ /^($PL_sb_uninflected)$/i && !exists $PL_sb_irregular{$word} && $word !~ /^($PL_sb_lese_lesen)$/i
      100        
1042             and return $word;
1043              
1044 8196 100 100     34879 $classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i
1045             and return $word;
1046              
1047              
1048             # HANDLE ISOLATED IRREGULAR PLURALS
1049              
1050             $word =~ /^($PL_sb_irregular)$/i
1051 8154 100 33     31841 and return ( $PL_sb_irregular{$1} || $PL_sb_irregular{lc $1} );
1052             $word =~ /(.*)\b($PL_sb_irregular)$/i
1053 7672 50 0     32666 and return $1 . ( $PL_sb_irregular{$2} || $PL_sb_irregular{lc $2} );
1054              
1055              
1056             # HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.)
1057              
1058 7672 100 66     39132 $word =~ /^(?:$PL_sb_postfix_adj)$/i
1059             and $value = $2
1060             and return _PL_noun($1,2)
1061             . $value;
1062              
1063 7612 100 100     34688 $word =~ /^(?:$PL_sb_prep_dual_compound)$/i
1064             and $value = [$2,$3]
1065             and return _PL_noun($1,2)
1066             . $value->[0]
1067             . _PL_noun($value->[1]);
1068              
1069 7604 100 66     38921 $word =~ /^(?:$PL_sb_prep_compound)$/i
1070             and $value = $2
1071             and return _PL_noun($1,2)
1072             . $value;
1073              
1074             # HANDLE PRONOUNS
1075              
1076             $word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i
1077 7452 100       32793 and return $1.$PL_pron_acc{lc($2)};
1078              
1079 7268 100       18146 $value = $PL_pron_nom{lc($word)}
1080             and return $value;
1081              
1082             $word =~ /^($PL_pron_acc)$/i
1083 7138 100       21794 and return $PL_pron_acc{lc($1)};
1084              
1085              
1086             # HANDLE FAMILIES OF IRREGULAR PLURALS
1087              
1088 7110 100       43823 $word =~ /(.*$PL_sb_U_man_mans)$/i
1089             and return "$1s";
1090 6866 100       20822 $word =~ /(\S*)quy$/i
1091             and return "$1quies";
1092 6858 100       16415 $word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people";
    100          
1093 6818 100       14958 $word =~ /(.*)man$/i and return "$1men";
1094 6652 100       13975 $word =~ /(.*[ml])ouse$/i and return "$1ice";
1095 6604 100       11960 $word =~ /(.*)goose$/i and return "$1geese";
1096 6596 100       12241 $word =~ /(.*)tooth$/i and return "$1teeth";
1097 6588 100       12147 $word =~ /(.*)foot$/i and return "$1feet";
1098              
1099             # HANDLE UNASSIMILATED IMPORTS
1100              
1101 6574 100       12467 $word =~ /(.*)ceps$/i and return $word;
1102 6570 100       12568 $word =~ /(.*)zoon$/i and return "$1zoa";
1103 6546 100       13304 $word =~ /(.*[csx])is$/i and return "$1es";
1104 6514 100       17907 $word =~ /(.*$PL_sb_U_a_ata)a$/i and return "$1ata";
1105 6498 100       17003 $word =~ /(.*$PL_sb_U_is_ides)is$/i and return "$1ides";
1106 6490 100       20823 $word =~ /(.*$PL_sb_U_ch_chs)ch$/i and return "$1chs";
1107 6466 100       19918 $word =~ /(.*$PL_sb_U_ex_ices)ex$/i and return "$1ices";
1108 6442 100       18636 $word =~ /(.*$PL_sb_U_ix_ices)ix$/i and return "$1ices";
1109 6418 100       22852 $word =~ /(.*$PL_sb_U_um_a)um$/i and return "$1a";
1110 6298 100       19909 $word =~ /(.*$PL_sb_U_us_i)us$/i and return "$1i";
1111 6186 100       18332 $word =~ /(.*$PL_sb_U_on_a)on$/i and return "$1a";
1112 6090 100       18040 $word =~ /(.*$PL_sb_U_a_ae)$/i and return "$1e";
1113 6058 100       17783 $word =~ /(.*$PL_sb_lese_lesen)$/i and return "$1n";
1114              
1115             # HANDLE INCOMPLETELY ASSIMILATED IMPORTS
1116              
1117 6026 100       11922 if ($classical{ancient})
1118             {
1119 4215 100       9101 $word =~ /(.*)trix$/i and return "$1trices";
1120 4180 100       8936 $word =~ /(.*)eau$/i and return "$1eaux";
1121 4126 100       8506 $word =~ /(.*)ieu$/i and return "$1ieux";
1122 4114 100       10680 $word =~ /(.{2,}[yia])nx$/i and return "$1nges";
1123 4090 100       12807 $word =~ /(.*$PL_sb_C_en_ina)en$/i and return "$1ina";
1124 4072 100       12838 $word =~ /(.*$PL_sb_C_ex_ices)ex$/i and return "$1ices";
1125 4019 100       9960 $word =~ /(.*$PL_sb_C_ix_ices)ix$/i and return "$1ices";
1126 4013 100       14476 $word =~ /(.*$PL_sb_C_um_a)um$/i and return "$1a";
1127 3840 100       12416 $word =~ /(.*$PL_sb_C_us_i)us$/i and return "$1i";
1128 3762 100       11218 $word =~ /(.*$PL_sb_C_us_us)$/i and return "$1";
1129 3734 100       22956 $word =~ /(.*$PL_sb_C_a_ae)$/i and return "$1e";
1130 3626 100       14451 $word =~ /(.*$PL_sb_C_a_ata)a$/i and return "$1ata";
1131 3458 100       15829 $word =~ /(.*$PL_sb_C_is_ides)is$/i and return "$1ides";
1132 3392 100       14640 $word =~ /(.*$PL_sb_C_o_i)o$/i and return "$1i";
1133 3344 100       8994 $word =~ /(.*$PL_sb_C_on_a)on$/i and return "$1a";
1134 3338 100       13610 $word =~ /$PL_sb_C_im$/i and return "${word}im";
1135 3308 100       13976 $word =~ /$PL_sb_C_i$/i and return "${word}i";
1136             }
1137              
1138              
1139             # HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS
1140              
1141 5107 100       31302 $word =~ /^($PL_sb_singular_s)$/i and return "$1es";
1142 3753 50 66     8513 $word =~ /^([A-Z].*s)$/ and $classical{names} and return "$1es";
1143 3695 100       11192 $word =~ /^($PL_sb_z_zes)$/i and return "$1es";
1144 3683 100       10699 $word =~ /^(.*[^z])(z)$/i and return "$1zzes";
1145 3659 100       13074 $word =~ /^(.*)([cs]h|x|zz|ss)$/i and return "$1$2es";
1146             # $word =~ /(.*)(us)$/i and return "$1$2es";
1147              
1148             # HANDLE ...f -> ...ves
1149              
1150 3438 100       7991 $word =~ /(.*[eao])lf$/i and return "$1lves";
1151 3398 100       7852 $word =~ /(.*[^d])eaf$/i and return "$1eaves";
1152 3370 100       6814 $word =~ /(.*[nlw])ife$/i and return "$1ives";
1153 3326 100       6709 $word =~ /(.*)arf$/i and return "$1arves";
1154              
1155             # HANDLE ...y
1156              
1157 3302 100       7104 $word =~ /(.*[aeiou])y$/i and return "$1ys";
1158 3242 100 100     6064 $word =~ /([A-Z].*y)$/ and $classical{names} and return "$1s";
1159 3225 100       6815 $word =~ /(.*)y$/i and return "$1ies";
1160              
1161             # HANDLE ...o
1162              
1163 3159 100       38813 $word =~ /$PL_sb_U_o_os$/i and return "${word}s";
1164 2887 100       7181 $word =~ /[aeiou]o$/i and return "${word}s";
1165 2815 100       5777 $word =~ /o$/i and return "${word}es";
1166              
1167              
1168             # OTHERWISE JUST ADD ...s
1169              
1170 2727         13776 return "${word}s";
1171             }
1172              
1173              
1174             sub _PL_special_verb
1175             {
1176 3952     3952   6897 my ( $word, $count ) = @_;
1177 3952 50 66     12163 $count = $persistent_count
1178             if !defined($count) && defined($persistent_count);
1179             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1180 3952 50 33     14612 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1181             : 2;
1182              
1183 3952 50       10150 return undef if $count=~/^($PL_count_one)$/io;
1184              
1185 3952         5508 my $value; # UTILITY VARIABLE
1186              
1187             # HANDLE USER-DEFINED VERBS
1188              
1189 3952 100       6480 return $value if defined($value = ud_match($word, @PL_v_user_defined));
1190              
1191             # HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND)
1192              
1193             $word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i
1194 3950 100       16340 and return $PL_v_irregular_pres{lc $1}.$2;
1195              
1196             # HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES
1197              
1198 3822 100       13191 $word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i
1199             and return $word;
1200              
1201             # HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND)
1202              
1203             $word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i
1204 3762 100       11359 and return $PL_v_irregular_pres{lc $1}.$2;
1205              
1206 3732 100       8815 $word =~ /^\S+n't\b/i
1207             and return $word;
1208              
1209             # HANDLE SPECIAL CASES
1210              
1211 3724 100       22272 $word =~ /^($PL_v_special_s)$/ and return undef;
1212 3076 100       7569 $word =~ /\s/ and return undef;
1213 2904 100       5426 $word =~ /^quizzes$/i and return "quiz";
1214              
1215             # HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS)
1216              
1217 2900 100       10523 $word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i and return "$1$2";
1218              
1219 2884 100       5859 $word =~ /^(..+)ies$/i and return "$1y";
1220              
1221 2875 100       13879 $word =~ /($PL_v_oes_oe)$/ and return substr($1,0,-1);
1222 2779 50       6150 $word =~ /^(.+)oes$/i and return "$1o";
1223              
1224 2779 100       7516 $word =~ /^(.*[^s])s$/i and return $1;
1225              
1226             # OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE)
1227              
1228 2604         9553 return undef;
1229             }
1230              
1231             sub _PL_general_verb
1232             {
1233 1726     1726   3259 my ( $word, $count ) = @_;
1234 1726 50 33     4988 $count = $persistent_count
1235             if !defined($count) && defined($persistent_count);
1236             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1237 1726 50 33     5896 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1238             : 2;
1239              
1240 1726 50       4302 return $word if $count=~/^($PL_count_one)$/io;
1241              
1242             # HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND)
1243              
1244             $word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i
1245 1726 100       6731 and return $PL_v_ambiguous_pres{lc $1}.$2;
1246              
1247             # HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES
1248              
1249 1722 100       5453 $word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i
1250             and return $word;
1251              
1252             # OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED
1253              
1254 1704         5305 return $word;
1255              
1256             }
1257              
1258             sub _PL_special_adjective
1259             {
1260 2004     2004   3412 my ( $word, $count ) = @_;
1261 2004 50 66     6152 $count = $persistent_count
1262             if !defined($count) && defined($persistent_count);
1263             $count = (defined $count and $count=~/^($PL_count_one)$/io or
1264 2004 100 100     7968 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1265             : 2;
1266              
1267 2004 100       5785 return $word if $count=~/^($PL_count_one)$/io;
1268              
1269              
1270             # HANDLE USER-DEFINED ADJECTIVES
1271              
1272 1999         2853 my $value;
1273 1999 100       3611 return $value if defined($value = ud_match($word, @PL_adj_user_defined));
1274              
1275             # HANDLE KNOWN CASES
1276              
1277             $word =~ /^($PL_adj_special)$/i
1278 1996 100       7745 and return $PL_adj_special{lc $1};
1279              
1280             # HANDLE POSSESSIVES
1281              
1282             $word =~ /^($PL_adj_poss)$/i
1283 1970 100       6519 and return $PL_adj_poss{lc $1};
1284              
1285 1932 100       4372 $word =~ /^(.*)'s?$/ and do { my $pl = PL_N($1);
  72         152  
1286 72 100       378 return "$pl'" . ($pl =~ m/s$/ ? "" : "s");
1287             };
1288              
1289             # OTHERWISE, NO IDEA
1290              
1291 1860         5020 return undef;
1292              
1293             }
1294              
1295              
1296             # 2. INDEFINITE ARTICLES
1297              
1298             # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND"
1299             # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY
1300             # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!)
1301              
1302             my $A_abbrev = q{
1303             (?! FJO | [HLMNS]Y. | RY[EO] | SQU
1304             | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
1305             [FHLMNRSX][A-Z]
1306             };
1307              
1308             # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A
1309             # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE
1310             # IMPLIES AN ABBREVIATION.
1311              
1312             my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)';
1313              
1314             # EXCEPTIONS TO EXCEPTIONS
1315              
1316             my $A_explicit_an = enclose join '|',
1317             (
1318             "euler",
1319             "hour(?!i)", "heir", "honest", "hono",
1320             );
1321              
1322             my $A_ordinal_an = enclose join '|',
1323             (
1324             "[aefhilmnorsx]-?th",
1325             );
1326              
1327             my $A_ordinal_a = enclose join '|',
1328             (
1329             "[bcdgjkpqtuvwyz]-?th",
1330             );
1331              
1332             sub A {
1333 0     0 0 0 my ($str, $count) = @_;
1334 0         0 my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i );
1335 0 0       0 return $str unless $word;
1336 0         0 my $result = _indef_article($word,$count);
1337 0         0 return $pre.$result.$post;
1338             }
1339              
1340 0     0 0 0 sub AN { goto &A }
1341              
1342             sub _indef_article {
1343 0     0   0 my ( $word, $count ) = @_;
1344              
1345 0 0 0     0 $count = $persistent_count
1346             if !defined($count) && defined($persistent_count);
1347              
1348 0 0 0     0 return "$count $word"
1349             if defined $count && $count!~/^($PL_count_one)$/io;
1350              
1351             # HANDLE USER-DEFINED VARIANTS
1352              
1353 0         0 my $value;
1354 0 0       0 return "$value $word"
1355             if defined($value = ud_match($word, @A_a_user_defined));
1356              
1357             # HANDLE ORDINAL FORMS
1358              
1359 0 0       0 $word =~ /^($A_ordinal_a)/i and return "a $word";
1360 0 0       0 $word =~ /^($A_ordinal_an)/i and return "an $word";
1361              
1362             # HANDLE SPECIAL CASES
1363              
1364 0 0       0 $word =~ /^($A_explicit_an)/i and return "an $word";
1365 0 0       0 $word =~ /^[aefhilmnorsx]$/i and return "an $word";
1366 0 0       0 $word =~ /^[bcdgjkpqtuvwyz]$/i and return "a $word";
1367              
1368              
1369             # HANDLE ABBREVIATIONS
1370              
1371 0 0       0 $word =~ /^($A_abbrev)/ox and return "an $word";
1372 0 0       0 $word =~ /^[aefhilmnorsx][.-]/i and return "an $word";
1373 0 0       0 $word =~ /^[a-z][.-]/i and return "a $word";
1374              
1375             # HANDLE CONSONANTS
1376              
1377 0 0       0 $word =~ /^[^aeiouy]/i and return "a $word";
1378              
1379             # HANDLE SPECIAL VOWEL-FORMS
1380              
1381 0 0       0 $word =~ /^e[uw]/i and return "a $word";
1382 0 0       0 $word =~ /^onc?e\b/i and return "a $word";
1383 0 0       0 $word =~ /^uni([^nmd]|mo)/i and return "a $word";
1384 0 0       0 $word =~ /^ut[th]/i and return "an $word";
1385 0 0       0 $word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word";
1386              
1387             # HANDLE SPECIAL CAPITALS
1388              
1389 0 0       0 $word =~ /^U[NK][AIEO]?/ and return "a $word";
1390              
1391             # HANDLE VOWELS
1392              
1393 0 0       0 $word =~ /^[aeiou]/i and return "an $word";
1394              
1395             # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND)
1396              
1397 0 0       0 $word =~ /^($A_y_cons)/io and return "an $word";
1398              
1399             # OTHERWISE, GUESS "a"
1400 0         0 return "a $word";
1401             }
1402              
1403             # 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)"
1404              
1405             sub NO
1406             {
1407 124     124 0 2101 my ($str, $count, $opt_ref) = @_;
1408 124         669 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
1409              
1410 124 50 33     337 $count = $persistent_count
1411             if !defined($count) && defined($persistent_count);
1412 124 100       214 $count = 0 unless $count;
1413              
1414 124 100       522 if ($count =~ /^$PL_count_zero$/) {
1415 3         20 return "${pre}no ". PL($word,0) . $post ;
1416             }
1417              
1418 121         225 $count =~ s{[^\d.e+-]}{}gi;
1419 121         257 1 while $count =~ s{([.].*)[.]}{$1};
1420             $count = defined $opt_ref->{words_below} && $count < $opt_ref->{words_below}
1421 121 100 100     432 ? NUMWORDS($count)
1422             : $count;
1423              
1424 121 100 100     338 if (defined $opt_ref->{comma} || defined $opt_ref->{comma_every}) {
1425 56 100       117 $opt_ref->{comma_every} = 3 if !defined $opt_ref->{comma_every};
1426             $opt_ref->{comma} = ',' if !defined $opt_ref->{comma}
1427 56 100 100     212 || $opt_ref->{comma} =~ /^\d+$/;
1428              
1429 56         88 $count = _commify($count, @{$opt_ref}{'comma','comma_every'});
  56         106  
1430             }
1431              
1432 121         315 return "$pre$count " . PL($word,$count) . $post
1433             }
1434              
1435             sub _commify {
1436 56     56   111 my ($number, $comma, $every) = @_;
1437 56 50       101 return if !defined $comma;
1438 56 50       83 return if !defined $comma;
1439 56         309 $number =~ s{(?:(?<=^)|(?<=^-))(\d\d{$every,})}
  42         98  
1440 42         352 { my $n = $1;
1441 42         129 $n=~s/(?<=.)(?=(?:.{$every})+$)/$comma/g;
1442             $n;
1443 56         127 }e;
1444             return $number;
1445             }
1446              
1447             # PARTICIPLES
1448              
1449             sub PART_PRES
1450 6     6 0 11 {
1451 6 100 33     95 local $_ = PL_V(shift,2);
      33        
      66        
      66        
      66        
      33        
      33        
      66        
      66        
1452             s/ie$/y/
1453             or s/ue$/u/
1454             or s/([auy])e$/$1/
1455             or s/ski$/ski/
1456             or s/[^b]i$//
1457             or s/^(are|were)$/be/
1458             or s/^(had)$/hav/
1459             or s/(hoe)$/$1/
1460             or s/([^e])e$/$1/
1461             or m/er$/
1462 6         28 or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
1463             return "${_}ing";
1464             }
1465              
1466              
1467              
1468             # NUMERICAL INFLECTIONS
1469              
1470             my %nth =
1471             (
1472             0 => 'th',
1473             1 => 'st',
1474             2 => 'nd',
1475             3 => 'rd',
1476             4 => 'th',
1477             5 => 'th',
1478             6 => 'th',
1479             7 => 'th',
1480             8 => 'th',
1481             9 => 'th',
1482             11 => 'th',
1483             12 => 'th',
1484             13 => 'th',
1485             );
1486              
1487              
1488             my %ordinal;
1489             @ordinal{qw(ty one two three five eight nine twelve )}=
1490             qw(tieth first second third fifth eighth ninth twelfth);
1491              
1492             my $ordinal_suff = join '|', keys %ordinal, "";
1493              
1494             $ordinal{""} = 'th';
1495              
1496             sub ORD($)
1497 166     166 0 246 {
1498 166 100       538 my $num = shift;
1499 83         140 if ($num =~ /\d/) {
1500 83   66     425 my $tail = substr($num, -3);
1501             return $num . ($nth{$tail%100} || $nth{$tail%10});
1502             }
1503 83         523 else {
1504 83         328 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/;
1505             return $num;
1506             }
1507             }
1508              
1509              
1510             my %default_args =
1511             (
1512             'group' => 0,
1513             'comma' => ',',
1514             'and' => 'and',
1515             'zero' => 'zero',
1516             'one' => 'one',
1517             'decimal' => 'point',
1518             );
1519              
1520             my @unit = ('',qw(one two three four five six seven eight nine));
1521             my @teen = qw(ten eleven twelve thirteen fourteen
1522             fifteen sixteen seventeen eighteen nineteen);
1523             my @ten = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety));
1524              
1525             ## From http://en.wikipedia.org/wiki/Names_of_large_numbers
1526             our %millmap = (
1527             #num zeros
1528             '0' => { US => q(), UK => q(), EU => q()},
1529             '3' => { US => q(thousand), UK => q(thousand), EU => q(thousand)},
1530             '6' => { US => q(million), UK => q(million), EU => q(million)},
1531             '9' => { US => q(billion), UK => q(thousand million), EU => q(milliard)},
1532             '12' => { US => q(trillion), UK => q(billion), EU => q(billion)},
1533             '15' => { US => q(quadrillion), UK => q(thousand billion), EU => q(billiard)},
1534             '18' => { US => q(quintillion), UK => q(trillion), EU => q(trillion)},
1535             '21' => { US => q(sextillion), UK => q(thousand trillion), EU => q(trilliard)},
1536             '24' => { US => q(septillion), UK => q(quadrillion), EU => q(quadrillion)},
1537             '27' => { US => q(octillion), UK => q(thousand quadrillion), EU => q(quadrilliard)},
1538             '30' => { US => q(nonillion), UK => q(quintillion), EU => q(quintillion)},
1539             '33' => { US => q(decillion), UK => q(thousand quintillion), EU => q(quintilliard)},
1540             '36' => { US => q(undecillion), UK => q(sextillion), EU => q(sextillion)},
1541             '39' => { US => q(duodecillion), UK => q(thousand sextillion), EU => q(sextilliard)},
1542             '42' => { US => q(tredecillion), UK => q(septillion), EU => q(septillion)},
1543             '45' => { US => q(quattuordecillion), UK => q(thousand septillion), EU => q(septilliard)},
1544             '48' => { US => q(quindecillion), UK => q(octillion), EU => q(octillion)},
1545             '51' => { US => q(sedecillion), UK => q(thousand octillion), EU => q(octilliard)},
1546             '54' => { US => q(septendecillion), UK => q(nonillion), EU => q(nonillion)},
1547             '57' => { US => q(octodecillion), UK => q(thousand nonillion), EU => q(nonilliard)},
1548             '60' => { US => q(novendecillion), UK => q(decillion), EU => q(decillion)},
1549             '63' => { US => q(vigintillion), UK => q(thousand decillion), EU => q(decilliard)},
1550             '66' => { US => q(unvigintillion), UK => q(undecillion), EU => q(undecillion)},
1551             '69' => { US => q(duovigintillion), UK => q(thousand undecillion), EU => q(undecilliard)},
1552             '72' => { US => q(tresvigintillion), UK => q(duodecillion), EU => q(duodecillion)},
1553             '75' => { US => q(quattuorvigintillion), UK => q(thousand duodecillion), EU => q(duodecilliard)},
1554             '78' => { US => q(quinquavigintillion), UK => q(tredecillion), EU => q(tredecillion)},
1555             '81' => { US => q(sesvigintillion), UK => q(thousand tredecillion), EU => q(tredecilliard)},
1556             '84' => { US => q(septemvigintillion), UK => q(quattuordecillion), EU => q(quattuordecillion)},
1557             '87' => { US => q(octovigintillion), UK => q(thousand quattuordecillion), EU => q(quattuordecilliard)},
1558             '90' => { US => q(novemvigintillion), UK => q(quindecillion), EU => q(quindecillion)},
1559             '93' => { US => q(trigintillion), UK => q(thousand quindecillion), EU => q(quindecilliard)},
1560             '96' => { US => q(untrigintillion), UK => q(sedecillion), EU => q(sedecillion)},
1561             '99' => { US => q(duotrigintillion), UK => q(thousand sedecillion), EU => q(sedecilliard)},
1562             '102' => { US => q(trestrigintillion), UK => q(septendecillion), EU => q(septendecillion)},
1563             '105' => { US => q(quattuortrigintillion), UK => q(thousand septendecillion), EU => q(septendecilliard)},
1564             '108' => { US => q(quinquatrigintillion), UK => q(octodecillion), EU => q(octodecillion)},
1565             '111' => { US => q(sestrigintillion), UK => q(thousand octodecillion), EU => q(octodecilliard)},
1566             '114' => { US => q(septentrigintillion), UK => q(novendecillion), EU => q(novendecillion)},
1567             '117' => { US => q(octotrigintillion), UK => q(thousand novendecillion), EU => q(novendecilliard)},
1568             '120' => { US => q(noventrigintillion), UK => q(vigintillion), EU => q(vigintillion)},
1569             '123' => { US => q(quadragintillion), UK => q(thousand vigintillion), EU => q(vigintilliard)},
1570             '153' => { US => q(quinquagintillion), UK => q(thousand quinquavigintillion), EU => q(quinquavigintilliard) },
1571             '183' => { US => q(sexagintillion), UK => q(thousand trigintillion), EU => q(trigintilliard) },
1572             '213' => { US => q(septuagintillion), UK => q(thousand quinquatrigintillion), EU => q(quinquatrigintilliard) },
1573             '243' => { US => q(octogintillion), UK => q(thousand quadragintillion), EU => q(quadragintilliard) },
1574             '273' => { US => q(nonagintillion), UK => q(thousand quinquaquadragintillion), EU => q(quinquaquadragintilliard) },
1575             '303' => { US => q(centillion), UK => q(thousand quinquagintillion), EU => q(quinquagintilliard) },
1576             '306' => { US => q(uncentillion), UK => q(unquinquagintillion), EU => q(unquinquagintillion) },
1577             '309' => { US => q(duocentillion), UK => q(thousand unquinquagintillion), EU => q(unquinquagintilliard) },
1578             '312' => { US => q(trescentillion), UK => q(duoquinquagintillion), EU => q(duoquinquagintillion) },
1579             '333' => { US => q(decicentillion), UK => q(thousand quinquaquinquagintillion), EU => q(quinquaquinquagintilliard) },
1580             '336' => { US => q(undecicentillion), UK => q(sesquinquagintillion), EU => q(sesquinquagintillion) },
1581             '363' => { US => q(viginticentillion), UK => q(thousand sexagintillion), EU => q(sexagintilliard) },
1582             '366' => { US => q(unviginticentillion), UK => q(unsexagintillion), EU => q(unsexagintillion) },
1583             '393' => { US => q(trigintacentillion), UK => q(thousand quinquasexagintillion), EU => q(quinquasexagintilliard) },
1584             '423' => { US => q(quadragintacentillion), UK => q(thousand septuagintillion), EU => q(septuagintilliard) },
1585             '453' => { US => q(quinquagintacentillion), UK => q(thousand quinquaseptuagintillion), EU => q(quinquaseptuagintilliard) },
1586             '483' => { US => q(sexagintacentillion), UK => q(thousand octogintillion), EU => q(octogintilliard) },
1587             '513' => { US => q(septuagintacentillion), UK => q(thousand quinquaoctogintillion), EU => q(quinquaoctogintilliard) },
1588             '543' => { US => q(octogintacentillion), UK => q(thousand nonagintillion), EU => q(nonagintilliard) },
1589             '573' => { US => q(nonagintacentillion), UK => q(thousand quinquanonagintillion), EU => q(quinquanonagintilliard) },
1590             '603' => { US => q(ducentillion), UK => q(thousand centillion), EU => q(centilliard) },
1591             '903' => { US => q(trecentillion), UK => q(thousand quinquagintacentillion), EU => q(quinquagintacentilliard) },
1592             '1203' => { US => q(quadringentillion), UK => q(thousand ducentillion), EU => q(ducentilliard) },
1593             '1503' => { US => q(quingentillion), UK => q(thousand quinquagintaducentillion), EU => q(quinquagintaducentilliard) },
1594             '1803' => { US => q(sescentillion), UK => q(thousand trecentillion), EU => q(trecentilliard) },
1595             '2103' => { US => q(septingentillion), UK => q(thousand quinquagintatrecentillion), EU => q(quinquagintatrecentilliard) },
1596             '2403' => { US => q(octingentillion), UK => q(thousand quadringentillion), EU => q(quadringentilliard) },
1597             '2703' => { US => q(nongentillion), UK => q(thousand quinquagintaquadringentillion), EU => q(quinquagintaquadringentilliard) },
1598             '3003' => { US => q(millinillion), UK => q(thousand quingentillion), EU => q(quingentilliard) },
1599             );
1600              
1601             my $millchoice = 'US';
1602             my @millkeys = sort {$a <=> $b } keys %millmap;
1603             my $maxmill = (sort {$a <=> $b } keys %millmap)[-1];
1604             our $millast = undef;
1605             our @mill;
1606             for my $numZeros ( 0..$maxmill ) {
1607             next if $numZeros%3 != 0;
1608             my $arraySlot = $numZeros/3;
1609             if( defined( $millmap{$numZeros} ) ) {
1610             $mill[$arraySlot] = " $millmap{$numZeros}->{$millchoice}";
1611             $millast = $arraySlot;
1612             } else {
1613             my $missing = $arraySlot - $millast;
1614             $mill[$arraySlot] = $mill[$missing] . $mill[$millast];
1615             }
1616             }
1617 14855   100 14855 0 33079  
1618 14855 50       22969 sub mill { my $ind = $_[0]||0;
1619 14855 50       1050536 die "Number out of range\n" if $ind > $#mill;
1620             return $ind<@mill ? $mill[$ind] : ' ???illion'; }
1621 6785     6785 0 12948  
1622             sub unit { return $unit[$_[0]]. mill($_[1]); }
1623              
1624             sub ten
1625 6041 100 100 6041 0 24166 {
    100          
1626             return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2])
1627 1087   100     4489 if $_[0] ne '1';
1628             return $teen[$_[1]]. $mill[$_[2]||0];
1629             }
1630              
1631             sub hund
1632 3128 100 100 3128 0 7603 {
    100          
1633             return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '')
1634 324 100 100     892 . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0];
1635 12         45 return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2];
1636             return '';
1637             }
1638              
1639              
1640             sub enword
1641 1075     1075 0 1901 {
1642             my ($num,$group,$zero,$one,$comma,$and) = @_;
1643 1075 100       2891  
    100          
    100          
    100          
    100          
1644             if ($group==1)
1645 90 100       259 {
  3342 100       7944  
1646             $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg;
1647             }
1648             elsif ($group==2)
1649 64 100       161 {
  1602 100       3171  
1650 64 100       183 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg;
  30         77  
1651             $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e;
1652             }
1653             elsif ($group==3)
1654 64 100       142 {
  1055 100       2748  
    100          
    100          
1655 64 50       179 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg;
  23 100       64  
1656 64 100       167 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e;
  23 100       80  
1657             $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e;
1658             }
1659 49         67 elsif ($num+0==0) {
1660             $num = $zero;
1661             }
1662 45         54 elsif ($num+0==1) {
1663             $num = $one;
1664             }
1665 763         1138 else {
1666 763         856 $num =~ s/\A\s*0+//;
1667 763         3135 my $mill = 0;
  3128         7246  
1668 763         2075 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e;
  371         623  
1669 763         2248 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e;
  352         578  
1670             $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e;
1671 1075         2257 }
1672             return $num;
1673             }
1674              
1675             sub NUMWORDS
1676 1315     1315 0 17238 {
1677             my $num = shift;
1678 1315 100 66     2793  
1679 56         515 if (@_ % 2 and require Carp) {
1680             die "Missing value in option list (odd number of option args) at"
1681             . join ' line ', (caller)[1,2];
1682             }
1683 1259         4585  
1684 1259         2199 my %arg = ( %default_args, @_ );
1685             my $group = $arg{group};
1686              
1687 1259 100 100     2675 # Handle "stylistic" conversions (up to a given threshold)...
1688 230         545 if (exists $arg{threshold} && $num > $arg{threshold}) {
1689 230         409 my ($whole, $frac) = split /[.]/, $num;
1690 230 100       456 while ($arg{comma}) {
1691             $whole =~ s{ (\d) ( \d{3}(?:,|\z) ) }{$1,$2}xms
1692             or last;
1693 230 100       767 }
1694             return $frac ? "$whole.$frac" : $whole;
1695             }
1696 1029 50       2950  
1697 1029 50       2376 die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/;
    50          
1698             my $sign = ($num =~ /\A\s*\+/) ? "plus"
1699             : ($num =~ /\A\s*\-/) ? "minus"
1700             : '';
1701 1029         1803  
1702 1029         1285 my ($zero, $one) = @arg{'zero','one'};
1703 1029         1182 my $comma = $arg{comma};
1704             my $and = $arg{'and'};
1705 1029         1741  
1706             my $ord = $num =~ s/(st|nd|rd|th)\Z//;
1707 1029 100       2642 my @chunks = ($arg{decimal})
    50          
1708             ? $group ? split(/\./, $num) : split(/\./, $num, 2)
1709             : ($num);
1710 1029         1270  
1711             my $first = 1;
1712 1029 100       1643  
  7         8  
  7         9  
1713             if ($chunks[0] eq '') { $first=0; shift @chunks; }
1714 1029         1498  
1715             foreach ( @chunks )
1716 1075         1596 {
1717 1075 100       1584 s/\D//g;
1718             $_ = '0' unless $_;
1719 1075 100 100     2418  
  26         44  
1720 1049         1656 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) }
1721             else { $_ = enword($_,$group,$zero,$one,$comma,$and) }
1722 1075         2843  
1723 1075         6055 s/, \Z//;
1724 1075 100 100     3867 s/\s+,/,/g;
1725 1075         9797 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first;
1726 1075         22673 s/\s+/ /g;
1727 1075 100       2256 s/(\A\s|\s\Z)//g;
1728             $first = '' if $first;
1729             }
1730 1029         1321  
1731 1029 100       1446 my @numchunks = ();
1732             if ($first =~ /0/)
1733 7         14 {
1734             unshift @chunks, '';
1735             }
1736             else
1737 1022         4077 {
1738             @numchunks = split /\Q$comma /, $chunks[0];
1739             }
1740 1029 100 100     2150  
1741             $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/
1742             if $ord and @numchunks;
1743 1029         1879  
1744             foreach (@chunks[1..$#chunks])
1745 53         81 {
1746 53         176 push @numchunks, $arg{decimal};
1747             push @numchunks, split /\Q$comma /;
1748             }
1749 1029 50       1864  
    100          
1750             if (wantarray)
1751 0 0       0 {
1752             unshift @numchunks, $sign if $sign;
1753 0         0 return @numchunks
1754             }
1755             elsif ($group)
1756 168 50       1358 {
1757             return ($sign?"$sign ":'') . join ", ", @numchunks;
1758             }
1759             else
1760 861 50       1313 {
1761 861         1959 $num = ($sign?"$sign ":'') . shift @numchunks;
1762 861         1258 $first = ($num !~ /$arg{decimal}\Z/);
1763             foreach ( @numchunks )
1764 3197 100       6040 {
    100          
1765             if (/\A$arg{decimal}\Z/)
1766 22         40 {
1767 22         37 $num .= " $_";
1768             $first = 0;
1769             }
1770             elsif ($first)
1771 3067         4924 {
1772             $num .= "$comma $_";
1773             }
1774             else
1775 108         167 {
1776             $num .= " $_";
1777             }
1778 861         3210 }
1779             return $num;
1780             }
1781             }
1782              
1783             # Join words with commas and a trailing 'and' (when appropriate)...
1784              
1785 29     29 0 158 sub WORDLIST {
1786             my %opt;
1787             my @words;
1788 29         84  
1789 101 100       271 for my $arg (@_) {
1790 24         61 if (ref $arg eq 'HASH' ) {
  24         134  
1791             %opt = (%opt, %{$arg});
1792             }
1793 77         196 else {
1794             push @words, $arg;
1795             }
1796             }
1797 29 50       102  
1798 29 100       118 return "" if @words == 0;
1799             return "$words[0]" if @words == 1;
1800 24 100       76  
1801 24 100       65 my $conj = exists($opt{conj}) ? $opt{conj} : 'and';
1802 6         52 if (@words == 2) {
1803 6         63 $conj =~ s/^ (?=\S) | (?<=\S) $/ /gxms;
1804             return "$words[0]$conj$words[1]";
1805             }
1806              
1807 18 100       120 my $sep = exists $opt{sep} ? $opt{sep}
    50          
1808             : grep(/,/, @words) ? q{; }
1809             : q{, }
1810             ;
1811              
1812 18 100       83 my $final_sep = !exists $opt{final_sep} ? "$sep $conj"
    100          
1813             : length($opt{final_sep}) == 0 ? $conj
1814             : "$opt{final_sep} $conj"
1815 18         110 ;
1816 18         121 $final_sep =~ s/\s+/ /gmxs;
1817             $final_sep =~ s/^ (?=[^\W\d_]) | (?<=\S) $/ /gxms;
1818 18         187  
1819             return join($sep, @words[0..@words-2]) . "$final_sep$words[-1]";
1820             }
1821              
1822              
1823              
1824             1;
1825              
1826             __END__