File Coverage

blib/lib/Text/GenderFromName.pm
Criterion Covered Total %
statement 150 170 88.2
branch 93 164 56.7
condition 15 35 42.8
subroutine 14 14 100.0
pod 8 8 100.0
total 280 391 71.6


line stmt bran cond sub pod time code
1             package Text::GenderFromName;
2              
3             # Text::GenderFromName.pm
4             #
5             # Originally by Jon Orwant,
6             # Created 10 Mar 97
7             #
8             # Version 0.30 - Jul 29 2003 by
9             # Eamon Daly,
10              
11 1     1   29587 use Carp;
  1         3  
  1         112  
12 1     1   6 use strict;
  1         1  
  1         35  
13 1     1   5 use warnings;
  1         6  
  1         41  
14             require Exporter;
15 1     1   5 use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
  1         1  
  1         701  
16              
17             @ISA = qw(Exporter);
18             @EXPORT = qw(&gender);
19             @EXPORT_OK = qw(&gender_init);
20             $VERSION = '0.33';
21              
22             =head1 NAME
23              
24             Text::GenderFromName - Guess the gender of an American first name.
25              
26             =head1 SYNOPSIS
27              
28             use Text::GenderFromName;
29              
30             print gender("Jon"); # prints 'm'
31              
32             See EXAMPLES for additional uses.
33              
34             =head1 DESCRIPTION
35              
36             This module provides C, which takes a name and returns one
37             of three values: 'm' for male, 'f' for female, or undef for unknown.
38              
39             =head1 CHANGES
40              
41             Version 0.30 is a significant departure from previous versions. By
42             default, version 0.30 uses the U.S. Social Security Administration's
43             "Most Popular Names of the 1980's" list of 1001 male first names and
44             1013 female first names. See CAVEATS below for details on this list.
45              
46             Version 0.30 also allows for arbitrary female and male hashed lists to
47             be provided at run-time, and includes several built-ins to provide
48             matches based on exclusivity, weight, metaphones, and both version
49             0.20 and version 0.10 regexp-style matching. The user can also specify
50             additional match subroutines and change the match order at run-time.
51              
52             =head1 EXPORT
53              
54             The single exported function is:
55              
56             =over 4
57              
58             =item gender ($name [, $looseness])
59              
60             Returns one of three values: 'm' for male, 'f' for female, or undef
61             for unknown. C also accepts a "looseness" level: the higher
62             the looseness value, the broader the match. See THE MATCH LIST below
63             for details.
64              
65             =back
66              
67             =head1 NON-EXPORT
68              
69             The non-exported matching subs are:
70              
71             =over 4
72              
73             =item one_only ($name)
74              
75             Returns 'm' or 'f' if and only if $name is found in only one of the
76             two lists.
77              
78             =item either_weight ($name)
79              
80             Returns 'm' or 'f' if $name is found in either list. If $name is in
81             both lists, it returns the more heavily weighted of the two.
82              
83             =item one_only_metaphone ($name)
84              
85             Uses Text::DoubleMetaphone for comparison. Returns 'm' or 'f' if and
86             only if the metaphone for $name is found in only one of the two lists.
87              
88             Note that this function builds a copy of the female/male name lists to
89             speed up the metaphone lookup.
90              
91             =item either_weight_metaphone ($name)
92              
93             Uses Text::DoubleMetaphone for comparison. Returns 'm' or 'f' if $name
94             is found in either list. If $name is in both lists, it sums the
95             weights of all matching metaphones and returns the larger of the two.
96              
97             Note that this function builds a copy of the female/male name lists to
98             speed up the metaphone lookup.
99              
100             =item v2_rules ($name)
101              
102             Uses Jon Orwant's v0.20 rules for matching.
103              
104             =item v1_rules ($name)
105              
106             Uses Jon Orwant's adaptation of Scott Pakin's awk script from v0.10
107             for matching.
108              
109             =back
110              
111             If you wish to use your own hash refs containing names and weights,
112             you should explicitly import:
113              
114             =over 4
115              
116             =item gender_init ($female_names_ref, $male_names_ref)
117              
118             Initializes the male and female hashes. This package calls
119             C internally: without arguments it uses the table
120             provided by the U.S. Social Security Administration. Don't call this
121             function unless you want to override the supplied lists. See THE
122             FEMALE/MALE HASHES below for details.
123              
124             =back
125              
126             =head1 THE MATCH LIST
127              
128             C<@MATCH_LIST> contains the list of subs C will use to
129             determine the gender of a given name.
130              
131             By default, there are 6 items in @MATCH_LIST, corresponding to the
132             non-exported functions above. Strictly matching subs should go first,
133             loosely matching subs should go last, as C will iterate over
134             the list from 0 to the specified looseness value or the number of subs
135             in C<@MATCH_LIST>, whichever comes first.
136              
137             You may override this like so:
138              
139             @Text::GenderFromName::MATCH_LIST = ('main::my_matching_routine');
140              
141             =head1 THE FEMALE/MALE HASHES
142              
143             By default, these hashes are built using data from the U.S. SSA. You
144             may override them by calling C with your own female and male
145             hash refs, like so:
146              
147             use Text::GenderFromName qw( :DEFAULT &gender_init );
148              
149             my %females = ('barbly' => 4.1, 'bar' => 2.3, ...);
150             my %males = ('foobly' => 4.5, 'foo' => 1.3, ...);
151              
152             &gender_init(\%females, \%males);
153              
154             The hash keys are lowercase names, and their values are their relative
155             weights. This allows for names that could be male or female, but are
156             more often one or the other.
157              
158             =head1 EXAMPLES
159              
160             Very strict usage:
161              
162             use Text::GenderFromName;
163              
164             my @names = ('Josephine', 'Michael', 'Dondi', 'Jonny',
165             'Pascal', 'Velvet', 'Eamon', 'FLKMLKSJN');
166              
167             for (@names) {
168             # Use strict matching
169             my $gender = &gender($_) || '';
170              
171             if ($gender eq 'f') { print "$_: Female\n" }
172             elsif ($gender eq 'm') { print "$_: Male\n" }
173             else { print "$_: UNSURE\n" }
174             }
175              
176             returns:
177              
178             Josephine: Female
179             Michael: UNSURE
180             Dondi: UNSURE
181             Jonny: UNSURE
182             Pascal: UNSURE
183             Velvet: UNSURE
184             Eamon: UNSURE
185             FLKMLKSJN: UNSURE
186              
187             Loose matching:
188              
189             for (@names) {
190             # Use loose matching
191             my $gender = &gender($_, 9) || '';
192             ...
193              
194             returns:
195              
196             Josephine: Female
197             Michael: Male
198             Dondi: Male
199             Jonny: Male
200             Pascal: Male
201             Velvet: Female
202             Eamon: UNSURE
203             FLKMLKSJN: UNSURE
204              
205             Turn on debugging:
206              
207             $Text::GenderFromName::DEBUG = 1;
208              
209             returns:
210              
211             Matching "josephine":
212             one_only...
213             ==> HIT (f)
214              
215             Matching "michael":
216             one_only...
217             either_weight...
218             F: 0.0271266376105491, M: 3.4091409099979
219             ==> HIT (m)
220              
221             Matching "dondi":
222             one_only...
223             either_weight...
224             one_only_metaphone...
225             M: dondi => dante => TNT: 0.020568
226             ==> HIT (m)
227              
228             Matching "jonny":
229             one_only...
230             either_weight...
231             one_only_metaphone...
232             F: jonny => jenna => JN: 0.193945
233             M: jonny => john => JN: 1.629871
234             either_weight_metaphone...
235             F: jonny => jenna => JN: 0.193945
236             F: jonny => joanna => JN: 0.118652
237             F: jonny => jenny => JN: 0.104875
238             ...
239             M: jonny => john => JN: 1.629871
240             M: jonny => juan => JN: 0.309234
241             M: jonny => johnny => JN: 0.127193
242             ...
243             ==> HIT (m)
244              
245             Matching "pascal":
246             one_only...
247             either_weight...
248             one_only_metaphone...
249             either_weight_metaphone...
250             v2_rules...
251             ==> HIT (m)
252              
253             Matching "velvet":
254             one_only...
255             either_weight...
256             one_only_metaphone...
257             either_weight_metaphone...
258             v2_rules...
259             v1_rules...
260             ==> HIT (f)
261              
262             Matching "eamon":
263             one_only...
264             either_weight...
265             one_only_metaphone...
266             either_weight_metaphone...
267             v2_rules...
268             v1_rules...
269              
270             Matching "flkmlksjn":
271             one_only...
272             either_weight...
273             one_only_metaphone...
274             either_weight_metaphone...
275             v2_rules...
276             v1_rules...
277              
278             Josephine: Female
279             Michael: Male
280             Dondi: Male
281             Jonny: Male
282             Pascal: Male
283             Velvet: Female
284             Eamon: UNSURE
285             FLKMLKSJN: UNSURE
286              
287             Add your own match sub:
288              
289             push @Text::GenderFromName::MATCH_LIST, 'main::eamon_hack';
290              
291             sub eamon_hack {
292             my $name = shift;
293             return 'm' if $name =~ /^eamon/;
294             }
295              
296             returns:
297              
298             ...
299             Matching "eamon":
300             one_only...
301             either_weight...
302             one_only_metaphone...
303             either_weight_metaphone...
304             v2_rules...
305             v1_rules...
306             main::eamon_hack...
307             ==> HIT (m)
308              
309             Eamon: Male
310              
311             Don't use metaphones:
312              
313             @Text::GenderFromName::MATCH_LIST =
314             grep !/metaphone/, @Text::GenderFromName::MATCH_LIST;
315              
316             Use your own female/male hash lists:
317              
318             use Text::GenderFromName qw( :DEFAULT &gender_init );
319              
320             my %females = ('josephine' => 2.1);
321             my %males = ('dondi' => 4.5);
322             &gender_init(\%females, \%males);
323              
324             Use female/male hash lists from a database:
325              
326             use Text::GenderFromName qw( :DEFAULT &gender_init );
327              
328             use Tie::RDBM;
329             tie my %females, 'Tie::RDBM', {db => 'mysql:common',
330             table => 'females',
331             key => 'name',
332             value => 'weight'};
333             tie my %males, 'Tie::RDBM', {db => 'mysql:common',
334             table => 'males',
335             key => 'name',
336             value => 'weight'};
337             &gender_init(\%females, \%males);
338              
339             =head1 COMPATIBILITY
340              
341             To run v0.30 in a (mostly) backward compatible mode, override the
342             MATCH_LIST like so:
343              
344             @Text::GenderFromName::MATCH_LIST = ('v2_rules', 'v1_rules');
345              
346             and set the looseness to any value greater than 1:
347              
348             &gender($_, 9);
349              
350             Note that v0.30 uses significantly different lists than before. If
351             you'd like to use the v0.20 name lists, you may download a previous
352             version of C, cut out the hashes, and use the
353             &gender_init() function to use those lists instead. To minimize the
354             size of this module, they are not included in this module.
355              
356             =head1 CAVEATS
357              
358             =head2 REGARDING THIS MODULE
359              
360             Rules are now case-insensitive, which is a departure from earlier
361             versions of this module. Also, Orwant's v0.20 rules no longer fall
362             through, though v0.10's do.
363              
364             Version 0.30 was a complete overhaul by someone who's never submitted a
365             module to CPAN before. Please consider this fact when using
366             C module in a production environment.
367              
368             Also note that the matching routines in this module are strongly
369             biased toward American first names. None of the methods included in
370             this module correctly identify the v0.30 author's gender (m) from his
371             first name (Eamon).
372              
373             =head2 REGARDING THE DEFAULT LIST
374              
375             From http://www.ssa.gov/OACT/babynames/1999/top1000of80s.html:
376              
377             "The data comes from a 5% sampling of Social Security card
378             applications with dates of birth from January 1980 through December
379             1989."
380              
381             "All names which occurred at least five times in the sample are
382             included in the table below. The total number of males in the sample
383             is 977,255 and the total number of females is 936,349. Criteria to be
384             included in the sample is simply that a Social Security card
385             application was filed, that the year of birth was between 1980 and
386             1989, and that the birth was on US soil. As always each unique
387             spelling is considered a unique name. It may be appropriate for
388             purposes of ranking popularity of names to combine similar spellings
389             of the same name. This kind of grouping, however, is subjective and
390             time consuming, and is beyond the scope of this document. The 2000
391             edition of the World Almanac lists the top 10 names of each decade
392             based on this data after combining different spellings of the same
393             name."
394              
395             "No effort has been made to edit the data and as a result some coding
396             errors are obvious. For example initials like "A" are included in the
397             lists. Another common problem, especially for the earlier decades is
398             females coded as being male. For example Jessica is the ranked 647
399             among male names. Finally entries like "Unknown" and "Baby" are not
400             removed from the lists."
401              
402             =head2 REGARDING HENRY
403              
404             m (0.111843889261247)
405              
406             =head1 BUGS
407              
408             Did I mention this module doesn't match the v0.30 author's name?
409              
410             =head1 AUTHOR
411              
412             Originally by Jon Orwant , v0.30 by Eamon Daly
413             .
414              
415             This is an adaptation of an 8/91 awk script by Scott Pakin in the
416             December 91 issue of Computer Language Monthly.
417              
418             Small contributions by Andrew Langmead and John Strickler. Thanks to
419             Bob Baldwin, Matt Bishop, Daniel Klein, and the U.S. SSA for their
420             lists of names.
421              
422             =head1 SEE ALSO
423              
424             L
425              
426             =cut
427              
428             our ($Males, $Females);
429              
430             our $DEBUG = 0;
431              
432             our @MATCH_LIST = ('one_only',
433             'either_weight',
434             'one_only_metaphone',
435             'either_weight_metaphone',
436             'v2_rules',
437             'v1_rules');
438              
439 1     1   1682 eval "use Text::DoubleMetaphone qw(double_metaphone)";
  1         5611  
  1         65  
440              
441             if ($@) {
442             @MATCH_LIST = grep !/metaphone/, @MATCH_LIST;
443             }
444              
445             my $DEBUG_MSG = '';
446              
447             &gender_init();
448              
449             sub gender_init {
450 1     1 1 3 my ($females_ref, $males_ref) = @_;
451              
452 1 50 33     7 if (!$females_ref || !$males_ref) {
    0 0        
    0 0        
453 1         1805 my $eval = join '', ();
454 1         8342 eval $eval;
455             }
456             elsif ($males_ref && !$females_ref) {
457 0         0 carp "Male hash supplied, but not female!";
458             }
459             elsif ($females_ref && !$males_ref) {
460 0         0 carp "Female hash supplied, but not male!";
461             }
462             else {
463 0         0 $Males = $males_ref;
464 0         0 $Females = $females_ref;
465             }
466             }
467              
468             sub gender {
469 14     14 1 2547 my $name = lc(shift);
470 14   50     60 my $looseness = shift || 1;
471 14         17 my $gender = undef;
472              
473 14 50       27 if (!$name) {
474 0         0 carp "No name specified";
475 0         0 return undef;
476             }
477              
478 14 50       26 $DEBUG_MSG = qq{Matching "$name":\n} if $DEBUG;
479              
480 1     1   7 no strict 'refs';
  1         1  
  1         2971  
481              
482 14         33 for (my $i = 0; $i < $looseness; $i++) {
483 14 50       35 last if !$MATCH_LIST[$i];
484              
485 14 50       24 $DEBUG_MSG .= "\t$MATCH_LIST[$i]...\n" if $DEBUG;
486              
487 14         18 $gender = &{ $MATCH_LIST[$i] }($name);
  14         55  
488              
489 14 50 33     58 $DEBUG_MSG .= "\t==> HIT ($gender)\n" if $DEBUG && $gender;
490              
491 14 100       47 last if $gender;
492             }
493              
494 14 50       26 print STDERR "$DEBUG_MSG\n" if $DEBUG;
495              
496 14         42 return $gender;
497             }
498              
499             sub one_only {
500 2     2 1 2 my $name = shift;
501 2         3 my $gender = undef;
502              
503             # Match one list only
504              
505 2         8 my $male_hit = $Males->{$name};
506 2         6 my $female_hit = $Females->{$name};
507              
508 2 100 66     13 if ($female_hit && !$male_hit) {
    50 33        
509 1         1 $gender = 'f';
510             }
511             elsif ($male_hit && !$female_hit) {
512 0         0 $gender = 'm';
513             }
514              
515 2         4 return $gender;
516             }
517              
518             sub either_weight {
519 2     2 1 4 my $name = shift;
520 2         3 my $gender = undef;
521              
522             # Match either, weight
523              
524 2         24 my $male_hit = $Males->{$name};
525 2         9 my $female_hit = $Females->{$name};
526              
527 2 100 66     12 if ($female_hit || $male_hit) {
528 1 50       7 $gender = ($female_hit > $male_hit) ? 'f' : 'm';
529             }
530              
531 2 50 33     5 $DEBUG_MSG .= "\tF: $female_hit, M: $male_hit\n" if $DEBUG && $gender;
532              
533 2         5 return $gender;
534             }
535              
536             sub one_only_metaphone {
537 2     2 1 5 my $name = shift;
538 2         12 my $gender = undef;
539              
540             # Match one list only, use DoubleMetaphone
541              
542 2         24 my $meta_name = &double_metaphone($name);
543 2         4 my $metaphone_hit = '';
544              
545             # Copy $Females and $Males to speed sorting.
546              
547 2         23 my %females_copy = %{ $Females };
  2         1153  
548 2         67 my %males_copy = %{ $Males };
  2         977  
549              
550 2         68 my $male_hit = 0;
551 2         2 my $female_hit = 0;
552              
553 2         176 foreach my $list_name (sort
  17683         19691  
554             { $females_copy{$b} <=> $females_copy{$a} }
555             keys %females_copy) {
556 1095 100       1516 last if $female_hit;
557              
558 1094         3783 my $meta_list_name = double_metaphone($list_name);
559              
560 1094 100       2085 if ($meta_name eq $meta_list_name) {
561 1         6 $female_hit = $females_copy{$list_name};
562              
563 1 50       8 $DEBUG_MSG .= sprintf "\tF: %s => %s => %s: %f\n",
564             $name, $list_name, $meta_list_name, $females_copy{$list_name}
565             if $DEBUG;
566             }
567             }
568              
569 2         375 foreach my $list_name (sort
  17485         20200  
570             { $males_copy{$b} <=> $males_copy{$a} }
571             keys %males_copy) {
572 448 100       660 last if $male_hit;
573              
574 446         1580 my $meta_list_name = double_metaphone($list_name);
575              
576 446 100       838 if ($meta_name eq $meta_list_name) {
577 2         6 $male_hit = $males_copy{$list_name};
578              
579 2 50       17 $DEBUG_MSG .= sprintf "\tM: %s => %s => %s: %f\n",
580             $name, $list_name, $meta_list_name, $males_copy{$list_name}
581             if $DEBUG;
582             }
583             }
584              
585 2 50 66     117 if ($female_hit && !$male_hit) {
    100 66        
586 0         0 $gender = 'f';
587             }
588             elsif ($male_hit && !$female_hit) {
589 1         3 $gender = 'm';
590             }
591              
592 2         990 return $gender;
593             }
594              
595             sub either_weight_metaphone {
596 2     2 1 5 my $name = shift;
597 2         4 my $gender = undef;
598              
599             # Match either, weight, use DoubleMetaphone
600              
601 2         11 my $meta_name = &double_metaphone($name);
602              
603             # Copy $Females and $Males to speed sorting.
604              
605 2         4 my %females_copy = %{ $Females };
  2         848  
606 2         67 my %males_copy = %{ $Males };
  2         780  
607              
608 2         74 my $male_hit = 0;
609 2         4 my $female_hit = 0;
610              
611 2         228 foreach my $list_name (sort
  17710         19913  
612             { $females_copy{$b} <=> $females_copy{$a} }
613             keys %females_copy) {
614 2026         8187 my $meta_list_name = double_metaphone($list_name);
615              
616 2026 100       4537 if ($meta_name eq $meta_list_name) {
617 23         84 $female_hit += $females_copy{$list_name};
618              
619 23 50       56 $DEBUG_MSG .= sprintf "\tF: %s => %s => %s: %f\n",
620             $name, $list_name, $meta_list_name, $females_copy{$list_name}
621             if $DEBUG;
622             }
623             }
624              
625 2         472 foreach my $list_name (sort
  17477         19452  
626             { $males_copy{$b} <=> $males_copy{$a} }
627             keys %males_copy) {
628 2002         6742 my $meta_list_name = double_metaphone($list_name);
629              
630 2002 100       3411 if ($meta_name eq $meta_list_name) {
631 9         13 $male_hit += $males_copy{$list_name};
632              
633 9 50       21 $DEBUG_MSG .= sprintf "\tM: %s => %s => %s: %f\n",
634             $name, $list_name, $meta_list_name, $males_copy{$list_name}
635             if $DEBUG;
636             }
637             }
638              
639 2 100 66     109 if ($female_hit || $male_hit) {
640 1 50       9 $gender = ($female_hit > $male_hit) ? 'f' : 'm';
641             }
642              
643 2         892 return $gender;
644             }
645              
646             sub v2_rules {
647 2     2 1 4 my $name = shift;
648 2         4 my $gender = undef;
649              
650             # Match using Orwant's rules from v0.20 of Text::GenderFromName
651              
652             # Note that this no longer 'falls through' as in v0.20. Jon makes
653             # mention of the fact that the v0.10 rules are ordered, but Jon's
654             # additions appear to be exclusive.
655              
656             # jon and john
657 2 50       40 if ($name =~ /^joh?n/) { $gender = 'm' }
  0 50       0  
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    100          
    50          
    50          
658             # tom and thomas and tomas and toby
659 0         0 elsif ($name =~ /^th?o(m|b)/) { $gender = 'm' }
660 0         0 elsif ($name =~ /^frank/) { $gender = 'm' }
661 0         0 elsif ($name =~ /^bil/) { $gender = 'm' }
662 0         0 elsif ($name =~ /^hans/) { $gender = 'm' }
663 0         0 elsif ($name =~ /^ron/) { $gender = 'm' }
664 0         0 elsif ($name =~ /^ro(z|s)/) { $gender = 'f' }
665 0         0 elsif ($name =~ /^walt/) { $gender = 'm' }
666 0         0 elsif ($name =~ /^krishna/) { $gender = 'm' }
667 0         0 elsif ($name =~ /^tri(c|sh)/) { $gender = 'f' }
668             # pascal and pasqual
669 1         2 elsif ($name =~ /^pas(c|qu)al$/) { $gender = 'm' }
670 0         0 elsif ($name =~ /^ellie/) { $gender = 'f' }
671 0         0 elsif ($name =~ /^anfernee/) { $gender = 'm' }
672              
673 2         4 return $gender;
674             }
675              
676             sub v1_rules {
677 2     2 1 4 my $name = shift;
678 2         3 my $gender = undef;
679              
680             # Match using rules from v0.10 of Text::GenderFromName
681              
682             # most names ending in a/e/i/y are female
683 2 50       11 $gender = 'f' if $name =~ /^.*[aeiy]$/;
684             # allison and variations
685 2 50       6 $gender = 'f' if $name =~ /^all?[iy]((ss?)|z)on$/;
686             # cathleen, eileen, maureen
687 2 50       6 $gender = 'f' if $name =~ /een$/;
688             # barry, larry, perry
689 2 50       6 $gender = 'm' if $name =~ /^[^s].*r[rv]e?y?$/;
690             # clive, dave, steve
691 2 50       8 $gender = 'm' if $name =~ /^[^g].*v[ei]$/;
692             # carolyn, gwendolyn, vivian
693 2 50       7 $gender = 'f' if $name =~ /^[^bd].*(b[iy]|y|via)nn?$/;
694             # dewey, stanley, wesley
695 2 50       10 $gender = 'm' if $name =~ /^[^ajklmnp][^o][^eit]*([glrsw]ey|lie)$/;
696             # heather, ruth, velvet
697 2 100       12 $gender = 'f' if $name =~ /^[^gksw].*(th|lv)(e[rt])?$/;
698             # gregory, jeremy, zachary
699 2 50       6 $gender = 'm' if $name =~ /^[cgjwz][^o][^dnt]*y$/;
700             # leroy, murray, roy
701 2 50       7 $gender = 'm' if $name =~ /^.*[rlr][abo]y$/;
702             # abigail, jill, lillian
703 2 50       5 $gender = 'f' if $name =~ /^[aehjl].*il.*$/;
704             # janet, jennifer, joan
705 2 50       4 $gender = 'f' if $name =~ /^.*[jj](o|o?[ae]a?n.*)$/;
706             # duane, eugene, rene
707 2 50       6 $gender = 'm' if $name =~ /^.*[grguw][ae]y?ne$/;
708             # fleur, lauren, muriel
709 2 50       5 $gender = 'f' if $name =~ /^[flm].*ur(.*[^eotuy])?$/;
710             # lance, quincy, vince
711 2 50       6 $gender = 'm' if $name =~ /^[clmqtv].*[^dl][in]c.*[ey]$/;
712             # margaret, marylou, miri;
713 2 50       5 $gender = 'f' if $name =~ /^m[aei]r[^tv].*([^cklnos]|([^o]n))$/;
714             # clyde, kyle, pascale
715 2 50       4 $gender = 'm' if $name =~ /^.*[ay][dl]e$/;
716             # blake, luke, mi;
717 2 50       5 $gender = 'm' if $name =~ /^[^o]*ke$/;
718             # carol, karen, shar;
719 2 50       4 $gender = 'f' if $name =~ /^[cks]h?(ar[^lst]|ry).+$/;
720             # pam, pearl, rachel
721 2 50       5 $gender = 'f' if $name =~ /^[pr]e?a([^dfju]|qu)*[lm]$/;
722             # annacarol, leann, ruthann
723 2 50       10 $gender = 'f' if $name =~ /^.*[aa]nn.*$/;
724             # deborah, leah, sarah
725 2 50       5 $gender = 'f' if $name =~ /^.*[^cio]ag?h$/;
726             # frances, megan, susan
727 2 50       4 $gender = 'f' if $name =~ /^[^ek].*[grsz]h?an(ces)?$/;
728             # ethel, helen, gretchen
729 2 50       11 $gender = 'f' if $name =~ /^[^p]*([hh]e|[ee][lt])[^s]*[ey].*[^t]$/;
730             # george, joshua, theodore
731 2 50       6 $gender = 'm' if $name =~ /^[^el].*o(rg?|sh?)?(e|ua)$/;
732             # delores, doris, precious
733 2 50       4 $gender = 'f' if $name =~ /^[dp][eo]?[lr].*s$/;
734             # anthony, henry, rodney
735 2 50       5 $gender = 'm' if $name =~ /^[^jpswz].*[denor]n.*y$/;
736             # karin, kim, kristin
737 2 50       3 $gender = 'f' if $name =~ /^k[^v]*i.*[mns]$/;
738             # bradley, brady, bruce
739 2 50       4 $gender = 'm' if $name =~ /^br[aou][cd].*[ey]$/;
740             # agnes, alexis, glynis
741 2 50       5 $gender = 'f' if $name =~ /^[acgk].*[deinx][^aor]s$/;
742             # ignace, lee, wallace
743 2 50       4 $gender = 'm' if $name =~ /^[ilw][aeg][^ir]*e$/;
744             # juliet, mildred, millicent
745 2 50       7 $gender = 'f' if $name =~ /^[^agw][iu][gl].*[drt]$/;
746             # ari, bela, ira
747 2 50       7 $gender = 'm' if $name =~ /^[abeiuy][euz]?[blr][aeiy]$/;
748             # iris, lois, phyllis
749 2 50       4 $gender = 'f' if $name =~ /^[egilp][^eu]*i[ds]$/;
750             # randy, timothy, tony
751 2 50       4 $gender = 'm' if $name =~ /^[art][^r]*[dhn]e?y$/;
752             # beatriz, bridget, harriet
753 2 50       19 $gender = 'f' if $name =~ /^[bhl].*i.*[rtxz]$/;
754             # antoine, jerome, tyrone
755 2 50       5 $gender = 'm' if $name =~ /^.*oi?[mn]e$/;
756             # danny, demetri, dondi
757 2 50       3 $gender = 'm' if $name =~ /^d.*[mnw].*[iy]$/;
758             # pete, serge, shane
759 2 50       5 $gender = 'm' if $name =~ /^[^bg](e[rst]|ha)[^il]*e$/;
760             # angel, gail, isabel
761 2 50       6 $gender = 'f' if $name =~ /^[adfgim][^r]*([bg]e[lr]|il|wn)$/;
762              
763 2         3 return $gender;
764             }
765              
766             1;
767              
768             __DATA__