File Coverage

blib/lib/Lingua/EN/NameCase.pm
Criterion Covered Total %
statement 87 87 100.0
branch 34 34 100.0
condition 21 21 100.0
subroutine 8 8 100.0
pod 2 2 100.0
total 152 152 100.0


line stmt bran cond sub pod time code
1             package Lingua::EN::NameCase;
2              
3             =head1 NAME
4              
5             Lingua::EN::NameCase - Correctly case a person's name from UPERCASE or lowcase
6              
7             =cut
8              
9 7     7   344444 use warnings;
  7         63  
  7         249  
10 7     7   40 use strict;
  7         16  
  7         187  
11 7     7   4015 use locale;
  7         4406  
  7         38  
12              
13 7     7   342 use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK $HEBREW $SPANISH $ROMAN $POSTNOMINAL );
  7         17  
  7         921  
14              
15             =head1 VERSION
16              
17             Version 1.21
18              
19             =cut
20              
21             our $VERSION = '1.21';
22              
23             =head1 SYNOPSIS
24              
25             # Working with scalars; complementing lc and uc.
26              
27             use Lingua::EN::NameCase qw( nc );
28              
29             $FixedCasedName = nc( $OriginalName );
30              
31             $FixedCasedName = nc( \$OriginalName );
32              
33             # Working with arrays or array references.
34              
35             use Lingua::EN::NameCase 'NameCase';
36              
37             $FixedCasedName = NameCase( $OriginalName );
38             @FixedCasedNames = NameCase( @OriginalNames );
39              
40             $FixedCasedName = NameCase( \$OriginalName );
41             @FixedCasedNames = NameCase( \@OriginalNames );
42              
43             NameCase( \@OriginalNames ) ; # In-place.
44              
45             # NameCase will not change a scalar in-place, i.e.
46             NameCase( \$OriginalName ) ; # WRONG: null operation.
47              
48             $Lingua::EN::NameCase::SPANISH = 1;
49             # Now 'El' => 'El' instead of (default) Greek 'El' => 'el'.
50             # Now 'La' => 'La' instead of (default) French 'La' => 'la'.
51              
52             $Lingua::EN::NameCase::HEBREW = 0;
53             # Now 'Aharon BEN Amram Ha-Kohein' => 'Aharon Ben Amram Ha-Kohein'
54             # instead of (default) => 'Aharon ben Amram Ha-Kohein'.
55              
56             $Lingua::EN::NameCase::ROMAN = 0;
57             # Now 'Li' => 'Li' instead of (default) 'Li' => 'LI'.
58              
59             $Lingua::EN::NameCase::POSTNOMINAL = 0;
60             # Now 'PHD' => 'PhD' instead of (default) 'PHD' => 'Phd'.
61              
62             =head1 DESCRIPTION
63              
64             Forenames and surnames are often stored either wholly in UPPERCASE
65             or wholly in lowercase. This module allows you to convert names into
66             the correct case where possible.
67              
68             Although forenames and surnames are normally stored separately if they
69             do appear in a single string, whitespace separated, NameCase and nc deal
70             correctly with them.
71              
72             NameCase currently correctly name cases names which include any of the
73             following:
74              
75             Mc, Mac, al, el, ap, da, de, delle, della, di, du, del, der,
76             la, le, lo, van and von.
77              
78             It correctly deals with names which contain apostrophes and hyphens too.
79              
80             =head2 EXAMPLE FIXES
81              
82             Original Name Case
83             -------- ---------
84             KEITH Keith
85             LEIGH-WILLIAMS Leigh-Williams
86             MCCARTHY McCarthy
87             O'CALLAGHAN O'Callaghan
88             ST. JOHN St. John
89              
90             plus "son (daughter) of" etc. in various languages, e.g.:
91              
92             VON STREIT von Streit
93             VAN DYKE van Dyke
94             AP LLWYD DAFYDD ap Llwyd Dafydd
95             etc.
96              
97             plus names with roman numerals (up to 89, LXXXIX), e.g.:
98              
99             henry viii Henry VIII
100             louis xiv Louis XIV
101              
102             =cut
103              
104             #--------------------------------------------------------------------------
105             # Modules
106              
107 7     7   50 use Carp;
  7         14  
  7         504  
108 7     7   44 use Exporter();
  7         17  
  7         10622  
109              
110             @ISA = qw( Exporter );
111             @EXPORT = qw( nc );
112             @EXPORT_OK = qw( NameCase nc );
113              
114             #--------------------------------------------------------------------------
115             # Variables
116              
117             $HEBREW = 1;
118             $SPANISH = 0;
119             $ROMAN = 1;
120             $POSTNOMINAL = 1;
121              
122             my @POST_NOMINAL_INITIALS = qw(
123             VC GC KG LG KT LT KP GCB OM GCSI GCMG GCIE GCVO GBE CH KCB DCB KCSI KCMG
124             DCMG KCIE KCVO DCVO KBE DBE CB CSI CMG CIE CVO CBE DSO LVO OBE ISO MVO MBE
125             IOM CGC RRC DSC MC DFC AFC ARRC OBI DCM CGM GM IDSM DSM MM DFM AFM SGM IOM
126             CPM QGM RVM BEM QPM QFSM QAM CPM MSM ERD VD TD UD ED RD VRD AE
127              
128             PC ADC QHP QHS QHDS QHNS QHC SCJ J LJ QS SL QC KC JP DL MP MSP MSYP AM AM
129             MLA MEP DBEnv DConstMgt DREst EdD DPhil PhD DLitt DSocSci MD EngD DD LLD
130             DProf MA MArch MAnth MSc MMORSE MMath MMathStat MPharm MPhil MSc MSci MSt
131             MRes MEng MChem MBiochem MSocSc MMus LLM BCL MPhys MComp MAcc MFin MBA MPA
132             MEd MEP MEnt MCGI MGeol MLitt MEarthSc MClinRes BA BSc LLB BEng MBChB FdA
133             FdSc FdEng PgDip PgD PgCert PgC PgCLTHE AUH AKC AUS HNC HNCert HND HNDip
134             DipHE Dip OND CertHE ACSM MCSM DIC AICSM ARSM ARCS LLB LLM BCL MJur DPhil
135             PhD LLD DipLP FCILEx GCILEx ACILEx CQSW DipSW BSW MSW FCILT CMILT MILT CPL
136             CTP CML PLS CTL DLP PLog EJLog ESLog EMLog JrLog Log SrLog BArch MArch ARB
137             RIBA RIAS RIAI RSAW MB BM BS BCh BChir MRCS FRCS MS MCh. MRCP FRCP MRCPCH
138             FRCPCH MRCPath MFPM FFPM BDS MRCPsych FRCPsych MRCOG FRCOG MCEM FCEM FRCA
139             FFPMRCA MRCGP FRCGP BSc MScChiro MChiro MSc DC LFHOM MFHOM FFHOM FADO FBDO
140             FCOptom MCOptom MOst DPT MCSP FCSP. SROT MSCR FSCR. CPhT RN VN RVN BVSc
141             BVetMed VetMB BVM&S MRCVS FRCVS FAWM PGCAP PGCHE PGCE PGDE BEd NPQH QTS
142             CSci CSciTeach RSci RSciTech CEng IEng EngTech ICTTech DEM MM CMarEng
143             CMarSci CMarTech IMarEng MarEngTech RGN SRN RMN RSCN SEN EN RNMH RN RM RN1
144             RNA RN2 RN3 RNMH RN4 RN5 RNLD RN6 RN8 RNC RN7 RN9 RHV RSN ROH RFHN SPAN
145             SPMH SPCN SPLD SPHP SCHM SCLD SPCC SPDN V100 V200 V300 LPE MSc
146             );
147              
148             #--------------------------------------------------------------------------
149             # Functions
150              
151             =head2 NameCase
152              
153             Takes a scalar, scalarref, array or arrayref, and changes the case of the
154             contents, as appropriate. Essentially a wrapper around nc().
155              
156             =cut
157              
158             sub NameCase {
159 12 100 100 12 1 11468 croak "Usage: \$SCALAR|\@ARRAY = NameCase [\\]\$SCALAR|\@ARRAY"
      100        
160             if ref $_[0] and ( ref $_[0] ne 'ARRAY' and ref $_[0] ne 'SCALAR' );
161              
162 11         24 local( $_ );
163              
164 11 100 100     87 if( wantarray and ( scalar @_ > 1 or ref $_[0] eq 'ARRAY' ) ) {
    100 100        
    100          
    100          
165             # We have received an array or array reference in a list context
166             # so we will return an array.
167 3 100       6 map { nc( $_ ) } @{ ref( $_[0] ) ? $_[0] : \@_ };
  94         223  
  3         15  
168              
169             } elsif( ref $_[0] eq 'ARRAY' ) {
170             # We have received an array reference in a scalar or void context
171             # so we will work on the array in-place.
172 1         3 foreach ( @{ $_[0] } ) {
  1         5  
173 46         102 $_ = nc( $_ );
174             }
175              
176             } elsif( ref $_[0] eq 'SCALAR' ) {
177             # We don't work on scalar references in-place; we take the value
178             # and return a name-cased copy.
179 3         4 nc( ${ $_[0] } );
  3         12  
180              
181             } elsif( scalar @_ == 1 ) {
182             # We've received a scalar: we return a name-cased copy.
183 3         8 nc( $_[0] );
184              
185             } else {
186 1         187 croak "NameCase only accepts a single scalar, array or array ref";
187             }
188             }
189              
190             =head2 nc
191              
192             Takes a scalar or scalarref, and change the case of the name in the
193             corresponding string appropriately.
194              
195             =cut
196              
197             sub nc {
198 177 100 100 177 1 14611 croak "Usage: nc [[\\]\$SCALAR]"
      100        
199             if scalar @_ > 1 or ( ref $_[0] and ref $_[0] ne 'SCALAR' );
200              
201 174 100       508 local( $_ ) = @_ if @_;
202 174 100       345 $_ = ${$_} if ref( $_ ) ; # Replace reference with value.
  2         5  
203              
204 174 100       340 return $_ unless($_);
205              
206 173         378 $_ = lc ; # Lowercase the lot.
207 173         1413 s{ \b (\w) }{\u$1}gx; # Uppercase first letter of every word.
208 173         433 s{ (\'\w) \b }{\L$1}gx; # Lowercase 's.
209              
210             # Name case Mcs and Macs - taken straight from NameParse.pm incl. comments.
211             # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace
212             # Exclude names ending in a,c,i,o, or j are typically Polish or Italian
213              
214 173 100 100     785 if ( /\bMac[A-Za-z]{2,}[^aciozj]\b/ or /\bMc/ ) {
215 48         293 s/\b(Ma?c)([A-Za-z]+)/$1\u$2/g;
216              
217             # Now correct for "Mac" exceptions
218 48         101 s/\bMacEvicius/Macevicius/g; # Lithuanian
219 48         76 s/\bMacHado/Machado/g; # Portuguese
220 48         78 s/\bMacHar/Machar/g;
221 48         77 s/\bMacHin/Machin/g;
222 48         77 s/\bMacHlin/Machlin/g;
223 48         81 s/\bMacIas/Macias/g;
224 48         67 s/\bMacIulis/Maciulis/g;
225 48         71 s/\bMacKie/Mackie/g;
226 48         81 s/\bMacKle/Mackle/g;
227 48         69 s/\bMacKlin/Macklin/g;
228 48         79 s/\bMacQuarie/Macquarie/g;
229 48         66 s/\bMacOmber/Macomber/g;
230 48         71 s/\bMacIn/Macin/g;
231 48         59 s/\bMacKintosh/Mackintosh/g;
232 48         93 s/\bMacKen/Macken/g;
233 48         72 s/\bMacHen/Machen/g;
234 48         128 s/\bMacisaac/MacIsaac/g;
235 48         76 s/\bMacHiel/Machiel/g;
236 48         75 s/\bMacIol/Maciol/g;
237 48         70 s/\bMacKell/Mackell/g;
238 48         64 s/\bMacKlem/Macklem/g;
239 48         77 s/\bMacKrell/Mackrell/g;
240 48         77 s/\bMacLin/Maclin/g;
241 48         66 s/\bMacKey/Mackey/g;
242 48         92 s/\bMacKley/Mackley/g;
243 48         66 s/\bMacHell/Machell/g;
244 48         70 s/\bMacHon/Machon/g;
245             }
246 173         313 s/Macmurdo/MacMurdo/g;
247              
248             # Fixes for "son (daughter) of" etc. in various languages.
249 173         281 s{ \b Al(?=\s+\w) }{al}gx; # al Arabic or forename Al.
250 173         263 s{ \b Ap \b }{ap}gx; # ap Welsh.
251             # search for: followed by ben
252             # without first (?<=\S\s), first name of 'ben jones' remains lowercase
253 173 100       361 s{ (?<=\S\s)\bBen(?=\s+\w) }{ben}gx if $HEBREW; # ben Hebrew or forename Ben.
254 173 100       333 s{ (?<=\S\s)\bBat(?=\s+\w) }{bat}gx if $HEBREW; # bat Hebrew or forename Bat.
255 173         268 s{ \b Dell([ae])\b }{dell$1}gx; # della and delle Italian.
256 173         306 s{ \b D([aeiu]) \b }{d$1}gx; # da, de, di Italian; du French.
257 173         312 s{ \b De([lr]) \b }{de$1}gx; # del Italian; der Dutch/Flemish.
258 173 100       336 s{ \b El \b }{el}gx unless $SPANISH; # el Greek or El Spanish.
259 173 100       327 s{ \b La \b }{la}gx unless $SPANISH; # la French or La Spanish.
260 173         287 s{ \b L([eo]) \b }{l$1}gx; # lo Italian; le French.
261 173         277 s{ \b Van(?=\s+\w) }{van}gx; # van German or forename Van.
262 173         270 s{ \b Von \b }{von}gx; # von Dutch/Flemish
263              
264 173 100       301 if($ROMAN) {
265             # Fixes for roman numeral names, e.g. Henry VIII, up to 89, LXXXIX
266 168         2015 s{ \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )?
267             (?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b }{\U$1}gx;
268             }
269              
270 173 100       407 if($POSTNOMINAL) {
271             # post-nominal initials
272 170         287 for my $pni (@POST_NOMINAL_INITIALS) {
273 56100         366155 s{ \b ($pni) $}{$pni}ix;
274             }
275             }
276              
277 173         712 $_;
278             }
279              
280             =head1 BUGS
281              
282             The module covers the rules that I know of. There are probably a lot
283             more rules, exceptions etc. for "Western"-style languages which could be
284             incorporated.
285              
286             There are probably lots of exceptions and problems - but as a general
287             data 'cleaner' it may be all you need.
288              
289             Use Kim Ryan's L for any really sophisticated name parsing.
290              
291             =head1 SUPPORT
292              
293             You can find documentation for this module with the perldoc command.
294              
295             perldoc Lingua::EN::NameCase
296              
297             You can also look for information at:
298              
299             =over 4
300              
301             =item * MetaCPAN
302              
303             L
304              
305             =item * RT: CPAN's request tracker
306              
307             L
308              
309             =item * CPANTS
310              
311             L
312              
313             =item * CPAN Testers' Matrix
314              
315             L
316              
317             =item * CPAN Ratings
318              
319             L
320              
321             =item * CPAN Testers Dependencies
322              
323             L
324              
325             =back
326              
327             =head1 AUTHOR
328              
329             1998-2014 Mark Summerfield
330             2014-present Barbie
331              
332             2020- Maintained by Nigel Horne, C<< >>
333              
334             =head1 ACKNOWLEDGEMENTS
335              
336             Thanks to Kim Ryan for his Mc/Mac solution.
337              
338             =head1 COPYRIGHT
339              
340             Copyright (c) Mark Summerfield 1998-2014. All Rights Reserved.
341             Copyright (c) Barbie 2014-2015. All Rights Reserved.
342              
343             This distribution is free software; you can redistribute it and/or
344             modify it under the Artistic Licence v2.
345              
346             =cut
347              
348             1;
349