File Coverage

blib/lib/Geo/PostalAddress.pm
Criterion Covered Total %
statement 82 94 87.2
branch 26 36 72.2
condition 7 9 77.7
subroutine 11 13 84.6
pod 7 7 100.0
total 133 159 83.6


line stmt bran cond sub pod time code
1             #
2             # $Id: PostalAddress.pm,v 1.4 2005/04/30 18:39:28 michel Exp $
3             #
4              
5             package Geo::PostalAddress;
6 2     2   7182 use strict;
  2         4  
  2         88  
7             require 5.00503;
8              
9 2     2   10 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  2         5  
  2         273  
10             require Exporter;
11             @ISA = qw(Exporter);
12             @EXPORT = ();
13             @EXPORT_OK = ();
14             %EXPORT_TAGS = ();
15             $VERSION = 0.04; # ExtUtils::MakeMaker will use this.
16             my $save_version = $VERSION; # Save so I can clean up after Locale::SubCountry
17              
18 2     2   2441 use UNIVERSAL;
  2         30  
  2         11  
19 2     2   1856 use Locale::Country;
  2         90297  
  2         216  
20 2     2   2185 use Locale::SubCountry;
  2         478945  
  2         50  
21 2     2   152 use Carp;
  2         4  
  2         24329  
22              
23             if ($save_version ne $VERSION) { # Workaround for Locale::SubCountry lossage
24             $Locale::SubCountry::VERSION = $VERSION;
25             $VERSION = $save_version;
26             }
27              
28             my (%per_country_data, %default_per_country_data);
29              
30             =head1 NAME
31              
32             Geo::PostalAddress - Country-specific postal address parsing/formatting
33              
34             =head1 DESCRIPTION
35              
36             This module converts postal (snail mail) addresses between an
37             unstructured country-neutral format (an array of character strings)
38             and a country-specific format that's hopefully meaningful by postal
39             authorities, courier/delivery services, residents, ... of that
40             country for postal address entry. It should handle most countries
41             out of the box with only minor or technical divergences from
42             approved bulk-mailing formats; if needed, country-specific code can
43             be added to make it fully conformant to those formats.
44              
45             The intended audience for this module is anyone needing to handle
46             most addresses in a recognizable country-specific format, without
47             going into the full generality and complexity that UPU standards
48             would appear to require.
49              
50             =head1 SYNOPSIS
51              
52             use Geo::PostalAddress;
53              
54             my $AU_parser = Geo::PostalAddress->new('AU');
55             my $format = $AU_parser->format();
56             # $format now contains:
57             # [['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 3,
58             # ['City', 40],
59             # ['State', {NSW => "New South Wales", TAS => "Tasmania",
60             # QLD => "Queensland", SA => "South Australia",
61             # WA => "Western Australia", VIC => "Victoria",
62             # ACT => "Australian Capital Territory",
63             # NT => "Northern Territory"}], ['Postcode', 4, qr/^\d\d\d\d$/]]
64             # 40 in ['Addr1', 40] is the suggested displayed field width (not the maximum
65             # length). 3 means that the next 3 fields should/could be on the same row.
66             # ['State', {...}] means an enumerated list is used for this field, with keys
67             # being the stored values and values being the labels used for display or
68             # selection.
69             my $display = $AU_parser->display(["4360 DUKES RD", "KALGOORLIE WA 6430"]);
70             # $display now contains:
71             # {Addr1 => "4360 DUKES RD", City => "KALGOORLIE",
72             # State => "WA", Postcode => "6430"}
73              
74             my $US_parser = Geo::PostalAddress->new('US');
75             my $address = {Addr1 => "123 MAGNOLIA ST", City => "HEMPSTEAD",
76             State => "NY", ZIP => "11550­1234"};
77             my $result = $US_parser->storage($address);
78             unless (ref $result) { carp "Bad postal address: $result.\n"; }
79              
80             my $AU_to_US_address_label = $US_parser->label("AU", "MR JOHN DOE", $result);
81             # What to print on an address label or on an envelope, if mailing from
82             # Australia to the United States.
83              
84             =head1 METHODS
85              
86             =head2 new()
87              
88             Cnew($country)> returns undef, or a blessed
89             reference to a parser suitable for handling the most common postal address
90             formats for that country. Depending on the country, this reference may be
91             blessed into Geo::PostalAddress or into a country-specific subclass.
92              
93             =cut
94              
95             sub new {
96 21     21 1 10083 my ($class, $code) = @_;
97 21         39 my $country_class = "Geo::PostalAddress::$code";
98 21         28 my ($object, $country_new);
99 19         125 my $instance_data = (exists $per_country_data{$code})
100 21 100       73 ? {_country_code => $code, %{$per_country_data{$code}}}
101             : {_country_code => $code, %default_per_country_data};
102              
103 21 100       74 unless (defined code2country($code, LOCALE_CODE_ALPHA_2)) { return undef; }
  1         73  
104              
105 20 50       826 if (exists($Geo::PostalAddress::{"${code}::"})) { # Country class exists
106 0 0       0 if (($country_new = $country_class->can("new")) != \&new) { # Has own new()
107             # XXX fails if &$country_new calls Geo::PostalAddress->new. MUST FIX.
108 0         0 $object = $country_new->($country_class, $code);
109             } else { # Country class, inherits our new()
110 0         0 $object = bless $instance_data, $country_class;
111             }
112             } else { # No country class.
113 20         48 $object = bless $instance_data, $class;
114             }
115              
116 20         51 return $object;
117             }
118              
119             =head2 format
120              
121             C<$parser-Eformat()> returns a reference to an array describing the
122             (display/input) fields that make a postal address, and gives some hints
123             about on-screen layout. Each element of the array can be an integer n > 0,
124             meaning the next n fields should be on the same line if window/screen width
125             allows it, or a reference to an array describing a field. Each field
126             description contains the field name and either a maximum length for a text
127             field or a hash of {stored => display} values for an enumerated field. An
128             optional regex can also be specified. If present, it should be compatible with
129             both perl and javascript, so it can be used in both client-side and server-side
130             programs or modules.
131              
132             An example for Australia may be:
133              
134             [["Addr1", 40], ["Addr2", 40], ["Addr3", 40], ["Addr4", 40], 3, ["City", 40],
135             ["State", {NSW => "New South Wales", TAS => "Tasmania", QLD => "Queensland",
136             SA => "South Australia", WA => "Western Australia",
137             VIC => "Victoria", ACT => "Australian Capital Territory",
138             NT => "Northern Territory"}], ["Postcode", 4, qr/^\d\d\d\d$/]]
139              
140             =cut
141              
142             sub format {
143 0     0 1 0 my ($self) = @_;
144              
145 0         0 return $self->{_format};
146             }
147              
148             =head2 display
149              
150             C<$parser-Edisplay($stored)> converts the postal address in @$stored to a
151             format suitable for data input and returns a reference to a hash. The keys of
152             the hash appear as fieldnames in the return value of C<$parser-Eformat()>.
153              
154             If @$stored doesn't contain an address in the country $parser is an instance
155             of, weird results are nearly certain.
156              
157             =cut
158              
159             sub display {
160 20     20 1 7366 my ($self, $stored) = @_;
161 20         27 my %display;
162 20         31 my $limit = 0;
163 20         26 my @regex_results; # Cache, 1 per regex (*not* per stored address field)
164              
165 20         22 foreach my $segment (@{$self->{_s2d_map}}) {
  20         60  
166 122 100       319 if ($segment->{StoredRownum} < $limit) {
167 19         39 $limit = $segment->{StoredRownum};
168             }
169             }
170              
171 20         34 $limit += @$stored; # Map positive indexes >= this to empty lines.
172              
173 20         21 foreach my $segment (@{$self->{_s2d_map}}) {
  20         84  
174 122 100       275 my $line
175             = ($segment->{StoredRownum} >= $limit)
176             ? ""
177             : $stored->[$segment->{StoredRownum}];
178              
179 122 100       238 if (exists($segment->{StoredColnum})) {
180 8 100       27 $line = exists($segment->{StoredCollen})
181             ? substr($line, $segment->{StoredColnum}, $segment->{StoredCollen})
182             : substr($line, $segment->{StoredColnum});
183             }
184              
185 122 100       236 if (exists($segment->{StoredRegexnum})) {
186 17         26 my $renum = $segment->{StoredRegexnum};
187              
188 17 100 66     79 if ($renum > $#regex_results or !defined($regex_results[$renum])) {
189             # First time for this regex; cache results.
190 8         19 my $regex = $self->{_regexes}->[$renum];
191 8         93 my @fields = $line =~ /$regex/;
192              
193 8         21 $regex_results[$renum] = \@fields;
194             }
195              
196             # XXX Complain if not present?
197 17         40 $line = $regex_results[$renum]->[$segment->{StoredFieldnum}];
198             }
199              
200 122         320 $display{$segment->{DisplayName}} = $line;
201             }
202              
203 20         63 $self->normalize(\%display); # XXX Do something with return value?
204 20         66 return \%display;
205             }
206              
207             =head2 storage
208              
209             C<$parser-Estorage($display)> makes country-dependent checks against the
210             postal address in %$display. If it passes all the checks,
211             C<$parser-Estorage($display)> converts it to a format suitable for storage
212             and returns a reference to an array. Otherwise,
213             C<$parser-Estorage($display)> returns a string representing an error
214             message.
215              
216             If %$display doesn't contain an address in the country $parser is an instance
217             of, weird results are nearly certain.
218              
219             =cut
220              
221             sub storage {
222 20     20 1 3950 my ($self, $display) = @_;
223 20         24 my (@storage, @storage_bottom);
224              
225 20         26 foreach my $field (@{$self->{_format}}) {
  20         51  
226 132 50 100     728 if (ref($field) && (@$field >= 3)
      66        
227             && ($display->{$field->[0]} !~ $field->[2])) {
228 0         0 return "$field->[0]: missing or incorrect value"; # XXX be more specific?
229             }
230             }
231              
232 20 50       51 if (my $errmsg = $self->normalize($display)) { return $errmsg; }
  0         0  
233              
234 20         24 foreach my $segment (@{$self->{_d2s_map}}) {
  20         51  
235 111         467 my $line = $segment->{StoredTemplate};
236 111         142 my $rownum = $segment->{StoredRownum};
237              
238 111         487 $line =~ s/\${([^{}]+)}/$display->{$1}/eg;
  122         395  
239              
240 111 100       214 if ($rownum < 0) {
241 29         77 $storage_bottom[1 - $rownum] = $line;
242             } else {
243 82         199 $storage[$rownum] = $line;
244             }
245             }
246              
247 20 50       45 @storage = grep { defined($_) && $_ } @storage;
  82         285  
248 20 100       32 @storage_bottom = grep { defined($_) && $_ } @storage_bottom;
  67         192  
249 20         36 push @storage, reverse @storage_bottom;
250 20         69 return \@storage;
251             }
252              
253             =head2 label
254              
255             C<$parser-Elabel($origin_country, $recipient, $address)> returns a
256             reference to an array containing an address label suitable for correspondance
257             from a sender in $origin_country (2-letter ISO 3166 code) to $recipient (can be
258             a string or an array reference, eg ["Aby's Auto Repair", "Kell Dewclaw"]) at
259             $address (as returned from C<$parser-Estorage()>) in the country for
260             $parser.
261              
262             The default version just tacks on the name of the destination country, if not
263             the same as the origin country.
264              
265             =cut
266              
267             sub label {
268 20     20 1 8041 my ($self, $origin_country, $recipient, $address) = @_;
269 20         29 my @label;
270              
271 20 50       38 if (ref $recipient) { @label = @$recipient; } else { @label = ($recipient); }
  0         0  
  20         44  
272 20         50 push @label, @$address;
273 20 50       56 if ($origin_country ne $self->{_country_code}) {
274 20         58 push @label, code2country($self->{_country_code});
275             }
276              
277 20         804 return \@label;
278             }
279              
280             =head2 option
281              
282             C<$parser-Eoption($name [ , $value] )> returns the setting of option $name
283             for parser $parser, after changing it to $value if specified.
284              
285             Available options and meaningful values for each option depend on the country
286             $parser is for.
287              
288             =cut
289              
290             sub option {
291 0     0 1 0 my ($self, $name, $value) = @_;
292              
293 0 0       0 if (defined $value) { $self->{_options}->{$name} = $value; }
  0         0  
294 0         0 return $self->{_options}->{$name};
295             }
296              
297             =head2 normalize
298              
299             C<$parser-Enormalize($display)> normalizes the address in %$display by
300             tweaking unambiguous but technically incorrect elements. It can also, if
301             needed, check it for validity and return an error message. If no problems were
302             found, it should return "".
303              
304             This method is called from within C and C, and users of
305             this module shouldn't normally need to call it directly. It exists so it can be
306             overridden in subclasses. The default version does nothing.
307              
308             =cut
309              
310             sub normalize {
311 40     40 1 78 return "";
312             }
313              
314             =head1 INTERNALS
315              
316             Unless you plan to add a country or change the format information for a
317             country, either directly in the base class (this) or as a subclass, you can
318             safely skip this. (But if you're curious, feel free to read on.)
319              
320             %per_country_data is a hash using the 2-letter ISO 3166-1 country code as the
321             key. The value is a hash reference ($hr in the following description) with the
322             following fields:
323              
324             =over 4
325              
326             =item _format
327              
328             This array reference is actually what C<$parser-Eformat()> returns.
329              
330             Each element can be a number n > 0, hinting that the next n fields should be on
331             the same line, if the terminal or window width allows it, but otherwise
332             ignored. Otherwise, it is an array reference describing a single field of the
333             address, and has the following elements:
334              
335             =over 4
336              
337             =item 0
338              
339             The name of a field. For maximum compatibility with form description languages
340             (including the forms part of HTML), this should match /^w+$/ in the C locale,
341             but this module only requires that it not contain {}. The name should be
342             present in C{DisplayName} } @{$hr-E{_s2d_map}}> (see
343             _s2d_map below).
344              
345             =item 1
346              
347             Can be a number E 0, indicating the maximum length of a text field, or a
348             hash of { stored =E displayed } mappings, indicating an enumerated field.
349             (Note that in the latter case, the order and layout of the values are left to
350             the discretion of the user of this module.)
351              
352             =item 2
353              
354             An optional validation regex can also be specified. If present, it should be
355             compatible with both perl and javascript, so it can be used in both client-side
356             and server-side programs or modules. Note that although most regexes would be
357             anchored at both ends, this isn't required or enforced.
358              
359             =back
360              
361             =item _s2d_map
362              
363             (storage-to-display map) This is an array of hash references, each describing
364             how to retrieve the value of one display field from the stored unstructured
365             text strings. Each element has the following fields:
366              
367             =over 4
368              
369             =item StoredRownum
370              
371             (stored row number) The row in the array of text lines where the field is. That
372             number is used as a perl-style array index (E=0 from the start, E 0
373             back from the end), except that on any given unstructured address, if there
374             aren't enough rows to map to both positive and negative indices without
375             overlap, the positive indices that would actually map to a row overlapping the
376             region starting with the negative index having the largest absolute value and
377             going to the end of the array are considered to return "" instead of the actual
378             row. In other words, using the array of lines qw(eenie meenie minie moe),
379             indexes -2 0 1 2 3 would return "minie", "eenie", "meenie", "", "" (even though
380             there is no -1 that would return "moe").
381              
382             =item StoredColnum
383              
384             (stored column number) The optional column in the line where the field (or
385             regex input) starts, from 0 for the first column. If absent, the field (or
386             regex input) is the whole line, even if StoredCollen is present. Note that
387             StoredColnum can be negative (with the expected result for the second argument
388             to L), but if so, there's no special handling, unlike
389             for StoredRownum.
390              
391             =item StoredCollen
392              
393             (stored column length) The optional length of the field (or regex input). If
394             absent or if StoredColnum is absent, the field (or regex input) extends to the
395             end of the line. Note that StoredCollen can be negative (with the expected
396             result for the third argument to L), but if so, there's
397             no special handling, unlike for StoredRownum.
398              
399             =item StoredRegexnum
400              
401             (stored regex number) The optional index of a regular expression in
402             C<@{$hr-E{_regexes}}> to be matched against the line (or the substring
403             selected by StoredColnum and StoredCollen if applicable) to extract the field
404             value from it. See the description of _regexes below for important restrictions
405             on regex use.
406              
407             =item StoredFieldnum
408              
409             (stored field number) The optional index into the array returned by the regex
410             matching mentioned above of the data to be returned as the field value. Note
411             that if StoredRegexnum is present, StoredFieldnum must be present too.
412              
413             =item DisplayName
414              
415             (display (field) name) The name of a field in C<@{$hr-E{_format}}>. This is
416             also the key used in the record hash returned by C<$parser-Edisplay()>.
417              
418             =back
419              
420             Note that although StoredColnum, StoredCollen, StoredRegexnum, and
421             StoredFieldnum are all optional, not all combinations make sense. Specifically:
422              
423             =over 4
424              
425             =item *
426              
427             At least one of StoredColnum and StoredRegexnum must be present; if both are,
428             StoredColnum (and StoredCollen if also present) are used before StoredRegexnum
429             and StoredFieldnum.
430              
431             =item *
432              
433             If StoredCollen is present without StoredColnum, it is ignored.
434              
435             =item *
436              
437             If StoredRegexnum is present, StoredFieldnum must be present too; if
438             StoredFieldnum is present without StoredRegexnum, it is ignored.
439              
440             =back
441              
442             =item _s2d_map
443              
444             (display-to-storage map) This is an array of hash references, each describing
445             how to generate one line of the unstructured string array used for storage from
446             the parsed fields used for display. Each element has the following fields:
447              
448             =over 4
449              
450             =item StoredTemplate
451              
452             (stored template) A string containing boilerplate text and field references of
453             the form ${foo} for field foo (using the field names in _format and _s2d_map).
454             Currently, there is no way to escape $, {, or } if they're part of a sequence
455             that could be interpreted as a field reference.
456              
457             =item StoredRownum
458              
459             (stored row number) A number that indicates in which row of the unstructured
460             storage string array this should go. This can be positive, 0, or negative, with
461             the same intended meaning as for _s2dmap, except than while putting the array
462             together, it grows in the middle as necessary to accomodate positive indexes.
463              
464             =back
465              
466             =item _regexes
467              
468             (regular expressions) A reference to an array of strings representing regexes,
469             in any form perl will accept (single-quoted, double-quoted, qr//, etc...) for
470             use in parsing unstructured storage strings into structured display fields.
471             Note that each regex is matched at most once in the course of a single
472             invocation to C<$hr-Edisplay()>, and its results cached for reuse. This is
473             true even if a subsequent match would use another string than the first. In
474             practice, this isn't a problem, as a given regex would normally be applied to
475             one storage line only. However, if this isn't the case, that regex must be
476             repeated, each line pointing (through StoredRegexnum) to its own copy.
477              
478             =back
479              
480             %default_per_country_data is similar, but for countries with unspecified
481             address formats. It's a single hash with the same structure as %$hr above.
482              
483             Cnew()> initializes the object hash with those
484             fields, and adds a _country_code field that holds the 2-letter code, in case we
485             need to retrieve other info later.
486              
487             Note that the above applies to the base class only. Subclasses may use other or
488             different data, instead of or in addition to this.
489              
490             =cut
491              
492             %default_per_country_data = (
493             _format => [
494             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['Addr5', 40]
495             ],
496             _s2d_map => [
497             {StoredRownum => 0, DisplayName => 'Addr1'},
498             {StoredRownum => 1, DisplayName => 'Addr2'},
499             {StoredRownum => 2, DisplayName => 'Addr3'},
500             {StoredRownum => 3, DisplayName => 'Addr4'},
501             {StoredRownum => 4, DisplayName => 'Addr5'}
502             ],
503             _d2s_map => [
504             {StoredTemplate => '${Addr1}', StoredRownum => 0},
505             {StoredTemplate => '${Addr2}', StoredRownum => 1},
506             {StoredTemplate => '${Addr3}', StoredRownum => 2},
507             {StoredTemplate => '${Addr4}', StoredRownum => 3},
508             {StoredTemplate => '${Addr5}', StoredRownum => 4},
509             ]
510             );
511              
512             # District name, no city or postcode: Albania, Angola, Bahamas, United Arab
513             # Emirates
514             # XXX What are districts called in AL & AO? (eg, state/province/county...)
515             # XXX Andorra subcountries missing from Locale::SubCountry
516             # XXX Aruba subcountries missing from Locale::SubCountry
517             # XXX Bhutan subcountries missing from Locale::SubCountry or not the right ones
518             # XXX Grenada districts missing from Locale::SubCountry (also, West Indies?)
519             # XXX Nauru (NR) missing from Locale::SubCountry
520             foreach my $spec ((['AE', 'Emirate'], ['AL', 'District'], ['AO', 'District'],
521             ['BS', 'Island'])) {
522             my ($code, $district) = @$spec;
523             if (exists($per_country_data{$code})) {
524             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
525             }
526             my $subctry = Locale::SubCountry->new($code);
527             $per_country_data{$code} = {
528             _format => [
529             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
530             [$district, { map { $_ => $_ } $subctry->all_full_names() } ]
531             ],
532             _s2d_map => [
533             {StoredRownum => 0, DisplayName => 'Addr1'},
534             {StoredRownum => 1, DisplayName => 'Addr2'},
535             {StoredRownum => 2, DisplayName => 'Addr3'},
536             {StoredRownum => 3, DisplayName => 'Addr4'},
537             {StoredRownum => -1, DisplayName => $district}
538             ],
539             _d2s_map => [
540             {StoredTemplate => '${Addr1}', StoredRownum => 0},
541             {StoredTemplate => '${Addr2}', StoredRownum => 1},
542             {StoredTemplate => '${Addr3}', StoredRownum => 2},
543             {StoredTemplate => '${Addr4}', StoredRownum => 3},
544             {StoredTemplate => "\$\{$district\}", StoredRownum => -1}
545             ]
546             };
547             }
548              
549             # Postcode (and postcode prefix) left of city, no district: Algeria, Andorra,
550             # Argentina, Armenia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and
551             # Herzegovina, Bulgaria, China, Costa Rica, Croatia, Cuba, Cyprus, Czech
552             # Republic, Denmark, Estonia, Ethiopia, Faroe, Finland, France, Gabon, Georgia,
553             # Germany, Guatemala, Guinea Bissau, Haiti, Iceland, Iran, Israel, Kuwait,
554             # Kyrgyzstan, Lao, Liberia, Liechtenstein, Lithuania, Luxembourg, Madagascar,
555             # Moldova, Monaco, Morocco, New Caledonia, Niger, Norway, Paraguay,
556             # Philippines, Romania, Russian Federation, San Marino, Senegal, Serbia and
557             # Montenegro, Slovenia, Spain, Tajikistan, Former Yugoslav Republic of
558             # Macedonia, Tunisia, Turkey, Turkmenistan, Vatican, Zambia.
559             # XXX Aruba may be here as part of the Netherlands.
560             # XXX I require Andorra postcodes to start with AD (uppercase). A better
561             # approach may be to make them optional and have normalize add them if needed.
562             # XXX Armenia may need 6-digit postcodes, not 4.
563             # XXX Austria may not require, or forbid, the A- postcode prefix.
564             # XXX Azerbaijan may need 6-digit postcodes, not 4.
565             # XXX Belarus addresses used to be upside down, but no longer. Thanks to
566             # LeiaCat for the information.
567             # XXX Belgium may not require, or forbid, the B- postcode prefix.
568             # XXX China postcodes may be after city, not before.
569             # XXX Croatia may use county code as first 2 digits of postcode.
570             # XXX Cuba may use county/district code as first 2 digits of postcode, but the
571             # district codes Locale::SubCountry gives are inconsistent with the sample
572             # addresses.
573             # XXX Cyprus may need a CY- postcode prefix.
574             # XXX Denmark may want a DK- postcode prefix.
575             # XXX Estonia may want EE- postcode prefix. Also, after postcode is city or
576             # region, but Locale::SubCountry only has regions, no cities.
577             # XXX Finland wants a different postcode prefix and an extra line for addresses
578             # in the Åland Islands. Forget about it for now?
579             # XXX I require Faroe postcodes to start with AD (uppercase). A better approach
580             # may be to make them optional and have normalize add them if needed.
581             # XXX France apparently no longer uses numeric codes for its outlying bits, but
582             # I'm not sure which (if any) alpha codes are valid. Note that I don't
583             # uppercase the 2A... and 2B... postcodes used for Corsica. Also, may want a F-
584             # or FR- postcode prefix.
585             # XXX French Guiana is here, as part of France.
586             # XXX Ditto for French Polynesia, except that it may want its own country name.
587             # If it does, handle with normalize(). (Can't make it its own country, even
588             # though it has an ISO 3166-1 entry, as it's missing from Locale::Country and
589             # Locale::SubCountry.)
590             # XXX Georgia may want 6-digit postcode, not 4.
591             # XXX Germany doesn't want a postcode prefix anymore, apparently.
592             # XXX Greenland may be part of Denmark, or it may use the same format but with
593             # its own country name. Go for the former.
594             # XXX Guadeloupe is here, as part of France.
595             # XXX I require Haiti postcodes to start with HT (uppercase). A better approach
596             # may be to make them optional and have normalize() add them if needed. Also,
597             # the numeric part may be further constrained, but I don't have a complete list
598             # of postcodes.
599             # XXX Iceland postcodes may be further constrained (first digit 0-8) and may
600             # need an IS- postcode prefix.
601             # XXX Israel may use a IL- postcode prefix.
602             # XXX Kyrgyzstan seems to want addresses upside down, with recipient just above
603             # country. If it does, handle it with label() for now. Not ideal.
604             # XXX Liechtenstein may have the postcode right of city, not left. Also, using
605             # the Switzerland/CH format, with Liechtenstein/FL (not LI?) instead.
606             # XXX Luxembourg may use a L- or LU- postcode prefix.
607             # XXX Mayotte is here, as part of France.
608             # XXX Macedonia postcodes may be 5 digits with a MK- prefix, not 4 digits and
609             # prefixless.
610             # XXX Monaco postcodes may be more constrained than 5 digits. Also, it may use
611             # a MC- postcode prefix.
612             # XXX Martinique is here, as part of France.
613             # XXX New Caledonia is just like France, except with its own country name and
614             # postcodes starting in 988.
615             # XXX Norway may use a N- or NO- postcode prefix.
616             # XXX Philippines may be using district/province instead of city, or either
617             # district/province or city, but I'm not sure which, and neither Manila nor
618             # Metro Manila are in the Locale::SubCountry.pm list, so pretend it's a city.
619             # XXX Réunion is here, as part of France.
620             # XXX Russian Federation prefers addresses upside down for local use, but can
621             # handle the more common format in international mail. Also, it seems to want
622             # postcodes under the country name, but we don't do that. Put it left of the
623             # city for now, although below city (and above country) may be a better place.
624             # Saint Pierre and Miquelon is (are?) here, as part of France.
625             # XXX San Marino may have a single city, as Singapore. Also, all postcodes
626             # start with 4789.
627             # XXX Spain may want the province name between () after the city name for some
628             # cities. Leave it here until I know more.
629             # XXX Svalbard and Jan Mayen Island may be here, as part of Norway.
630             # XXX Switzerland may want a CH postcode prefix. Also, ambiguous cities may
631             # need district (canton) code postfixed. Let users enter it as part of city if
632             # needed (same as post office number).
633             # XXX Tajikistan may have a district code before the postcode, but the list in
634             # Locale::SubCountry looks incomplete.
635             # XXX Tunisia may use a TN- postcode prefix.
636             # XXX Turkmenistan wants addresses in the postcode+city, country, name, street
637             # order?
638             # XXX Vatican actually has a single postcode. (It's part of the Italian postal
639             # system, but doesn't has a province appended, and has its own country name.)
640             # Wallis and Futuna is here as part of France.
641             foreach my $spec
642             ((['DZ', 5, qr/^(0[1-9]|[1-3][0-9]|4[0-8])\d{3}$/, ''],
643             ['FR', 5, qr/^([02][1-9]|[13-8][0-9]|2[AB]|9[0-578])\d{3}$/, ''],
644             ['AD', 5, qr/^AD\d{3}$/, ''], ['AM', 4, qr/^\d{4}$/, ''],
645             ['AR', 8, qr/^\w\d{4}\w{3}$/, ''], ['AT', 4, qr/^\d{4}$/, 'A-'],
646             ['AZ', 4, qr/^\d{4}$/, 'AZ'], ['BY', 6, qr/^\d{6}$/, ''],
647             ['BE', 4, qr/^\d{4}$/, 'B-'], ['BA', 5, qr/^\d{5}$/, ''],
648             ['BG', 4, qr/^\d{4}$/, 'BG-'], ['CN', 6, qr/^\d{6}$/, ''],
649             ['CR', 4, qr/^\d{4}$/, ''], ['HR', 5, qr/^\d{5}$/, 'HR-'],
650             ['CU', 5, qr/^\d{5}$/, 'CP '], ['CY', 4, qr/^\d{4}$/, ''],
651             ['DK', 4, qr/^\d{4}$/, ''], ['EE', 5, qr/^\d{5}$/, ''],
652             ['ET', 4, qr/^\d{4}$/, ''], ['FI', 5, qr/^\d{5}$/, 'FI-'],
653             ['FO', 5, qr/^FO\d{3}$/, ''], ['GA', 2, qr/^\d\d$/, ''],
654             ['GE', 4, qr/^\d{4}$/, ''], ['DE', 5, qr/^\d{5}$/, ''],
655             ['GT', 5, qr/^\d{5}$/, ''], ['GW', 4, qr/^\d{4}$/, ''],
656             ['HT', 6, qr/^HT\d{4}$/, ''], ['IS', 3, qr/^\d{3}$/, ''],
657             ['IR', 10, qr/^\d{10}$/, ''], ['IL', 5, qr/^\d{5}$/, ''],
658             ['KW', 5, qr/^\d{5}$/, ''], ['KG', 6, qr/^\d{6}$/, ''],
659             ['LA', 5, qr/^\d{5}$/, ''], ['LI', 4, qr/^\d{4}$/, 'FL-'],
660             ['LR', 4, qr/^\d{4}$/, ''], ['LT', 5, qr/^\d{5}$/, 'LT-'],
661             ['LU', 4, qr/^\d{4}$/, 'L-'], ['MG', 3, qr/^\d{3}$/, ''],
662             ['MD', 5, qr/^\d{5}$/, 'MD-'], ['MC', 5, qr/^\d{5}$/, ''],
663             ['MA', 5, qr/^\d{5}$/, ''], ['NC', 5, qr/^988\d\d$/, ''],
664             ['NE', 4, qr/^\d{4}$/, ''], ['NO', 4, qr/^\d{4}$/, ''],
665             ['PH', 4, qr/^\d{4}$/, ''], ['PY', 4, qr/^[1-9]\d{3}$/, ''],
666             ['RO', 6, qr/^\d{6}$/, ''], ['RU', 6, qr/^\d{6}$/, ''],
667             ['SM', 5, qr/^4789\d$/, ''], ['SN', 5, qr/^\d{5}$/, ''],
668             ['CS', 5, qr/^\d{5}$/, ''], ['SI', 4, qr/^\d{4}$/, ''],
669             ['ES', 5, qr/^\d{5}$/, ''], ['CH', 4, qr/^\d{4}$/, ''],
670             ['TJ', 6, qr/^\d{6}$/, ''], ['MK', 4, qr/^\d{4}$/, ''],
671             ['TN', 4, qr/^\d{4}$/, ''], ['TR', 5, qr/^\d{5}$/, ''],
672             ['TM', 6, qr/^\d{6}$/, ''], ['VA', 5, qr/^00120$/, ''],
673             ['ZM', 5, qr/^\d{5}$/, ''])) {
674             my ($code, $postcode_len, $postcode_re, $postcode_pfx) = @$spec;
675             if (exists($per_country_data{$code})) {
676             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
677             }
678             $per_country_data{$code} = {
679             _format => [
680             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
681             ['Postcode', $postcode_len, $postcode_re], ['City', 40],
682             ],
683             _s2d_map => [
684             {StoredRownum => 0, DisplayName => 'Addr1'},
685             {StoredRownum => 1, DisplayName => 'Addr2'},
686             {StoredRownum => 2, DisplayName => 'Addr3'},
687             {StoredRownum => 3, DisplayName => 'Addr4'},
688             {StoredRownum => -1, DisplayName => 'Postcode',
689             StoredColnum => length($postcode_pfx), StoredCollen => $postcode_len},
690             {StoredRownum => -1, DisplayName => 'City',
691             StoredColnum => length($postcode_pfx) + $postcode_len + 1}
692             ],
693             _d2s_map => [
694             {StoredTemplate => '${Addr1}', StoredRownum => 0},
695             {StoredTemplate => '${Addr2}', StoredRownum => 1},
696             {StoredTemplate => '${Addr3}', StoredRownum => 2},
697             {StoredTemplate => '${Addr4}', StoredRownum => 3},
698             {StoredTemplate => "$postcode_pfx\$\{Postcode\} \$\{City\}",
699             StoredRownum => -1}
700             ]
701             };
702             }
703              
704             # Variable length postcode (and postcode prefix) left of city, no district:
705             # Chile, Czech Republic, Dominican Republic, Greece, Guinea, Netherlands,
706             # Poland, Portugal, Slovakia, Sweden
707             # XXX Czech Republic wants a space after the 3rd digit of the postcode. Also,
708             # it may want CZ- as a postcode prefix.
709             # XXX Dominican Republic wants a - after the 5th digit of the postcode.
710             # XXX Greece doesn't want a postcode prefix anymore, apparently.
711             # XXX Guinea postcodes include a PO box. 12 leaves room for 5-digits PO box #s.
712             # Also, I don't try to normalize postcodes.
713             # XXX Netherlands may want a NL- postcode prefix.
714             # XXX Netherlands Antilles are here, as part of the Netherlands.
715             foreach my $spec
716             ((['CL', 8, qr/^\d{3}[-\s]*\d{4}$/, '${Postcode} ${City}',
717             qr/^(\d{3}[-\s]?\d{4})\s+(.+)$/],
718             ['CZ', 6, qr/^\d\d\s?\d{3}$/, '${Postcode} ${City}',
719             qr/^(\d{3}\s*\d\d)\s+(.+)$/],
720             ['DO', 10, qr/^\d{5}[-\s]*\d{4}$/, '${Postcode} ${City}',
721             qr/^(\d{5}[-\s]*\d{4})\s+(.+)$/],
722             ['GR', 6, qr/^\d{3}\s*\d\d$/, '${Postcode} ${City}',
723             qr/^(\d{3}\s*\d\d)\s+(.+)$/],
724             ['GN', 12, qr/^[0-4]\d\d\s*BP\s*\d+$/i, '${Postcode} ${City}',
725             qr/^([0-4]\d\d\s*BP\s*\d+)\s+(.+)$/i],
726             ['NL', 7, qr/^\d{4}\s?w\w$/, '${Postcode} ${City}',
727             qr/^(\d{4}\s?w\w)\s+(.+)$/],
728             ['PL', 6, qr/^\d{3}-?\d\d$/, '${Postcode} ${City}',
729             qr/^(\d{3}-?\d\d)\s+(.+)$/],
730             ['PT', 8, qr/^\d{4}-?\d{3}$/, '${Postcode} ${City}',
731             qr/^(\d{4}-?\d{3})\s+(.+)$/],
732             ['SK', 6, qr/^\d{3}\s*\d\d$/, '${Postcode} ${City}',
733             qr/^(\d{3}-?\d\d)\s+(.+)$/],
734             ['SE', 6, qr/^\d{3}\s*\d\d$/, 'SE-${Postcode} ${City}',
735             qr/^(?i:SE-)?(\d{3}[\s-]?\d\d)\s+(.+)$/])) {
736             my ($code, $postcode_len, $postcode_re, $pc_layout, $pc_re) = @$spec;
737             if (exists($per_country_data{$code})) {
738             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
739             }
740             $per_country_data{$code} = {
741             _format => [
742             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
743             ['Postcode', $postcode_len, $postcode_re], ['City', 40]
744             ],
745             _s2d_map => [
746             {StoredRownum => 0, DisplayName => 'Addr1'},
747             {StoredRownum => 1, DisplayName => 'Addr2'},
748             {StoredRownum => 2, DisplayName => 'Addr3'},
749             {StoredRownum => 3, DisplayName => 'Addr4'},
750             {StoredRownum => -1, DisplayName => 'Postcode',
751             StoredRegexnum => 0, StoredFieldnum => 0},
752             {StoredRownum => -1, DisplayName => 'City',
753             StoredRegexnum => 0, StoredFieldnum => 1}
754             ],
755             _d2s_map => [
756             {StoredTemplate => '${Addr1}', StoredRownum => 0},
757             {StoredTemplate => '${Addr2}', StoredRownum => 1},
758             {StoredTemplate => '${Addr3}', StoredRownum => 2},
759             {StoredTemplate => '${Addr4}', StoredRownum => 3},
760             {StoredTemplate => $pc_layout, StoredRownum => -1}
761             ],
762             _regexes => [ $pc_re ]
763             };
764             }
765              
766             # Postcode right of city, no district: Bermuda, Bahrain, Cambodia, India,
767             # Indonesia, Jordan, Republic of Korea (aka South Korea), Latvia, Lebanon,
768             # Lesotho, Maldives, Malta, Mongolia, Myanmar, Nepal, New Zealand, Pakistan,
769             # Saudi Arabia, Taiwan.
770             # XXX Cook Islands may be here too as part of New Zealand.
771             # XXX Note that state (code or name) is optional (and almost never used) in
772             # India addresses provided that postcode is present. (thanks to Martin DeMello
773             # for the information.)
774             # XXX Republic of Korea may need region/city (in list) instead of city name,
775             # and its postal authority suggests adding "Seoul" to the city name on
776             # international mail, no matter what the destination, to avoid misrouting to
777             # North Korea. for now, treat it all like a big unstructured city field.
778             # XXX Mongolia wants postcodes right of country, not city, and some cities at
779             # least have a delivery/route code after the city. OTOH, anything next to the
780             # country name is a bad idea. Leave the postcode right of the city for now.
781             # XXX New Zealand postcodes are optional, except for bulk mailers. This means
782             # that many people probably don't know their postcode. Asking for it anyway
783             # doesn't hurt.
784             # XXX Niue may be here as part of New Zealand.
785             # XXX Saudi Arabia addresses use separate formats for Latin and Arabic scripts.
786             # XXX Tokelau may be here as part of New Zealand.
787             # XXX Not sure of the format for Taiwan postcodes: may be 3 digits, 5 digits,
788             # or 5 digits with a - after the 3rd.
789             foreach my $spec
790             ((['BH', 4, qr/^([2-9]|1[0-2]?)\d\d$/,
791             qr/^(.+)\s+((?:[2-9]|1[0-2]?)\d\d)$/, ' '],
792             ['BM', 5, qr/^\w\w\s*(\d\d|\w\w)$/,
793             qr/^(.+)\s+(\w\w\s*(?:\d\d|\w\w))$/, ' '],
794             ['KH', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '],
795             ['IN', 6,
796             qr/^(1[1-9]|2[0-8]|[35][0-36-9]|34|[47][0-9]|6[0-47-9]|8[0-5])\d{4}$/,
797             qr/^(.+)(?:\s+|\s*-\s*)(\d{6})$/, '-'],
798             ['ID', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '],
799             ['JO', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '],
800             ['KR', 7, qr/^\d{3}-?\d{3}$/, qr/^(.+)\s+(\d{3}-?\d{3})$/, ' '],
801             ['LV', 4, qr/^\d{4}$/, qr/^(.+?),?\s*(?:LV\s*-\s*)?(\d{4})$/, ', LV-'],
802             ['LB', 9, qr/^\d{4}\s*\d{4}?$/, qr/^(.+?)\s+(\d{4}\s*\d{4}?)$/, ' '],
803             ['LS', 3, qr/^\d{3}$/, qr/^(.+)\s+(\d{3})$/, ' '],
804             ['MV', 5, qr/^\d\d-?\d\d$/, qr/^(.+)\s+(\d\d-?\d\d)$/, ' '],
805             ['MT', 7, qr/^\w{3}\s*\d{2,3}$/, qr/^(.+)\s+(\w{3}\s*\d{2,3})$/, ' '],
806             ['MN', 6, qr/^\d{6}$/, qr/^(.+)\s+(\d{6})$/, ' '],
807             ['MM', 5, qr/^(0[1-9]|1[0-4])\d{3}$/, qr/^(.+)(?:,\s*|\s+)(\d{5})$/, ' '],
808             ['NP', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '],
809             ['NZ', 4, qr/^(\d{4})?$/, qr/^(.+?)(?:\s+(\d{4}))?$/, ' '],
810             ['PK', 5, qr/^\d{5}$/, qr/^(.+)(?:\s*-\s*|\s+)(\d{5})$/, ' '],
811             ['SA', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '],
812             ['VN', 6, qr/^\d{6}$/, qr/^(.+)\s+(\d{6})$/, ' '],
813             ['TW', 6, qr/^\d{3}(-?\d{2})?$/,
814             qr/^(.+)(?:,\s*|\s+)\d{3}(?:-?\d{2})?$/, ' '])) {
815             my ($code, $postcode_len, $postcode_re, $cp_re, $postcode_prefix) = @$spec;
816             if (exists($per_country_data{$code})) {
817             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
818             }
819             $per_country_data{$code} = {
820             _format => [
821             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
822             ['City', 40], ['Postcode', $postcode_len, $postcode_re]
823             ],
824             _s2d_map => [
825             {StoredRownum => 0, DisplayName => 'Addr1'},
826             {StoredRownum => 1, DisplayName => 'Addr2'},
827             {StoredRownum => 2, DisplayName => 'Addr3'},
828             {StoredRownum => 3, DisplayName => 'Addr4'},
829             {StoredRownum => -1, DisplayName => 'City',
830             StoredRegexnum => 0, StoredFieldnum => 0},
831             {StoredRownum => -1, DisplayName => 'Postcode',
832             StoredRegexnum => 0, StoredFieldnum => 1}
833             ],
834             _d2s_map => [
835             {StoredTemplate => '${Addr1}', StoredRownum => 0},
836             {StoredTemplate => '${Addr2}', StoredRownum => 1},
837             {StoredTemplate => '${Addr3}', StoredRownum => 2},
838             {StoredTemplate => '${Addr4}', StoredRownum => 3},
839             {StoredTemplate => "\$\{City\}$postcode_prefix\$\{Postcode\}",
840             StoredRownum => -1}
841             ],
842             _regexes => [ $cp_re ]
843             };
844             }
845              
846             # Postcode left of city, district name below: Cape Verde, El Salvador,
847             # Mozambique
848             # XXX Cape Verde seems to be missing an island in Locale::SubCountry.pm.
849             foreach my $spec
850             ((['CV', 'Island'], ['SV', 'Department'], ['MZ', 'Province'])) {
851             my ($code, $dname) = @$spec;
852             if (exists($per_country_data{$code})) {
853             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
854             }
855             my $subctry = Locale::SubCountry->new($code);
856             $per_country_data{$code} = {
857             _format => [
858             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
859             ['Postcode', 4, qr/^\d{4}$/], ['City', 40],
860             [$dname, { map { $_ => $_ } $subctry->all_full_names() } ]
861             ],
862             _s2d_map => [
863             {StoredRownum => 0, DisplayName => 'Addr1'},
864             {StoredRownum => 1, DisplayName => 'Addr2'},
865             {StoredRownum => 2, DisplayName => 'Addr3'},
866             {StoredRownum => 3, DisplayName => 'Addr4'},
867             {StoredRownum => -2, DisplayName => 'Postcode',
868             StoredColnum => 0, StoredCollen => 4},
869             {StoredRownum => -2, DisplayName => 'City', StoredColnum => 5},
870             {StoredRownum => -1, DisplayName => $dname}
871             ],
872             _d2s_map => [
873             {StoredTemplate => '${Addr1}', StoredRownum => 0},
874             {StoredTemplate => '${Addr2}', StoredRownum => 1},
875             {StoredTemplate => '${Addr3}', StoredRownum => 2},
876             {StoredTemplate => '${Addr4}', StoredRownum => 3},
877             {StoredTemplate => '${Postcode} ${City}', StoredRownum => -2},
878             {StoredTemplate => "\$\{$dname\}", StoredRownum => -1}
879             ]
880             };
881             }
882              
883             # Postcode right of city, district name below: Nigeria
884             foreach my $code (qw(NG)) {
885             if (exists($per_country_data{$code})) {
886             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
887             }
888             my $subctry = Locale::SubCountry->new($code);
889             $per_country_data{$code} = {
890             _format => [
891             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
892             ['City', 40], ['Postcode', 6, qr/^\d{6}$/],
893             ['State', { map { $_ => $_ } $subctry->all_full_names() } ]
894             ],
895             _s2d_map => [
896             {StoredRownum => 0, DisplayName => 'Addr1'},
897             {StoredRownum => 1, DisplayName => 'Addr2'},
898             {StoredRownum => 2, DisplayName => 'Addr3'},
899             {StoredRownum => 3, DisplayName => 'Addr4'},
900             {StoredRownum => -2, DisplayName => 'City',
901             StoredRegexnum => 0, StoredFieldnum => 0},
902             {StoredRownum => -2, DisplayName => 'Postcode',
903             StoredRegexnum => 0, StoredFieldnum => 1},
904             {StoredRownum => -1, DisplayName => 'State'}
905             ],
906             _d2s_map => [
907             {StoredTemplate => '${Addr1}', StoredRownum => 0},
908             {StoredTemplate => '${Addr2}', StoredRownum => 1},
909             {StoredTemplate => '${Addr3}', StoredRownum => 2},
910             {StoredTemplate => '${Addr4}', StoredRownum => 3},
911             {StoredTemplate => '${City} ${Postcode}', StoredRownum => -2},
912             {StoredTemplate => '${State}', StoredRownum => -1}
913             ],
914             _regexes => [ qr/^(.+)\s+(\d{6})$/ ]
915             };
916             }
917              
918             # City and district name each on a line by itself, no postcode: Ireland,
919             # Kiribati, Panama, Solomon Islands
920             # XXX Ireland district (county) is optional (forbidden?) if same as city, and
921             # prefixed with 'CO ' is present. If it's forbidden, not just optional, let
922             # normalize() handle it. Also, Dublin needs a numeric suffix.
923             # XXX Kiribati district (island) list may be incomplete or incorrect.
924             # XXX Panama may use district only, not city.
925             # XXX Seychelles may be here too, but they're missing from Locale::SubCountry.
926             # XXX Solomon Islands district (province) list may be incomplete or incorrect.
927             foreach my $spec
928             ((['IE', 'County', 'CO '], ['KI', 'Island', ''], ['PA', 'Province', ''],
929             ['SB', 'Province', ''])) {
930             my ($code, $dname, $dpfx) = @$spec;
931             if (exists($per_country_data{$code})) {
932             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
933             }
934             my $subctry = Locale::SubCountry->new($code);
935             $per_country_data{$code} = {
936             _format => [
937             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40],
938             [$dname, { map { $_ => $_ } $subctry->all_full_names() } ]
939             ],
940             _s2d_map => [
941             {StoredRownum => 0, DisplayName => 'Addr1'},
942             {StoredRownum => 1, DisplayName => 'Addr2'},
943             {StoredRownum => 2, DisplayName => 'Addr3'},
944             {StoredRownum => 3, DisplayName => 'Addr4'},
945             {StoredRownum => -2, DisplayName => 'City'},
946             {StoredRownum => -1, DisplayName => $dname,
947             StoredColnum => length($dpfx)}
948             ],
949             _d2s_map => [
950             {StoredTemplate => '${Addr1}', StoredRownum => 0},
951             {StoredTemplate => '${Addr2}', StoredRownum => 1},
952             {StoredTemplate => '${Addr3}', StoredRownum => 2},
953             {StoredTemplate => '${Addr4}', StoredRownum => 3},
954             {StoredTemplate => '${City}', StoredRownum => -2},
955             {StoredTemplate => "$dpfx\$\{$dname\}", StoredRownum => -1}
956             ]
957             };
958             }
959              
960             # City, district code/name, postcode in some order all on same line: Australia,
961             # Canada, Italy, Japan, Malaysia, Mexico, Papua New Guinea, Somalia, United
962             # States, Venezuela
963             # DamienPS explained something (I forgot what) about Australian postcodes.
964             # XXX American Samoa may be here as part of the US.
965             # XXX Cocos (Keeling) Islands may be here, as part of Australia.
966             # XXX Christmas Island may be here, as part of Australia.
967             # XXX Canadian postcodes want uppercase letters and 1 space after 3rd position.
968             # XXX Federated States of Micronesia may be here as part of the US.
969             # XXX Guam may be here as part of the US.
970             # XXX Heard Island and McDonald Islands may be here as part of Australia.
971             # XXX Honduras districts are missing from Locale::SubCountry
972             # XXX Thanks to Renée for splainin what Japan adresses are like. Also, this
973             # assumes city and prefecture names don't contain spaces. Also, there may be a
974             # trend toward moving the postcode to its own line below city and prefecture,
975             # and we leave the - insertion after the 3rd postcode digit to normalize(), if
976             # it's really necessary. (Since addresses in Roman script are still sorted by
977             # hand according to the UPU page about Japan, I doubt it is.)
978             # XXX Malaysia doesn't use the names of the federal territories for routing,
979             # and they appear optional.
980             # XXX Marshall Islands may be here as part of the US.
981             # XXX Mariana Islands may be here as part of the US.
982             # XXX Mexico addresses may use state name, not code. Also, on-screen field
983             # order of addresses doesn't match natural/stored.
984             # XXX Norfolk Island may be here as part of Australia.
985             # XXX Puerto Rico may be here as part of the US.
986             # XXX United States definition in Locale::SubCountry.pm is missing AA/AE/AP
987             # entries for APO/FPO.
988             # XXX Venezuela definition in Locale::SubCountry.pm may be missing some states.
989             # XXX US Virgin Islands may be here as part of the US.
990             foreach my $spec
991             ((['AU', 'State', 'Postcode', 4, qr/^\d{4}$/,
992             '${City} ${State} ${Postcode}',
993             0, 0, 1, 2, qr/^(.+),?\s+(\w{2,3})\s+(\d{4})$/],
994             ['CA', 'Province', 'Postcode', 7, qr/^\w\d\w\s*\d\w\d$/,
995             '${City} ${Province} ${Postcode}', 0, 0, 1, 2,
996             qr/^(.+)\s+(\w\w)\s+(\w\d\w\s*\d\w\d)$/],
997             ['IT', 'Province', 'Postcode', 5, qr/^\d{5}$/,
998             '${Postcode}-${City} ${Province}',
999             0, 1, 2, 0, qr/^(\d{5})(?:\s*-\s*|\s+)(.+?)(?:\s+(\w\w))?$/],
1000             ['JP', 'Prefecture', 'Postcode', 8, qr/^\d{3}-?\d{4}$/,
1001             '${City} ${Prefecture} ${Postcode}', 1, 0, 1, 2,
1002             qr/^(\S+)\s+(\S+)\s+(\d\d\d-?\d\d\d\d)$/],
1003             ['MY', 'State', 'Postcode', 5, qr/^\d{5}$/,
1004             '${Postcode} ${City}, ${State}',
1005             1, 1, 2, 0, qr/^(\d{5})\s+(.+),\s*(\w+)$/],
1006             ['MX', 'State', 'Postcode', 5, qr/^\d{5}$/,
1007             '${Postcode} ${City}, ${State}',
1008             0, 1, 2, 0, qr/^(\d{5})\s+(.+),\s*(\w+)$/],
1009             ['PG', 'Province', 'Postcode', 3, qr/^\d{3}$/,
1010             '${City} ${Postcode} ${Province}',
1011             0, 0, 2, 1, qr/^(.+)\s+(\d{3})\s+(\w{3})$/],
1012             ['SO', 'Region', 'Postcode', 5, qr/^\d{5}$/,
1013             '${City}, ${State} ${Postcode}',
1014             0, 0, 1, 2, qr/^(.+),?\s+(\w{2})\s+(\d{5})$/],
1015             ['US', 'State', 'ZIP', 10, qr/^\d{5}(-\d{4})?$/,
1016             '${City}, ${State} ${ZIP}', 0, 0, 1, 2,
1017             qr/^(.+),?\s+(\w{2})\s+(\d{5}(?:-\d{4})?)$/],
1018             ['VE', 'State', 'Postcode', 4, qr/^\d{4}$/,
1019             '${City}, ${Postcode} ${State}',
1020             1, 0, 2, 1, qr/^(.+?)\s+(\d{4})\s*,?\s+(.+)$/])) {
1021             my ($code, $district, $postcode, $pc_length, $pc_re, $cdp_layout, $use_dname,
1022             $city_fn, $district_fn, $pc_fn, $cdp_re) = @$spec;
1023             if (exists($per_country_data{$code})) {
1024             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1025             }
1026             my $subctry = Locale::SubCountry->new($code);
1027             $per_country_data{$code} = {
1028             _format => [
1029             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 3,
1030             ['City', 40],
1031             [$district, ($use_dname ? { map { $_ => $_ } $subctry->all_full_names() }
1032             : {$subctry->code_full_name_hash})],
1033             [$postcode, $pc_length, $pc_re]
1034             ],
1035             _s2d_map => [
1036             {StoredRownum => 0, DisplayName => 'Addr1'},
1037             {StoredRownum => 1, DisplayName => 'Addr2'},
1038             {StoredRownum => 2, DisplayName => 'Addr3'},
1039             {StoredRownum => 3, DisplayName => 'Addr4'},
1040             {StoredRownum => -1, DisplayName => 'City',
1041             StoredRegexnum => 0, StoredFieldnum => $city_fn},
1042             {StoredRownum => -1, DisplayName => $district,
1043             StoredRegexnum => 0, StoredFieldnum => $district_fn},
1044             {StoredRownum => -1, DisplayName => $postcode,
1045             StoredRegexnum => 0, StoredFieldnum => $pc_fn},
1046             ],
1047             _d2s_map => [
1048             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1049             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1050             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1051             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1052             {StoredTemplate => $cdp_layout, StoredRownum => -1}
1053             ],
1054             _regexes => [ $cdp_re ]
1055             };
1056             }
1057              
1058             # City and district code (with postfix) on 1 line, then postcode alone below:
1059             # Brazil
1060             # XXX Brazil may need a "Brazil" suffix after some or all states.
1061             foreach my $code (qw(BR)) {
1062             if (exists($per_country_data{$code})) {
1063             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1064             }
1065             my $subctry = Locale::SubCountry->new($code);
1066             $per_country_data{$code} = {
1067             _format => [
1068             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
1069             ['City', 40], ['State', {$subctry->code_full_name_hash}],
1070             ['Postcode', 9, qr/^\d\d\d\d\d-?\d\d\d$/]
1071             ],
1072             _s2d_map => [
1073             {StoredRownum => 0, DisplayName => 'Addr1'},
1074             {StoredRownum => 1, DisplayName => 'Addr2'},
1075             {StoredRownum => 2, DisplayName => 'Addr3'},
1076             {StoredRownum => 3, DisplayName => 'Addr4'},
1077             {StoredRownum => -2, DisplayName => 'City',
1078             StoredRegexnum => 0, StoredFieldnum => 0},
1079             {StoredRownum => -2, DisplayName => 'State',
1080             StoredRegexnum => 0, StoredFieldnum => 1},
1081             {StoredRownum => -1, DisplayName => 'Postcode'}
1082             ],
1083             _d2s_map => [
1084             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1085             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1086             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1087             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1088             {StoredTemplate => '${City}-${State}', StoredRownum => -2},
1089             {StoredTemplate => '${Postcode}', StoredRownum => -1}
1090             ],
1091             _regexes => [ qr/^(.+?)(?:\s*-\s*|\s+)(\w\w)$/ ]
1092             };
1093             }
1094              
1095             # Postcode alone, then city and district below: Nicaragua
1096             # XXX Nicaragua postcodes may be extended from 7 to 11 digits in the future.
1097             foreach my $code (qw(NI)) {
1098             if (exists($per_country_data{$code})) {
1099             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1100             }
1101             my $subctry = Locale::SubCountry->new($code);
1102             $per_country_data{$code} = {
1103             _format => [
1104             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1105             ['Postcode', 9, qr/^\d{3}-?\d{3}-?\d$/], 2, ['City', 40],
1106             ['Department', { map { $_ => $_ } $subctry->all_full_names() } ]
1107             ],
1108             _s2d_map => [
1109             {StoredRownum => 0, DisplayName => 'Addr1'},
1110             {StoredRownum => 1, DisplayName => 'Addr2'},
1111             {StoredRownum => 2, DisplayName => 'Addr3'},
1112             {StoredRownum => 3, DisplayName => 'Addr4'},
1113             {StoredRownum => -2, DisplayName => 'Postcode'},
1114             {StoredRownum => -1, DisplayName => 'City',
1115             StoredRegexnum => 0, StoredFieldnum => 0},
1116             {StoredRownum => -1, DisplayName => 'State',
1117             StoredRegexnum => 0, StoredFieldnum => 1}
1118             ],
1119             _d2s_map => [
1120             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1121             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1122             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1123             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1124             {StoredTemplate => '${Postcode}', StoredRownum => -2},
1125             {StoredTemplate => '${City}, ${State}', StoredRownum => -1}
1126             ],
1127             _regexes => [ qr/^(.+),\s*(.+)$/ ]
1128             };
1129             }
1130              
1131             # City and district name on same line, no postcode: Colombia
1132             # XXX Information on Colombia is inconsistent: does it use city+district, or
1133             # city only?
1134             foreach my $code (qw(CO)) {
1135             if (exists($per_country_data{$code})) {
1136             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1137             }
1138             my $subctry = Locale::SubCountry->new($code);
1139             $per_country_data{$code} = {
1140             _format => [
1141             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
1142             ['City', 40],
1143             ['Department', { map { $_ => $_ } $subctry->all_full_names() } ]
1144             ],
1145             _s2d_map => [
1146             {StoredRownum => 0, DisplayName => 'Addr1'},
1147             {StoredRownum => 1, DisplayName => 'Addr2'},
1148             {StoredRownum => 2, DisplayName => 'Addr3'},
1149             {StoredRownum => 3, DisplayName => 'Addr4'},
1150             {StoredRownum => -1, DisplayName => 'City',
1151             StoredRegexnum => 0, StoredFieldnum => 0},
1152             {StoredRownum => -1, DisplayName => 'Department',
1153             StoredRegexnum => 0, StoredFieldnum => 1}
1154             ],
1155             _d2s_map => [
1156             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1157             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1158             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1159             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1160             {StoredTemplate => '${City}-${Department}', StoredRownum => -1}
1161             ],
1162             _regexes => [ qr/^(.+?)(?:\s*[-,]\s*(.*))?$/ ]
1163             };
1164             }
1165              
1166             # District name and postal code each on a line by itself: Egypt
1167             # XXX Kazakhstan would sort of be here too, but Locale::SubCountry seems to be
1168             # missing stuff.
1169             foreach my $code (qw(EG)) {
1170             if (exists($per_country_data{$code})) {
1171             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1172             }
1173             my $subctry = Locale::SubCountry->new($code);
1174             $per_country_data{$code} = {
1175             _format => [
1176             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1177             ['Governorate', { map { $_ => $_ } $subctry->all_full_names() } ],
1178             ['Postcode', 5, qr/^\d\d\d\d\d$/]
1179             ],
1180             _s2d_map => [
1181             {StoredRownum => 0, DisplayName => 'Addr1'},
1182             {StoredRownum => 1, DisplayName => 'Addr2'},
1183             {StoredRownum => 2, DisplayName => 'Addr3'},
1184             {StoredRownum => 3, DisplayName => 'Addr4'},
1185             {StoredRownum => -2, DisplayName => 'Governorate'},
1186             {StoredRownum => -1, DisplayName => 'Postcode'}
1187             ],
1188             _d2s_map => [
1189             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1190             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1191             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1192             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1193             {StoredTemplate => '${Governorate}', StoredRownum => -2},
1194             {StoredTemplate => '${Postcode}', StoredRownum => -1}
1195             ]
1196             };
1197             }
1198              
1199             # City, name of district, and postcode each on a line by itself: Ukraine,
1200             # United Kingdom (not Great Britain, dammit!)
1201             # XXX Anguilla may be here as part of the UK.
1202             # XXX Antigua and Barbuda may be here as part of the UK.
1203             # XXX British Virgin Islands may be here as part of the UK.
1204             # XXX British Indian Ocean Territory would be here as part of UK, but the
1205             # postcode given for it (BBND 1ZZ) doesn't match the pattern.
1206             # XXX Ditto for British Antarctic territory and BIQQ 1ZZ.
1207             # XXX Ditto for Falkland Islands and FIQQ 1ZZ.
1208             # XXX Ditto for Gibraltar and (I think) GIR 0AA.
1209             # XXX Guernsey may be here as part of the UK.
1210             # XXX Isle of Man may be here as part of the UK, or it may need its own country
1211             # name. Go with the former.
1212             # XXX Jersey may be here as part of the UK.
1213             # XXX Monserrat may be here as part of the UK.
1214             # XXX Pitcairn, Henderson, Ducie, and Oeno Island would be here as part of UK,
1215             # but the postcode given for it (PCRN 1ZZ) doesn't match the pattern.
1216             # XXX South Georgia and the South Sandwich Island: ditto (SIQQ 1ZZ)
1217             # XXX Tristan Da Cunha: ditto (TDCU 1ZZ), and the rest of the example address
1218             # format is weird: "Via Capetown"?? Also, are Saint Helena and Tristan Da Cunha
1219             # the same?
1220             # XXX Turks and Caicos: ditto (TECA 1ZZ).
1221             # XXX Ukraine addresses may not need districts in some cases (large cities?).
1222             # XXX UK addresses come in 2 formats: postcode below city/county, and postcode
1223             # on the right. Usually, postcode on the right is for storage/reference, and
1224             # postcode below for mailing. However, I use postcode below exclusively. If
1225             # you're curious why, just look at the postcode regexp. (thanks to Ailbhe for
1226             # the clarification.) Also: as always, I don't enforce separators or upper case
1227             # in postal codes. Plus, it's not obvious that the county is optional unless
1228             # the user groks UK addresses.
1229             # XXX Uzbekistan addresses may not need districts in some cases (large
1230             # cities?). Also, it may want postcode below country. Pretend it doesn't.
1231             foreach my $spec
1232             ((['GB', 'County', 8, qr/^\w\w?\d[\w\d]?\s*\d\w\w$/],
1233             ['UA', 'Region', 5, qr/^\d{5}$/], ['UZ', 'Region', 6, qr/^\d{6}$/])) {
1234             my ($code, $dname, $pc_len, $pc_re) = @$spec;
1235             if (exists($per_country_data{$code})) {
1236             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1237             }
1238             my $subctry = Locale::SubCountry->new($code);
1239             $per_country_data{$code} = {
1240             _format => [
1241             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40],
1242             [$dname, { map { $_ => $_ } $subctry->all_full_names() }],
1243             ['Postcode', $pc_len, $pc_re]
1244             ],
1245             _s2d_map => [
1246             {StoredRownum => 0, DisplayName => 'Addr1'},
1247             {StoredRownum => 1, DisplayName => 'Addr2'},
1248             {StoredRownum => 2, DisplayName => 'Addr3'},
1249             {StoredRownum => 3, DisplayName => 'Addr4'},
1250             {StoredRownum => -3, DisplayName => 'City'},
1251             {StoredRownum => -2, DisplayName => $dname},
1252             {StoredRownum => -1, DisplayName => 'Postcode'}
1253             ],
1254             _d2s_map => [
1255             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1256             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1257             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1258             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1259             {StoredTemplate => '${City}', StoredRownum => -3},
1260             {StoredTemplate => "\$\{$dname\}", StoredRownum => -2},
1261             {StoredTemplate => '${Postcode}', StoredRownum => -1}
1262             ]
1263             };
1264             }
1265              
1266             # Postcode on a line by itself, then city: Ecuador, Sudan, Uruguay
1267             # XXX Note that I don't uppercase the letters in Ecuador postcodes
1268             # XXX Uruguay may want the district name (and country) next to the city.
1269             foreach my $spec ((['EC', 6, qr/^\w\d{4}\w$/], ['SD', 5, qr/^\d{5}$/],
1270             ['UY', 5, qr/^\d{5}$/])) {
1271             my ($code, $pc_len, $pc_re) = @$spec;
1272             if (exists($per_country_data{$code})) {
1273             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1274             }
1275             $per_country_data{$code} = {
1276             _format => [
1277             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1278             ['Postcode', $pc_len, $pc_re], ['City', 40]
1279             ],
1280             _s2d_map => [
1281             {StoredRownum => 0, DisplayName => 'Addr1'},
1282             {StoredRownum => 1, DisplayName => 'Addr2'},
1283             {StoredRownum => 2, DisplayName => 'Addr3'},
1284             {StoredRownum => 3, DisplayName => 'Addr4'},
1285             {StoredRownum => -2, DisplayName => 'Postcode'},
1286             {StoredRownum => -1, DisplayName => 'City'}
1287             ],
1288             _d2s_map => [
1289             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1290             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1291             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1292             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1293             {StoredTemplate => '${Postcode}', StoredRownum => -2},
1294             {StoredTemplate => '${City}', StoredRownum => -1}
1295             ]
1296             };
1297             }
1298              
1299             # Postcode and prefix on a line by itself under city: Iraq, Kenya, Oman, South
1300             # Africa, Sri Lanka, Swaziland
1301             # XXX Ascension would go there too, but it's not in ISO 3166.
1302             # XXX Iraq may want city, district instead.
1303             # XXX Oman examples show postcode above city, not below.
1304             # XXX Sri Lanka examples show postcode below city, but text says it should be
1305             # above. Assume below.
1306             foreach my $spec
1307             ((['IQ', 5, qr/^\d{5}$/, ''], ['KE', 5, qr/^\d{5}$/, ''],
1308             ['OM', 3, qr/^\d{3}$/, ''],
1309             ['ZA', 4, qr/^\d{4}$/, ''], ['LK', 5, qr/^\d{5}$/, ''],
1310             ['SZ', 4, qr/^[HhLlMmSs]\d{3}$/, ''])) {
1311             my ($code, $pc_length, $pc_re, $pc_prefix) = @$spec;
1312             if (exists($per_country_data{$code})) {
1313             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1314             }
1315             $per_country_data{$code} = {
1316             _format => [
1317             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1318             ['City', 40], ['Postcode', $pc_length, $pc_re]
1319             ],
1320             _s2d_map => [
1321             {StoredRownum => 0, DisplayName => 'Addr1'},
1322             {StoredRownum => 1, DisplayName => 'Addr2'},
1323             {StoredRownum => 2, DisplayName => 'Addr3'},
1324             {StoredRownum => 3, DisplayName => 'Addr4'},
1325             {StoredRownum => -2, DisplayName => 'City'},
1326             {StoredRownum => -1, DisplayName => 'Postcode',
1327             StoredColnum => length($pc_prefix)}
1328             ],
1329             _d2s_map => [
1330             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1331             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1332             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1333             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1334             {StoredTemplate => '${City}', StoredRownum => -2},
1335             {StoredTemplate => "$pc_prefix\$\{Postcode\}", StoredRownum => -1}
1336             ]
1337             };
1338             }
1339              
1340             # City on first line of address, postcode by itself on last line: Hungary
1341             foreach my $code (qw(HU)) {
1342             if (exists($per_country_data{$code})) {
1343             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1344             }
1345             $per_country_data{$code} = {
1346             _format => [
1347             ['City', 40], ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1348             ['Postcode', 4, qr/^\d{4}$/]
1349             ],
1350             _s2d_map => [
1351             {StoredRownum => 0, DisplayName => 'City'},
1352             {StoredRownum => 1, DisplayName => 'Addr1'},
1353             {StoredRownum => 2, DisplayName => 'Addr2'},
1354             {StoredRownum => 3, DisplayName => 'Addr3'},
1355             {StoredRownum => 4, DisplayName => 'Addr4'},
1356             {StoredRownum => -1, DisplayName => 'Postcode', StoredColnum => 0}
1357             ],
1358             _d2s_map => [
1359             {StoredTemplate => '${City}', StoredRownum => 0},
1360             {StoredTemplate => '${Addr1}', StoredRownum => 1},
1361             {StoredTemplate => '${Addr2}', StoredRownum => 2},
1362             {StoredTemplate => '${Addr3}', StoredRownum => 3},
1363             {StoredTemplate => '${Addr4}', StoredRownum => 4},
1364             {StoredTemplate => '${Postcode}', StoredRownum => -1}
1365             ]
1366             };
1367             }
1368              
1369             # No city (or rather, only one): Singapore
1370             foreach my $code (qw(SG)) {
1371             if (exists($per_country_data{$code})) {
1372             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1373             }
1374             $per_country_data{$code} = {
1375             _format => [
1376             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40],
1377             ['Postcode', 6, qr/^\d{6}$/]
1378             ],
1379             _s2d_map => [
1380             {StoredRownum => 0, DisplayName => 'Addr1'},
1381             {StoredRownum => 1, DisplayName => 'Addr2'},
1382             {StoredRownum => 2, DisplayName => 'Addr3'},
1383             {StoredRownum => 3, DisplayName => 'Addr4'},
1384             {StoredRownum => -1, DisplayName => 'Postcode', StoredColnum => 10}
1385             ],
1386             _d2s_map => [
1387             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1388             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1389             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1390             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1391             {StoredTemplate => 'SINGAPORE ${Postcode}', StoredRownum => -1}
1392             ]
1393             };
1394             }
1395              
1396             # District name, postcode (no city?): Bangladesh, Brunei Darussalam, Thailand
1397             # XXX Bangladesh needs preprocessing of district names?
1398             # XXX Brunei Darussalam is missing districts in Locale::SubCountry?
1399             # XXX Thailand may use 9-digit postcodes in some cases.
1400             foreach my $spec
1401             ((['BD', 4, qr/^\d{4}$/, qr/^(.+)\s*-\s*(\d{4})$/, ' - '],
1402             ['BN', 4, qr/^[bBkKtTpP]\w\d{4}$/,
1403             qr/^(.+)\s+([bBkKtTpP]\w\d{4})$/, ' - '],
1404             ['TH', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '])) {
1405             my ($code, $pc_len, $pc_re, $dp_re, $sep) = @$spec;
1406             if (exists($per_country_data{$code})) {
1407             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1408             }
1409             my $subctry = Locale::SubCountry->new($code);
1410             $per_country_data{$code} = {
1411             _format => [
1412             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2,
1413             ['District', { map { $_ => $_ } $subctry->all_full_names() } ],
1414             ['Postcode', $pc_len, $pc_re]
1415             ],
1416             _s2d_map => [
1417             {StoredRownum => 0, DisplayName => 'Addr1'},
1418             {StoredRownum => 1, DisplayName => 'Addr2'},
1419             {StoredRownum => 2, DisplayName => 'Addr3'},
1420             {StoredRownum => 3, DisplayName => 'Addr4'},
1421             {StoredRownum => -1, DisplayName => 'District',
1422             StoredRegexnum => 0, StoredFieldnum => 0},
1423             {StoredRownum => -1, DisplayName => 'Postcode',
1424             StoredRegexnum => 0, StoredFieldnum => 1}
1425             ],
1426             _d2s_map => [
1427             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1428             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1429             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1430             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1431             {StoredTemplate => "\$\{District\}$sep\$\{Postcode\}",
1432             StoredRownum => -1}
1433             ],
1434             _regexes => [ $dp_re ]
1435             };
1436             }
1437              
1438             # City, no postcode or district: Barbados, Belize, Benin, Bolivia, Botswana,
1439             # Burkina Faso, Burundi, Cameroon, Cayman Islands, Central African Republic,
1440             # Chad, Comoros, Congo (Brazzaville), Congo (Kinshasa), Cote d'Ivoire,
1441             # Democratic People's Republic of Korea (aka North Korea), Djibouti, Dominica,
1442             # Equatorial Guinea, Eritrea, Fiji, Gambia, Ghana, Guyana, Hong Kong, Jamaica,
1443             # Libya, Macao, Malawi, Mali, Mauritania, Mauritius, Namibia, Nigeria, Peru,
1444             # Qatar, Rwanda, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the
1445             # Grenadines, São Tomé and Principe, Sierra Leone, Suriname, Syrian Arab
1446             # Republic, United Republic of Tanzania, Timor Leste, Togo, Tonga, Trinidad and
1447             # Tobago, Tuvalu, Uganda, (Western) Samoa, Yemen, Zimbabwe
1448             # XXX Not sure about Belize - it could be it has a district name, but if so,
1449             # the districts Locale::SubCountry knows about aren't the right ones.
1450             # XXX Bolivia could use district instead of city.
1451             # XXX Not sure about Botswana - it could be it has a district name, but if so,
1452             # the districts Locale::SubCountry knows about aren't the right ones.
1453             # XXX Burkina Faso has 2 digits right of the city that may be (part of) a
1454             # postcode. Also, all addresses are PO boxes and need the postcode-ish number
1455             # prefixed?
1456             # XXX Burundi could use district instead of city.
1457             # XXX Central African Republic could use district instead of city.
1458             # XXX Congo (Kinshasa) has a number after the city (at least in some cases),
1459             # but I don't know whether that's a postcode. Until I do, leave it here.
1460             # XXX Cote d'Ivoire actually has a 2-3 digit postcode duplicated on both ends
1461             # of the "City" line, with a PO box just left of the city. Pretend it's all one
1462             # big "City" field. May be better to leave it completely unstructured instead.
1463             # XXX Djibouti could use district instead of city.
1464             # XXX Gambia could use district instead of city.
1465             # XXX Hong Kong postal info may be out of date, and I'm not sure about how
1466             # Kowloon or New Territories fit in.
1467             # XXX Jamaica needs a 1-2 digit suffix for Kingston.
1468             # XXX Malawi needs a 1-digit suffix for Lilongwe.
1469             # XXX Mauritania could use district instead of city.
1470             # XXX Mauritius has an optional experimental postcode for 1 city only. Treat it
1471             # as an unstructured address line for now.
1472             # XXX Peru wants route numbers for some cities, and may introduce a postcode
1473             # system eventually.
1474             # XXX Qatar city may be optional in some cases.
1475             # XXX Rwanda could use district instead of city.
1476             # XXX Saint Kitts and Nevis could use district, island instead of city, but if
1477             # so, Locale::SubCountry is missing both district and island names.
1478             # XXX Saint Vincent and the Grenadines may want an extra address ligne below
1479             # country. Ignore.
1480             # XXX São Tomé and Principe may need district and or island specified for some
1481             # addresses.
1482             # XXX Suriname could use district instead of city.
1483             # XXX Syrian Arab Republic could use district instead of city, and is currently
1484             # developping a postcode system.
1485             # XXX United Republic of Tanzania could use district instead of city.
1486             # XXX Yemen could use district instead of city.
1487             # XXX Zimbabwe could use district instead of city.
1488             foreach my $code (qw(BB BZ BJ BO BW BF BI CM KY CF TD KM CG CI KP DJ DM GQ ER
1489             FJ GM GH GY HK JM LY MO MW ML MR NA PE QA RW KN LC VC ST
1490             SL SR SY TZ TL TG TT TV UG WS YE ZW)) {
1491             if (exists($per_country_data{$code})) {
1492             die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n";
1493             }
1494             $per_country_data{$code} = {
1495             _format => [
1496             ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40]
1497             ],
1498             _s2d_map => [
1499             {StoredRownum => 0, DisplayName => 'Addr1'},
1500             {StoredRownum => 1, DisplayName => 'Addr2'},
1501             {StoredRownum => 2, DisplayName => 'Addr3'},
1502             {StoredRownum => 3, DisplayName => 'Addr4'},
1503             {StoredRownum => -1, DisplayName => 'City'},
1504             ],
1505             _d2s_map => [
1506             {StoredTemplate => '${Addr1}', StoredRownum => 0},
1507             {StoredTemplate => '${Addr2}', StoredRownum => 1},
1508             {StoredTemplate => '${Addr3}', StoredRownum => 2},
1509             {StoredTemplate => '${Addr4}', StoredRownum => 3},
1510             {StoredTemplate => '${City}', StoredRownum => -1}
1511             ]
1512             };
1513             }
1514              
1515             1;
1516              
1517             =head1 BUGS
1518              
1519             Only 2-letter country codes are supported.
1520              
1521             A knob to carp on some errors would be nice.
1522              
1523             Objects returned by the new method can be actually blessed into a
1524             country-specific subclass. This makes it impossible to have other
1525             derived classes than the country-specific ones.
1526              
1527             40 is used as the suggested length for all text fields. This is probably too
1528             long for some and too short for others.
1529              
1530             Support for most countries ranges from non-existent to sketchy.
1531              
1532             The method name "display" is arguably a poor choice.
1533              
1534             Some messages should go through a translation table.
1535              
1536             Data validation should probably be a method of its own.
1537              
1538             This module doesn't yet deal well with countries that want the recipient name
1539             in another position than 1st line, or the country name in another position than
1540             last line. Examples of such countries are: Ukraine (wants country,
1541             city+postcode, street address, recipient name from top down instead of the more
1542             widespread bottom up), Turkmenistan (wants city+postcode, country, recipient
1543             name, street address, from top down), Grenada (wants a supranational line -
1544             West Indies - below the country name). The interface to do that exists, but is
1545             do-nothing until I figure out how to deal with address formats for use between
1546             countries with conflicting requirements.
1547              
1548             This module doesn't deal well with countries where the address format depends
1549             on the script used, such as Saudi Arabia.
1550              
1551             This module doesn't yet support entities with their own ISO 3166-1 code that
1552             use another country's address format, including the country name.
1553              
1554             This module assumes "no locale", and blissfully mixes character classes that
1555             could conceivably match in the locale with classes that have to match according
1556             to the Roman alphabet (eg, US ZIP codes and Canadian postal codes). This is
1557             probably nearly impossible to fix, as the relevant locale isn't well-defined
1558             anyway. (The locale for the machine running the application? The locale for the
1559             user? Or the locale for the country the address is in?)
1560              
1561             This module assumes that the privileged order for entering address components
1562             is top-down, left-to right, according to the standard or most common address
1563             format. This may not be true of countries where the dominant language is
1564             written right-to-left.
1565              
1566             This module doesn't use the PATDL
1567             (F) in the address parsing
1568             rules.
1569              
1570             =head1 HISTORY
1571              
1572             =head1 SEE ALSO
1573              
1574             L
1575              
1576             L
1577              
1578             F
1579              
1580             F
1581              
1582             F
1583              
1584             F
1585              
1586             F (previous version of the first
1587             URL, incorrect in spots, and to be used only if no other info is available)
1588              
1589             F
1590              
1591             =head1 CONTRIBUTORS
1592              
1593             Ailbhe, DamienPS, LeiaCat, Renée, and Martin DeMello clarified, corrected, or
1594             explained standards or usage for specific countries. See acknowledgements in
1595             comments throughout the source code.
1596              
1597             Bill Holbrook draws (and holds the copyright to) comic strip Kevin and Kell,
1598             from which I got the names used in the description for C<$parser-Elabel()>.
1599              
1600             =head1 AUTHOR AND LICENSE
1601              
1602             Copyright (c) 2004, Michel Lavondès. All rights reserved.
1603              
1604             Redistribution and use in source and binary forms, with or without
1605             modification, are permitted provided that the following conditions are met:
1606              
1607             =over 4
1608              
1609             =item *
1610              
1611             Redistributions of source code must retain the above copyright notice, this
1612             list of conditions and the following disclaimer.
1613              
1614             =item *
1615              
1616             Redistributions in binary form must reproduce the above copyright notice, this
1617             list of conditions and the following disclaimer in the documentation and/or
1618             other materials provided with the distribution.
1619              
1620             =item *
1621              
1622             Neither the name of the Copyright holder nor the names of any contributors may
1623             be used to endorse or promote products derived from this software without
1624             specific prior written permission.
1625              
1626             =back
1627              
1628             This software is provided by the copyright holder and contributors "as is" and
1629             any express or implied warranties, including, but not limited to, the implied
1630             warranties of merchantability and fitness for a particular purpose are
1631             disclaimed. In no event shall the copyright holder or contributors be liable
1632             for any direct, indirect, incidental, special, exemplary, or consequential
1633             damages (including, but not limited to, procurement of substiture goods or
1634             services; loss of use, data, or profits; or business interruption) however
1635             caused and on any theory of liability, whether in contract, strict liability,
1636             or tort (including negligence or otherwise) arising in any way out of the use
1637             of this software, even if advised of the possibility of such damage.