File Coverage

blib/lib/Time/OlsonTZ/Download.pm
Criterion Covered Total %
statement 41 316 12.9
branch 0 142 0.0
condition 0 24 0.0
subroutine 14 46 30.4
pod 17 17 100.0
total 72 545 13.2


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Time::OlsonTZ::Download - Olson timezone database from source
4              
5             =head1 SYNOPSIS
6              
7             use Time::OlsonTZ::Download;
8              
9             $version = Time::OlsonTZ::Download->latest_version;
10              
11             $download = Time::OlsonTZ::Download->new;
12              
13             $version = $download->version;
14             $version = $download->code_version;
15             $version = $download->data_version;
16             $dir = $download->dir;
17             $dir = $download->unpacked_dir;
18              
19             $names = $download->canonical_names;
20             $names = $download->link_names;
21             $names = $download->all_names;
22             $links = $download->raw_links;
23             $links = $download->threaded_links;
24             $countries = $download->country_selection;
25              
26             $files = $download->data_files;
27             $zic = $download->zic_exe;
28             $dir = $download->zoneinfo_dir;
29              
30             =head1 DESCRIPTION
31              
32             An object of this class represents a local copy of the source of
33             the Olson timezone database, possibly used to build binary tzfiles.
34             The source copy always begins by being downloaded from the canonical
35             repository of the Olson database. This class provides methods to help
36             with extracting useful information from the source.
37              
38             =cut
39              
40             package Time::OlsonTZ::Download;
41              
42 1     1   543 { use 5.008; }
  1         3  
43 1     1   4 use warnings;
  1         2  
  1         27  
44 1     1   3 use strict;
  1         2  
  1         18  
45              
46 1     1   5 use Carp qw(croak);
  1         1  
  1         55  
47 1     1   355 use Encode 1.75 qw(decode FB_CROAK);
  1         7301  
  1         73  
48 1     1   8 use File::Path 2.07 qw(rmtree);
  1         17  
  1         46  
49 1     1   421 use File::Temp 0.22 qw(tempdir);
  1         11785  
  1         59  
50 1     1   228 use IO::Dir 1.03 ();
  1         8264  
  1         29  
51 1     1   5 use IO::File 1.03 ();
  1         16  
  1         22  
52 1     1   285 use IPC::Filter 0.002 qw(filter);
  1         10121  
  1         55  
53 1     1   477 use Net::FTP 3.07 ();
  1         69354  
  1         37  
54 1     1   403 use Params::Classify 0.000 qw(is_undef is_string);
  1         1608  
  1         82  
55 1     1   246 use String::ShellQuote 1.01 qw(shell_quote);
  1         622  
  1         44  
56 1     1   345 use utf8 ();
  1         12  
  1         3149  
57              
58             our $VERSION = "0.007";
59              
60             sub _init_ftp($$) {
61 0     0     my($self, $hostname) = @_;
62 0           $self->{ftp_hostname} = $hostname;
63 0 0         $self->{ftp} = Net::FTP->new($hostname)
64             or die "FTP error on $hostname: $@\n";
65             }
66              
67             sub _ftp_op($$@) {
68 0     0     my($self, $method, @args) = @_;
69             $self->{ftp}->$method(@args)
70 0           or die "FTP error on @{[$self->{ftp_hostname}]}: ".
71 0 0         $self->{ftp}->message;
72             }
73              
74             sub _ftp_login($$$) {
75 0     0     my($self, $hostname, $dirarray) = @_;
76 0           _init_ftp($self, $hostname);
77 0           _ftp_op($self, "login", "anonymous","-anonymous\@");
78 0           _ftp_op($self, "binary");
79 0           _ftp_op($self, "cwd", $_) foreach @$dirarray;
80             }
81              
82             sub _ensure_ftp($) {
83 0     0     my($self) = @_;
84 0 0         unless($self->{ftp}) {
85             # Always use IANA master. Could possibly look at mirrors,
86             # but the IANA site is probably reliable enough.
87 0           _ftp_login($self, "ftp.iana.org", ["tz", "releases"]);
88             }
89             }
90              
91             sub _ftp_versions_in_dir($$) {
92 0     0     my($self, $subdir) = @_;
93 0           _ensure_ftp($self);
94 0 0         my $filenames = _ftp_op($self, "ls", defined($subdir) ? ($subdir) : ());
95 0           my(%cversions, %dversions);
96 0           foreach(@$filenames) {
97 0 0         if(m#(?:\A|/)tzcode([0-9]{2}(?:[0-9]{2})?[a-z])
98             \.tar\.(?:gz|Z)\z#x) {
99 0           $cversions{$1} = $_;
100             }
101 0 0         if(m#(?:\A|/)tzdata([0-9]{2}(?:[0-9]{2})?[a-z])
102             \.tar\.(?:gz|Z)\z#x) {
103 0           $dversions{$1} = $_;
104             }
105             }
106 0           return { code => \%cversions, data => \%dversions };
107             }
108              
109             sub _all_versions($) {
110 0     0     my($self) = @_;
111 0   0       return $self->{all_versions} ||= _ftp_versions_in_dir($self, undef);
112             }
113              
114             sub _cmp_version($$) {
115 0     0     my($a, $b) = @_;
116 0 0         $a = "19".$a if $a =~ /\A[0-9][0-9][a-z]\z/;
117 0 0         $b = "19".$b if $b =~ /\A[0-9][0-9][a-z]\z/;
118 0           return $a cmp $b;
119             }
120              
121             sub _latest_version($) {
122 0     0     my($self) = @_;
123 0           my $latest;
124 0           my $curv = _all_versions($self);
125 0           foreach(keys %{$curv->{data}}) {
  0            
126 0 0 0       $latest = $_
127             if !defined($latest) || _cmp_version($_, $latest) > 0;
128             }
129 0 0         unless(defined $latest) {
130 0           die "no current timezone database found on ".
131 0           "@{[$self->{ftp_hostname}]}\n";
132             }
133 0           return $latest;
134             }
135              
136             =head1 CLASS METHODS
137              
138             =over
139              
140             =item Time::OlsonTZ::Download->latest_version
141              
142             Returns the version number of the latest available version of the Olson
143             timezone database. This requires consulting the repository, but is much
144             cheaper than actually downloading the database.
145              
146             =cut
147              
148             sub latest_version {
149 0     0 1   my($class) = @_;
150 0 0         croak "@{[__PACKAGE__]}->latest_version not called as a class method"
  0            
151             unless is_string($class);
152 0           return _latest_version({});
153             }
154              
155             =back
156              
157             =cut
158              
159             sub DESTROY {
160 0     0     my($self) = @_;
161 0           local($., $@, $!, $^E, $?);
162 0 0         rmtree($self->{cleanup_dir}, 0, 0) if exists $self->{cleanup_dir};
163             }
164              
165             =head1 CONSTRUCTORS
166              
167             =over
168              
169             =item Time::OlsonTZ::Download->new([VERSION])
170              
171             Downloads a copy of the source of the Olson database, and returns an
172             object representing that copy.
173              
174             I, if supplied, is a version number specifying which version of
175             the database is to be downloaded. If not supplied, the latest available
176             version will be downloaded. Version numbers for the Olson database
177             currently consist of a year number and a lowercase letter, such as
178             "C<2010k>". Availability of versions other than the latest is limited:
179             until 2011 there was no official archive, so this module is at the mercy
180             of historical mirror administrators' whims.
181              
182             =cut
183              
184             sub new {
185 0     0 1   my($class, $version) = @_;
186 0 0 0       die "malformed Olson version number `$version'\n"
      0        
187             unless is_undef($version) ||
188             (is_string($version) &&
189             $version =~ /\A[0-9]{2}(?:[0-9]{2})?[a-z]\z/);
190 0           my $self = bless({}, $class);
191 0           my $latest_version = $self->_latest_version;
192 0   0       $version ||= $latest_version;
193 0 0         _cmp_version($version, $latest_version) <= 0
194             or die "Olson DB version $version doesn't exist yet\n";
195 0           $self->{version} = $version;
196 0           $self->{dir} = $self->{cleanup_dir} = tempdir();
197 0           my $vers = $self->_all_versions;
198 0 0         unless(exists $vers->{data}->{$version}) {
199 0           $vers = $self->_all_versions;
200 0 0         unless(exists $vers->{data}->{$version}) {
201 0           die "Olson DB version $version not available on ".
202 0           "@{[$self->{ftp_hostname}]}\n";
203             }
204             }
205 0           my @cversions = sort { _cmp_version($b, $a) }
206 0           grep { _cmp_version($_, $version) <= 0 } keys %{$vers->{code}};
  0            
  0            
207 0 0         die "no matching code available for data version $version\n"
208             unless @cversions;
209 0           my $cversion = $cversions[0];
210 0           $self->{code_version} = $cversion;
211 0           $self->{data_version} = $version;
212 0           $self->_ftp_op("get", $vers->{code}->{$cversion},
213             $self->dir."/tzcode.tar.gz");
214 0           $self->_ftp_op("get", $vers->{data}->{$version},
215             $self->dir."/tzdata.tar.gz");
216 0           delete $self->{ftp};
217 0           delete $self->{ftp_hostname};
218 0           $self->{downloaded} = 1;
219 0           return $self;
220             }
221              
222             =item Time::OlsonTZ::Download->new_from_local_source(ATTR => VALUE, ...)
223              
224             Acquires Olson database source locally, without downloading, and returns
225             an object representing a copy of it ready to use like a download.
226             This can be used to work with locally-modified versions of the database.
227             The following attributes may be given:
228              
229             =over
230              
231             =item B
232              
233             Local directory containing Olson source files. Must be supplied.
234             The entire directory will be copied into a temporary location to be
235             worked on.
236              
237             =item B
238              
239             Olson version number to attribute to the source files. Must be supplied.
240              
241             =item B
242              
243             =item B
244              
245             Olson version number to attribute to the code and data parts of the
246             source files. Both default to the main version number.
247              
248             =back
249              
250             =cut
251              
252             sub new_from_local_source {
253 0     0 1   my $class = shift;
254 0           my $self = bless({}, $class);
255 0           my $srcdir;
256 0           while(@_) {
257 0           my $attr = shift;
258 0           my $value = shift;
259 0 0         if($attr eq "source_dir") {
    0          
260 0 0         croak "source directory specified redundantly"
261             if defined $srcdir;
262 0 0         croak "source directory must be a string"
263             unless is_string($value);
264 0           $srcdir = $value;
265             } elsif($attr =~ /\A(?:(?:code|data)_)?version\z/) {
266             croak "$attr specified redundantly"
267 0 0         if exists $self->{$attr};
268 0 0 0       die "malformed Olson version number `$value'\n"
269             unless is_string($value) &&
270             $value =~ /\A
271             [0-9]{2}(?:[0-9]{2})?[a-z]
272             \z/x;
273 0           $self->{$attr} = $value;
274             } else {
275 0           croak "unrecognised attribute `$attr'";
276             }
277             }
278 0 0         croak "source directory not specified" unless defined $srcdir;
279 0 0         croak "version number not specified" unless exists $self->{version};
280 0           foreach(qw(code_version data_version)) {
281 0 0         $self->{$_} = $self->{version} unless exists $self->{$_};
282             }
283 0           my $tdir = tempdir();
284 0           $self->{cleanup_dir} = $tdir;
285 0           $self->{dir} = "$tdir/c";
286 0           filter("", "cp -pr @{[shell_quote($srcdir)]} ".
  0            
287 0           "@{[shell_quote($self->{dir})]}");
288 0           $self->{downloaded} = 1;
289 0           $self->{unpacked} = 1;
290 0           return $self;
291             }
292              
293             =back
294              
295             =head1 METHODS
296              
297             =head2 Basic information
298              
299             =over
300              
301             =item $download->version
302              
303             Returns the version number of the database of which a copy is represented
304             by this object.
305              
306             The database consists of code and data parts which are updated
307             semi-independently. The latest version of the database as a whole
308             consists of the latest version of the code and the latest version of
309             the data. If both parts are updated at once then they will both get the
310             same version number, and that will be the version number of the database
311             as a whole. However, in general they may be updated at different times,
312             and a single version of the database may be made up of code and data
313             parts that have different version numbers. The version number of the
314             database as a whole will then be the version number of the most recently
315             updated part.
316              
317             =cut
318              
319             sub version {
320 0     0 1   my($self) = @_;
321             die "Olson database version not determined\n"
322 0 0         unless exists $self->{version};
323 0           return $self->{version};
324             }
325              
326             =item $download->code_version
327              
328             Returns the version number of the code part of the database of which a
329             copy is represented by this object.
330              
331             =cut
332              
333             sub code_version {
334 0     0 1   my($self) = @_;
335             die "Olson database code version not determined\n"
336 0 0         unless exists $self->{code_version};
337 0           return $self->{code_version};
338             }
339              
340             =item $download->data_version
341              
342             Returns the version number of the data part of the database of which a
343             copy is represented by this object.
344              
345             =cut
346              
347             sub data_version {
348 0     0 1   my($self) = @_;
349             die "Olson database data version not determined\n"
350 0 0         unless exists $self->{data_version};
351 0           return $self->{data_version};
352             }
353              
354             =item $download->dir
355              
356             Returns the pathname of the directory in which the files of this download
357             are located. With this method, there is no guarantee of particular
358             files being available in the directory; see other directory-related
359             methods below that establish particular directory contents.
360              
361             The directory does not move during the lifetime of the download object:
362             this method will always return the same pathname. The directory and
363             all of its contents, including subdirectories, will be automatically
364             deleted when this object is destroyed. This will be when the main
365             program terminates, if it is not otherwise destroyed. Any files that
366             it is desired to keep must be copied to a permanent location.
367              
368             =cut
369              
370             sub dir {
371 0     0 1   my($self) = @_;
372             die "download directory not created\n"
373 0 0         unless exists $self->{dir};
374 0           return $self->{dir};
375             }
376              
377             sub _ensure_downloaded {
378 0     0     my($self) = @_;
379             die "can't use download because downloading failed\n"
380 0 0         unless $self->{downloaded};
381             }
382              
383             sub _ensure_unpacked {
384 0     0     my($self) = @_;
385 0 0         unless($self->{unpacked}) {
386 0           $self->_ensure_downloaded;
387 0           foreach my $part (qw(tzcode tzdata)) {
388 0           filter("", "cd @{[shell_quote($self->dir)]} && ".
  0            
389             "gunzip < $part.tar.gz | tar xf -");
390             }
391 0           $self->{unpacked} = 1;
392             }
393             }
394              
395             =item $download->unpacked_dir
396              
397             Returns the pathname of the directory in which the downloaded source
398             files have been unpacked. This is the local temporary directory used
399             by this download. This method will unpack the files there if they have
400             not already been unpacked.
401              
402             =cut
403              
404             sub unpacked_dir {
405 0     0 1   my($self) = @_;
406 0           $self->_ensure_unpacked;
407 0           return $self->dir;
408             }
409              
410             =back
411              
412             =head2 Zone metadata
413              
414             =over
415              
416             =cut
417              
418             sub _ensure_canonnames_and_rawlinks {
419 0     0     my($self) = @_;
420 0 0         unless(exists $self->{canonical_names}) {
421 0           my %seen;
422             my %canonnames;
423 0           my %rawlinks;
424 0           foreach(@{$self->data_files}) {
  0            
425 0 0         my $fh = IO::File->new($_, "r")
426             or die "data file $_ unreadable: $!\n";
427 0           local $/ = "\n";
428 0           while(defined(my $line = $fh->getline)) {
429 0 0         if($line =~ /\AZone[ \t]+([!-~]+)[ \t\n]/) {
    0          
430 0           my $name = $1;
431             die "zone $name multiply defined\n"
432 0 0         if exists $seen{$name};
433 0           $seen{$name} = undef;
434 0           $canonnames{$name} = undef;
435             } elsif($line =~ /\ALink[\ \t]+
436             ([!-~]+)[\ \t]+
437             ([!-~]+)[\ \t\n]/x) {
438 0           my($target, $name) = ($1, $2);
439             die "zone $name multiply defined\n"
440 0 0         if exists $seen{$name};
441 0           $seen{$name} = undef;
442 0           $rawlinks{$name} = $target;
443             }
444             }
445             }
446 0           $self->{raw_links} = \%rawlinks;
447 0           $self->{canonical_names} = \%canonnames;
448             }
449             }
450              
451             =item $download->canonical_names
452              
453             Returns the set of timezone names that this version of the database
454             defines as canonical. These are the timezone names that are directly
455             associated with a set of observance data. The return value is a reference
456             to a hash, in which the keys are the canonical timezone names and the
457             values are all C.
458              
459             =cut
460              
461             sub canonical_names {
462 0     0 1   my($self) = @_;
463 0           $self->_ensure_canonnames_and_rawlinks;
464 0           return $self->{canonical_names};
465             }
466              
467             =item $download->link_names
468              
469             Returns the set of timezone names that this version of the database
470             defines as links. These are the timezone names that are aliases for
471             other names. The return value is a reference to a hash, in which the
472             keys are the link timezone names and the values are all C.
473              
474             =cut
475              
476             sub link_names {
477 0     0 1   my($self) = @_;
478 0 0         unless(exists $self->{link_names}) {
479             $self->{link_names} =
480 0           { map { ($_ => undef) } keys %{$self->raw_links} };
  0            
  0            
481             }
482 0           return $self->{link_names};
483             }
484              
485             =item $download->all_names
486              
487             Returns the set of timezone names that this version of the database
488             defines. These are the L and the L.
489             The return value is a reference to a hash, in which the keys are the
490             timezone names and the values are all C.
491              
492             =cut
493              
494             sub all_names {
495 0     0 1   my($self) = @_;
496 0 0         unless(exists $self->{all_names}) {
497             $self->{all_names} = {
498 0           %{$self->canonical_names},
499 0           %{$self->link_names},
  0            
500             };
501             }
502 0           return $self->{all_names};
503             }
504              
505             =item $download->raw_links
506              
507             Returns details of the timezone name links in this version of the
508             database. Each link defines one timezone name as an alias for some
509             other timezone name. The return value is a reference to a hash, in
510             which the keys are the aliases and each value is the preferred timezone
511             name to which that alias directly refers. It is possible for an alias
512             to point to another alias, or to point to a non-existent name. For a
513             more processed view of links, see L.
514              
515             =cut
516              
517             sub raw_links {
518 0     0 1   my($self) = @_;
519 0           $self->_ensure_canonnames_and_rawlinks;
520 0           return $self->{raw_links};
521             }
522              
523             =item $download->threaded_links
524              
525             Returns details of the timezone name links in this version of the
526             database. Each link defines one timezone name as an alias for some
527             other timezone name. The return value is a reference to a hash, in
528             which the keys are the aliases and each value is the canonical name of
529             the timezone to which that alias refers. All such canonical names can
530             be found in the L hash.
531              
532             =cut
533              
534             sub threaded_links {
535 0     0 1   my($self) = @_;
536 0 0         unless(exists $self->{threaded_links}) {
537 0           my $raw_links = $self->raw_links;
538 0           my %links = %$raw_links;
539 0           while(1) {
540 0           my $done_any;
541 0           foreach(keys %links) {
542 0 0         next unless exists $raw_links->{$links{$_}};
543 0           $links{$_} = $raw_links->{$links{$_}};
544 0 0         die "circular link at $_\n" if $links{$_} eq $_;
545 0           $done_any = 1;
546             }
547 0 0         last unless $done_any;
548             }
549 0           my $canonical_names = $self->canonical_names;
550 0           foreach(keys %links) {
551             die "link from $_ to non-existent zone $links{$_}\n"
552 0 0         unless exists $canonical_names->{$links{$_}};
553             }
554 0           $self->{threaded_links} = \%links;
555             }
556 0           return $self->{threaded_links};
557             }
558              
559             =item $download->country_selection
560              
561             Returns information about how timezones relate to countries, intended
562             to aid humans in selecting a geographical timezone. This information
563             is derived from the C and C files in the database
564             source.
565              
566             The return value is a reference to a hash, keyed by (ISO 3166 alpha-2
567             uppercase) country code. The value for each country is a hash containing
568             these values:
569              
570             =over
571              
572             =item B
573              
574             The ISO 3166 alpha-2 uppercase country code.
575              
576             =item B
577              
578             An English name for the country, possibly in a modified form, optimised
579             to help humans find the right entry in alphabetical lists. This is
580             not necessarily identical to the country's standard short or long name.
581             (For other forms of the name, consult a database of countries, keying
582             by the country code.)
583              
584             =item B
585              
586             Information about the regions of the country that use distinct
587             timezones. This is a hash, keyed by English description of the region.
588             The description is empty if there is only one region. The value for
589             each region is a hash containing these values:
590              
591             =over
592              
593             =item B
594              
595             Brief English description of the region, used to distinguish between
596             the regions of a single country. Empty string if the country has only
597             one region for timezone purposes. (This is the same string used as the
598             key in the B hash.)
599              
600             =item B
601              
602             Name of the Olson timezone used in this region. This is not necessarily
603             a canonical name (it may be a link). Typically, where there are aliases
604             or identical canonical zones, a name is chosen that refers to a location
605             in the country of interest. It is not guaranteed that the named timezone
606             exists in the database (though it always should).
607              
608             =item B
609              
610             Geographical coordinates of some point within the location referred to in
611             the timezone name. This is a latitude and longitude, in ISO 6709 format.
612              
613             =back
614              
615             =back
616              
617             This data structure is intended to help a human select the appropriate
618             timezone based on political geography, specifically working from a
619             selection of country. It is of essentially no use for any other purpose.
620             It is not strictly guaranteed that every geographical timezone in the
621             database is listed somewhere in this structure, so it is of limited use
622             in providing information about an already-selected timezone. It does
623             not include non-geographic timezones at all. It also does not claim
624             to be a comprehensive list of countries, and does not make any claims
625             regarding the political status of any entity listed: the "country"
626             classification is loose, and used only for identification purposes.
627              
628             =cut
629              
630             sub country_selection {
631 0     0 1   my($self) = @_;
632 0 0         unless(exists $self->{country_selection}) {
633 0           my $itabname = $self->unpacked_dir."/iso3166.tab";
634 0           my $ztabname = $self->unpacked_dir."/zone.tab";
635 0           local $/ = "\n";
636 0           my %itab;
637 0 0         my $itabfh = IO::File->new($itabname, "r")
638             or die "data file $itabname unreadable: $!\n";
639 0           while(defined(my $line = $itabfh->getline)) {
640 0           $line = decode("UTF-8", $line, FB_CROAK);
641 0           utf8::upgrade($line);
642 0 0         if($line =~ /\A([A-Z]{2})\t(\S[^\t\n]*\S)\n\z/) {
    0          
643             die "duplicate $itabname entry for $1\n"
644 0 0         if exists $itab{$1};
645 0           $itab{$1} = $2;
646             } elsif($line !~ /\A#[^\n]*\n\z/) {
647 0           die "bad line in $itabname\n";
648             }
649             }
650 0           my %sel;
651 0 0         my $ztabfh = IO::File->new($ztabname, "r")
652             or die "data file $ztabname unreadable: $!\n";
653 0           while(defined(my $line = $ztabfh->getline)) {
654 0 0         if($line =~ /\A([A-Z]{2})
    0          
655             \t([-+][0-9]{4}(?:[0-9]{2})?
656             [-+][0-9]{5}(?:[0-9]{2})?)
657             \t([!-~]+)
658             (?:\t([!-~][ -~]*[!-~]))?
659             \n\z/x) {
660 0           my($cc, $coord, $zn, $reg) = ($1, $2, $3, $4);
661 0 0         $reg = "" unless defined $reg;
662 0   0       $sel{$cc} ||= { regions => {} };
663             die "duplicate $ztabname entry for $cc\n"
664 0 0         if exists $sel{$cc}->{regions}->{$reg};
665 0           $sel{$cc}->{regions}->{$reg} = {
666             olson_description => $reg,
667             timezone_name => $zn,
668             location_coords => $coord,
669             };
670             } elsif($line !~ /\A#[^\n]*\n\z/) {
671 0           die "bad line in $ztabname\n";
672             }
673             }
674 0           foreach(keys %sel) {
675 0 0         die "unknown country $_\n" unless exists $itab{$_};
676 0           $sel{$_}->{alpha2_code} = $_;
677 0           $sel{$_}->{olson_name} = $itab{$_};
678             die "bad region description in $_\n"
679 0           if keys(%{$sel{$_}->{regions}}) == 1 xor
680 0 0 0       exists($sel{$_}->{regions}->{""});
681             }
682 0           $self->{country_selection} = \%sel;
683             }
684 0           return $self->{country_selection};
685             }
686              
687             =back
688              
689             =head2 Compiling zone data
690              
691             =over
692              
693             =item $download->data_files
694              
695             Returns a reference to an array containing the pathnames of all the
696             source data files in the database. These are located in the local
697             temporary directory used by this download.
698              
699             There is approximately one source data file per continent. Each data
700             file, in a human-editable textual format, describes the known civil
701             timezones used on the file's continent. The textual format is not
702             standardised, and is peculiar to the Olson database, so parsing it
703             directly is in principle a dubious proposition, but in practice it is
704             very stable.
705              
706             =cut
707              
708             sub data_files {
709 0     0 1   my($self) = @_;
710 0 0         unless(exists $self->{data_files}) {
711 0           my $list;
712 0           $self->_ensure_unpacked;
713 0           my $mf = IO::File->new($self->dir."/Makefile", "r");
714 0 0         my $mfc = $mf ? do { local $/ = undef; $mf->getline } : "";
  0            
  0            
715 0 0         if($mfc =~ m#
    0          
716             \ncheck_links:[\ \t]+checklinks\.awk[\ \t]+
717             \$\(TDATA\)[\ \t]*\n
718             \t[\ \t]*\$\(AWK\)\ -f\ checklinks\.awk\ \$\(TDATA\)\n
719             \t[\ \t]*\$\(AWK\)\ -f\ checklinks\.awk\ tzdata\.zi\n\n
720             #x) {
721 0           $list = filter("", "cd @{[shell_quote($self->dir)]} ".
  0            
722             "&& make check_links ".
723             "AWK='\@echo x' VERSION_DEPS=");
724 0           $list =~ s#\Ax -f checklinks\.awk ##;
725 0           $list =~ s#\n.*\z##s;
726             } elsif($mfc =~ m#
727             \nzonenames:[\ \t]+\$\(TDATA\)[\ \t]*\n
728             \t[\ \t]*\@\$\(AWK\)\ '
729             /\^Zone/\ \{\ print\ \$\$2\ \}
730             \ /\^Link/\ \{\ print\ \$\$3\ \}
731             '\ \$\(TDATA\)[\ \t]*\n\n
732             #x) {
733 0           $list = filter("", "cd @{[shell_quote($self->dir)]} ".
  0            
734             "&& make zonenames ".
735             "AWK=echo VERSION_DEPS=");
736 0           $list =~ s#\A.*\{.*\} ##s;
737 0           $list =~ s#\n\z##;
738             } else {
739 0           die "don't know how to extract data file names ".
740             "from this form of Olson distribution";
741             }
742             $self->{data_files} =
743 0           [ map { $self->dir."/".$_ } split(/ /, $list) ];
  0            
744             }
745 0           return $self->{data_files};
746             }
747              
748             sub _ensure_zic_built {
749 0     0     my($self) = @_;
750 0 0         unless($self->{zic_built}) {
751 0           $self->_ensure_unpacked;
752 0           filter("", "cd @{[shell_quote($self->dir)]} && ".
  0            
753             "make zic VERSION_DEPS=");
754 0           $self->{zic_built} = 1;
755             }
756             }
757              
758             =item $download->zic_exe
759              
760             Returns the pathname of the C executable that has been built from
761             the downloaded source. This is located in the local temporary directory
762             used by this download. This method will build C if it has not
763             already been built.
764              
765             =cut
766              
767             sub zic_exe {
768 0     0 1   my($self) = @_;
769 0           $self->_ensure_zic_built;
770 0           return $self->dir."/zic";
771             }
772              
773             =item $download->zoneinfo_dir([OPTIONS])
774              
775             Returns the pathname of the directory containing binary tzfiles (in
776             L format) that have been generated from the downloaded source.
777             This is located in the local temporary directory used by this download,
778             and the files within it have names that match the timezone names (as
779             returned by L). This method will generate the tzfiles if
780             they have not already been generated.
781              
782             The optional parameter I controls which kind of tzfiles are
783             desired. If supplied, it must be a reference to a hash, in which these
784             keys are permitted:
785              
786             =over
787              
788             =item B
789              
790             Truth value, controls whether the tzfiles incorporate information about
791             known leap seconds offsets that account for the known leap seconds.
792             If false (which is the default), the tzfiles have no knowledge of leap
793             seconds, and are intended to be used on a system where C is some
794             flavour of UT (as is conventional on Unix and is the POSIX standard).
795             If true, the tzfiles know about leap seconds that have occurred between
796             1972 and the date of the database, and are intended to be used on a
797             system where C is (from 1972 onwards) a linear count of TAI
798             seconds (which is a non-standard arrangement).
799              
800             =back
801              
802             =cut
803              
804             sub _foreach_nondir_under($$);
805             sub _foreach_nondir_under($$) {
806 0     0     my($dir, $callback) = @_;
807 0 0         my $dh = IO::Dir->new($dir) or die "can't examine $dir: $!\n";
808 0           while(defined(my $ent = $dh->read)) {
809 0 0         next if $ent =~ /\A\.\.?\z/;
810 0           my $entpath = $dir."/".$ent;
811 0 0         if(-d $entpath) {
812 0           _foreach_nondir_under($entpath, $callback);
813             } else {
814 0           $callback->($entpath);
815             }
816             }
817             }
818              
819             sub zoneinfo_dir {
820 0     0 1   my($self, $options) = @_;
821 0 0         $options = {} if is_undef($options);
822 0           foreach(keys %$options) {
823 0 0         die "bad option `$_'\n" unless /\Aleaps\z/;
824             }
825 0 0         my $type = $options->{leaps} ? "right" : "posix";
826 0           my $zidir = $self->dir."/zoneinfo_$type";
827 0 0         unless($self->{"zoneinfo_built_$type"}) {
828 0           filter("", "cd @{[shell_quote($self->unpacked_dir)]} && ".
  0            
829 0           "make ${type}_only TZDIR=@{[shell_quote($zidir)]} ".
830             "VERSION_DEPS=");
831 0           my %expect_names = %{$self->all_names};
  0            
832 0           my $skiplen = length($zidir) + 1;
833             _foreach_nondir_under($zidir, sub {
834 0     0     my($fname) = @_;
835 0           my $lname = substr($fname, $skiplen);
836 0 0         unless(exists $expect_names{$lname}) {
837 0           die "unexpected file $lname\n";
838             }
839 0           delete $expect_names{$lname};
840 0           });
841 0 0         if(keys %expect_names) {
842 0           die "missing file @{[(sort keys %expect_names)[0]]}\n";
  0            
843             }
844 0           $self->{"zoneinfo_built_$type"} = 1;
845             }
846 0           return $zidir;
847             }
848              
849             =back
850              
851             =head1 BUGS
852              
853             Most of what this class does will only work on Unix platforms. This is
854             largely because the Olson database source is heavily Unix-oriented.
855              
856             It also won't be much good if you're not connected to the Internet.
857              
858             This class is liable to break if the format of the Olson database source
859             ever changes substantially. If that happens, an update of this class
860             will be required. It should at least recognise that it can't perform,
861             rather than do the wrong thing.
862              
863             =head1 SEE ALSO
864              
865             L,
866             L,
867             L
868              
869             =head1 AUTHOR
870              
871             Andrew Main (Zefram)
872              
873             =head1 COPYRIGHT
874              
875             Copyright (C) 2010, 2011, 2012, 2017
876             Andrew Main (Zefram)
877              
878             =head1 LICENSE
879              
880             This module is free software; you can redistribute it and/or modify it
881             under the same terms as Perl itself.
882              
883             =cut
884              
885             1;