File Coverage

blib/lib/DateTime/TimeZone/Tzfile.pm
Criterion Covered Total %
statement 240 244 98.3
branch 95 112 84.8
condition 33 50 66.0
subroutine 34 34 100.0
pod 11 11 100.0
total 413 451 91.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             DateTime::TimeZone::Tzfile - tzfile (zoneinfo) timezone files
4              
5             =head1 SYNOPSIS
6              
7             use DateTime::TimeZone::Tzfile;
8              
9             $tz = DateTime::TimeZone::Tzfile->new(
10             name => "local timezone",
11             filename => "/etc/localtime");
12             $tz = DateTime::TimeZone::Tzfile->new("/etc/localtime");
13              
14             if($tz->is_floating) { ...
15             if($tz->is_utc) { ...
16             if($tz->is_olson) { ...
17             $category = $tz->category;
18             $tz_string = $tz->name;
19              
20             if($tz->has_dst_changes) { ...
21             if($tz->is_dst_for_datetime($dt)) { ...
22             $offset = $tz->offset_for_datetime($dt);
23             $abbrev = $tz->short_name_for_datetime($dt);
24             $offset = $tz->offset_for_local_datetime($dt);
25              
26             =head1 DESCRIPTION
27              
28             An instance of this class represents a timezone that was encoded in a
29             file in the L format. These can express arbitrary patterns
30             of offsets from Universal Time, changing over time. Offsets and change
31             times are limited to a resolution of one second.
32              
33             This class implements the L interface, so that its
34             instances can be used with L objects.
35              
36             =cut
37              
38             package DateTime::TimeZone::Tzfile;
39              
40 5     5   3719 { use 5.006; }
  5         18  
41 5     5   31 use warnings;
  5         11  
  5         159  
42 5     5   26 use strict;
  5         11  
  5         121  
43              
44 5     5   25 use Carp qw(croak);
  5         20  
  5         318  
45 5     5   1430 use Date::ISO8601 0.000 qw(present_ymd);
  5         9964  
  5         309  
46 5     5   2203 use IO::File 1.13;
  5         34839  
  5         777  
47 5     5   45 use IO::Handle 1.08;
  5         88  
  5         299  
48 5     5   3006 use Params::Classify 0.000 qw(is_undef is_string is_ref);
  5         12002  
  5         773  
49              
50             our $VERSION = "0.011";
51              
52             my $rdn_epoch_cjdn = 1721425;
53              
54             # _fdiv(A, B), _fmod(A, B): divide A by B, flooring remainder
55             #
56             # B must be a positive Perl integer. A must be a Perl integer.
57              
58             sub _fdiv($$) {
59 14406     14406   23050 my($a, $b) = @_;
60 14406 100       24261 if($a < 0) {
61 5     5   53 use integer;
  5         15  
  5         43  
62 2507         5617 return -(($b - 1 - $a) / $b);
63             } else {
64 5     5   258 use integer;
  5         17  
  5         26  
65 11899         21699 return $a / $b;
66             }
67             }
68              
69 14406     14406   27225 sub _fmod($$) { $_[0] % $_[1] }
70              
71             =head1 CONSTRUCTOR
72              
73             =over
74              
75             =item DateTime::TimeZone::Tzfile->new(ATTR => VALUE, ...)
76              
77             Reads and parses a L format file, then constructs and returns
78             a L-compatible timezone object that implements the timezone
79             encoded in the file. The following attributes may be given:
80              
81             =over
82              
83             =item B
84              
85             Name for the timezone object. This will be returned by the C
86             method described below, and will be included in certain error messages.
87              
88             =item B
89              
90             The string or C that will be returned by the C method
91             described below. Default C.
92              
93             =item B
94              
95             The truth value that will be returned by the C method described
96             below. Default false.
97              
98             =item B
99              
100             Name of the file from which to read the timezone data. The filename
101             must be understood by L.
102              
103             =item B
104              
105             An L object from which the timezone data can be read.
106             This does not need to be a regular seekable file; it is read sequentially.
107             After the constructor has finished, the handle can still be used to read
108             any data that follows the timezone data.
109              
110             =back
111              
112             Either a filename or filehandle must be given. If a timezone name is not
113             given, then the filename is used instead if supplied; a timezone name
114             must be given explicitly if no filename is given.
115              
116             =item DateTime::TimeZone::Tzfile->new(FILENAME)
117              
118             Simpler way to invoke the above constructor in the usual case. Only the
119             filename is given; this will also be used as the timezone name.
120              
121             =cut
122              
123             sub _saferead($$) {
124 16532     16532   28420 my($fh, $len) = @_;
125 16532         22886 my $data;
126 16532         34626 my $rlen = $fh->read($data, $len);
127 16532 50       92743 croak "can't read tzfile: $!" unless defined($rlen);
128 16532 50       29475 croak "bad tzfile: premature EOF" unless $rlen == $len;
129 16532         44944 return $data;
130             }
131              
132 9153     9153   15045 sub _read_u32($) { unpack("N", _saferead($_[0], 4)) }
133              
134             sub _read_s32($) {
135 6040     6040   9999 my $uval = _read_u32($_[0]);
136 6040 100       13607 return ($uval & 0x80000000) ? ($uval & 0x7fffffff) - 0x80000000 :
137             $uval;
138             }
139              
140 6329     6329   10737 sub _read_u8($) { ord(_saferead($_[0], 1)) }
141              
142             my $unix_epoch_rdn = 719163;
143              
144             sub _read_tm32($) {
145 2866     2866   4778 my $t = _read_s32($_[0]);
146 2866         5523 return [ $unix_epoch_rdn + _fdiv($t, 86400), _fmod($t, 86400) ];
147             }
148              
149             sub _read_tm64($) {
150 2885     2885   4755 my($fh) = @_;
151 2885         4623 my $th = _read_s32($fh);
152 2885         5281 my $tl = _read_u32($fh);
153 2885         5548 my $dh = _fdiv($th, 86400);
154 2885         5037 $th = (_fmod($th, 86400) << 10) | ($tl >> 22);
155 2885         4697 my $d2 = _fdiv($th, 86400);
156 2885         4919 $th = (_fmod($th, 86400) << 10) | (($tl >> 12) & 0x3ff);
157 2885         4736 my $d3 = _fdiv($th, 86400);
158 2885         4788 $th = (_fmod($th, 86400) << 12) | ($tl & 0xfff);
159 2885         4759 my $d4 = _fdiv($th, 86400);
160 2885         4869 $th = _fmod($th, 86400);
161 2885         5173 my $d = $dh * 4294967296 + $d2 * 4194304 + (($d3 << 12) + $d4);
162 2885         8218 return [ $unix_epoch_rdn + $d, $th ];
163             }
164              
165             my $factory_abbr = "Local time zone must be set--see zic manual page";
166              
167             sub new {
168 43     43 1 28747 my $class = shift;
169 43 100       199 unshift @_, "filename" if @_ == 1;
170 43         126 my $self = bless({}, $class);
171 43         107 my($filename, $fh);
172 43         137 while(@_) {
173 59         119 my $attr = shift;
174 59         122 my $value = shift;
175 59 100       311 if($attr eq "name") {
    100          
    100          
    100          
    100          
176             croak "timezone name specified redundantly"
177 13 100       126 if exists $self->{name};
178 12 100       535 croak "timezone name must be a string"
179             unless is_string($value);
180 8         35 $self->{name} = $value;
181             } elsif($attr eq "category") {
182             croak "category value specified redundantly"
183 7 100       108 if exists $self->{category};
184 6 100 100     453 croak "category value must be a string or undef"
185             unless is_undef($value) || is_string($value);
186 3         11 $self->{category} = $value;
187             } elsif($attr eq "is_olson") {
188             croak "is_olson flag specified redundantly"
189 4 100       145 if exists $self->{is_olson};
190 3         13 $self->{is_olson} = !!$value;
191             } elsif($attr eq "filename") {
192 27 100 100     377 croak "filename specified redundantly"
193             if defined($filename) || defined($fh);
194 25 100       679 croak "filename must be a string"
195             unless is_string($value);
196 21         74 $filename = $value;
197             } elsif($attr eq "filehandle") {
198 7 100 100     229 croak "filehandle specified redundantly"
199             if defined($filename) || defined($fh);
200 5         19 $fh = $value;
201             } else {
202 1         99 croak "unrecognised attribute `$attr'";
203             }
204             }
205 24 100 100     282 croak "file not specified" unless defined($filename) || defined($fh);
206 22 100       100 unless(exists $self->{name}) {
207 16 100       157 croak "timezone name not specified" unless defined $filename;
208 15         44 $self->{name} = $filename;
209             }
210 21 100       77 unless(exists $self->{category}) {
211 19         55 $self->{category} = undef;
212             }
213 21 100       70 unless(exists $self->{is_olson}) {
214 19         55 $self->{is_olson} = !!0;
215             }
216 21 100       72 if(defined $filename) {
217 19 100 66     174 ($fh = IO::File->new($filename, "r")) && $fh->binmode
218             or croak "can't read $filename: $!";
219             }
220 20 100       2781 croak "bad tzfile: wrong magic number"
221             unless _saferead($fh, 4) eq "TZif";
222 19         64 my $fmtversion = _saferead($fh, 1);
223 19 50       99 croak "bad tzfile: malformed version number"
224             unless $fmtversion =~ /\A[2-9\0]\z/;
225 19         63 _saferead($fh, 15);
226             my($ttisgmtcnt, $ttisstdcnt, $leapcnt, $timecnt, $typecnt, $charcnt) =
227 19         61 map { _read_u32($fh) } 1 .. 6;
  114         230  
228 19 50       71 croak "bad tzfile: no local time types" if $typecnt == 0;
229 19         155 my @trn_times = map { _read_tm32($fh) } 1 .. $timecnt;
  2866         4925  
230 19         197 my @obs_types = map { _read_u8($fh) } 1 .. $timecnt;
  2866         5415  
231             my @types = map {
232 19         153 [ _read_s32($fh), !!_read_u8($fh), _read_u8($fh) ]
  136         305  
233             } 1 .. $typecnt;
234 19         64 my $chars = _saferead($fh, $charcnt);
235 19         90 for(my $i = $leapcnt; $i--; ) { _saferead($fh, 8); }
  0         0  
236 19         130 for(my $i = $ttisstdcnt; $i--; ) { _saferead($fh, 1); }
  136         263  
237 19         69 for(my $i = $ttisgmtcnt; $i--; ) { _saferead($fh, 1); }
  136         235  
238 19         34 my $late_rule;
239 19 50       71 if($fmtversion ge "2") {
240 19 50       93 croak "bad tzfile: wrong magic number"
241             unless _saferead($fh, 4) eq "TZif";
242 19         71 _saferead($fh, 16);
243             ($ttisgmtcnt, $ttisstdcnt, $leapcnt,
244             $timecnt, $typecnt, $charcnt) =
245 19         60 map { _read_u32($fh) } 1 .. 6;
  114         217  
246 19 50       79 croak "bad tzfile: no local time types" if $typecnt == 0;
247 19         115 @trn_times = map { _read_tm64($fh) } 1 .. $timecnt;
  2885         5042  
248 19         227 @obs_types = map { _read_u8($fh) } 1 .. $timecnt;
  2885         5046  
249             @types = map {
250 19         178 [ _read_s32($fh), !!_read_u8($fh), _read_u8($fh) ]
  153         349  
251             } 1 .. $typecnt;
252 19         61 $chars = _saferead($fh, $charcnt);
253 19         123 for(my $i = $leapcnt; $i--; ) { _saferead($fh, 12); }
  0         0  
254 19         74 for(my $i = $ttisstdcnt; $i--; ) { _saferead($fh, 1); }
  153         278  
255 19         73 for(my $i = $ttisgmtcnt; $i--; ) { _saferead($fh, 1); }
  153         262  
256 19 50       54 croak "bad tzfile: missing newline"
257             unless _saferead($fh, 1) eq "\x0a";
258 19         56 $late_rule = "";
259 19         44 while(1) {
260 319         529 my $c = _saferead($fh, 1);
261 319 100       654 last if $c eq "\x0a";
262 300         547 $late_rule .= $c;
263             }
264             }
265 19         463 $fh = undef;
266 19         109 for(my $i = @trn_times - 1; $i-- > 0; ) {
267 2866 50 33     7917 unless(($trn_times[$i]->[0] <=> $trn_times[$i+1]->[0] ||
268             $trn_times[$i]->[1] <=> $trn_times[$i+1]->[1]) == -1) {
269 0         0 croak "bad tzfile: unsorted change times";
270             }
271             }
272 19         49 my $first_std_type_index;
273             my %offsets;
274 19         82 for(my $i = 0; $i != $typecnt; $i++) {
275 153         297 my $abbrind = $types[$i]->[2];
276 153 50       319 croak "bad tzfile: invalid abbreviation index"
277             if $abbrind > $charcnt;
278 153         357 pos($chars) = $abbrind;
279 153         535 $chars =~ /\G([^\0]*)/g;
280 153         427 $types[$i]->[2] = $1;
281 153 50 66     412 $first_std_type_index = $i
282             if !defined($first_std_type_index) && !$types[$i]->[1];
283 153 100       363 $self->{has_dst} = 1 if $types[$i]->[1];
284 153 100 66     513 if($types[$i]->[0] == 0 && !$types[$i]->[1] &&
      100        
285             $types[$i]->[2] eq "zzz") {
286             # "zzz" means the zone is not defined at this time,
287             # due for example to the location being uninhabited
288 6         23 $types[$i] = "zone disuse";
289             } else {
290 147         512 $offsets{$types[$i]->[0]} = undef;
291             }
292             }
293 19 50       149 unshift @obs_types,
294             defined($first_std_type_index) ? $first_std_type_index : 0;
295 19         65 foreach my $obs_type (@obs_types) {
296 2904 50       4878 croak "bad tzfile: invalid local time type index"
297             if $obs_type >= $typecnt;
298 2904         4547 $obs_type = $types[$obs_type];
299             }
300 19 0 33     171 if(defined($late_rule) && $late_rule eq "<$factory_abbr>0" &&
      33        
      33        
      0        
      0        
301             defined($obs_types[-1]) && $obs_types[-1]->[0] == 0 &&
302             !$obs_types[-1]->[1] &&
303             $obs_types[-1]->[2] eq $factory_abbr) {
304             # This bizarre timezone abbreviation is used in the Factory
305             # timezone in the Olson database. It's not valid in a
306             # SysV-style TZ value, because it contains spaces, but zic
307             # puts it into one anyway because the file format demands
308             # it. DT:TZ:SystemV would object, so as a special
309             # exception we ignore the TZ value in this case.
310 0         0 $late_rule = undef;
311             }
312 19 50       65 if(defined $late_rule) {
313 19 100       132 if($late_rule eq "") {
    100          
314 2         8 $obs_types[-1] = "missing data";
315             } elsif($late_rule =~
316             /\A(?:zzz|)[-+]?00?(?::00(?::00)?)?\z/) {
317 2         7 $obs_types[-1] = "zone disuse";
318             } else {
319 15         3819 require DateTime::TimeZone::SystemV;
320 15         15993 DateTime::TimeZone::SystemV->VERSION("0.009");
321 15 100       154 $obs_types[-1] =
322             DateTime::TimeZone::SystemV->new(
323             system => $fmtversion ge "3" ?
324             "tzfile3" : "posix",
325             recipe => $late_rule);
326             }
327             }
328 19         3576 $self->{trn_times} = \@trn_times;
329 19         63 $self->{obs_types} = \@obs_types;
330 19         154 $self->{offsets} = [ sort { $a <=> $b } keys %offsets ];
  91         256  
331 19         900 return $self;
332             }
333              
334             sub _present_rdn_sod($$) {
335 60     60   126 my($rdn, $sod) = @_;
336 60         195 return sprintf("%sT%02d:%02d:%02d",
337             present_ymd($rdn + $rdn_epoch_cjdn),
338             int($sod/3600), int($sod/60)%60, $sod%60);
339             }
340              
341             =back
342              
343             =head1 METHODS
344              
345             These methods are all part of the L interface.
346             See that class for the general meaning of these methods; the documentation
347             below only comments on the specific behaviour of this class.
348              
349             =head2 Identification
350              
351             =over
352              
353             =item $tz->is_floating
354              
355             Returns false.
356              
357             =cut
358              
359 3     3 1 2475 sub is_floating { 0 }
360              
361             =item $tz->is_utc
362              
363             Returns false.
364              
365             =cut
366              
367 3     3 1 13 sub is_utc { 0 }
368              
369             =item $tz->is_olson
370              
371             Returns the truth value that was provided to the constructor for this
372             purpose, default false. This nominally indicates whether the timezone
373             data is from the Olson database. The files interpreted by this class
374             are very likely to be from the Olson database, but there is no explicit
375             indicator for this in the file, so this information must be supplied to
376             the constructor if required.
377              
378             =cut
379              
380 3     3 1 23 sub is_olson { $_[0]->{is_olson} }
381              
382             =item $tz->category
383              
384             Returns the value that was provided to the constructor for this purpose,
385             default C. This is intended to indicate the general region
386             (continent or ocean) in which a geographical timezone is used, when
387             the timezone is named according to the hierarchical scheme of the Olson
388             timezone database.
389              
390             =cut
391              
392 3     3 1 22 sub category { $_[0]->{category} }
393              
394             =item $tz->name
395              
396             Returns the timezone name. Usually this is the filename that was supplied
397             to the constructor, but it can be overridden by the constructor's B
398             attribute.
399              
400             =cut
401              
402 8     8 1 2578 sub name { $_[0]->{name} }
403              
404             =back
405              
406             =head2 Offsets
407              
408             =over
409              
410             =item $tz->has_dst_changes
411              
412             Returns a truth value indicating whether any of the observances in the file
413             are marked as DST. These DST flags are potentially arbitrary, and don't
414             affect any of the zone's behaviour.
415              
416             =cut
417              
418 3     3 1 15 sub has_dst_changes { $_[0]->{has_dst} }
419              
420             #
421             # observance lookup
422             #
423              
424             sub _type_for_rdn_sod {
425 1986     1986   3997 my($self, $utc_rdn, $utc_sod) = @_;
426 1986         3453 my $lo = 0;
427 1986         3009 my $hi = @{$self->{trn_times}};
  1986         4142  
428 1986         4611 while($lo != $hi) {
429 5     5   12609 my $try = do { use integer; ($lo + $hi) / 2 };
  5         20  
  5         31  
  14395         21296  
  14395         24919  
430 14395 100 100     39546 if(($utc_rdn <=> $self->{trn_times}->[$try]->[0] ||
431             $utc_sod <=> $self->{trn_times}->[$try]->[1]) == -1) {
432 6871         15117 $hi = $try;
433             } else {
434 7524         16022 $lo = $try + 1;
435             }
436             }
437 1986         4866 return $self->{obs_types}->[$lo];
438             }
439              
440             sub _type_for_datetime {
441 1752     1752   3282 my($self, $dt) = @_;
442 1752         4593 my($utc_rdn, $utc_sod) = $dt->utc_rd_values;
443 1752 100       10333 $utc_sod = 86399 if $utc_sod >= 86400;
444 1752         3832 my $type = $self->_type_for_rdn_sod($utc_rdn, $utc_sod);
445 1752 100       4669 if(is_string($type)) {
446 33         48 croak "time @{[_present_rdn_sod($utc_rdn, $utc_sod)]}Z ".
  33         61  
447             "is not represented ".
448 33         4897 "in the @{[$self->{name}]} timezone ".
449             "due to $type";
450             }
451 1719         3650 return $type;
452             }
453              
454             =item $tz->offset_for_datetime(DT)
455              
456             I
must be a L-compatible object (specifically, it must
457             implement the C method). Returns the offset from UT that
458             is in effect at the instant represented by I
, in seconds.
459              
460             =cut
461              
462             sub offset_for_datetime {
463 584     584 1 8325 my($self, $dt) = @_;
464 584         1719 my $type = $self->_type_for_datetime($dt);
465 573 100       4096 return is_ref($type, "ARRAY") ? $type->[0] :
466             $type->offset_for_datetime($dt);
467             }
468              
469             =item $tz->is_dst_for_datetime(DT)
470              
471             I
must be a L-compatible object (specifically, it must
472             implement the C method). Returns a truth value indicating
473             whether the timezone's observance at the instant represented by I
474             is marked as DST. This DST flag is potentially arbitrary, and doesn't
475             affect anything else.
476              
477             =cut
478              
479             sub is_dst_for_datetime {
480 584     584 1 500470 my($self, $dt) = @_;
481 584         1758 my $type = $self->_type_for_datetime($dt);
482 573 100       4305 return is_ref($type, "ARRAY") ? $type->[1] :
483             $type->is_dst_for_datetime($dt);
484             }
485              
486             =item $tz->short_name_for_datetime(DT)
487              
488             I
must be a L-compatible object (specifically, it must
489             implement the C method). Returns the abbreviation
490             used to label the time scale at the instant represented by I
.
491             This abbreviation is potentially arbitrary, and does not uniquely identify
492             either the timezone or the offset.
493              
494             =cut
495              
496             sub short_name_for_datetime {
497 584     584 1 8082 my($self, $dt) = @_;
498 584         1711 my $type = $self->_type_for_datetime($dt);
499 573 100       4170 return is_ref($type, "ARRAY") ? $type->[2] :
500             $type->short_name_for_datetime($dt);
501             }
502              
503             =item $tz->offset_for_local_datetime(DT)
504              
505             I
must be a L-compatible object (specifically, it
506             must implement the C method). Takes the local
507             time represented by I
(regardless of what absolute time it also
508             represents), and interprets that as a local time in the timezone of the
509             timezone object (not the timezone used in I
). Returns the offset
510             from UT that is in effect at that local time, in seconds.
511              
512             If the local time given is ambiguous due to a nearby offset change,
513             the numerically lowest offset (usually the standard one) is returned
514             with no warning of the situation. (Equivalently: the latest possible
515             absolute time is indicated.) If the local time given does not exist
516             due to a nearby offset change, the method Cs saying so.
517              
518             =cut
519              
520             sub _local_to_utc_rdn_sod($$$) {
521 234     234   479 my($rdn, $sod, $offset) = @_;
522 234         359 $sod -= $offset;
523 234         510 while($sod < 0) {
524 15         35 $rdn--;
525 15         47 $sod += 86400;
526             }
527 234         458 while($sod >= 86400) {
528 32         42 $rdn++;
529 32         54 $sod -= 86400;
530             }
531 234         514 return ($rdn, $sod);
532             }
533              
534             sub offset_for_local_datetime {
535 85     85 1 10517 my($self, $dt) = @_;
536 85         245 my($lcl_rdn, $lcl_sod) = $dt->local_rd_values;
537 85 50       501 $lcl_sod = 86399 if $lcl_sod >= 86400;
538 85         138 my %seen_error;
539 85         145 foreach my $offset (@{$self->{offsets}}) {
  85         254  
540 234         557 my($utc_rdn, $utc_sod) =
541             _local_to_utc_rdn_sod($lcl_rdn, $lcl_sod, $offset);
542 234         556 my $ttype = $self->_type_for_rdn_sod($utc_rdn, $utc_sod);
543 234 100       587 if(is_string($ttype)) {
544 31         71 $seen_error{$ttype} = undef;
545 31         64 next;
546             }
547             my $local_offset = is_ref($ttype, "ARRAY") ? $ttype->[0] :
548 203 100       498 eval { local $SIG{__DIE__};
  22         113  
549 22         99 $ttype->offset_for_local_datetime($dt);
550             };
551 203 100 100     81318 return $offset
552             if defined($local_offset) && $local_offset == $offset;
553             }
554 27         53 my $error;
555 27         77 foreach("zone disuse", "missing data") {
556 45 100       118 if(exists $seen_error{$_}) {
557 11         22 $error = $_;
558 11         22 last;
559             }
560             }
561 27   100     313 $error ||= "offset change";
562 27         65 croak "local time @{[_present_rdn_sod($lcl_rdn, $lcl_sod)]} ".
  27         86  
563 27         6274 "does not exist in the @{[$self->{name}]} timezone ".
564             "due to $error";
565             }
566              
567             =back
568              
569             =head1 SEE ALSO
570              
571             L,
572             L,
573             L,
574             L,
575             L,
576             L
577              
578             =head1 AUTHOR
579              
580             Andrew Main (Zefram)
581              
582             =head1 COPYRIGHT
583              
584             Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2017
585             Andrew Main (Zefram)
586              
587             =head1 LICENSE
588              
589             This module is free software; you can redistribute it and/or modify it
590             under the same terms as Perl itself.
591              
592             =cut
593              
594             1;