File Coverage

blib/lib/DateTime/TimeZone/Tzfile.pm
Criterion Covered Total %
statement 241 245 98.3
branch 95 112 84.8
condition 33 50 66.0
subroutine 34 34 100.0
pod 11 11 100.0
total 414 452 91.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             DateTime::TimeZone::Tzfile - tzfile (zoneinfo) timezone files
4              
5             =head1 SYNOPSIS
6              
7             use DateTime::TimeZone::Tzfile;
8              
9             $tz = DateTime::TimeZone::Tzfile->new(
10             name => "local timezone",
11             filename => "/etc/localtime");
12             $tz = DateTime::TimeZone::Tzfile->new("/etc/localtime");
13              
14             if($tz->is_floating) { ...
15             if($tz->is_utc) { ...
16             if($tz->is_olson) { ...
17             $category = $tz->category;
18             $tz_string = $tz->name;
19              
20             if($tz->has_dst_changes) { ...
21             if($tz->is_dst_for_datetime($dt)) { ...
22             $offset = $tz->offset_for_datetime($dt);
23             $abbrev = $tz->short_name_for_datetime($dt);
24             $offset = $tz->offset_for_local_datetime($dt);
25              
26             =head1 DESCRIPTION
27              
28             An instance of this class represents a timezone that was encoded in a
29             file in the L format. These can express arbitrary patterns
30             of offsets from Universal Time, changing over time. Offsets and change
31             times are limited to a resolution of one second.
32              
33             This class implements the L interface, so that its
34             instances can be used with L objects.
35              
36             =cut
37              
38             package DateTime::TimeZone::Tzfile;
39              
40 5     5   15818 { use 5.006; }
  5         20  
  5         242  
41 5     5   31 use warnings;
  5         10  
  5         198  
42 5     5   30 use strict;
  5         21  
  5         183  
43              
44 5     5   26 use Carp qw(croak);
  5         10  
  5         377  
45 5     5   3199 use Date::ISO8601 0.000 qw(present_ymd);
  5         15307  
  5         392  
46 5     5   3968 use IO::File 1.13;
  5         45918  
  5         950  
47 5     5   40 use IO::Handle 1.08;
  5         91  
  5         223  
48 5     5   5057 use Params::Classify 0.000 qw(is_undef is_string is_ref);
  5         14664  
  5         725  
49              
50             our $VERSION = "0.010";
51              
52             my $rdn_epoch_cjdn = 1721425;
53              
54             # _fdiv(A, B), _fmod(A, B): divide A by B, flooring remainder
55             #
56             # B must be a positive Perl integer. A must be a Perl integer.
57              
58             sub _fdiv($$) {
59 14406     14406   17741 my($a, $b) = @_;
60 14406 100       25184 if($a < 0) {
61 5     5   34 use integer;
  5         10  
  5         30  
62 2507         5439 return -(($b - 1 - $a) / $b);
63             } else {
64 5     5   177 use integer;
  5         543  
  5         19  
65 11899         21226 return $a / $b;
66             }
67             }
68              
69 14406     14406   30833 sub _fmod($$) { $_[0] % $_[1] }
70              
71             =head1 CONSTRUCTOR
72              
73             =over
74              
75             =item DateTime::TimeZone::Tzfile->new(ATTR => VALUE, ...)
76              
77             Reads and parses a L format file, then constructs and returns
78             a L-compatible timezone object that implements the timezone
79             encoded in the file. The following attributes may be given:
80              
81             =over
82              
83             =item B
84              
85             Name for the timezone object. This will be returned by the C
86             method described below, and will be included in certain error messages.
87              
88             =item B
89              
90             The string or C that will be returned by the C method
91             described below. Default C.
92              
93             =item B
94              
95             The truth value that will be returned by the C method described
96             below. Default false.
97              
98             =item B
99              
100             Name of the file from which to read the timezone data. The filename
101             must be understood by L.
102              
103             =item B
104              
105             An L object from which the timezone data can be read.
106             This does not need to be a regular seekable file; it is read sequentially.
107             After the constructor has finished, the handle can still be used to read
108             any data that follows the timezone data.
109              
110             =back
111              
112             Either a filename or filehandle must be given. If a timezone name is not
113             given, then the filename is used instead if supplied; a timezone name
114             must be given explicitly if no filename is given.
115              
116             =item DateTime::TimeZone::Tzfile->new(FILENAME)
117              
118             Simpler way to invoke the above constructor in the usual case. Only the
119             filename is given; this will also be used as the timezone name.
120              
121             =cut
122              
123             sub _saferead($$) {
124 16532     16532   21156 my($fh, $len) = @_;
125 16532         16711 my $data;
126 16532         43388 my $rlen = $fh->read($data, $len);
127 16532 50       148331 croak "can't read tzfile: $!" unless defined($rlen);
128 16532 50       32436 croak "bad tzfile: premature EOF" unless $rlen == $len;
129 16532         47169 return $data;
130             }
131              
132 9153     9153   16338 sub _read_u32($) { unpack("N", _saferead($_[0], 4)) }
133              
134             sub _read_s32($) {
135 6040     6040   12308 my $uval = _read_u32($_[0]);
136 6040 100       23051 return ($uval & 0x80000000) ? ($uval & 0x7fffffff) - 0x80000000 :
137             $uval;
138             }
139              
140 6329     6329   12554 sub _read_u8($) { ord(_saferead($_[0], 1)) }
141              
142             my $unix_epoch_rdn = 719163;
143              
144             sub _read_tm32($) {
145 2866     2866   5669 my $t = _read_s32($_[0]);
146 2866         5136 return [ $unix_epoch_rdn + _fdiv($t, 86400), _fmod($t, 86400) ];
147             }
148              
149             sub _read_tm64($) {
150 2885     2885   3787 my($fh) = @_;
151 2885         4628 my $th = _read_s32($fh);
152 2885         5226 my $tl = _read_u32($fh);
153 2885         11122 my $dh = _fdiv($th, 86400);
154 2885         4691 $th = (_fmod($th, 86400) << 10) | ($tl >> 22);
155 2885         4475 my $d2 = _fdiv($th, 86400);
156 2885         4607 $th = (_fmod($th, 86400) << 10) | (($tl >> 12) & 0x3ff);
157 2885         4764 my $d3 = _fdiv($th, 86400);
158 2885         4955 $th = (_fmod($th, 86400) << 12) | ($tl & 0xfff);
159 2885         4450 my $d4 = _fdiv($th, 86400);
160 2885         4682 $th = _fmod($th, 86400);
161 2885         7423 my $d = $dh * 4294967296 + $d2 * 4194304 + (($d3 << 12) + $d4);
162 2885         10464 return [ $unix_epoch_rdn + $d, $th ];
163             }
164              
165             my $factory_abbr = "Local time zone must be set--see zic manual page";
166              
167             sub new {
168 43     43 1 25050 my $class = shift;
169 43 100       181 unshift @_, "filename" if @_ == 1;
170 43         306 my $self = bless({}, $class);
171 43         69 my($filename, $fh);
172 43         132 while(@_) {
173 59         98 my $attr = shift;
174 59         268 my $value = shift;
175 59 100       655 if($attr eq "name") {
    100          
    100          
    100          
    100          
176 13 100       190 croak "timezone name specified redundantly"
177             if exists $self->{name};
178 12 100       768 croak "timezone name must be a string"
179             unless is_string($value);
180 8         34 $self->{name} = $value;
181             } elsif($attr eq "category") {
182 7 100       174 croak "category value specified redundantly"
183             if exists $self->{category};
184 6 100 100     471 croak "category value must be a string or undef"
185             unless is_undef($value) || is_string($value);
186 3         195 $self->{category} = $value;
187             } elsif($attr eq "is_olson") {
188 4 100       338 croak "is_olson flag specified redundantly"
189             if exists $self->{is_olson};
190 3         14 $self->{is_olson} = !!$value;
191             } elsif($attr eq "filename") {
192 27 100 100     535 croak "filename specified redundantly"
193             if defined($filename) || defined($fh);
194 25 100       614 croak "filename must be a string"
195             unless is_string($value);
196 21         248 $filename = $value;
197             } elsif($attr eq "filehandle") {
198 7 100 100     345 croak "filehandle specified redundantly"
199             if defined($filename) || defined($fh);
200 5         17 $fh = $value;
201             } else {
202 1         150 croak "unrecognised attribute `$attr'";
203             }
204             }
205 24 100 100     419 croak "file not specified" unless defined($filename) || defined($fh);
206 22 100       109 unless(exists $self->{name}) {
207 16 100       199 croak "timezone name not specified" unless defined $filename;
208 15         41 $self->{name} = $filename;
209             }
210 21 100       255 unless(exists $self->{category}) {
211 19         46 $self->{category} = undef;
212             }
213 21 100       61 unless(exists $self->{is_olson}) {
214 19         56 $self->{is_olson} = !!0;
215             }
216 21 100       59 if(defined $filename) {
217 19 100 66     464 ($fh = IO::File->new($filename, "r")) && $fh->binmode
218             or croak "can't read $filename: $!";
219             }
220 20 100       4351 croak "bad tzfile: wrong magic number"
221             unless _saferead($fh, 4) eq "TZif";
222 19         69 my $fmtversion = _saferead($fh, 1);
223 19 50       189 croak "bad tzfile: malformed version number"
224             unless $fmtversion =~ /\A[2-9\0]\z/;
225 19         44 _saferead($fh, 15);
226 114         309 my($ttisgmtcnt, $ttisstdcnt, $leapcnt, $timecnt, $typecnt, $charcnt) =
227 19         52 map { _read_u32($fh) } 1 .. 6;
228 19 50       68 croak "bad tzfile: no local time types" if $typecnt == 0;
229 19         230 my @trn_times = map { _read_tm32($fh) } 1 .. $timecnt;
  2866         18438  
230 19         335 my @obs_types = map { _read_u8($fh) } 1 .. $timecnt;
  2866         4268  
231 136         242 my @types = map {
232 19         203 [ _read_s32($fh), !!_read_u8($fh), _read_u8($fh) ]
233             } 1 .. $typecnt;
234 19         67 my $chars = _saferead($fh, $charcnt);
235 19         86 for(my $i = $leapcnt; $i--; ) { _saferead($fh, 8); }
  0         0  
236 19         67 for(my $i = $ttisstdcnt; $i--; ) { _saferead($fh, 1); }
  136         205  
237 19         78 for(my $i = $ttisgmtcnt; $i--; ) { _saferead($fh, 1); }
  136         230  
238 19         31 my $late_rule;
239 19 50       73 if($fmtversion ge "2") {
240 19 50       49 croak "bad tzfile: wrong magic number"
241             unless _saferead($fh, 4) eq "TZif";
242 19         51 _saferead($fh, 16);
243 114         202 ($ttisgmtcnt, $ttisstdcnt, $leapcnt,
244             $timecnt, $typecnt, $charcnt) =
245 19         47 map { _read_u32($fh) } 1 .. 6;
246 19 50       71 croak "bad tzfile: no local time types" if $typecnt == 0;
247 19         629 @trn_times = map { _read_tm64($fh) } 1 .. $timecnt;
  2885         4774  
248 19         561 @obs_types = map { _read_u8($fh) } 1 .. $timecnt;
  2885         7444  
249 153         329 @types = map {
250 19         287 [ _read_s32($fh), !!_read_u8($fh), _read_u8($fh) ]
251             } 1 .. $typecnt;
252 19         63 $chars = _saferead($fh, $charcnt);
253 19         400 for(my $i = $leapcnt; $i--; ) { _saferead($fh, 12); }
  0         0  
254 19         79 for(my $i = $ttisstdcnt; $i--; ) { _saferead($fh, 1); }
  153         228  
255 19         66 for(my $i = $ttisgmtcnt; $i--; ) { _saferead($fh, 1); }
  153         239  
256 19 50       42 croak "bad tzfile: missing newline"
257             unless _saferead($fh, 1) eq "\x0a";
258 19         141 $late_rule = "";
259 19         30 while(1) {
260 319         742 my $c = _saferead($fh, 1);
261 319 100       675 last if $c eq "\x0a";
262 300         351 $late_rule .= $c;
263             }
264             }
265 19         34 $fh = undef;
266 19         6585 for(my $i = @trn_times - 1; $i-- > 0; ) {
267 2866 50 33     10680 unless(($trn_times[$i]->[0] <=> $trn_times[$i+1]->[0] ||
268             $trn_times[$i]->[1] <=> $trn_times[$i+1]->[1]) == -1) {
269 0         0 croak "bad tzfile: unsorted change times";
270             }
271             }
272 19         36 my $first_std_type_index;
273             my %offsets;
274 19         82 for(my $i = 0; $i != $typecnt; $i++) {
275 153         248 my $abbrind = $types[$i]->[2];
276 153 50       296 croak "bad tzfile: invalid abbreviation index"
277             if $abbrind > $charcnt;
278 153         464 pos($chars) = $abbrind;
279 153         484 $chars =~ /\G([^\0]*)/g;
280 153         384 $types[$i]->[2] = $1;
281 153 50 66     370 $first_std_type_index = $i
282             if !defined($first_std_type_index) && !$types[$i]->[1];
283 153 100       371 $self->{has_dst} = 1 if $types[$i]->[1];
284 153 100 66     1270 if($types[$i]->[0] == 0 && !$types[$i]->[1] &&
      100        
285             $types[$i]->[2] eq "zzz") {
286             # "zzz" means the zone is not defined at this time,
287             # due for example to the location being uninhabited
288 6         30 $types[$i] = "zone disuse";
289             } else {
290 147         644 $offsets{$types[$i]->[0]} = undef;
291             }
292             }
293 19 50       159 unshift @obs_types,
294             defined($first_std_type_index) ? $first_std_type_index : 0;
295 19         58 foreach my $obs_type (@obs_types) {
296 2904 50       5128 croak "bad tzfile: invalid local time type index"
297             if $obs_type >= $typecnt;
298 2904         3320 $obs_type = $types[$obs_type];
299             }
300 19 0 33     192 if(defined($late_rule) && $late_rule eq "<$factory_abbr>0" &&
      33        
      33        
      0        
      0        
301             defined($obs_types[-1]) && $obs_types[-1]->[0] == 0 &&
302             !$obs_types[-1]->[1] &&
303             $obs_types[-1]->[2] eq $factory_abbr) {
304             # This bizarre timezone abbreviation is used in the Factory
305             # timezone in the Olson database. It's not valid in a
306             # SysV-style TZ value, because it contains spaces, but zic
307             # puts it into one anyway because the file format demands
308             # it. DT:TZ:SystemV would object, so as a special
309             # exception we ignore the TZ value in this case.
310 0         0 $late_rule = undef;
311             }
312 19 50       55 if(defined $late_rule) {
313 19 100       12089 if($late_rule eq "") {
    100          
314 2         99 $obs_types[-1] = "missing data";
315             } elsif($late_rule =~
316             /\A(?:zzz|)[-+]?00?(?::00(?::00)?)?\z/) {
317 2         6 $obs_types[-1] = "zone disuse";
318             } else {
319 15         11556 require DateTime::TimeZone::SystemV;
320 15         24227 DateTime::TimeZone::SystemV->VERSION("0.009");
321 15 100       175 $obs_types[-1] =
322             DateTime::TimeZone::SystemV->new(
323             system => $fmtversion ge "3" ?
324             "tzfile3" : "posix",
325             recipe => $late_rule);
326             }
327             }
328 19         3838 $self->{trn_times} = \@trn_times;
329 19         63 $self->{obs_types} = \@obs_types;
330 19         174 $self->{offsets} = [ sort { $a <=> $b } keys %offsets ];
  85         221  
331 19         424 return $self;
332             }
333              
334             sub _present_rdn_sod($$) {
335 60     60   84 my($rdn, $sod) = @_;
336 60         222 return sprintf("%sT%02d:%02d:%02d",
337             present_ymd($rdn + $rdn_epoch_cjdn),
338             int($sod/3600), int($sod/60)%60, $sod%60);
339             }
340              
341             =back
342              
343             =head1 METHODS
344              
345             These methods are all part of the L interface.
346             See that class for the general meaning of these methods; the documentation
347             below only comments on the specific behaviour of this class.
348              
349             =head2 Identification
350              
351             =over
352              
353             =item $tz->is_floating
354              
355             Returns false.
356              
357             =cut
358              
359 3     3 1 1066 sub is_floating { 0 }
360              
361             =item $tz->is_utc
362              
363             Returns false.
364              
365             =cut
366              
367 3     3 1 12 sub is_utc { 0 }
368              
369             =item $tz->is_olson
370              
371             Returns the truth value that was provided to the constructor for this
372             purpose, default false. This nominally indicates whether the timezone
373             data is from the Olson database. The files interpreted by this class
374             are very likely to be from the Olson database, but there is no explicit
375             indicator for this in the file, so this information must be supplied to
376             the constructor if required.
377              
378             =cut
379              
380 3     3 1 19 sub is_olson { $_[0]->{is_olson} }
381              
382             =item $tz->category
383              
384             Returns the value that was provided to the constructor for this purpose,
385             default C. This is intended to indicate the general region
386             (continent or ocean) in which a geographical timezone is used, when
387             the timezone is named according to the hierarchical scheme of the Olson
388             timezone database.
389              
390             =cut
391              
392 3     3 1 18 sub category { $_[0]->{category} }
393              
394             =item $tz->name
395              
396             Returns the timezone name. Usually this is the filename that was supplied
397             to the constructor, but it can be overridden by the constructor's B
398             attribute.
399              
400             =cut
401              
402 8     8 1 4552 sub name { $_[0]->{name} }
403              
404             =back
405              
406             =head2 Offsets
407              
408             =over
409              
410             =item $tz->has_dst_changes
411              
412             Returns a truth value indicating whether any of the observances in the file
413             are marked as DST. These DST flags are potentially arbitrary, and don't
414             affect any of the zone's behaviour.
415              
416             =cut
417              
418 3     3 1 12 sub has_dst_changes { $_[0]->{has_dst} }
419              
420             #
421             # observance lookup
422             #
423              
424             sub _type_for_rdn_sod {
425 1986     1986   4145 my($self, $utc_rdn, $utc_sod) = @_;
426 1986         2618 my $lo = 0;
427 1986         2036 my $hi = @{$self->{trn_times}};
  1986         3907  
428 1986         4922 while($lo != $hi) {
429 5     5   12872 my $try = do { use integer; ($lo + $hi) / 2 };
  5         12  
  5         21  
  14395         14078  
  14395         18568  
430 14395 100 100     73631 if(($utc_rdn <=> $self->{trn_times}->[$try]->[0] ||
431             $utc_sod <=> $self->{trn_times}->[$try]->[1]) == -1) {
432 6871         13926 $hi = $try;
433             } else {
434 7524         16272 $lo = $try + 1;
435             }
436             }
437 1986         4863 return $self->{obs_types}->[$lo];
438             }
439              
440             sub _type_for_datetime {
441 1752     1752   20175 my($self, $dt) = @_;
442 1752         5146 my($utc_rdn, $utc_sod) = $dt->utc_rd_values;
443 1752 100       10097 $utc_sod = 86399 if $utc_sod >= 86400;
444 1752         3815 my $type = $self->_type_for_rdn_sod($utc_rdn, $utc_sod);
445 1752 100       4507 if(is_string($type)) {
446 33         51 croak "time @{[_present_rdn_sod($utc_rdn, $utc_sod)]}Z ".
  33         70  
447             "is not represented ".
448 33         7823 "in the @{[$self->{name}]} timezone ".
449             "due to $type";
450             }
451 1719         4600 return $type;
452             }
453              
454             =item $tz->offset_for_datetime(DT)
455              
456             I
must be a L-compatible object (specifically, it must
457             implement the C method). Returns the offset from UT that
458             is in effect at the instant represented by I
, in seconds.
459              
460             =cut
461              
462             sub offset_for_datetime {
463 584     584 1 8122 my($self, $dt) = @_;
464 584         1319 my $type = $self->_type_for_datetime($dt);
465 573 100       3721 return is_ref($type, "ARRAY") ? $type->[0] :
466             $type->offset_for_datetime($dt);
467             }
468              
469             =item $tz->is_dst_for_datetime(DT)
470              
471             I
must be a L-compatible object (specifically, it must
472             implement the C method). Returns a truth value indicating
473             whether the timezone's observance at the instant represented by I
474             is marked as DST. This DST flag is potentially arbitrary, and doesn't
475             affect anything else.
476              
477             =cut
478              
479             sub is_dst_for_datetime {
480 584     584 1 369138 my($self, $dt) = @_;
481 584         1423 my $type = $self->_type_for_datetime($dt);
482 573 100       3905 return is_ref($type, "ARRAY") ? $type->[1] :
483             $type->is_dst_for_datetime($dt);
484             }
485              
486             =item $tz->short_name_for_datetime(DT)
487              
488             I
must be a L-compatible object (specifically, it must
489             implement the C method). Returns the abbreviation
490             used to label the time scale at the instant represented by I
.
491             This abbreviation is potentially arbitrary, and does not uniquely identify
492             either the timezone or the offset.
493              
494             =cut
495              
496             sub short_name_for_datetime {
497 584     584 1 6945 my($self, $dt) = @_;
498 584         1236 my $type = $self->_type_for_datetime($dt);
499 573 100       8116 return is_ref($type, "ARRAY") ? $type->[2] :
500             $type->short_name_for_datetime($dt);
501             }
502              
503             =item $tz->offset_for_local_datetime(DT)
504              
505             I
must be a L-compatible object (specifically, it
506             must implement the C method). Takes the local
507             time represented by I
(regardless of what absolute time it also
508             represents), and interprets that as a local time in the timezone of the
509             timezone object (not the timezone used in I
). Returns the offset
510             from UT that is in effect at that local time, in seconds.
511              
512             If the local time given is ambiguous due to a nearby offset change,
513             the numerically lowest offset (usually the standard one) is returned
514             with no warning of the situation. (Equivalently: the latest possible
515             absolute time is indicated.) If the local time given does not exist
516             due to a nearby offset change, the method Cs saying so.
517              
518             =cut
519              
520             sub _local_to_utc_rdn_sod($$$) {
521 234     234   342 my($rdn, $sod, $offset) = @_;
522 234         389 $sod -= $offset;
523 234         2027 while($sod < 0) {
524 15         23 $rdn--;
525 15         194 $sod += 86400;
526             }
527 234         1433 while($sod >= 86400) {
528 32         37 $rdn++;
529 32         71 $sod -= 86400;
530             }
531 234         1020 return ($rdn, $sod);
532             }
533              
534             sub offset_for_local_datetime {
535 85     85 1 12799 my($self, $dt) = @_;
536 85         305 my($lcl_rdn, $lcl_sod) = $dt->local_rd_values;
537 85 50       643 $lcl_sod = 86399 if $lcl_sod >= 86400;
538 85         106 my %seen_error;
539 85         302 foreach my $offset (@{$self->{offsets}}) {
  85         332  
540 234         789 my($utc_rdn, $utc_sod) =
541             _local_to_utc_rdn_sod($lcl_rdn, $lcl_sod, $offset);
542 234         1328 my $ttype = $self->_type_for_rdn_sod($utc_rdn, $utc_sod);
543 234 100       720 if(is_string($ttype)) {
544 31         60 $seen_error{$ttype} = undef;
545 31         296 next;
546             }
547             my $local_offset = is_ref($ttype, "ARRAY") ? $ttype->[0] :
548 203 100       944 eval { local $SIG{__DIE__};
  22         67  
549 22         63 $ttype->offset_for_local_datetime($dt);
550             };
551 203 100 100     55936 return $offset
552             if defined($local_offset) && $local_offset == $offset;
553             }
554 27         61 my $error;
555 27         50 foreach("zone disuse", "missing data") {
556 45 100       129 if(exists $seen_error{$_}) {
557 11         15 $error = $_;
558 11         20 last;
559             }
560             }
561 27   100     119 $error ||= "offset change";
562 27         43 croak "local time @{[_present_rdn_sod($lcl_rdn, $lcl_sod)]} ".
  27         156  
563 27         10493 "does not exist in the @{[$self->{name}]} timezone ".
564             "due to $error";
565             }
566              
567             =back
568              
569             =head1 SEE ALSO
570              
571             L,
572             L,
573             L,
574             L,
575             L,
576             L
577              
578             =head1 AUTHOR
579              
580             Andrew Main (Zefram)
581              
582             =head1 COPYRIGHT
583              
584             Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013
585             Andrew Main (Zefram)
586              
587             =head1 LICENSE
588              
589             This module is free software; you can redistribute it and/or modify it
590             under the same terms as Perl itself.
591              
592             =cut
593              
594             1;