| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/env perl |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Date::Parse::Modern; |
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
141506
|
use strict; |
|
|
2
|
|
|
|
|
14
|
|
|
|
2
|
|
|
|
|
60
|
|
|
6
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
50
|
|
|
7
|
2
|
|
|
2
|
|
26
|
use v5.10; |
|
|
2
|
|
|
|
|
7
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
202
|
|
|
10
|
2
|
|
|
2
|
|
991
|
use Time::Local 1.26; |
|
|
2
|
|
|
|
|
4984
|
|
|
|
2
|
|
|
|
|
117
|
|
|
11
|
2
|
|
|
2
|
|
15
|
use Exporter 'import'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
3814
|
|
|
12
|
|
|
|
|
|
|
our @EXPORT = ('strtotime'); |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
############################################################################### |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# https://pause.perl.org/pause/query?ACTION=pause_operating_model#3_5_factors_considering_in_the_indexing_phase |
|
17
|
|
|
|
|
|
|
our $VERSION = 0.5; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# https://timezonedb.com/download |
|
20
|
|
|
|
|
|
|
my $TZ_OFFSET = { |
|
21
|
|
|
|
|
|
|
'ACDT' => 10, 'ACST' => 9, 'ACT' => -5, 'ACWST' => 8, 'ADT' => -3, 'AEDT' => 11, 'AEST' => 10, 'AFT' => 4, |
|
22
|
|
|
|
|
|
|
'AKDT' => -8, 'AKST' => -9, 'ALMT' => 6, 'AMST' => 5, 'AMT' => 4, 'ANAST' => 12, 'ANAT' => 12, 'AQTT' => 5, |
|
23
|
|
|
|
|
|
|
'ART' => -3, 'AST' => -4, 'AWDT' => 9, 'AWST' => 8, 'AZOST' => 0, 'AZOT' => -1, 'AZST' => 5, 'AZT' => 4, |
|
24
|
|
|
|
|
|
|
'AoE' => -12, 'BNT' => 8, 'BOT' => -4, 'BRST' => -2, 'BRT' => -3, 'BST' => 1, 'BTT' => 6, 'CAST' => 8, |
|
25
|
|
|
|
|
|
|
'CAT' => 2, 'CCT' => 6, 'CDT' => -5, 'CEST' => 2, 'CET' => 1, 'CHADT' => 13, 'CHAST' => 12, 'CHOST' => 9, |
|
26
|
|
|
|
|
|
|
'CHOT' => 8, 'CHUT' => 10, 'CIDST' => -4, 'CIST' => -5, 'CKT' => -10, 'CLST' => -3, 'CLT' => -4, 'COT' => -5, |
|
27
|
|
|
|
|
|
|
'CST' => -6, 'CVT' => -1, 'CXT' => 7, 'ChST' => 10, 'DAVT' => 7, 'DDUT' => 10, 'EASST' => -5, 'EAST' => -6, |
|
28
|
|
|
|
|
|
|
'EAT' => 3, 'ECT' => -5, 'EDT' => -4, 'EEST' => 3, 'EET' => 2, 'EGST' => 0, 'EGT' => -1, 'EST' => -5, |
|
29
|
|
|
|
|
|
|
'FET' => 3, 'FJST' => 13, 'FJT' => 12, 'FKST' => -3, 'FKT' => -4, 'FNT' => -2, 'GALT' => -6, 'GAMT' => -9, |
|
30
|
|
|
|
|
|
|
'GET' => 4, 'GFT' => -3, 'GILT' => 12, 'GMT' => 0, 'GST' => -2, 'GYT' => -4, 'HDT' => -9, 'HKT' => 8, |
|
31
|
|
|
|
|
|
|
'HOVST' => 8, 'HOVT' => 7, 'HST' => -10, 'ICT' => 7, 'IDT' => 3, 'IOT' => 6, 'IRDT' => 4, 'IRKST' => 9, |
|
32
|
|
|
|
|
|
|
'IRKT' => 8, 'IRST' => 3, 'IST' => 2, 'JST' => 9, 'KGT' => 6, 'KOST' => 11, 'KRAST' => 8, 'KRAT' => 7, |
|
33
|
|
|
|
|
|
|
'KST' => 9, 'KUYT' => 4, 'LHDT' => 11, 'LHST' => 10, 'LINT' => 14, 'MAGST' => 12, 'MAGT' => 11, 'MART' => -9, |
|
34
|
|
|
|
|
|
|
'MAWT' => 5, 'MDT' => -6, 'MHT' => 12, 'MMT' => 6, 'MSD' => 4, 'MSK' => 3, 'MST' => -7, 'MUT' => 4, |
|
35
|
|
|
|
|
|
|
'MVT' => 5, 'MYT' => 8, 'NCT' => 11, 'NDT' => -2, 'NFDT' => 12, 'NFT' => 11, 'NOVST' => 7, 'NOVT' => 7, |
|
36
|
|
|
|
|
|
|
'NPT' => 5, 'NRT' => 12, 'NST' => -3, 'NUT' => -11, 'NZDT' => 13, 'NZST' => 12, 'OMSST' => 7, 'OMST' => 6, |
|
37
|
|
|
|
|
|
|
'ORAT' => 5, 'PDT' => -7, 'PET' => -5, 'PETST' => 12, 'PETT' => 12, 'PGT' => 10, 'PHOT' => 13, 'PHT' => 8, |
|
38
|
|
|
|
|
|
|
'PKT' => 5, 'PMDT' => -2, 'PMST' => -3, 'PONT' => 11, 'PST' => -8, 'PWT' => 9, 'PYST' => -3, 'PYT' => 8, |
|
39
|
|
|
|
|
|
|
'QYZT' => 6, 'RET' => 4, 'ROTT' => -3, 'SAKT' => 11, 'SAMT' => 4, 'SAST' => 2, 'SBT' => 11, 'SCT' => 4, |
|
40
|
|
|
|
|
|
|
'SGT' => 8, 'SRET' => 11, 'SRT' => -3, 'SST' => -11, 'SYOT' => 3, 'TAHT' => -10, 'TFT' => 5, 'TJT' => 5, |
|
41
|
|
|
|
|
|
|
'TKT' => 13, 'TLT' => 9, 'TMT' => 5, 'TOST' => 14, 'TOT' => 13, 'TRT' => 3, 'TVT' => 12, 'ULAST' => 9, |
|
42
|
|
|
|
|
|
|
'ULAT' => 8, 'UYST' => -2, 'UYT' => -3, 'UZT' => 5, 'VET' => -4, 'VLAST' => 11, 'VLAT' => 10, 'VOST' => 6, |
|
43
|
|
|
|
|
|
|
'VUT' => 11, 'WAKT' => 12, 'WARST' => -3, 'WAST' => 2, 'WAT' => 1, 'WEST' => 1, 'WET' => 0, 'WFT' => 12, |
|
44
|
|
|
|
|
|
|
'WGST' => -2, 'WGT' => -3, 'WIB' => 7, 'WIT' => 9, 'WITA' => 8, 'WST' => 1, 'YAKST' => 10, 'YAKT' => 9, |
|
45
|
|
|
|
|
|
|
'YAPT' => 10, 'YEKST' => 6, 'YEKT' => 5, 'Z' => 0, |
|
46
|
|
|
|
|
|
|
}; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Separator between dates pieces: '-' or '/' or '\' |
|
49
|
|
|
|
|
|
|
my $sep = qr/[\/\\-]/; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Force a local timezone offset (used for unit tests) |
|
52
|
|
|
|
|
|
|
our $LOCAL_TZ_OFFSET = undef; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Use caching for repeated lookups for the same TZ offset |
|
55
|
|
|
|
|
|
|
our $USE_TZ_CACHE = 1; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# These are undocumented package variables. They could be changed to support |
|
58
|
|
|
|
|
|
|
# alternate languages but there are caveats. These are cached and changing |
|
59
|
|
|
|
|
|
|
# them after strtotime() is called won't affect anything. No one has requested |
|
60
|
|
|
|
|
|
|
# alternate languages, so I'm leaving this undocumented for now. |
|
61
|
|
|
|
|
|
|
our $MONTH_MAP = { |
|
62
|
|
|
|
|
|
|
'jan' => 1, 'feb' => 2, 'mar' => 3, 'apr' => 4 , 'may' => 5 , 'jun' => 6 , |
|
63
|
|
|
|
|
|
|
'jul' => 7, 'aug' => 8, 'sep' => 9, 'oct' => 10, 'nov' => 11, 'dec' => 12, |
|
64
|
|
|
|
|
|
|
}; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# See above |
|
67
|
|
|
|
|
|
|
our $MONTH_REGEXP = qr/ |
|
68
|
|
|
|
|
|
|
Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June| |
|
69
|
|
|
|
|
|
|
Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December |
|
70
|
|
|
|
|
|
|
/ix; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
############################################################################### |
|
73
|
|
|
|
|
|
|
############################################################################### |
|
74
|
|
|
|
|
|
|
############################################################################### |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 NAME |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
C - Provide string to unixtime conversions |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
C provides a single function C which takes a datetime string |
|
83
|
|
|
|
|
|
|
and returns a unixtime. Care was given to support the most modern style strings that you would |
|
84
|
|
|
|
|
|
|
commonly find in log files or on the internet. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 USAGE |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
use Date::Parse::Modern; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
C exports the C function automatically. |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head2 strtotime($string) |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $unixtime = strtotime('1979-02-24'); # 288691200 |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Simply feed C a string with some type of date or time in it, and it will return an |
|
99
|
|
|
|
|
|
|
integer unixtime. If the string is unparseable, or a weird error occurs, it will return C. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
All the "magic" in C is done using regular expressions that look for common datetime |
|
102
|
|
|
|
|
|
|
formats. Common formats like YYYY-MM-DD and HH:II:SS are easily detected and converted to the |
|
103
|
|
|
|
|
|
|
appropriate formats. This allows the date or time to be found anywhere in the string, in (almost) any |
|
104
|
|
|
|
|
|
|
order. In all cases, the day of the week is ignored in the input string. |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
B Strings without a year are assumed to be in the current year. Example: C |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
B Strings with only a date are assumed to occur at midnight. Example: C<2023-01-15> |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
B Strings with only time are assumed to be the current day. Example: C<10:15am> |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
B In strings with numeric B textual time zone offsets, the numeric is used. Example: |
|
113
|
|
|
|
|
|
|
C<14 Nov 1994 11:34:32 -0500 (EST)> |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 Will you support XYZ format? |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Everyone has their B date/time format, and we'd like to support as many |
|
118
|
|
|
|
|
|
|
as possible. We have tried to support as much of |
|
119
|
|
|
|
|
|
|
L as possible, but we |
|
120
|
|
|
|
|
|
|
cannot support everything. Every new format we support runs the risk of slowing |
|
121
|
|
|
|
|
|
|
down things for existing formats. You can submit a feature request on Github |
|
122
|
|
|
|
|
|
|
for new formats but we may reject them if adding support would slow down others. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 Bugs/Features |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Please submit bugs and feature requests on Github: |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
https://github.com/scottchiefbaker/perl-Date-Parse-Modern |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 AUTHORS |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Scott Baker - https://www.perturb.org/ |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
############################################################################### |
|
137
|
|
|
|
|
|
|
############################################################################### |
|
138
|
|
|
|
|
|
|
############################################################################### |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# The logic here is that we use regular expressions to pull out various patterns |
|
141
|
|
|
|
|
|
|
# YYYY/MM/DD, H:I:S, DD MonthWord YYYY |
|
142
|
|
|
|
|
|
|
sub strtotime { |
|
143
|
43
|
|
|
43
|
1
|
201
|
my ($str, $debug) = @_; |
|
144
|
|
|
|
|
|
|
|
|
145
|
43
|
100
|
|
|
|
105
|
if (!defined($str)) { |
|
146
|
1
|
|
|
|
|
4
|
return undef; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
42
|
|
|
|
|
86
|
my ($year, $month, $day) = (0, 0, 0); |
|
150
|
42
|
|
|
|
|
74
|
my ($hour, $min , $sec, $ms) = (0, 0, 0, 0); |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
########################################################################### |
|
153
|
|
|
|
|
|
|
########################################################################### |
|
154
|
|
|
|
|
|
|
|
|
155
|
42
|
|
|
|
|
104
|
state $rule_1 = qr/ |
|
156
|
|
|
|
|
|
|
\b |
|
157
|
|
|
|
|
|
|
((\d{4})$sep(\d{2})$sep(\d{2}) # YYYY-MM-DD |
|
158
|
|
|
|
|
|
|
| |
|
159
|
|
|
|
|
|
|
(\d{2})$sep(\d{2})$sep(\d{4})) # DD-MM-YYYY |
|
160
|
|
|
|
|
|
|
/x; |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# First we look to see if we have anything that mathches YYYY-MM-DD (numerically) |
|
163
|
42
|
100
|
|
|
|
341
|
if ($str =~ $rule_1) { |
|
164
|
|
|
|
|
|
|
# YYYY-MM-DD: 1999-12-24 |
|
165
|
15
|
100
|
66
|
|
|
61
|
if ($2 || $3) { |
|
166
|
14
|
|
|
|
|
39
|
$year = $2; |
|
167
|
14
|
|
|
|
|
20
|
$month = $3; |
|
168
|
14
|
|
|
|
|
30
|
$day = $4; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# DD-MM-YYYY: 12-24-1999 |
|
172
|
15
|
100
|
66
|
|
|
66
|
if ($5 || $6) { |
|
173
|
1
|
|
|
|
|
2
|
$day = $5; |
|
174
|
1
|
|
|
|
|
3
|
$month = $6; |
|
175
|
1
|
|
|
|
|
3
|
$year = $7; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# It might be American format (MM-DD-YYYY) so we do a quick flip/flop |
|
178
|
1
|
50
|
|
|
|
4
|
if ($month > 12) { |
|
179
|
1
|
|
|
|
|
3
|
($day, $month) = ($month, $day); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# The year may be on the end of the string: Sat May 8 21:24:31 2021 |
|
185
|
42
|
100
|
|
|
|
99
|
if (!$year) { |
|
186
|
27
|
|
|
|
|
84
|
($year) = $str =~ m/\s(\d{4})\b/; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
########################################################################### |
|
190
|
|
|
|
|
|
|
|
|
191
|
42
|
|
|
|
|
215
|
state $rule_2 = qr/ |
|
192
|
|
|
|
|
|
|
(\d{1,2})? # Maybe some digits before month |
|
193
|
|
|
|
|
|
|
\s* |
|
194
|
|
|
|
|
|
|
($MONTH_REGEXP) # A textual month |
|
195
|
|
|
|
|
|
|
\s+ |
|
196
|
|
|
|
|
|
|
(\d{1,4}) # Digits |
|
197
|
|
|
|
|
|
|
[\s\$] # Whitespace OR end of line |
|
198
|
|
|
|
|
|
|
((\d{2}|\d{4})[ \$])? # If there are two or four digits ater it's a year |
|
199
|
|
|
|
|
|
|
/x; |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Next we look for alpha months followed by a digit if we didn't find a numeric month above |
|
202
|
|
|
|
|
|
|
# This will find: "April 13" and also "13 April 1995" |
|
203
|
42
|
100
|
100
|
|
|
325
|
if (!$month && $str =~ $rule_2) { |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# Get the numerical number for this month |
|
206
|
17
|
|
|
|
|
58
|
my $month_name = lc(substr($2,0,3)); |
|
207
|
17
|
|
|
|
|
38
|
$month = $MONTH_MAP->{$month_name}; |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# 17 March 94 |
|
210
|
17
|
100
|
|
|
|
47
|
if ($1) { |
|
211
|
4
|
|
|
|
|
11
|
$day = int($1); |
|
212
|
4
|
|
|
|
|
70
|
$year = int($3); |
|
213
|
|
|
|
|
|
|
# April 13 or April 13 94 |
|
214
|
|
|
|
|
|
|
} else { |
|
215
|
13
|
|
|
|
|
33
|
$day = int($3); |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# *IF* we still don't have a year |
|
218
|
13
|
100
|
|
|
|
30
|
if (!$year) { |
|
219
|
5
|
|
100
|
|
|
17
|
my $part = $5 || 0; |
|
220
|
5
|
|
|
|
|
10
|
$year = int($part) |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
########################################################################### |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Alternate date string like like: 21/dec/93 or dec/21/93 much less common |
|
228
|
42
|
100
|
100
|
|
|
265
|
if (!$month && $str =~ /(.*)($MONTH_REGEXP)(.*)/) { |
|
229
|
5
|
|
|
|
|
15
|
my $before = $1; |
|
230
|
5
|
|
|
|
|
9
|
my $after = $3; |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Lookup the numeric month based on the string name |
|
233
|
5
|
|
50
|
|
|
23
|
$month = $MONTH_MAP->{lc($2)} || 0; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Month starts string: dec/21/93 or feb/14/1999 |
|
236
|
5
|
100
|
33
|
|
|
22
|
if ($before eq "") { |
|
|
|
50
|
|
|
|
|
|
|
237
|
2
|
50
|
|
|
|
36
|
if ($after =~ m/(\d{2})$sep(\d{2,4})/) { |
|
238
|
2
|
|
|
|
|
6
|
$day = $1; |
|
239
|
2
|
|
|
|
|
4
|
$year = $2; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
# Month in the middle: 21/dec/93 |
|
242
|
|
|
|
|
|
|
} elsif ($before && $after) { |
|
243
|
3
|
|
|
|
|
12
|
$before =~ m/(\d+)\D/; # Just the digits |
|
244
|
3
|
|
50
|
|
|
11
|
$day = $1 || 0; |
|
245
|
|
|
|
|
|
|
|
|
246
|
3
|
|
|
|
|
8
|
$after =~ m/\D(\d{2,4})(.)/; # Get the digits AFTER the separator |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# If it's not a time (has a colon) it's the year |
|
249
|
3
|
100
|
|
|
|
13
|
if ($2 ne ":") { |
|
250
|
1
|
|
|
|
|
2
|
$year = $1; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
########################################################################### |
|
256
|
|
|
|
|
|
|
|
|
257
|
42
|
|
|
|
|
68
|
state $rule_3 = qr/ |
|
258
|
|
|
|
|
|
|
(\b|T) # Anchor point |
|
259
|
|
|
|
|
|
|
(\d{1,2}): # Hours |
|
260
|
|
|
|
|
|
|
(\d{1,2}):? # Minutes |
|
261
|
|
|
|
|
|
|
(\d{2}(Z|\.\d+)?)? # Seconds (optional) |
|
262
|
|
|
|
|
|
|
\ ?(am|pm|AM|PM)? # AMPM (optional) |
|
263
|
|
|
|
|
|
|
/x; |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# Now we look for times: 10:14, 10:14:17, 08:15pm |
|
266
|
42
|
100
|
|
|
|
294
|
if ($str =~ $rule_3) { |
|
267
|
32
|
|
|
|
|
83
|
$hour = int($2); |
|
268
|
32
|
|
|
|
|
55
|
$min = int($3); |
|
269
|
32
|
|
100
|
|
|
105
|
$sec = $4 || 0; # Not int() cuz it might be float for milliseconds |
|
270
|
32
|
|
|
|
|
69
|
$sec =~ s/Z$//; # Remove and Z at the end |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# The string of AM or PM |
|
273
|
32
|
|
100
|
|
|
104
|
my $ampm = lc($6 || ""); |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# PM means add 12 hours |
|
276
|
32
|
100
|
|
|
|
105
|
if ($ampm eq "pm") { |
|
277
|
1
|
|
|
|
|
3
|
$hour += 12; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# 12:15am = 00:15 / 12:15pm = 12:15 so we have to compensate |
|
281
|
32
|
50
|
66
|
|
|
82
|
if ($ampm && ($hour == 24 || $hour == 12)) { |
|
|
|
|
100
|
|
|
|
|
|
282
|
2
|
|
|
|
|
3
|
$hour -= 12; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# Just some basic sanity checking |
|
287
|
42
|
|
100
|
|
|
127
|
my $has_time = ($hour || $min || $sec); |
|
288
|
42
|
|
66
|
|
|
106
|
my $has_date = ($year || $month || $day); |
|
289
|
|
|
|
|
|
|
|
|
290
|
42
|
100
|
100
|
|
|
92
|
if (!$has_time && !$has_date) { |
|
291
|
|
|
|
|
|
|
# One final check if NOTHING else has matched, we lookup a weird format: 20020722T100000Z |
|
292
|
4
|
100
|
|
|
|
14
|
if ($str =~ m/(\d{4})(\d{2})(\d{2})T(\d\d)(\d\d)(\d\d)Z/) { |
|
293
|
1
|
|
|
|
|
3
|
$year = $1; |
|
294
|
1
|
|
|
|
|
2
|
$month = $2; |
|
295
|
1
|
|
|
|
|
3
|
$day = $3; |
|
296
|
|
|
|
|
|
|
|
|
297
|
1
|
|
|
|
|
3
|
$hour = $4; |
|
298
|
1
|
|
|
|
|
2
|
$min = $5; |
|
299
|
1
|
|
|
|
|
1
|
$sec = $6; |
|
300
|
|
|
|
|
|
|
} else { |
|
301
|
3
|
|
|
|
|
17
|
return undef; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
########################################################################### |
|
306
|
|
|
|
|
|
|
########################################################################### |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# If there is no month, we assume the current month |
|
309
|
39
|
100
|
|
|
|
69
|
if (!$month) { |
|
310
|
1
|
|
|
|
|
25
|
$month = (localtime())[4] + 1; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# If there is no day, we assume the current day |
|
314
|
39
|
100
|
|
|
|
77
|
if (!$day) { |
|
315
|
1
|
|
|
|
|
14
|
$day = (localtime())[3]; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# If we STILL don't have a year it may be a time only string so we assume it's the current year |
|
319
|
39
|
100
|
|
|
|
70
|
if (!$year) { |
|
320
|
5
|
|
|
|
|
128
|
$year = (localtime())[5] + 1900; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Convert any two digit years to four digits |
|
324
|
39
|
100
|
|
|
|
101
|
if ($year < 100) { |
|
325
|
8
|
|
|
|
|
10
|
$year += 1900; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
# Time::Local doesn't support fractional seconds, so we make an int version |
|
329
|
|
|
|
|
|
|
# and then add the ms after the timegm_modern() conversion |
|
330
|
39
|
|
|
|
|
69
|
$ms = $sec - int($sec); |
|
331
|
39
|
|
|
|
|
51
|
$sec = int($sec); |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# If we have all the requisite pieces we build a unixtime |
|
334
|
39
|
|
|
|
|
48
|
my $ret; |
|
335
|
39
|
|
|
|
|
56
|
my $ok = eval { |
|
336
|
39
|
|
|
|
|
134
|
$ret = Time::Local::timegm_modern($sec, $min, $hour, $day, $month - 1, $year); |
|
337
|
|
|
|
|
|
|
|
|
338
|
39
|
|
|
|
|
1521
|
return 1; |
|
339
|
|
|
|
|
|
|
}; |
|
340
|
|
|
|
|
|
|
# This has to be *immediately* after the eval or something else might |
|
341
|
|
|
|
|
|
|
# tromp on the error message |
|
342
|
39
|
|
|
|
|
78
|
my $err = $@; |
|
343
|
|
|
|
|
|
|
|
|
344
|
39
|
50
|
33
|
|
|
85
|
if ($err && $err =~ /Undefined subroutine/) { |
|
345
|
0
|
|
|
|
|
0
|
print STDERR $err; |
|
346
|
0
|
|
|
|
|
0
|
return undef; |
|
347
|
|
|
|
|
|
|
}; |
|
348
|
|
|
|
|
|
|
|
|
349
|
39
|
|
|
|
|
51
|
$ret += $ms; |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# If we find a timezone offset we take that in to account now |
|
352
|
|
|
|
|
|
|
# Either: +1000 or -0700 |
|
353
|
|
|
|
|
|
|
# or |
|
354
|
|
|
|
|
|
|
# 11:53 PST (One to four chars after a time) |
|
355
|
39
|
|
|
|
|
54
|
my $tz_offset_seconds = 0; |
|
356
|
39
|
|
|
|
|
55
|
my $tz_str = ''; |
|
357
|
39
|
|
|
|
|
50
|
state $tz_rule = qr/ |
|
358
|
|
|
|
|
|
|
( |
|
359
|
|
|
|
|
|
|
(\s|:\d\d) # Start AFTER a space, or time (:12) |
|
360
|
|
|
|
|
|
|
([+-])(\d{1,2})(\d{2}) # +1000 or -700 (three or four digits) |
|
361
|
|
|
|
|
|
|
| |
|
362
|
|
|
|
|
|
|
\d{2}\ # Only match chars if they're AFTER a time |
|
363
|
|
|
|
|
|
|
([A-Z]{1,4})\b # Capitalized TZ at end of string |
|
364
|
|
|
|
|
|
|
| |
|
365
|
|
|
|
|
|
|
\d{2}(Z)$ # Just a simple Z at the end |
|
366
|
|
|
|
|
|
|
) |
|
367
|
|
|
|
|
|
|
/x; |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# If we have a string with a timezone piece |
|
370
|
39
|
100
|
100
|
|
|
502
|
if ($ret && $str =~ $tz_rule) { |
|
|
|
100
|
|
|
|
|
|
|
371
|
17
|
|
|
|
|
30
|
my $str_offset = 0; |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
# String timezone: 11:53 PST |
|
374
|
17
|
100
|
100
|
|
|
67
|
if ($6 || $7) { |
|
375
|
|
|
|
|
|
|
# Whichever form matches, the TZ is that one |
|
376
|
11
|
|
50
|
|
|
32
|
my $tz_code = $6 || $7 || ''; |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# Lookup the timezone offset in the table |
|
379
|
11
|
|
100
|
|
|
41
|
$str_offset = $TZ_OFFSET->{$tz_code} || 0; |
|
380
|
|
|
|
|
|
|
# Timezone offsets are in hours, so we convert to seconds |
|
381
|
11
|
|
|
|
|
17
|
$str_offset *= 3600; |
|
382
|
|
|
|
|
|
|
|
|
383
|
11
|
|
|
|
|
18
|
$tz_str = $tz_code; |
|
384
|
|
|
|
|
|
|
# Numeric format: +1000 or -0700 |
|
385
|
|
|
|
|
|
|
} else { |
|
386
|
|
|
|
|
|
|
# Break the input string into parts so we can do math |
|
387
|
|
|
|
|
|
|
# +1000 = 10 hours, -0700 = 7 hours, +0430 = 4.5 hours |
|
388
|
6
|
|
|
|
|
20
|
$str_offset = ($4 + ($5 / 60)) * 3600; |
|
389
|
|
|
|
|
|
|
|
|
390
|
6
|
100
|
|
|
|
19
|
if ($3 eq "-") { |
|
391
|
5
|
|
|
|
|
7
|
$str_offset *= -1; |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
|
|
394
|
6
|
|
|
|
|
19
|
$tz_str = "$3$4$5"; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
17
|
|
|
|
|
25
|
$tz_offset_seconds = $str_offset; |
|
398
|
|
|
|
|
|
|
# No timezone info found so we assume the local timezone |
|
399
|
|
|
|
|
|
|
} elsif ($ret) { |
|
400
|
20
|
|
|
|
|
47
|
my $local_offset = get_local_offset($ret); |
|
401
|
|
|
|
|
|
|
|
|
402
|
20
|
|
|
|
|
33
|
$tz_offset_seconds = $local_offset; |
|
403
|
20
|
|
|
|
|
33
|
$tz_str = 'UNSPECIFIED'; |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Subtract the timezone offset from the unixtime |
|
407
|
39
|
|
|
|
|
66
|
$ret -= $tz_offset_seconds; |
|
408
|
|
|
|
|
|
|
|
|
409
|
39
|
50
|
|
|
|
72
|
if ($debug) { |
|
410
|
0
|
|
|
|
|
0
|
my $color = "\e[38;5;45m"; |
|
411
|
0
|
|
|
|
|
0
|
my $reset = "\e[0m"; |
|
412
|
0
|
|
|
|
|
0
|
my $header = sprintf("%*s = YYYY-MM-DD HH:II:SS (timezone offset)", length($str) + 2, "Input string"); |
|
413
|
0
|
|
0
|
|
|
0
|
my $output = sprintf("'%s' = %02d-%02d-%02d %02d:%02d:%02d (%s = %d seconds)", $str, $year || -1, $month || -1, $day || -1, $hour, $min, $sec, $tz_str, $tz_offset_seconds); |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
0
|
print STDERR $color . $header . $reset . "\n"; |
|
416
|
0
|
|
|
|
|
0
|
print STDERR $output . "\n"; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
|
|
420
|
39
|
|
|
|
|
184
|
return $ret; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Return the timezone offset for the local machine |
|
424
|
|
|
|
|
|
|
sub get_local_offset { |
|
425
|
20
|
|
|
20
|
0
|
29
|
my $unixtime = $_[0]; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# If we have a forced LOCAL_TZ_OFFSET we use that (unit tests) |
|
428
|
20
|
50
|
|
|
|
42
|
if (defined($LOCAL_TZ_OFFSET)) { |
|
429
|
20
|
|
|
|
|
88
|
return $LOCAL_TZ_OFFSET; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# Since timezones only change on the half-hour (at most), we |
|
433
|
|
|
|
|
|
|
# round down the nearest half hour "bucket" and then cache |
|
434
|
|
|
|
|
|
|
# that result. We probably could get away with a full hour |
|
435
|
|
|
|
|
|
|
# here but we don't gain much performance/memory by doing that |
|
436
|
0
|
|
|
|
|
|
my $bucket_size = 1800; |
|
437
|
0
|
|
|
|
|
|
my $cache_key = $unixtime - ($unixtime % $bucket_size); |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# Simple memoizing (improves repeated performance a LOT) |
|
440
|
|
|
|
|
|
|
# Note: this is even faster than `use Memoize` |
|
441
|
0
|
|
|
|
|
|
state $x = {}; |
|
442
|
0
|
0
|
0
|
|
|
|
if ($USE_TZ_CACHE && $x->{$cache_key}) { |
|
443
|
0
|
|
|
|
|
|
return $x->{$cache_key}; |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# Get a time obj for this local timezone and UTC for the Unixtime |
|
447
|
|
|
|
|
|
|
# Then compare the two to get the local TZ offset |
|
448
|
0
|
|
|
|
|
|
my @t = localtime($unixtime); |
|
449
|
0
|
|
|
|
|
|
my $ret = (Time::Local::timegm(@t) - Time::Local::timelocal(@t)); |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# Cache the result |
|
452
|
0
|
0
|
|
|
|
|
if ($USE_TZ_CACHE) { |
|
453
|
0
|
|
|
|
|
|
$x->{$cache_key} = $ret; |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
|
|
456
|
0
|
|
|
|
|
|
return $ret; |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
1; |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
__END__ |