File Coverage

blib/lib/Date/Parse/Modern.pm
Criterion Covered Total %
statement 116 132 87.8
branch 45 56 80.3
condition 49 76 64.4
subroutine 8 8 100.0
pod 1 2 50.0
total 219 274 79.9


line stmt bran cond sub pod time code
1             #!/usr/bin/env perl
2              
3             package Date::Parse::Modern;
4              
5 2     2   145737 use strict;
  2         20  
  2         62  
6 2     2   10 use warnings;
  2         8  
  2         46  
7 2     2   46 use v5.10;
  2         8  
8              
9 2     2   11 use Carp;
  2         4  
  2         183  
10 2     2   1003 use Time::Local 1.26;
  2         5191  
  2         119  
11 2     2   13 use Exporter 'import';
  2         4  
  2         3776  
12             our @EXPORT = ('strtotime');
13              
14             ###############################################################################
15              
16             our $VERSION = 0.3;
17              
18             # https://timezonedb.com/download
19             my $TZ_OFFSET = {
20             'ACDT' => 10, 'ACST' => 9, 'ACT' => -5, 'ACWST' => 8, 'ADT' => -3, 'AEDT' => 11, 'AEST' => 10, 'AFT' => 4,
21             'AKDT' => -8, 'AKST' => -9, 'ALMT' => 6, 'AMST' => 5, 'AMT' => 4, 'ANAST' => 12, 'ANAT' => 12, 'AQTT' => 5,
22             'ART' => -3, 'AST' => -4, 'AWDT' => 9, 'AWST' => 8, 'AZOST' => 0, 'AZOT' => -1, 'AZST' => 5, 'AZT' => 4,
23             'AoE' => -12, 'BNT' => 8, 'BOT' => -4, 'BRST' => -2, 'BRT' => -3, 'BST' => 1, 'BTT' => 6, 'CAST' => 8,
24             'CAT' => 2, 'CCT' => 6, 'CDT' => -5, 'CEST' => 2, 'CET' => 1, 'CHADT' => 13, 'CHAST' => 12, 'CHOST' => 9,
25             'CHOT' => 8, 'CHUT' => 10, 'CIDST' => -4, 'CIST' => -5, 'CKT' => -10, 'CLST' => -3, 'CLT' => -4, 'COT' => -5,
26             'CST' => -6, 'CVT' => -1, 'CXT' => 7, 'ChST' => 10, 'DAVT' => 7, 'DDUT' => 10, 'EASST' => -5, 'EAST' => -6,
27             'EAT' => 3, 'ECT' => -5, 'EDT' => -4, 'EEST' => 3, 'EET' => 2, 'EGST' => 0, 'EGT' => -1, 'EST' => -5,
28             'FET' => 3, 'FJST' => 13, 'FJT' => 12, 'FKST' => -3, 'FKT' => -4, 'FNT' => -2, 'GALT' => -6, 'GAMT' => -9,
29             'GET' => 4, 'GFT' => -3, 'GILT' => 12, 'GMT' => 0, 'GST' => -2, 'GYT' => -4, 'HDT' => -9, 'HKT' => 8,
30             'HOVST' => 8, 'HOVT' => 7, 'HST' => -10, 'ICT' => 7, 'IDT' => 3, 'IOT' => 6, 'IRDT' => 4, 'IRKST' => 9,
31             'IRKT' => 8, 'IRST' => 3, 'IST' => 2, 'JST' => 9, 'KGT' => 6, 'KOST' => 11, 'KRAST' => 8, 'KRAT' => 7,
32             'KST' => 9, 'KUYT' => 4, 'LHDT' => 11, 'LHST' => 10, 'LINT' => 14, 'MAGST' => 12, 'MAGT' => 11, 'MART' => -9,
33             'MAWT' => 5, 'MDT' => -6, 'MHT' => 12, 'MMT' => 6, 'MSD' => 4, 'MSK' => 3, 'MST' => -7, 'MUT' => 4,
34             'MVT' => 5, 'MYT' => 8, 'NCT' => 11, 'NDT' => -2, 'NFDT' => 12, 'NFT' => 11, 'NOVST' => 7, 'NOVT' => 7,
35             'NPT' => 5, 'NRT' => 12, 'NST' => -3, 'NUT' => -11, 'NZDT' => 13, 'NZST' => 12, 'OMSST' => 7, 'OMST' => 6,
36             'ORAT' => 5, 'PDT' => -7, 'PET' => -5, 'PETST' => 12, 'PETT' => 12, 'PGT' => 10, 'PHOT' => 13, 'PHT' => 8,
37             'PKT' => 5, 'PMDT' => -2, 'PMST' => -3, 'PONT' => 11, 'PST' => -8, 'PWT' => 9, 'PYST' => -3, 'PYT' => 8,
38             'QYZT' => 6, 'RET' => 4, 'ROTT' => -3, 'SAKT' => 11, 'SAMT' => 4, 'SAST' => 2, 'SBT' => 11, 'SCT' => 4,
39             'SGT' => 8, 'SRET' => 11, 'SRT' => -3, 'SST' => -11, 'SYOT' => 3, 'TAHT' => -10, 'TFT' => 5, 'TJT' => 5,
40             'TKT' => 13, 'TLT' => 9, 'TMT' => 5, 'TOST' => 14, 'TOT' => 13, 'TRT' => 3, 'TVT' => 12, 'ULAST' => 9,
41             'ULAT' => 8, 'UYST' => -2, 'UYT' => -3, 'UZT' => 5, 'VET' => -4, 'VLAST' => 11, 'VLAT' => 10, 'VOST' => 6,
42             'VUT' => 11, 'WAKT' => 12, 'WARST' => -3, 'WAST' => 2, 'WAT' => 1, 'WEST' => 1, 'WET' => 0, 'WFT' => 12,
43             'WGST' => -2, 'WGT' => -3, 'WIB' => 7, 'WIT' => 9, 'WITA' => 8, 'WST' => 1, 'YAKST' => 10, 'YAKT' => 9,
44             'YAPT' => 10, 'YEKST' => 6, 'YEKT' => 5, 'Z' => 0,
45             };
46              
47             # Separator between dates pieces: '-' or '/' or '\'
48             my $sep = qr/[\/\\-]/;
49              
50             # Force a local timezone offset (used for unit tests)
51             our $LOCAL_TZ_OFFSET = undef;
52              
53             # Use caching for repeated lookups for the same TZ offset
54             our $USE_TZ_CACHE = 1;
55              
56             # These are undocumented package variables. They could be changed to support
57             # alternate languages but there are caveats. These are cached and changing
58             # them after strtotime() is called won't affect anything. No one has requested
59             # alternate languages, so I'm leaving this undocumented for now.
60             our $MONTH_MAP = {
61             'jan' => 1, 'feb' => 2, 'mar' => 3, 'apr' => 4 , 'may' => 5 , 'jun' => 6 ,
62             'jul' => 7, 'aug' => 8, 'sep' => 9, 'oct' => 10, 'nov' => 11, 'dec' => 12,
63             };
64              
65             # See above
66             our $MONTH_REGEXP = qr/
67             Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|
68             Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December
69             /ix;
70              
71             ###############################################################################
72             ###############################################################################
73             ###############################################################################
74              
75             =head1 NAME
76              
77             C - Provide string to unixtime conversions
78              
79             =head1 DESCRIPTION
80              
81             C provides a single function C which takes a datetime string
82             and returns a unixtime. Care was given to support the most modern style strings that you would
83             commonly find in log files or on the internet.
84              
85             =head1 USAGE
86              
87             use Date::Parse::Modern;
88              
89             C exports the C function automatically.
90              
91             =head1 FUNCTIONS
92              
93             =head2 strtotime($string)
94              
95             my $unixtime = strtotime('1979-02-24'); # 288691200
96              
97             Simply feed C a string with some type of date or time in it, and it will return an
98             integer unixtime. If the string is unparseable, or a weird error occurs, it will return C.
99              
100             All the "magic" in C is done using regular expressions that look for common datetime
101             formats. Common formats like YYYY-MM-DD and HH:II:SS are easily detected and converted to the
102             appropriate formats. This allows the date or time to be found anywhere in the string, in (almost) any
103             order. In all cases, the day of the week is ignored in the input string.
104              
105             B Strings without a year are assumed to be in the current year. Example: C
106              
107             B Strings with only a date are assumed to occur at the midnight. Example: C<2023-01-15>
108              
109             B Strings with only time are assumed to be the current day. Example: C<10:15am>
110              
111             B In strings with numeric B textual time zone offsets, the numeric is used. Example:
112             C<14 Nov 1994 11:34:32 -0500 (EST)>
113              
114             =head1 Bugs/Features
115              
116             Please submit bugs and feature requests on Github:
117              
118             https://github.com/scottchiefbaker/perl-Date-Parse-Modern
119              
120             =head1 AUTHORS
121              
122             Scott Baker - https://www.perturb.org/
123              
124             =cut
125              
126             ###############################################################################
127             ###############################################################################
128             ###############################################################################
129              
130             # The logic here is that we use regular expressions to pull out various patterns
131             # YYYY/MM/DD, H:I:S, DD MonthWord YYYY
132             sub strtotime {
133 38     38 1 195 my ($str, $debug) = @_;
134              
135 38 100       93 if (!defined($str)) {
136 1         4 return undef;
137             }
138              
139 37         74 my ($year, $month, $day) = (0, 0, 0);
140 37         69 my ($hour, $min , $sec) = (0, 0, 0);
141              
142             ###########################################################################
143             ###########################################################################
144              
145 37         91 state $rule_1 = qr/
146             \b
147             ((\d{4})$sep(\d{2})$sep(\d{2}) # YYYY-MM-DD
148             |
149             (\d{2})$sep(\d{2})$sep(\d{4})) # DD-MM-YYYY
150             /x;
151              
152             # First we look to see if we have anything that mathches YYYY-MM-DD (numerically)
153 37 100       324 if ($str =~ $rule_1) {
154             # YYYY-MM-DD: 1999-12-24
155 14 100 66     56 if ($2 || $3) {
156 13         25 $year = $2;
157 13         23 $month = $3;
158 13         27 $day = $4;
159             }
160              
161             # DD-MM-YYYY: 12-24-1999
162 14 100 66     63 if ($5 || $6) {
163 1         3 $day = $5;
164 1         2 $month = $6;
165 1         3 $year = $7;
166              
167             # It might be American format (MM-DD-YYYY) so we do a quick flip/flop
168 1 50       5 if ($month > 12) {
169 1         5 ($day, $month) = ($month, $day);
170             }
171             }
172             }
173              
174             # The year may be on the end of the string: Sat May 8 21:24:31 2021
175 37 100       81 if (!$year) {
176 23         75 ($year) = $str =~ m/\s(\d{4})\b/;
177             }
178              
179             ###########################################################################
180              
181 37         202 state $rule_2 = qr/
182             (\d{1,2})? # Maybe some digits before month
183             \s*
184             ($MONTH_REGEXP) # A textual month
185             \s+
186             (\d{1,4}) # Digits
187             [\s\$] # Whitespace OR end of line
188             ((\d{4}) )? # If there are digits ater the space it's 'Jan 13 2000'
189             /x;
190              
191             # Next we look for alpha months followed by a digit if we didn't find a numeric month above
192             # This will find: "April 13" and also "13 April 1995"
193 37 100 100     283 if (!$month && $str =~ $rule_2) {
194              
195             # Get the numerical number for this month
196 13         47 my $month_name = lc(substr($2,0,3));
197 13         27 $month = $MONTH_MAP->{$month_name};
198              
199             # 17 March 94
200 13 100       34 if ($1) {
201 4         13 $day = int($1);
202 4         20 $year = int($3);
203             # April 13 or April 13 94
204             } else {
205 9         21 $day = int($3);
206              
207             # *IF* there is a $5 it's a year
208 9   50     31 $year ||= int($5 || 0);
      66        
209             }
210             }
211              
212             ###########################################################################
213              
214             # Alternate date string like like: 21/dec/93 or dec/21/93 much less common
215 37 100 100     269 if (!$month && $str =~ /(.*)($MONTH_REGEXP)(.*)/) {
216 5         15 my $before = $1;
217 5         11 my $after = $3;
218              
219             # Lookup the numeric month based on the string name
220 5   50     22 $month = $MONTH_MAP->{lc($2)} || 0;
221              
222             # Month starts string: dec/21/93 or feb/14/1999
223 5 100 33     22 if ($before eq "") {
    50          
224 2         27 $after =~ m/(\d{2})$sep(\d{2,4})/;
225              
226 2         6 $day = $1;
227 2         4 $year = $2;
228              
229             # Month in the middle: 21/dec/93
230             } elsif ($before && $after) {
231 3         11 $before =~ m/(\d+)\D/; # Just the digits
232 3   50     8 $day = $1 || 0;
233              
234 3         10 $after =~ m/\D(\d{2,4})(.)/; # Get the digits AFTER the separator
235              
236             # If it's not a time (has a colon) it's the year
237 3 100       9 if ($2 ne ":") {
238 1         3 $year = $1;
239             }
240             }
241             }
242              
243             ###########################################################################
244              
245 37         57 state $rule_3 = qr/
246             (\b|T) # Anchor point
247             (\d{1,2}): # Hours
248             (\d{1,2}):? # Minutes
249             (\d{2}(Z|\.\d+)?)? # Seconds (optional)
250             \ ?(am|pm|AM|PM)? # AMPM (optional)
251             /x;
252              
253             # Now we look for times: 10:14, 10:14:17, 08:15pm
254 37 100       250 if ($str =~ $rule_3) {
255 27         73 $hour = int($2);
256 27         36 $min = int($3);
257 27   100     85 $sec = $4 || 0; # Not int() cuz it might be float for milliseconds
258              
259 27         58 $sec =~ s/Z$//;
260              
261             # The string of AM or PM
262 27   100     89 my $ampm = lc($6 || "");
263              
264             # PM means add 12 hours
265 27 100       57 if ($ampm eq "pm") {
266 1         2 $hour += 12;
267             }
268              
269             # 12:15am = 00:15 / 12:15pm = 12:15 so we have to compensate
270 27 50 66     58 if ($ampm && ($hour == 24 || $hour == 12)) {
      100        
271 2         5 $hour -= 12;
272             }
273             }
274              
275             # Just some basic sanity checking
276 37   100     108 my $has_time = ($hour || $min || $sec);
277 37   66     111 my $has_date = ($year || $month || $day);
278              
279 37 100 100     86 if (!$has_time && !$has_date) {
280             # One final check if NOTHING else has matched, we lookup a weird format: 20020722T100000Z
281 4 100       14 if ($str =~ m/(\d{4})(\d{2})(\d{2})T(\d\d)(\d\d)(\d\d)Z/) {
282 1         4 $year = $1;
283 1         2 $month = $2;
284 1         2 $day = $3;
285              
286 1         2 $hour = $4;
287 1         3 $min = $5;
288 1         2 $sec = $6;
289             } else {
290 3         18 return undef;
291             }
292             }
293              
294             ###########################################################################
295             ###########################################################################
296              
297             # Sanity check some basic boundaries
298             # I don't think we need this any more since we eval() and timegm_modern() will barf and return undef
299             #if ($month > 12 || $day > 31 || $hour > 23 || $min > 60 || $sec > 61) {
300             # return undef;
301             #}
302              
303 34   66     92 $month ||= (localtime())[4] + 1; # If there is no month, we assume the current month
304 34   66     74 $day ||= (localtime())[3]; # If there is no day, we assume the current day
305             # If we STILL don't have a year it may be a time only string so we assume it's the current year
306 34   66     210 $year ||= (localtime())[5] + 1900;
307              
308             # Convert any two digit years to four digits
309 34 100       79 if ($year < 100) {
310 5         11 $year += 1900;
311             }
312              
313             # If we have all the requisite pieces we build a unixtime
314 34         46 my $ret;
315 34 50 0     50 my $err = $@ || 'Error' unless eval {
316 34         116 $ret = Time::Local::timegm_modern($sec, $min, $hour, $day, $month - 1, $year);
317              
318 34         1318 return 1;
319             };
320              
321 34 50 33     76 if ($err && $err =~ /Undefined subroutine/) {
322 0         0 print STDERR $err;
323 0         0 return undef;
324             };
325              
326             # If we find a timezone offset we take that in to account now
327             # Either: +1000 or -0700
328             # or
329             # 11:53 PST (One to four chars after a time)
330 34         56 my $tz_offset_seconds = 0;
331 34         49 my $tz_str = '';
332 34         55 state $tz_rule = qr/
333             (
334             (\s|:\d\d) # Start AFTER a space, or time (:12)
335             ([+-])(\d{1,2})(\d{2}) # +1000 or -700 (three or four digits)
336             |
337             \d{2}\ # Only match chars if they're AFTER a time
338             ([A-Z]{1,4})\b # Capitalized TZ at end of string
339             |
340             \d{2}(Z)$ # Just a simple Z at the end
341             )
342             /x;
343              
344 34 100 100     439 if ($ret && $str =~ $tz_rule) {
    100          
345 15         22 my $str_offset = 0;
346              
347             # String timezone: 11:53 PST
348 15 100 100     56 if ($6 || $7) {
349             # Whichever form matches, the TZ is that one
350 10   50     29 my $tz_code = $6 || $7 || '';
351              
352             # Lookup the timezone offset in the table
353 10   100     45 $str_offset = $TZ_OFFSET->{$tz_code} || 0;
354             # Timezone offsets are in hours, so we convert to seconds
355 10         15 $str_offset *= 3600;
356              
357 10         16 $tz_str = $tz_code;
358             # Numeric format: +1000 or -0700
359             } else {
360             # Break the input string into parts so we can do math
361             # +1000 = 10 hours, -0700 = 7 hours, +0430 = 4.5 hours
362 5         18 $str_offset = ($4 + ($5 / 60)) * 3600;
363              
364 5 100       13 if ($3 eq "-") {
365 4         7 $str_offset *= -1;
366             }
367              
368 5         14 $tz_str = "$2$3$4";
369             }
370              
371 15         26 $tz_offset_seconds = $str_offset;
372             # No timezone info found so we assume the local timezone
373             } elsif ($ret) {
374 17         38 my $local_offset = get_local_offset($ret);
375              
376 17         24 $tz_offset_seconds = $local_offset;
377 17         29 $tz_str = 'UNSPECIFIED';
378             }
379              
380             # Subtract the timezone offset from the unixtime
381 34         58 $ret -= $tz_offset_seconds;
382              
383 34 50       60 if ($debug) {
384 0         0 my $color = "\e[38;5;45m";
385 0         0 my $reset = "\e[0m";
386 0         0 my $header = sprintf("%*s = YYYY-MM-DD HH:II:SS (timezone offset)", length($str) + 2, "Input string");
387 0   0     0 my $output = sprintf("'%s' = %02d-%02d-%02d %02d:%02d:%02d (%s = %d seconds)", $str, $year || -1, $month || -1, $day || -1, $hour, $min, $sec, $tz_str, $tz_offset_seconds);
      0        
      0        
388              
389 0         0 print STDERR $color . $header . $reset . "\n";
390 0         0 print STDERR $output . "\n";
391             }
392              
393              
394 34         176 return $ret;
395             }
396              
397             # Return the timezone offset for the local machine
398             sub get_local_offset {
399 17     17 0 28 my $unixtime = $_[0];
400              
401             # Since timezones only change on the half-hour (at most), we
402             # round down the nearest half hour "bucket" and then cache
403             # that result. We probably could get away with a full hour
404             # here but we don't gain much performance/memory by doing that
405 17         21 my $bucket_size = 1800;
406 17         33 my $cache_key = $unixtime - ($unixtime % $bucket_size);
407              
408             # If we have a forced LOCAL_TZ_OFFSET we use that (unit tests)
409 17 50       32 if (defined($LOCAL_TZ_OFFSET)) {
410 17         45 return $LOCAL_TZ_OFFSET;
411             }
412              
413             # Simple memoizing (improves repeated performance a LOT)
414             # Note: this is even faster than `use Memoize`
415 0           state $x = {};
416 0 0 0       if ($USE_TZ_CACHE && $x->{$cache_key}) {
417 0           return $x->{$cache_key};
418             }
419              
420             # Get a time obj for this local timezone and UTC for the Unixtime
421             # Then compare the two to get the local TZ offset
422 0           my @t = localtime($unixtime);
423 0           my $ret = (Time::Local::timegm(@t) - Time::Local::timelocal(@t));
424              
425             # Cache the result
426 0 0         if ($USE_TZ_CACHE) {
427 0           $x->{$cache_key} = $ret;
428             }
429              
430 0           return $ret;
431             }
432              
433             1;
434              
435             __END__