File Coverage

blib/lib/Data/Password/zxcvbn/Match/Date.pm
Criterion Covered Total %
statement 84 84 100.0
branch 43 48 89.5
condition 20 21 95.2
subroutine 10 10 100.0
pod 4 4 100.0
total 161 167 96.4


line stmt bran cond sub pod time code
1             package Data::Password::zxcvbn::Match::Date;
2 3     3   9301 use Moo;
  3         9  
  3         18  
3             with 'Data::Password::zxcvbn::Match';
4 3     3   3249 use List::AllUtils 0.14 qw(max min_by);
  3         111  
  3         3769  
5             our $VERSION = '1.1.1'; # VERSION
6             # ABSTRACT: match class for digit sequences that look like dates
7              
8              
9             my $MIN_YEAR_SPACE = 20;
10             my $REFERENCE_YEAR = 2017;
11              
12             has year => ( is => 'ro', required => 1 );
13             has separator => ( is => 'ro', default => '' );
14              
15              
16             sub estimate_guesses {
17 370     370 1 9028 my ($self, $min_guesses) = @_;
18              
19             # base guesses: (year distance from REFERENCE_YEAR) * num_days * num_years
20 370         1970 my $year_space = max(abs($self->year - $REFERENCE_YEAR),$MIN_YEAR_SPACE);
21 370         1037 my $guesses = $year_space * 365;
22             # add factor of 4 for separator selection (one of ~4 choices)
23 370 100       1316 $guesses *=4 if $self->separator;
24              
25 370         1150 return $guesses;
26             }
27              
28              
29             my $MAYBE_DATE_NO_SEP_RE = qr{\A ([0-9]{4,8}) \z}x;
30             my $MAYBE_DATE_WITH_SEP_RE = qr{\A ([0-9]{1,4}) ([\s/\\_.-]) ([0-9]{1,2}) \2 ([0-9]{1,4}) \z}x;
31             my $MAX_YEAR = 2050;
32             my $MIN_YEAR = 1000;
33             my %SPLITS = (
34             4 => [ # for length-4 strings, eg 1191 or 9111, two ways to split:
35             [1, 2], # 1 1 91 (2nd split starts at index 1, 3rd at index 2)
36             [2, 3], # 91 1 1
37             ],
38             5 => [
39             [1, 3], # 1 11 91
40             [2, 3], # 11 1 91
41             ],
42             6 => [
43             [1, 2], # 1 1 1991
44             [2, 4], # 11 11 91
45             [4, 5], # 1991 1 1
46             ],
47             7 => [
48             [1, 3], # 1 11 1991
49             [2, 3], # 11 1 1991
50             [4, 5], # 1991 1 11
51             [4, 6], # 1991 11 1
52             ],
53             8 => [
54             [2, 4], # 11 11 1991
55             [4, 6], # 1991 11 11
56             ],
57             );
58              
59             sub make {
60 1533     1533 1 146217 my ($class, $password) = @_;
61             # a "date" is recognized as:
62             # * any 3-tuple that starts or ends with a 2- or 4-digit year,
63             # * with 2 or 0 separator chars (1.1.91 or 1191),
64             # * maybe zero-padded (01-01-91 vs 1-1-91),
65             # * a month between 1 and 12,
66             # * a day between 1 and 31.
67             #
68             # note: this isn't true date parsing in that "feb 31st" is allowed,
69             # this doesn't check for leap years, etc.
70             #
71             # recipe:
72             #
73             # start with regex to find maybe-dates, then attempt to map the
74             # integers onto month-day-year to filter the maybe-dates into
75             # dates.
76             #
77             # finally, remove matches that are substrings of other matches to
78             # reduce noise.
79             #
80             # note: instead of using a lazy or greedy regex to find many dates
81             # over the full string, this uses a ^...$ regex against every
82             # substring of the password -- less performant but leads to every
83             # possible date match.
84              
85 1533         3649 my $length = length($password);
86             # dates without separators are between length 4 '1191' and 8 '11111991'
87 1533 100       6256 return [] if $length < 4;
88              
89 1052         3597 my @matches;
90              
91 1052         4183 for my $i (0..$length-3) {
92 5757         13274 for my $j ($i+3 .. $i+8) {
93 19409 100       85832 last if $j >= $length;
94              
95 14105         26302 my $token = substr($password,$i,$j-$i+1);
96 14105 100       52195 next unless $token =~ $MAYBE_DATE_NO_SEP_RE;
97              
98 1901         3814 my @candidates;
99 1901 50       3188 for my $split (@{ $SPLITS{length($token)} || [] }) {
  1901         7278  
100 4454         7406 my ($k,$l) = @{$split};
  4454         9314  
101              
102 4454 100       15305 my $year = $class->_map_ints_to_year(
103             substr($token,0,$k),
104             substr($token,$k,$l-$k),
105             substr($token,$l),
106             ) or next;
107              
108 2111         6121 push @candidates,$year;
109             }
110 1901 100       5349 next unless @candidates;
111              
112             # at this point: different possible year mappings for the
113             # same i,j substring. match the candidate date that likely
114             # takes the fewest guesses: a year closest to
115             # 2017. ($REFERENCE_YEAR).
116             #
117             # ie, considering '111504', prefer 11-15-04 to 1-1-1504
118             # (interpreting '04' as 2004)
119 1411     2111   8360 my $best_candidate = min_by { abs($_ - $REFERENCE_YEAR) } @candidates;
  2111         17337  
120 1411         44543 push @matches, $class->new({
121             token => $token,
122             i => $i, j => $j,
123             separator => '',
124             year => $best_candidate,
125             });
126             }
127             }
128              
129             # dates with separators are between length 6 '1/1/91' and 10 '11/11/1991'
130 1052         4709 for my $i (0..$length-5) {
131 3653         7547 for my $j ($i+5 .. $i+10) {
132 9657 100       25349 last if $j >= $length;
133              
134 6158         11940 my $token = substr($password,$i,$j-$i+1);
135 6158 100       25595 my @pieces = $token =~ $MAYBE_DATE_WITH_SEP_RE
136             or next;
137              
138 178 100       483 my $year = $class->_map_ints_to_year(
139             $pieces[0],
140             $pieces[2],
141             $pieces[3]
142             ) or next;
143              
144 148         3244 push @matches, $class->new({
145             token => $token,
146             i => $i, j => $j,
147             separator => $pieces[1],
148             year => $year,
149             });
150             }
151             }
152              
153             # matches now contains all valid date strings in a way that is
154             # tricky to capture with regexes only. while thorough, it will
155             # contain some unintuitive noise:
156             #
157             # '2015_06_04', in addition to matching 2015_06_04, will also
158             # contain 5(!) other date matches: 15_06_04, 5_06_04, ..., even
159             # 2015 (matched as 5/1/2020)
160             #
161             # to reduce noise, remove date matches that are strict substrings
162             # of others
163              
164             @matches = grep {
165 1052         4432 my $match = $_;
  1559         2736  
166             my $is_submatch = grep {
167 1559 100 100     2676 $_ == $match
  12631 100       26733  
168             ? 0
169             : $_->i <= $match->i && $_->j >= $match->j
170             ? 1
171             : 0
172             } @matches;
173 1559         4708 !$is_submatch;
174             } @matches;
175              
176 1052         3185 @matches = sort @matches;
177 1052         5994 return \@matches;
178             }
179              
180             sub _map_ints_to_year {
181 4632     4632   16815 my ($class,@ints) = @_;
182              
183             ## no critic (ProhibitBooleanGrep)
184              
185             # given a 3-tuple, discard if:
186             # middle int is over 31 (for all dmy formats, years are never allowed in
187             # the middle)
188             # middle int is zero
189 4632 100 100     20315 return undef if $ints[1] > 31 or $ints[1] <= 0;
190             # any int is over the max allowable year
191             # any int is over two digits but under the min allowable year
192 3403 100 100     6312 return undef if grep { $_ > $MAX_YEAR ||
  10209 100       40334  
193             ( $_ > 99 && $_ < $MIN_YEAR ) } @ints;
194             # 2 ints are over 31, the max allowable day
195 2634 50       5153 return undef if grep { $_ > 31 } @ints >= 2;
  2634         7141  
196             # 2 ints are zero
197 2634 50       5005 return undef if grep { $_ == 0 } @ints >= 2;
  2634         6828  
198             # all ints are over 12, the max allowable month
199 2634 50       4962 return undef if grep { $_ > 12 } @ints == 3;
  2634         6769  
200              
201             # first look for a four digit year: yyyy + daymonth or daymonth + yyyy
202 2634         8864 my @possible_four_digit_splits = (
203             [ $ints[2], $ints[0], $ints[1] ],
204             [ $ints[0], $ints[1], $ints[2] ],
205             );
206 2634         5083 for my $split (@possible_four_digit_splits) {
207 5098         7793 my ($year,@rest) = @{$split};
  5098         11252  
208 5098 100 66     14002 if ( $year >= $MIN_YEAR && $year <= $MAX_YEAR) {
209             # for a candidate that includes a four-digit year,
210             # when the remaining ints don't match to a day and month,
211             # it is not a date.
212 335 100       806 if ($class->_map_ints_to_dm(@rest)) {
213 255         1142 return $year;
214             }
215             else {
216 80         358 return undef;
217             }
218             }
219             }
220              
221             # given no four-digit year, two digit years are the most flexible
222             # int to match, so try to parse a day-month out of @ints[0,1] or
223             # @ints[1,0]
224 2299         4250 for my $split (@possible_four_digit_splits) {
225 3112         5146 my ($year,@rest) = @{$split};
  3112         7133  
226 3112 100       7642 if ($class->_map_ints_to_dm(@rest)) {
227 2004         4764 $year = $class->_two_to_four_digit_year($year);
228 2004         8169 return $year;
229             }
230             }
231              
232 295         1350 return undef;
233             }
234              
235             sub _map_ints_to_dm {
236 3447     3447   8104 my ($class,@ints) = @_;
237 3447         10498 for my $case ([@ints],[reverse @ints]) {
238 4904         7134 my ($d,$m) = @{$case};
  4904         9167  
239 4904 100 100     25443 if ( $d >= 1 && $d <= 31 && $m >= 1 && $m <= 12) {
      100        
      100        
240 2259         7459 return 1
241             }
242             }
243 1188         4035 return undef;
244             }
245              
246             sub _two_to_four_digit_year {
247 2004     2004   4563 my ($class, $year) = @_;
248 2004 50       4804 return $year if $year > 99;
249 2004 100       4693 return 1900 + $year if $year > 50;
250 1190         2449 return 2000 + $year;
251             }
252              
253              
254             sub feedback_warning {
255 134     134 1 452 my ($self) = @_;
256              
257 134         757 return 'Dates are often easy to guess';
258             }
259              
260             sub feedback_suggestions {
261 134     134 1 677 return [ 'Avoid dates and years that are associated with you' ];
262             }
263              
264              
265             around fields_for_json => sub {
266             my ($orig,$self) = @_;
267             ( $self->$orig(), qw(year separator) )
268             };
269              
270             1;
271              
272             __END__
273              
274             =pod
275              
276             =encoding UTF-8
277              
278             =head1 NAME
279              
280             Data::Password::zxcvbn::Match::Date - match class for digit sequences that look like dates
281              
282             =head1 VERSION
283              
284             version 1.1.1
285              
286             =head1 DESCRIPTION
287              
288             This class represents the guess that a certain substring of a
289             password, consisting of digits and maybe separators, can be guessed by
290             scanning dates in the recent past (like birthdays, or recent events).
291              
292             =head1 ATTRIBUTES
293              
294             =head2 C<year>
295              
296             Integer, the year extracted from the token.
297              
298             =head2 C<separator>
299              
300             String, possibly empty: the separator used between digits in the
301             token.
302              
303             =head1 METHODS
304              
305             =head2 C<estimate_guesses>
306              
307             The number of guesses is the number of days between the extracted
308             L</year> and a reference year (currently 2017), multiplied by the
309             possible separators.
310              
311             =head2 C<make>
312              
313             my @matches = @{ Data::Password::zxcvbn::Match::Date->make(
314             $password,
315             ) };
316              
317             Scans the C<$password> for sequences of digits and separators that
318             look like dates. Some examples:
319              
320             =over 4
321              
322             =item *
323              
324             1/1/91
325              
326             =item *
327              
328             1191
329              
330             =item *
331              
332             1991-01-01
333              
334             =item *
335              
336             910101
337              
338             =back
339              
340             =head2 C<feedback_warning>
341              
342             =head2 C<feedback_suggestions>
343              
344             This class suggests not using dates.
345              
346             =head2 C<fields_for_json>
347              
348             The JSON serialisation for matches of this class will contain C<token
349             i j guesses guesses_log10 year separator>.
350              
351             =head1 AUTHOR
352              
353             Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
354              
355             =head1 COPYRIGHT AND LICENSE
356              
357             This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
358              
359             This is free software; you can redistribute it and/or modify it under
360             the same terms as the Perl 5 programming language system itself.
361              
362             =cut