File Coverage

blib/lib/Data/Password/zxcvbn/MatchList.pm
Criterion Covered Total %
statement 123 128 96.0
branch 20 24 83.3
condition 6 8 75.0
subroutine 17 18 94.4
pod 8 8 100.0
total 174 186 93.5


line stmt bran cond sub pod time code
1             package Data::Password::zxcvbn::MatchList;
2 4     4   8874 use Moo;
  4         13530  
  4         37  
3 4     4   7855 use Data::Password::zxcvbn::Match::BruteForce;
  4         13  
  4         154  
4 4     4   1469 use Data::Password::zxcvbn::Combinatorics qw(factorial);
  4         14  
  4         262  
5 4     4   1916 use Data::Password::zxcvbn::TimeEstimate qw(guesses_to_score);
  4         14  
  4         268  
6 4     4   41 use Module::Runtime qw(use_module);
  4         8  
  4         36  
7 4     4   353 use List::AllUtils 0.14 qw(max_by);
  4         113  
  4         6261  
8              
9             our $VERSION = '1.1.0'; # VERSION
10             # ABSTRACT: a collection of matches for a password
11              
12              
13             has password => (is => 'ro', required => 1); # string
14             has matches => (is => 'ro', default => sub { [] });
15             has guesses => (is => 'ro');
16              
17              
18             sub omnimatch {
19 1492     1492 1 74043 my ($class, $password, $opts) = @_;
20              
21             # let's protect people who try to pass BruteForce in
22             my @modules = $opts->{modules}
23 0         0 ? grep { $_ ne 'Data::Password::zxcvbn::Match::BruteForce' } @{$opts->{modules}}
  0         0  
24 1492 50       8697 : map { "Data::Password::zxcvbn::Match::$_" }
  10444         25361  
25             qw(
26             Dictionary
27             UserInput
28             Spatial
29             Repeat
30             Sequence
31             Regex
32             Date
33             );
34              
35             # here, we need to pass the whole $opts down, because some
36             # matchers (e.g. Repeat) will use it to call us recursively, and
37             # we don't want to lose any option
38             my @matches = map {
39 1492         4631 @{ use_module($_)->make($password,$opts) },
  10444         18138  
  10444         25802  
40             } @modules;
41 1492         7175 @matches = sort @matches;
42              
43 1492         43789 return $class->new({
44             password => $password,
45             matches => \@matches,
46             });
47             }
48              
49              
50             # the following is a O($l_max * ($n + $m)) dynamic programming
51             # algorithm for a length-$n password with $m candidate matches. $l_max
52             # is the maximum optimal sequence length spanning each prefix of the
53             # password. In practice it rarely exceeds 5 and the search terminates
54             # rapidly.
55             #
56             # the optimal "minimum guesses" sequence is here defined to be the
57             # sequence that minimizes the following function:
58             #
59             # $g = $l! * Product($_->guesses for $sequence) + $D^($l - 1)
60             #
61             # where $l is the length of the $sequence.
62             #
63             # the factorial term is the number of ways to order $l patterns.
64             #
65             # the $D^($l-1) term is another length penalty, roughly capturing the
66             # idea that an attacker will try lower-length sequences first before
67             # trying length-$l sequences.
68             #
69             # for example, consider a sequence that is date-repeat-dictionary.
70             #
71             # - an attacker would need to try other date-repeat-dictionary
72             # combinations, hence the product term.
73             #
74             # - an attacker would need to try repeat-date-dictionary,
75             # dictionary-repeat-date, ..., hence the factorial term.
76             #
77             # - an attacker would also likely try length-1 (dictionary) and
78             # length-2 (dictionary-date) sequences before length-3. assuming at
79             # minimum $D guesses per pattern type, $D^($l-1) approximates
80             # Sum($D**$_ for 1..$l-1)
81              
82             my $MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000;
83              
84             sub most_guessable_match_list { ## no critic(ProhibitExcessComplexity)
85 1509     1509 1 105599 my ($self, $exclude_additive) = @_;
86              
87 1509         5404 my $password = $self->password;
88 1509         3998 my $n = length($password);
89              
90             # partition matches into sublists according to ending index j
91 1509         3034 my %matches_by_j;
92 1509         2685 for my $match (@{$self->matches}) {
  1509         6750  
93 6329         9210 push @{$matches_by_j{$match->j}},$match;
  6329         22460  
94             }
95             # small detail: for deterministic output, sort each sublist by i.
96 1509         5833 for my $list (values %matches_by_j) {
97 3599         5551 $list = [ sort {$a->i <=> $b->i} @{$list} ];
  3913         10633  
  3599         10248  
98             }
99              
100             # $optimal{m}{$k}{$l} holds final match in the best length-$l
101             # match sequence covering the password prefix up to $k, inclusive.
102             # if there is no length-$l sequence that scores better (fewer
103             # guesses) than a shorter match sequence spanning the same prefix,
104             # this is undefined.
105             #
106             # $optimal{pi} has the same structure as $optimal{m} -- holds the
107             # product term Prod(m.guesses for m in sequence). $optimal{pi}
108             # allows for fast (non-looping) updates to the minimization
109             # function.
110             #
111             # $optimal{g} again same structure, holds the overall metric
112 1509         3440 my %optimal;
113              
114             # helper: considers whether a length-$length sequence ending at
115             # $match is better (fewer guesses) than previously encountered
116             # sequences, updating state if so.
117             my $update = sub {
118 25100     25100   48223 my ($match,$length) = @_;
119              
120 25100         59882 my $k = $match->j;
121 25100         357629 my $pi = $match->guesses_for_password($password);
122              
123 25100 100       62142 if ($length > 1) {
124             # we're considering a length-$length sequence ending with
125             # $match: obtain the product term in the minimization
126             # function by multiplying $match->guesses by the product
127             # of the length-($length-1) sequence ending just before
128             # $match, at $match->i - 1
129 15130         47765 $pi *= $optimal{pi}->{$match->i-1}{$length-1};
130             }
131 25100         61119 my $guesses = factorial($length) * $pi;
132 25100 100       58010 $guesses += $MIN_GUESSES_BEFORE_GROWING_SEQUENCE ** ($length-1)
133             unless $exclude_additive;
134              
135             # update state if new best. first see if any competing
136             # sequences covering this prefix, with $length or fewer
137             # matches, fare better than this sequence. if so, skip it and
138             # return.
139 25100         37475 for my $competing_length (keys %{$optimal{g}->{$k}}) {
  25100         78768  
140 18596 100       44967 next if $competing_length > $length;
141 15500         30069 my $competing_g = $optimal{g}->{$k}{$competing_length};
142 15500 50       29763 next unless defined $competing_g;
143 15500 100       52425 return if $competing_g <= $guesses;
144             }
145              
146 12323         29898 $optimal{g}->{$k}{$length} = $guesses;
147 12323         29914 $optimal{m}->{$k}{$length} = $match;
148 12323         32883 $optimal{pi}->{$k}{$length} = $pi;
149 1509         12789 };
150              
151             # helper: evaluate bruteforce matches ending at k.
152             my $bruteforce_update = sub {
153 8197     8197   17140 my ($k) = @_;
154             # see if a single bruteforce match spanning the k-prefix is optimal.
155 8197         173811 my $match = Data::Password::zxcvbn::Match::BruteForce->new({
156             password => $password,
157             i => 0, j => $k,
158             });
159 8197         145079 $update->($match, 1);
160              
161 8197         18478 for my $i (1..$k) {
162             # generate $k bruteforce matches, spanning from (i=1, j=$k) up to
163             # (i=$k, j=$k). see if adding these new matches to any of the
164             # sequences in $optimal{m}->[i-1] leads to new bests.
165 26710         499066 my $other_match = Data::Password::zxcvbn::Match::BruteForce->new({
166             password => $password,
167             i => $i, j => $k,
168             });
169              
170 26710         380277 for my $length (keys %{$optimal{m}->{$i-1}}) {
  26710         85678  
171 33786         65632 my $last_match = $optimal{m}->{$i-1}{$length};
172              
173             # corner: an optimal sequence will never have two adjacent
174             # bruteforce matches. it is strictly better to have a single
175             # bruteforce match spanning the same region: same contribution
176             # to the guess product with a lower length.
177             # --> safe to skip those cases.
178 33786 100       151527 next if $last_match->isa('Data::Password::zxcvbn::Match::BruteForce');
179             # try adding m to this length-l sequence.
180 9417         27706 $update->($other_match, $length + 1);
181             }
182             }
183 1509         7370 };
184              
185             # helper: step backwards through optimal.m starting at the end,
186             # constructing the final optimal match sequence.
187             my $unwind = sub {
188 1509     1509   4133 my ($k) = @_;
189              
190 1509         2880 my @optimal_match_sequence;
191 1509         2579 --$k;
192             # find the final best sequence length and score
193 1509         3178 my $length; my $guesses;
194 1509         2655 for my $candidate_length (keys %{$optimal{g}->{$k}}) {
  1509         5292  
195 2271         4677 my $candidate_guesses = $optimal{g}->{$k}{$candidate_length};
196 2271 100 100     10722 if (!defined($guesses) || $candidate_guesses < $guesses) {
197 1891         4360 $length = $candidate_length;
198 1891         4476 $guesses = $candidate_guesses;
199             }
200             }
201              
202 1509         5501 while ($k >= 0) {
203 2057         4596 my $match = $optimal{m}->{$k}{$length};
204 2057         4444 unshift @optimal_match_sequence,$match;
205 2057         4890 $k = $match->i - 1;
206 2057         7162 --$length;
207             }
208              
209 1509         4739 return \@optimal_match_sequence;
210 1509         6261 };
211              
212 1509         5151 for my $k (0..$n-1) {
213 8197         15438 for my $match (@{$matches_by_j{$k}}) {
  8197         24223  
214 6329 100       20005 if ($match->i > 0) {
215 4556         8125 for my $l (keys %{$optimal{m}->{$match->i - 1}}) {
  4556         15179  
216 5713         14329 $update->($match, $l+1);
217             }
218             }
219             else {
220 1773         3948 $update->($match,1);
221             }
222             }
223 8197         20107 $bruteforce_update->($k);
224             }
225              
226 1509         5238 my $optimal_match_sequence = $unwind->($n);
227 1509         3004 my $optimal_length = @{$optimal_match_sequence};
  1509         3211  
228              
229 1509         2635 my $guesses;
230             # corner: empty password
231 1509 50       4722 if ($n==0) {
232 0         0 $guesses = 1;
233             }
234             else {
235 1509         4979 $guesses = $optimal{g}->{$n - 1}{$optimal_length};
236             }
237              
238 1509         36035 return ref($self)->new({
239             password => $password,
240             guesses => $guesses,
241             matches => $optimal_match_sequence,
242             });
243             }
244              
245              
246             sub guesses_log10 {
247 996     996 1 33156 return log(shift->guesses)/log(10);
248             }
249              
250              
251 1992     1992 1 7224 sub score { guesses_to_score(shift->guesses) }
252              
253              
254             sub get_feedback {
255 996     996 1 5426 my ($self, $max_score_for_feedback) = @_;
256             # yes, if someone passes a 0, they'll get the default; I consider
257             # this a feature
258 996   50     6222 $max_score_for_feedback ||= 2;
259              
260 996         3393 my $matches = $self->matches;
261 996         2470 my $matches_count = @{$matches};
  996         2583  
262              
263 996 50       4238 if ($matches_count == 0) {
264 0         0 return $self->feedback_for_no_matches;
265             }
266              
267 996 100       3853 if ($self->score > $max_score_for_feedback) {
268 98         535 return $self->feedback_above_threshold;
269             }
270              
271 898     1345   4931 my $longest_match = max_by { length($_->token) } @{$matches};
  1345         15489  
  898         6913  
272 898         9949 my $is_sole_match = $matches_count == 1;
273 898         4081 my $feedback = $longest_match->get_feedback($is_sole_match);
274              
275 898         3685 my $extra_feedback = $self->feedback_below_threshold;
276 898         2058 push @{$feedback->{suggestions}}, @{$extra_feedback->{suggestions}};
  898         2519  
  898         2596  
277 898   66     5635 $feedback->{warning} ||= $extra_feedback->{warning};
278              
279 898         3675 return $feedback;
280             }
281              
282              
283             sub feedback_for_no_matches {
284             return {
285 0     0 1 0 warning => '',
286             suggestions => [
287             'Use a few words, avoid common phrases.',
288             'No need for symbols, digits, or uppercase letters.',
289             ],
290             };
291             }
292              
293              
294             sub feedback_above_threshold {
295 98     98 1 618 return { warning => '', suggestions => [] };
296             }
297              
298              
299             sub feedback_below_threshold {
300             return {
301 898     898 1 4377 warning => '',
302             suggestions => [
303             'Add another word or two. Uncommon words are better.'
304             ],
305             };
306             }
307              
308             1;
309              
310             __END__
311              
312             =pod
313              
314             =encoding UTF-8
315              
316             =for :stopwords JS
317              
318             =for :stopwords precendence
319              
320             =head1 NAME
321              
322             Data::Password::zxcvbn::MatchList - a collection of matches for a password
323              
324             =head1 VERSION
325              
326             version 1.1.0
327              
328             =head1 SYNOPSIS
329              
330             use Data::Password::zxcvbn::MatchList;
331              
332             my $list = Data::Password::zxcvbn::MatchList->omnimatch($password)
333             ->most_guessable_match_list;
334              
335             =head1 DESCRIPTION
336              
337             zxcvbn estimates the strength of a password by guessing which way a
338             generic password cracker would produce it, and then guessing after how
339             many tries it would produce it.
340              
341             This class represents a list of guesses ("matches"), covering
342             different substrings of a password.
343              
344             =head1 ATTRIBUTES
345              
346             =head2 C<password>
347              
348             Required string, the password this list is about.
349              
350             =head2 C<matches>
351              
352             Arrayref, the actual list of matches.
353              
354             =head2 C<guesses>
355              
356             The estimated number of attempts that a generic password cracker would
357             need to guess the whole L</password>. This will be set for objects
358             returned by L<< /C<most_guessable_match_list> >>, not for those
359             returned by L<< /C<omnimatch> >>.
360              
361             =head1 METHODS
362              
363             =head2 C<omnimatch>
364              
365             my $match_list = Data::Password::zxcvbn::MatchList->omnimatch($password,\%opts);
366              
367             Main constructor (the name comes from the original JS
368             implementation). Calls C<< ->make($password,\%opts) >> on all the
369             C<Data::Password::zxcvbn::Match::*> classes (or the ones in C<<
370             @{$opts{modules}} >>), combines all the matches, and returns a
371             C<MatchList> holding them.
372              
373             =head2 C<most_guessable_match_list>
374              
375             my $minimal_list = $match_list->most_guessable_match_list;
376              
377             This method extracts, from the L</matches> of the invocant, a list of
378             non-overlapping matches with minimum guesses. That list should
379             represent the way that a generic password cracker would guess the
380             L</password>, and as such is the one that the L<main
381             function|Data::Password::zxcvbn/password_strength> will use.
382              
383             =head2 C<guesses_log10>
384              
385             The logarithm in base 10 of L<< /C<guesses> >>.
386              
387             =head2 C<score>
388              
389             my $score = $match_list->score;
390              
391             Returns an integer from 0-4 (useful for implementing a strength
392             bar). See L<<
393             C<Data::Password::zxcvbn::TimeEstimate::guesses_to_score>|Data::Password::zxcvbn::TimeEstimate/guesses_to_score
394             >>.
395              
396             =head2 C<get_feedback>
397              
398             my %feedback = %{ $match_list->get_feedback };
399              
400             my %feedback = %{ $match_list->get_feedback($max_score_for_feedback) };
401              
402             If there's no matches, returns the result of L<<
403             /C<feedback_for_no_matches> >>.
404              
405             If the match list L</score> is above C<$max_score_for_feedback>
406             (default 2), returns the result of L<< /C<feedback_above_threshold>
407             >>.
408              
409             Otherwise, collects all the feedback from the L</matches>, and returns
410             it, merged with the result of L<< /C<feedback_below_threshold> >>
411             (suggestions are appended, but the warning from the matches takes
412             precendence).
413              
414             =head2 C<feedback_for_no_matches>
415              
416             Returns a feedback for when the password didn't match any of our
417             heuristics. It contains no warning, and some simple common
418             suggestions.
419              
420             =head2 C<feedback_above_threshold>
421              
422             Returns a feedback for when the password scored above the threshold
423             passed to L<< /C<get_feedback> >> (i.e. the password is "good"). It's
424             an empty feedback.
425              
426             =head2 C<feedback_below_threshold>
427              
428             Returns a feedback for when the password scored below the threshold
429             passed to L<< /C<get_feedback> >> (i.e. the password is "bad"). It
430             suggests to add some words.
431              
432             =head1 AUTHOR
433              
434             Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
435              
436             =head1 COPYRIGHT AND LICENSE
437              
438             This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
439              
440             This is free software; you can redistribute it and/or modify it under
441             the same terms as the Perl 5 programming language system itself.
442              
443             =cut