File Coverage

blib/lib/Data/Password/zxcvbn/MatchList.pm

Criterion	Covered	Total	%
statement	123	128	96.0
branch	20	24	83.3
condition	6	8	75.0
subroutine	17	18	94.4
pod	8	8	100.0
total	174	186	93.5

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Password::zxcvbn::MatchList;
2	4			4		9259	use Moo;
	4					13394
	4					28
3	4			4		7921	use Data::Password::zxcvbn::Match::BruteForce;
	4					13
	4					124
4	4			4		1343	use Data::Password::zxcvbn::Combinatorics qw(factorial);
	4					12
	4					235
5	4			4		1756	use Data::Password::zxcvbn::TimeEstimate qw(guesses_to_score);
	4					11
	4					247
6	4			4		29	use Module::Runtime qw(use_module);
	4					10
	4					29
7	4			4		282	use List::AllUtils 0.14 qw(max_by);
	4					97
	4					5670
8
9							our $VERSION = '1.1.1'; # VERSION
10							# ABSTRACT: a collection of matches for a password
11
12
13							has password => (is => 'ro', required => 1); # string
14							has matches => (is => 'ro', default => sub { [] });
15							has guesses => (is => 'ro');
16
17
18							sub omnimatch {
19	1492			1492	1	69799	my ($class, $password, $opts) = @_;
20
21							# let's protect people who try to pass BruteForce in
22							my @modules = $opts->{modules}
23	0					0	? grep { $_ ne 'Data::Password::zxcvbn::Match::BruteForce' } @{$opts->{modules}}
	0					0
24	1492	50				7481	: map { "Data::Password::zxcvbn::Match::$_" }
	10444					25820
25							qw(
26							Dictionary
27							UserInput
28							Spatial
29							Repeat
30							Sequence
31							Regex
32							Date
33							);
34
35							# here, we need to pass the whole $opts down, because some
36							# matchers (e.g. Repeat) will use it to call us recursively, and
37							# we don't want to lose any option
38							my @matches = map {
39	1492					5128	@{ use_module($_)->make($password,$opts) },
	10444					16986
	10444					25334
40							} @modules;
41	1492					6423	@matches = sort @matches;
42
43	1492					42168	return $class->new({
44							password => $password,
45							matches => \@matches,
46							});
47							}
48
49
50							# the following is a O($l_max * ($n + $m)) dynamic programming
51							# algorithm for a length-$n password with $m candidate matches. $l_max
52							# is the maximum optimal sequence length spanning each prefix of the
53							# password. In practice it rarely exceeds 5 and the search terminates
54							# rapidly.
55							#
56							# the optimal "minimum guesses" sequence is here defined to be the
57							# sequence that minimizes the following function:
58							#
59							# $g = $l! * Product($_->guesses for $sequence) + $D^($l - 1)
60							#
61							# where $l is the length of the $sequence.
62							#
63							# the factorial term is the number of ways to order $l patterns.
64							#
65							# the $D^($l-1) term is another length penalty, roughly capturing the
66							# idea that an attacker will try lower-length sequences first before
67							# trying length-$l sequences.
68							#
69							# for example, consider a sequence that is date-repeat-dictionary.
70							#
71							# - an attacker would need to try other date-repeat-dictionary
72							# combinations, hence the product term.
73							#
74							# - an attacker would need to try repeat-date-dictionary,
75							# dictionary-repeat-date, ..., hence the factorial term.
76							#
77							# - an attacker would also likely try length-1 (dictionary) and
78							# length-2 (dictionary-date) sequences before length-3. assuming at
79							# minimum $D guesses per pattern type, $D^($l-1) approximates
80							# Sum($D**$_ for 1..$l-1)
81
82							my $MIN_GUESSES_BEFORE_GROWING_SEQUENCE = 10000;
83
84							sub most_guessable_match_list { ## no critic(ProhibitExcessComplexity)
85	1509			1509	1	107858	my ($self, $exclude_additive) = @_;
86
87	1509					5246	my $password = $self->password;
88	1509					3558	my $n = length($password);
89
90							# partition matches into sublists according to ending index j
91	1509					3212	my %matches_by_j;
92	1509					2778	for my $match (@{$self->matches}) {
	1509					5262
93	6329					9449	push @{$matches_by_j{$match->j}},$match;
	6329					21340
94							}
95							# small detail: for deterministic output, sort each sublist by i.
96	1509					6082	for my $list (values %matches_by_j) {
97	3599					5535	$list = [ sort {$a->i <=> $b->i} @{$list} ];
	3913					10036
	3599					10619
98							}
99
100							# $optimal{m}{$k}{$l} holds final match in the best length-$l
101							# match sequence covering the password prefix up to $k, inclusive.
102							# if there is no length-$l sequence that scores better (fewer
103							# guesses) than a shorter match sequence spanning the same prefix,
104							# this is undefined.
105							#
106							# $optimal{pi} has the same structure as $optimal{m} -- holds the
107							# product term Prod(m.guesses for m in sequence). $optimal{pi}
108							# allows for fast (non-looping) updates to the minimization
109							# function.
110							#
111							# $optimal{g} again same structure, holds the overall metric
112	1509					4209	my %optimal;
113
114							# helper: considers whether a length-$length sequence ending at
115							# $match is better (fewer guesses) than previously encountered
116							# sequences, updating state if so.
117							my $update = sub {
118	25161			25161		49096	my ($match,$length) = @_;
119
120	25161					57383	my $k = $match->j;
121	25161					357741	my $pi = $match->guesses_for_password($password);
122
123	25161	100				60172	if ($length > 1) {
124							# we're considering a length-$length sequence ending with
125							# $match: obtain the product term in the minimization
126							# function by multiplying $match->guesses by the product
127							# of the length-($length-1) sequence ending just before
128							# $match, at $match->i - 1
129	15191					50016	$pi *= $optimal{pi}->{$match->i-1}{$length-1};
130							}
131	25161					62598	my $guesses = factorial($length) * $pi;
132	25161	100				61177	$guesses += $MIN_GUESSES_BEFORE_GROWING_SEQUENCE ** ($length-1)
133							unless $exclude_additive;
134
135							# update state if new best. first see if any competing
136							# sequences covering this prefix, with $length or fewer
137							# matches, fare better than this sequence. if so, skip it and
138							# return.
139	25161					38157	for my $competing_length (keys %{$optimal{g}->{$k}}) {
	25161					80118
140	18676	100				46886	next if $competing_length > $length;
141	15470					27451	my $competing_g = $optimal{g}->{$k}{$competing_length};
142	15470	50				32676	next unless defined $competing_g;
143	15470	100				55127	return if $competing_g <= $guesses;
144							}
145
146	12347					30259	$optimal{g}->{$k}{$length} = $guesses;
147	12347					29460	$optimal{m}->{$k}{$length} = $match;
148	12347					33562	$optimal{pi}->{$k}{$length} = $pi;
149	1509					12097	};
150
151							# helper: evaluate bruteforce matches ending at k.
152							my $bruteforce_update = sub {
153	8197			8197		18193	my ($k) = @_;
154							# see if a single bruteforce match spanning the k-prefix is optimal.
155	8197					170656	my $match = Data::Password::zxcvbn::Match::BruteForce->new({
156							password => $password,
157							i => 0, j => $k,
158							});
159	8197					145192	$update->($match, 1);
160
161	8197					18642	for my $i (1..$k) {
162							# generate $k bruteforce matches, spanning from (i=1, j=$k) up to
163							# (i=$k, j=$k). see if adding these new matches to any of the
164							# sequences in $optimal{m}->[i-1] leads to new bests.
165	26710					495936	my $other_match = Data::Password::zxcvbn::Match::BruteForce->new({
166							password => $password,
167							i => $i, j => $k,
168							});
169
170	26710					384944	for my $length (keys %{$optimal{m}->{$i-1}}) {
	26710					84128
171	33836					70094	my $last_match = $optimal{m}->{$i-1}{$length};
172
173							# corner: an optimal sequence will never have two adjacent
174							# bruteforce matches. it is strictly better to have a single
175							# bruteforce match spanning the same region: same contribution
176							# to the guess product with a lower length.
177							# --> safe to skip those cases.
178	33836	100				157775	next if $last_match->isa('Data::Password::zxcvbn::Match::BruteForce');
179							# try adding m to this length-l sequence.
180	9464					25871	$update->($other_match, $length + 1);
181							}
182							}
183	1509					7199	};
184
185							# helper: step backwards through optimal.m starting at the end,
186							# constructing the final optimal match sequence.
187							my $unwind = sub {
188	1509			1509		4241	my ($k) = @_;
189
190	1509					2684	my @optimal_match_sequence;
191	1509					2873	--$k;
192							# find the final best sequence length and score
193	1509					3558	my $length; my $guesses;
194	1509					2593	for my $candidate_length (keys %{$optimal{g}->{$k}}) {
	1509					5717
195	2272					5114	my $candidate_guesses = $optimal{g}->{$k}{$candidate_length};
196	2272	100	100			9779	if (!defined($guesses) \|\| $candidate_guesses < $guesses) {
197	1866					4038	$length = $candidate_length;
198	1866					5090	$guesses = $candidate_guesses;
199							}
200							}
201
202	1509					4720	while ($k >= 0) {
203	2057					4291	my $match = $optimal{m}->{$k}{$length};
204	2057					4579	unshift @optimal_match_sequence,$match;
205	2057					5474	$k = $match->i - 1;
206	2057					7163	--$length;
207							}
208
209	1509					4662	return \@optimal_match_sequence;
210	1509					6669	};
211
212	1509					5104	for my $k (0..$n-1) {
213	8197					14494	for my $match (@{$matches_by_j{$k}}) {
	8197					23558
214	6329	100				19791	if ($match->i > 0) {
215	4556					7565	for my $l (keys %{$optimal{m}->{$match->i - 1}}) {
	4556					14804
216	5727					14750	$update->($match, $l+1);
217							}
218							}
219							else {
220	1773					3899	$update->($match,1);
221							}
222							}
223	8197					20334	$bruteforce_update->($k);
224							}
225
226	1509					4977	my $optimal_match_sequence = $unwind->($n);
227	1509					2802	my $optimal_length = @{$optimal_match_sequence};
	1509					3867
228
229	1509					3049	my $guesses;
230							# corner: empty password
231	1509	50				4051	if ($n==0) {
232	0					0	$guesses = 1;
233							}
234							else {
235	1509					6056	$guesses = $optimal{g}->{$n - 1}{$optimal_length};
236							}
237
238	1509					34465	return ref($self)->new({
239							password => $password,
240							guesses => $guesses,
241							matches => $optimal_match_sequence,
242							});
243							}
244
245
246							sub guesses_log10 {
247	996			996	1	31995	return log(shift->guesses)/log(10);
248							}
249
250
251	1992			1992	1	6662	sub score { guesses_to_score(shift->guesses) }
252
253
254							sub get_feedback {
255	996			996	1	4397	my ($self, $max_score_for_feedback) = @_;
256							# yes, if someone passes a 0, they'll get the default; I consider
257							# this a feature
258	996		50			6271	$max_score_for_feedback \|\|= 2;
259
260	996					3346	my $matches = $self->matches;
261	996					1798	my $matches_count = @{$matches};
	996					2462
262
263	996	50				4012	if ($matches_count == 0) {
264	0					0	return $self->feedback_for_no_matches;
265							}
266
267	996	100				3826	if ($self->score > $max_score_for_feedback) {
268	98					394	return $self->feedback_above_threshold;
269							}
270
271	898			1345		5304	my $longest_match = max_by { length($_->token) } @{$matches};
	1345					15155
	898					5923
272	898					10438	my $is_sole_match = $matches_count == 1;
273	898					4404	my $feedback = $longest_match->get_feedback($is_sole_match);
274
275	898					3857	my $extra_feedback = $self->feedback_below_threshold;
276	898					2557	push @{$feedback->{suggestions}}, @{$extra_feedback->{suggestions}};
	898					3166
	898					2590
277	898		66			4630	$feedback->{warning} \|\|= $extra_feedback->{warning};
278
279	898					4204	return $feedback;
280							}
281
282
283							sub feedback_for_no_matches {
284							return {
285	0			0	1	0	warning => '',
286							suggestions => [
287							'Use a few words, avoid common phrases.',
288							'No need for symbols, digits, or uppercase letters.',
289							],
290							};
291							}
292
293
294							sub feedback_above_threshold {
295	98			98	1	618	return { warning => '', suggestions => [] };
296							}
297
298
299							sub feedback_below_threshold {
300							return {
301	898			898	1	4685	warning => '',
302							suggestions => [
303							'Add another word or two. Uncommon words are better.'
304							],
305							};
306							}
307
308							1;
309
310							__END__
311
312							=pod
313
314							=encoding UTF-8
315
316							=for :stopwords JS
317
318							=for :stopwords precendence
319
320							=head1 NAME
321
322							Data::Password::zxcvbn::MatchList - a collection of matches for a password
323
324							=head1 VERSION
325
326							version 1.1.1
327
328							=head1 SYNOPSIS
329
330							use Data::Password::zxcvbn::MatchList;
331
332							my $list = Data::Password::zxcvbn::MatchList->omnimatch($password)
333							->most_guessable_match_list;
334
335							=head1 DESCRIPTION
336
337							zxcvbn estimates the strength of a password by guessing which way a
338							generic password cracker would produce it, and then guessing after how
339							many tries it would produce it.
340
341							This class represents a list of guesses ("matches"), covering
342							different substrings of a password.
343
344							=head1 ATTRIBUTES
345
346							=head2 C<password>
347
348							Required string, the password this list is about.
349
350							=head2 C<matches>
351
352							Arrayref, the actual list of matches.
353
354							=head2 C<guesses>
355
356							The estimated number of attempts that a generic password cracker would
357							need to guess the whole L</password>. This will be set for objects
358							returned by L<< /C<most_guessable_match_list> >>, not for those
359							returned by L<< /C<omnimatch> >>.
360
361							=head1 METHODS
362
363							=head2 C<omnimatch>
364
365							my $match_list = Data::Password::zxcvbn::MatchList->omnimatch($password,\%opts);
366
367							Main constructor (the name comes from the original JS
368							implementation). Calls C<< ->make($password,\%opts) >> on all the
369							C<Data::Password::zxcvbn::Match::*> classes (or the ones in C<<
370							@{$opts{modules}} >>), combines all the matches, and returns a
371							C<MatchList> holding them.
372
373							=head2 C<most_guessable_match_list>
374
375							my $minimal_list = $match_list->most_guessable_match_list;
376
377							This method extracts, from the L</matches> of the invocant, a list of
378							non-overlapping matches with minimum guesses. That list should
379							represent the way that a generic password cracker would guess the
380							L</password>, and as such is the one that the L<main
381							function\|Data::Password::zxcvbn/password_strength> will use.
382
383							=head2 C<guesses_log10>
384
385							The logarithm in base 10 of L<< /C<guesses> >>.
386
387							=head2 C<score>
388
389							my $score = $match_list->score;
390
391							Returns an integer from 0-4 (useful for implementing a strength
392							bar). See L<<
393							C<Data::Password::zxcvbn::TimeEstimate::guesses_to_score>\|Data::Password::zxcvbn::TimeEstimate/guesses_to_score
394							>>.
395
396							=head2 C<get_feedback>
397
398							my %feedback = %{ $match_list->get_feedback };
399
400							my %feedback = %{ $match_list->get_feedback($max_score_for_feedback) };
401
402							If there's no matches, returns the result of L<<
403							/C<feedback_for_no_matches> >>.
404
405							If the match list L</score> is above C<$max_score_for_feedback>
406							(default 2), returns the result of L<< /C<feedback_above_threshold>
407							>>.
408
409							Otherwise, collects all the feedback from the L</matches>, and returns
410							it, merged with the result of L<< /C<feedback_below_threshold> >>
411							(suggestions are appended, but the warning from the matches takes
412							precendence).
413
414							=head2 C<feedback_for_no_matches>
415
416							Returns a feedback for when the password didn't match any of our
417							heuristics. It contains no warning, and some simple common
418							suggestions.
419
420							=head2 C<feedback_above_threshold>
421
422							Returns a feedback for when the password scored above the threshold
423							passed to L<< /C<get_feedback> >> (i.e. the password is "good"). It's
424							an empty feedback.
425
426							=head2 C<feedback_below_threshold>
427
428							Returns a feedback for when the password scored below the threshold
429							passed to L<< /C<get_feedback> >> (i.e. the password is "bad"). It
430							suggests to add some words.
431
432							=head1 AUTHOR
433
434							Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
435
436							=head1 COPYRIGHT AND LICENSE
437
438							This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
439
440							This is free software; you can redistribute it and/or modify it under
441							the same terms as the Perl 5 programming language system itself.
442
443							=cut