File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm

Criterion	Covered	Total	%
statement	132	137	96.3
branch	50	52	96.1
condition	18	20	90.0
subroutine	19	22	86.3
pod	10	10	100.0
total	229	241	95.0

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Password::zxcvbn::Match::Dictionary;
2	4			4		10884	use Moo;
	4					12
	4					36
3							with 'Data::Password::zxcvbn::Match';
4	4			4		4948	use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
	4					14
	4					296
5	4			4		31	use List::AllUtils qw(min);
	4					9
	4					4638
6							our $VERSION = '1.1.0'; # VERSION
7							# ABSTRACT: match class for words in passwords
8
9
10							has reversed => (is => 'ro', default => 0); # bool
11							has substitutions => ( is => 'ro', default => sub { +{} } );
12							has rank => ( is => 'ro', default => 1 ); # int
13							# this should be constrained to the keys of %ranked_dictionaries, but
14							# we can't do that because users can pass their own dictionaries to
15							# ->make
16							has dictionary_name => ( is => 'ro', default => 'passwords' );
17
18
19							sub l33t {
20	451			451	1	61213	return scalar(keys %{shift->substitutions})!=0;
	451					2639
21							}
22
23
24							our %l33t_table = ( ## no critic (ProhibitPackageVars)
25							a => ['4', '@'],
26							b => ['8'],
27							c => ['(', '{', '[', '<'],
28							e => ['3'],
29							g => ['6', '9'],
30							i => ['1', '!', '\|'],
31							l => ['1', '\|', '7'],
32							o => ['0'],
33							s => ['$', '5'],
34							t => ['+', '7'],
35							x => ['%'],
36							z => ['2'],
37							);
38
39							sub make {
40	1533			1533	1	106829	my ($class, $password, $opts) = @_;
41							## no critic (ProhibitPackageVars)
42							my $dictionaries = $opts->{ranked_dictionaries}
43	1533		66			8234	\|\| do {
44							require Data::Password::zxcvbn::RankedDictionaries;
45							\%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46							};
47	1533		100			8620	my $l33t_table = $opts->{l33t_table} \|\| \%l33t_table;
48
49	1533					4485	my @matches;
50	1533					7633	$class->_make_simple(\@matches,$password,$dictionaries);
51	1533					8570	$class->_make_reversed(\@matches,$password,$dictionaries);
52	1533					11174	$class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53
54	1533					15366	@matches = sort @matches;
55	1533					8911	return \@matches;
56							}
57
58							sub _make_simple {
59	4648			4648		12956	my ($class, $matches, $password, $dictionaries) = @_;
60	4648					13495	my $password_lc = lc($password);
61							# lc may change the length of the password...
62	4648					8914	my $length = length($password_lc);
63
64	4648					7285	for my $dictionary_name (keys %{$dictionaries}) {
	4648					17746
65	27453					47865	my $ranked_dict = $dictionaries->{$dictionary_name};
66	27453					48029	for my $i (0..$length-1) {
67	156745					260003	for my $j ($i..$length-1) {
68	674738					1068870	my $word = substr($password_lc,$i,$j-$i+1);
69	674738	100				1747619	if (my $rank = $ranked_dict->{$word}) {
70	8139					12769	push @{$matches}, $class->new({
	8139					192085
71							token => substr($password,$i,$j-$i+1),
72							i => $i, j=> $j,
73							rank => $rank,
74							dictionary_name => $dictionary_name,
75							});
76							}
77							}
78							}
79							}
80							}
81
82							sub _make_reversed {
83	1533			1533		5118	my ($class, $matches, $password, $dictionaries) = @_;
84
85	1533					4548	my $rev_password = reverse($password);
86	1533					3275	my @rev_matches;
87	1533					5809	$class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88
89	1533					6697	my $rev_length = length($password)-1;
90	1533					4996	for my $rev_match (@rev_matches) {
91	1553					8642	my $word = $rev_match->token;
92							# no need to add this, the normal matching will have produced
93							# it already
94	1553	100				7041	next if $word eq reverse($word);
95	661					1634	push @{$matches}, $class->new({
	661					17086
96							token => reverse($word),
97							i => $rev_length - $rev_match->j,
98							j=> $rev_length - $rev_match->i,
99							rank => $rev_match->rank,
100							dictionary_name => $rev_match->dictionary_name,
101							reversed => 1,
102							});
103							}
104							}
105
106							# makes a pruned copy of l33t_table that only includes password's
107							# possible substitutions
108							sub _relevant_l33t_subtable {
109	1539			1539		24524	my ($class, $password, $l33t_table) = @_;
110							# set of characters
111	1539					3704	my %password_chars; @password_chars{split //,$password} = ();
	1539					12214
112
113	1539					4545	my %subtable;
114	1539					3994	for my $letter (keys %{$l33t_table}) {
	1539					9075
115	34681					74062	my @relevant_subs = grep { exists $password_chars{$_} }
116	18084					27249	@{$l33t_table->{$letter}};
	18084					37273
117	18084	100				44575	$subtable{$letter} = \@relevant_subs
118							if @relevant_subs;
119							}
120
121	1539					13443	return \%subtable;
122							}
123
124							sub _translate {
125	1582			1582		4133	my ($class, $string, $table) = @_;
126	1582					2927	my $keys = join '', keys %{$table};
	1582					5303
127	1582					37349	$string =~ s{([\Q$keys\E])}
128	1582					7099	{$table->{$1}}g;
129							return $string;
130							}
131
132	1533			1533		5834	sub _make_l33t {
133							my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134	1533					6748
135							my $subs = enumerate_substitution_maps(
136							$class->_relevant_l33t_subtable($password,$l33t_table)
137	1533					5373	);
	1533					4798
138	2246	100				8960	for my $sub (@{$subs}) {
	2246					7742
139	1582					6184	next unless %{$sub};
140	1582					3797	my $subbed_password = $class->_translate($password,$sub);
141	1582					5827	my @subbed_matches;
142							$class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143	1582					5900
144	3651					24755	for my $subbed_match (@subbed_matches) {
145							my $token = substr($password,
146							$subbed_match->i,
147							$subbed_match->j - $subbed_match->i + 1);
148	3651	100				11267	# too short, ignore
149							next if length($token) <= 1;
150	1930	100				8332	# only return the matches that contain an actual substitution
151							next if lc($token) eq lc($subbed_match->token);
152							# subset of mappings in $sub that are in use for this match
153							my %min_subs = map {
154	2164	100				20289	$token =~ m{\Q$_}
155							? ( $_ => $sub->{$_} )
156	589					1255	: ()
	589					2023
157	589					1631	} keys %{$sub};
	589					14751
158							push @{$matches}, $class->new({
159							token => $token,
160							substitutions => \%min_subs,
161							i => $subbed_match->i,
162							j=> $subbed_match->j,
163							rank => $subbed_match->rank,
164							dictionary_name => $subbed_match->dictionary_name,
165							});
166							}
167							}
168							}
169
170
171	4179			4179	1	67147	sub estimate_guesses {
172							my ($self,$min_guesses) = @_;
173	4179					16532
174							return $self->rank *
175							$self->_uppercase_variations *
176							$self->_l33t_variations *
177							$self->_reversed_variations;
178							}
179
180
181							# an uppercase letter, followed by stuff that is not uppercase
182	3			3		1850	# letters
	3					47
	3					42
183							my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184							# stuff that is not uppercase letters, followed by an uppercase
185							# letter
186							my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187							# no characters that are not uppercase letters
188							my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189							# no characters that are not lowercase letters
190							my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191
192	386			386	1	4044
193	0			0	1	0	sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194	0			0	1	0	sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195	0			0	1	0	sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196	356		100	356	1	3639	sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197							sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198
199	4179			4179		8869	sub _uppercase_variations {
200							my ($self) = @_;
201	4179					10066
202							my $word = $self->token;
203
204	4179	100				31028	# if the word has no uppercase letters, count it as 1 variation
205	270	100				992	return 1 if $word =~ $ALL_NOT_UPPER_RE;
206							return 1 if lc($word) eq $word;
207
208							# a capitalized word is the most common capitalization scheme, so
209							# it only doubles the search space (uncapitalized + capitalized).
210							# allcaps and end-capitalized are common enough too, underestimate
211	268	100				1500	# as 2x factor to be safe.
212	148	100				764	return 2 if $word =~ $START_UPPER_RE;
213	127	100				810	return 2 if $word =~ $END_UPPER_RE;
214							return 2 if $word =~ $ALL_NOT_LOWER_RE;
215
216							# otherwise calculate the number of ways to capitalize U+L
217							# uppercase+lowercase letters with U uppercase letters or
218							# less. or, if there's more uppercase than lower (for
219							# eg. PASSwORD), the number of ways to lowercase U+L letters with
220	37					190	# L lowercase letters or less.
221	37					209	my $U = () = $word =~ m/\p{Lu}/g;
222							my $L = () = $word =~ m/\p{Ll}/g;
223	37					95
224	37					253	my $variations = 0;
225	37					133	$variations += nCk($U+$L,$_) for 1..min($U,$L);
226							return $variations;
227							}
228
229	4179			4179		9775	sub _l33t_variations {
230							my ($self) = @_;
231	4179					9321
232							my $word = $self->token;
233	4179					7973
234	4179					7445	my $variations = 1;
	4179					15487
235	1098					3030	for my $subbed (keys %{$self->substitutions}) {
236							my $unsubbed = $self->substitutions->{$subbed};
237
238	1098					10335	# number of Substituted characters
239							my $S = () = $word =~ m{\Q$subbed}gi;
240	1098					6808	# number of Unsubstituted characters
241							my $U = () = $word =~ m{\Q$unsubbed}gi;
242	1098	100	66			4858
243							if ($S==0 \|\| $U==0) {
244							# for this substitution, password is either fully subbed
245							# (444) or fully unsubbed (aaa); treat that as doubling
246							# the space (attacker needs to try fully subbed chars in
247	1054					2397	# addition to unsubbed.)
248							$variations *= 2;
249							}
250							else {
251							# this case is similar to capitalization: with aa44a, U =
252							# 3, S = 2, attacker needs to try unsubbed + one sub + two
253	44					102	# subs
254	44					307	my $possibilities = 0;
255	44					111	$possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256							$variations *= $possibilities;
257							}
258							}
259	4179					13023
260							return $variations;
261							}
262
263	4179	100		4179		23473	sub _reversed_variations {
264							return shift->reversed ? 2 : 1;
265							}
266
267
268	386			386	1	1213	sub feedback_warning {
269							my ($self, $is_sole_match) = @_;
270	386	100				3371
		100
		100
271	107	100	100			1040	if ($self->dictionary_name eq 'passwords') {
		100	100
272	26	50				179	if ($is_sole_match && !$self->l33t && !$self->reversed) {
		50
273	0					0	if ($self->rank <= 10) {
274							return 'This is a top-10 common password';
275							}
276	0					0	elsif ($self->rank <= 100) {
277							return 'This is a top-100 common password';
278							}
279	26					196	else {
280							return 'This is a very common password';
281							}
282							}
283	64					1366	elsif ($self->guesses_log10 <= 4) {
284							return 'This is similar to a commonly used password';
285							}
286							}
287	131	100				434	elsif ($self->dictionary_name =~ /names$/) {
288	22					144	if ($is_sole_match) {
289							return 'Names and surnames by themselves are easy to guess'
290							}
291	109					730	else {
292							return 'Common names and surnames are easy to guess';
293							}
294							}
295	27					190	elsif ($is_sole_match) {
296							return 'A word by itself is easy to guess';
297							}
298	138					1169
299							return undef;
300							}
301
302	386			386	1	1296	sub feedback_suggestions {
303							my ($self) = @_;
304	386					1348
305	386					886	my $word = $self->token;
306							my @suggestions;
307	386	100				1674
		100
308	30					149	if ($self->does_word_start_upper($word)) {
309							push @suggestions, q{Capitalization doesn't help very much};
310							}
311	7					31	elsif ($self->is_word_all_upper($word)) {
312							push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313							}
314	386	100	100			2123
315	13					45	if ($self->reversed && length($word) >= 4) {
316							push @suggestions, q{Reversed words aren't much harder to guess};
317							}
318	386	100				1957
319	10					32	if ($self->l33t) {
320							push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321							}
322	386					2180
323							return \@suggestions;
324							}
325
326
327							around fields_for_json => sub {
328							my ($orig,$self) = @_;
329							( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330							};
331
332							1;
333
334							__END__
335
336							=pod
337
338							=encoding UTF-8
339
340							=for :stopwords Wiktionary xato
341
342							=head1 NAME
343
344							Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345
346							=head1 VERSION
347
348							version 1.1.0
349
350							=head1 DESCRIPTION
351
352							This class represents the guess that a certain substring of a password
353							can be guessed by going through a dictionary.
354
355							=head1 ATTRIBUTES
356
357							=head2 C<reversed>
358
359							Boolean, true if the token appears to be a dictionary word that's been
360							reversed (i.e. last letter first)
361
362							=head2 C<substitutions>
363
364							Hashref representing the characters that need to be substituted to
365							make the token match a dictionary work (e.g. if the token is
366							C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367
368							=head2 C<rank>
369
370							Number, indicating how common the dictionary word is. 1 means "most
371							common".
372
373							=head2 C<dictionary_name>
374
375							String, the name of the dictionary that the word was found in. Usually one of:
376
377							=over 4
378
379							=item *
380
381							C<english_wikipedia>
382
383							words extracted from a dump of the English edition of Wikipedia
384
385							=item *
386
387							C<male_names>, C<female_names>, C<surnames>
388
389							common names from the 1990 US census
390
391							=item *
392
393							C<passwords>
394
395							most common passwords, extracted from the "xato" password dump
396
397							=item *
398
399							C<us_tv_and_film>
400
401							words from a 2006 Wiktionary word frequency study over American
402							television and movies
403
404							=back
405
406							=head1 METHODS
407
408							=head2 C<l33t>
409
410							Returns true if the token had any L</substitutions> (i.e. it was
411							written in "l33t-speak")
412
413							=head2 C<make>
414
415							my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416							$password,
417							{ # these are the defaults
418							ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419							l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420							},
421							) };
422
423							Scans the C<$password> for substrings that match words in the
424							C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425							from the C<l33t_table>.
426
427							The C<ranked_dictionaries> should look like:
428
429							{ some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430							... }
431
432							(i.e. a hash of dictionaries, each mapping words to their frequency
433							rank) and the C<l33t_table> should look like:
434
435							{ a => [ '4', '@' ], ... }
436
437							(i.e. a hash mapping characters to arrays of other characters)
438
439							=head2 C<estimate_guesses>
440
441							The number of guesses is the product of the rank of the word, how many
442							case combinations match it, how many substitutions were used, doubled
443							if the token is reversed.
444
445							=head2 C<does_word_start_upper>
446
447							=head2 C<does_word_end_upper>
448
449							=head2 C<is_word_all_not_upper>
450
451							=head2 C<is_word_all_not_lower>
452
453							=head2 C<is_word_all_upper>
454
455							if ($self->does_word_start_upper($word)) { ... }
456
457							These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458							>> and L<< /C<feedback_suggestions> >>.
459
460							=head2 C<feedback_warning>
461
462							=head2 C<feedback_suggestions>
463
464							This class suggests not using common words or passwords, especially on
465							their own. It also suggests that capitalisation, "special characters"
466							substitutions, and writing things backwards are not very useful.
467
468							=head2 C<fields_for_json>
469
470							The JSON serialisation for matches of this class will contain C<token
471							i j guesses guesses_log10 dictionary_name reversed rank
472							substitutions>.
473
474							=head1 AUTHOR
475
476							Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477
478							=head1 COPYRIGHT AND LICENSE
479
480							This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481
482							This is free software; you can redistribute it and/or modify it under
483							the same terms as the Perl 5 programming language system itself.
484
485							=cut