File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm

Criterion	Covered	Total	%
statement	132	137	96.3
branch	50	52	96.1
condition	18	20	90.0
subroutine	19	22	86.3
pod	10	10	100.0
total	229	241	95.0

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Password::zxcvbn::Match::Dictionary;
2	4			4		9878	use Moo;
	4					12
	4					32
3							with 'Data::Password::zxcvbn::Match';
4	4			4		4775	use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
	4					13
	4					280
5	4			4		32	use List::AllUtils qw(min);
	4					9
	4					4577
6							our $VERSION = '1.1.1'; # VERSION
7							# ABSTRACT: match class for words in passwords
8
9
10							has reversed => (is => 'ro', default => 0); # bool
11							has substitutions => ( is => 'ro', default => sub { +{} } );
12							has rank => ( is => 'ro', default => 1 ); # int
13							# this should be constrained to the keys of %ranked_dictionaries, but
14							# we can't do that because users can pass their own dictionaries to
15							# ->make
16							has dictionary_name => ( is => 'ro', default => 'passwords' );
17
18
19							sub l33t {
20	451			451	1	59050	return scalar(keys %{shift->substitutions})!=0;
	451					2750
21							}
22
23
24							our %l33t_table = ( ## no critic (ProhibitPackageVars)
25							a => ['4', '@'],
26							b => ['8'],
27							c => ['(', '{', '[', '<'],
28							e => ['3'],
29							g => ['6', '9'],
30							i => ['1', '!', '\|'],
31							l => ['1', '\|', '7'],
32							o => ['0'],
33							s => ['$', '5'],
34							t => ['+', '7'],
35							x => ['%'],
36							z => ['2'],
37							);
38
39							sub make {
40	1533			1533	1	107751	my ($class, $password, $opts) = @_;
41							## no critic (ProhibitPackageVars)
42							my $dictionaries = $opts->{ranked_dictionaries}
43	1533		66			8483	\|\| do {
44							require Data::Password::zxcvbn::RankedDictionaries;
45							\%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46							};
47	1533		100			8658	my $l33t_table = $opts->{l33t_table} \|\| \%l33t_table;
48
49	1533					3600	my @matches;
50	1533					6625	$class->_make_simple(\@matches,$password,$dictionaries);
51	1533					8860	$class->_make_reversed(\@matches,$password,$dictionaries);
52	1533					11059	$class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53
54	1533					14487	@matches = sort @matches;
55	1533					8306	return \@matches;
56							}
57
58							sub _make_simple {
59	4648			4648		12801	my ($class, $matches, $password, $dictionaries) = @_;
60	4648					9879	my $password_lc = lc($password);
61							# lc may change the length of the password...
62	4648					9747	my $length = length($password_lc);
63
64	4648					8748	for my $dictionary_name (keys %{$dictionaries}) {
	4648					16634
65	27453					49371	my $ranked_dict = $dictionaries->{$dictionary_name};
66	27453					50572	for my $i (0..$length-1) {
67	156745					264822	for my $j ($i..$length-1) {
68	674738					1121645	my $word = substr($password_lc,$i,$j-$i+1);
69	674738	100				1753953	if (my $rank = $ranked_dict->{$word}) {
70	8139					15932	push @{$matches}, $class->new({
	8139					185872
71							token => substr($password,$i,$j-$i+1),
72							i => $i, j=> $j,
73							rank => $rank,
74							dictionary_name => $dictionary_name,
75							});
76							}
77							}
78							}
79							}
80							}
81
82							sub _make_reversed {
83	1533			1533		5228	my ($class, $matches, $password, $dictionaries) = @_;
84
85	1533					3955	my $rev_password = reverse($password);
86	1533					3079	my @rev_matches;
87	1533					6382	$class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88
89	1533					6700	my $rev_length = length($password)-1;
90	1533					5413	for my $rev_match (@rev_matches) {
91	1553					7676	my $word = $rev_match->token;
92							# no need to add this, the normal matching will have produced
93							# it already
94	1553	100				7111	next if $word eq reverse($word);
95	661					1365	push @{$matches}, $class->new({
	661					16532
96							token => reverse($word),
97							i => $rev_length - $rev_match->j,
98							j=> $rev_length - $rev_match->i,
99							rank => $rev_match->rank,
100							dictionary_name => $rev_match->dictionary_name,
101							reversed => 1,
102							});
103							}
104							}
105
106							# makes a pruned copy of l33t_table that only includes password's
107							# possible substitutions
108							sub _relevant_l33t_subtable {
109	1539			1539		23472	my ($class, $password, $l33t_table) = @_;
110							# set of characters
111	1539					3537	my %password_chars; @password_chars{split //,$password} = ();
	1539					10649
112
113	1539					4093	my %subtable;
114	1539					3607	for my $letter (keys %{$l33t_table}) {
	1539					9683
115	34681					73446	my @relevant_subs = grep { exists $password_chars{$_} }
116	18084					27197	@{$l33t_table->{$letter}};
	18084					39557
117	18084	100				45890	$subtable{$letter} = \@relevant_subs
118							if @relevant_subs;
119							}
120
121	1539					12410	return \%subtable;
122							}
123
124							sub _translate {
125	1582			1582		4323	my ($class, $string, $table) = @_;
126	1582					3336	my $keys = join '', keys %{$table};
	1582					5533
127	1582					37215	$string =~ s{([\Q$keys\E])}
128	1582					7220	{$table->{$1}}g;
129							return $string;
130							}
131
132	1533			1533		6109	sub _make_l33t {
133							my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134	1533					5727
135							my $subs = enumerate_substitution_maps(
136							$class->_relevant_l33t_subtable($password,$l33t_table)
137	1533					6057	);
	1533					4178
138	2246	100				7083	for my $sub (@{$subs}) {
	2246					6583
139	1582					5598	next unless %{$sub};
140	1582					3364	my $subbed_password = $class->_translate($password,$sub);
141	1582					5858	my @subbed_matches;
142							$class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143	1582					6111
144	3651					23300	for my $subbed_match (@subbed_matches) {
145							my $token = substr($password,
146							$subbed_match->i,
147							$subbed_match->j - $subbed_match->i + 1);
148	3651	100				10755	# too short, ignore
149							next if length($token) <= 1;
150	1930	100				8298	# only return the matches that contain an actual substitution
151							next if lc($token) eq lc($subbed_match->token);
152							# subset of mappings in $sub that are in use for this match
153							my %min_subs = map {
154	2164	100				20337	$token =~ m{\Q$_}
155							? ( $_ => $sub->{$_} )
156	589					1444	: ()
	589					2016
157	589					1716	} keys %{$sub};
	589					15034
158							push @{$matches}, $class->new({
159							token => $token,
160							substitutions => \%min_subs,
161							i => $subbed_match->i,
162							j=> $subbed_match->j,
163							rank => $subbed_match->rank,
164							dictionary_name => $subbed_match->dictionary_name,
165							});
166							}
167							}
168							}
169
170
171	4179			4179	1	67598	sub estimate_guesses {
172							my ($self,$min_guesses) = @_;
173	4179					14523
174							return $self->rank *
175							$self->_uppercase_variations *
176							$self->_l33t_variations *
177							$self->_reversed_variations;
178							}
179
180
181							# an uppercase letter, followed by stuff that is not uppercase
182	3			3		1809	# letters
	3					44
	3					42
183							my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184							# stuff that is not uppercase letters, followed by an uppercase
185							# letter
186							my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187							# no characters that are not uppercase letters
188							my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189							# no characters that are not lowercase letters
190							my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191
192	386			386	1	3970
193	0			0	1	0	sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194	0			0	1	0	sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195	0			0	1	0	sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196	356		100	356	1	3557	sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197							sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198
199	4179			4179		8488	sub _uppercase_variations {
200							my ($self) = @_;
201	4179					9600
202							my $word = $self->token;
203
204	4179	100				29166	# if the word has no uppercase letters, count it as 1 variation
205	270	100				1065	return 1 if $word =~ $ALL_NOT_UPPER_RE;
206							return 1 if lc($word) eq $word;
207
208							# a capitalized word is the most common capitalization scheme, so
209							# it only doubles the search space (uncapitalized + capitalized).
210							# allcaps and end-capitalized are common enough too, underestimate
211	268	100				1638	# as 2x factor to be safe.
212	148	100				810	return 2 if $word =~ $START_UPPER_RE;
213	127	100				829	return 2 if $word =~ $END_UPPER_RE;
214							return 2 if $word =~ $ALL_NOT_LOWER_RE;
215
216							# otherwise calculate the number of ways to capitalize U+L
217							# uppercase+lowercase letters with U uppercase letters or
218							# less. or, if there's more uppercase than lower (for
219							# eg. PASSwORD), the number of ways to lowercase U+L letters with
220	37					191	# L lowercase letters or less.
221	37					177	my $U = () = $word =~ m/\p{Lu}/g;
222							my $L = () = $word =~ m/\p{Ll}/g;
223	37					100
224	37					289	my $variations = 0;
225	37					127	$variations += nCk($U+$L,$_) for 1..min($U,$L);
226							return $variations;
227							}
228
229	4179			4179		9910	sub _l33t_variations {
230							my ($self) = @_;
231	4179					9155
232							my $word = $self->token;
233	4179					7202
234	4179					7172	my $variations = 1;
	4179					14773
235	1098					2910	for my $subbed (keys %{$self->substitutions}) {
236							my $unsubbed = $self->substitutions->{$subbed};
237
238	1098					9990	# number of Substituted characters
239							my $S = () = $word =~ m{\Q$subbed}gi;
240	1098					7274	# number of Unsubstituted characters
241							my $U = () = $word =~ m{\Q$unsubbed}gi;
242	1098	100	66			4826
243							if ($S==0 \|\| $U==0) {
244							# for this substitution, password is either fully subbed
245							# (444) or fully unsubbed (aaa); treat that as doubling
246							# the space (attacker needs to try fully subbed chars in
247	1054					2465	# addition to unsubbed.)
248							$variations *= 2;
249							}
250							else {
251							# this case is similar to capitalization: with aa44a, U =
252							# 3, S = 2, attacker needs to try unsubbed + one sub + two
253	44					98	# subs
254	44					309	my $possibilities = 0;
255	44					113	$possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256							$variations *= $possibilities;
257							}
258							}
259	4179					12522
260							return $variations;
261							}
262
263	4179	100		4179		23224	sub _reversed_variations {
264							return shift->reversed ? 2 : 1;
265							}
266
267
268	386			386	1	1360	sub feedback_warning {
269							my ($self, $is_sole_match) = @_;
270	386	100				3592
		100
		100
271	107	100	100			913	if ($self->dictionary_name eq 'passwords') {
		100	100
272	26	50				196	if ($is_sole_match && !$self->l33t && !$self->reversed) {
		50
273	0					0	if ($self->rank <= 10) {
274							return 'This is a top-10 common password';
275							}
276	0					0	elsif ($self->rank <= 100) {
277							return 'This is a top-100 common password';
278							}
279	26					174	else {
280							return 'This is a very common password';
281							}
282							}
283	64					1463	elsif ($self->guesses_log10 <= 4) {
284							return 'This is similar to a commonly used password';
285							}
286							}
287	131	100				443	elsif ($self->dictionary_name =~ /names$/) {
288	22					148	if ($is_sole_match) {
289							return 'Names and surnames by themselves are easy to guess'
290							}
291	109					687	else {
292							return 'Common names and surnames are easy to guess';
293							}
294							}
295	27					166	elsif ($is_sole_match) {
296							return 'A word by itself is easy to guess';
297							}
298	138					1134
299							return undef;
300							}
301
302	386			386	1	1280	sub feedback_suggestions {
303							my ($self) = @_;
304	386					1288
305	386					1037	my $word = $self->token;
306							my @suggestions;
307	386	100				1998
		100
308	30					140	if ($self->does_word_start_upper($word)) {
309							push @suggestions, q{Capitalization doesn't help very much};
310							}
311	7					24	elsif ($self->is_word_all_upper($word)) {
312							push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313							}
314	386	100	100			2473
315	13					55	if ($self->reversed && length($word) >= 4) {
316							push @suggestions, q{Reversed words aren't much harder to guess};
317							}
318	386	100				1847
319	10					35	if ($self->l33t) {
320							push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321							}
322	386					2036
323							return \@suggestions;
324							}
325
326
327							around fields_for_json => sub {
328							my ($orig,$self) = @_;
329							( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330							};
331
332							1;
333
334							__END__
335
336							=pod
337
338							=encoding UTF-8
339
340							=for :stopwords Wiktionary xato
341
342							=head1 NAME
343
344							Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345
346							=head1 VERSION
347
348							version 1.1.1
349
350							=head1 DESCRIPTION
351
352							This class represents the guess that a certain substring of a password
353							can be guessed by going through a dictionary.
354
355							=head1 ATTRIBUTES
356
357							=head2 C<reversed>
358
359							Boolean, true if the token appears to be a dictionary word that's been
360							reversed (i.e. last letter first)
361
362							=head2 C<substitutions>
363
364							Hashref representing the characters that need to be substituted to
365							make the token match a dictionary work (e.g. if the token is
366							C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367
368							=head2 C<rank>
369
370							Number, indicating how common the dictionary word is. 1 means "most
371							common".
372
373							=head2 C<dictionary_name>
374
375							String, the name of the dictionary that the word was found in. Usually one of:
376
377							=over 4
378
379							=item *
380
381							C<english_wikipedia>
382
383							words extracted from a dump of the English edition of Wikipedia
384
385							=item *
386
387							C<male_names>, C<female_names>, C<surnames>
388
389							common names from the 1990 US census
390
391							=item *
392
393							C<passwords>
394
395							most common passwords, extracted from the "xato" password dump
396
397							=item *
398
399							C<us_tv_and_film>
400
401							words from a 2006 Wiktionary word frequency study over American
402							television and movies
403
404							=back
405
406							=head1 METHODS
407
408							=head2 C<l33t>
409
410							Returns true if the token had any L</substitutions> (i.e. it was
411							written in "l33t-speak")
412
413							=head2 C<make>
414
415							my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416							$password,
417							{ # these are the defaults
418							ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419							l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420							},
421							) };
422
423							Scans the C<$password> for substrings that match words in the
424							C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425							from the C<l33t_table>.
426
427							The C<ranked_dictionaries> should look like:
428
429							{ some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430							... }
431
432							(i.e. a hash of dictionaries, each mapping words to their frequency
433							rank) and the C<l33t_table> should look like:
434
435							{ a => [ '4', '@' ], ... }
436
437							(i.e. a hash mapping characters to arrays of other characters)
438
439							=head2 C<estimate_guesses>
440
441							The number of guesses is the product of the rank of the word, how many
442							case combinations match it, how many substitutions were used, doubled
443							if the token is reversed.
444
445							=head2 C<does_word_start_upper>
446
447							=head2 C<does_word_end_upper>
448
449							=head2 C<is_word_all_not_upper>
450
451							=head2 C<is_word_all_not_lower>
452
453							=head2 C<is_word_all_upper>
454
455							if ($self->does_word_start_upper($word)) { ... }
456
457							These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458							>> and L<< /C<feedback_suggestions> >>.
459
460							=head2 C<feedback_warning>
461
462							=head2 C<feedback_suggestions>
463
464							This class suggests not using common words or passwords, especially on
465							their own. It also suggests that capitalisation, "special characters"
466							substitutions, and writing things backwards are not very useful.
467
468							=head2 C<fields_for_json>
469
470							The JSON serialisation for matches of this class will contain C<token
471							i j guesses guesses_log10 dictionary_name reversed rank
472							substitutions>.
473
474							=head1 AUTHOR
475
476							Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477
478							=head1 COPYRIGHT AND LICENSE
479
480							This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481
482							This is free software; you can redistribute it and/or modify it under
483							the same terms as the Perl 5 programming language system itself.
484
485							=cut