File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm
Criterion Covered Total %
statement 132 137 96.3
branch 50 52 96.1
condition 18 20 90.0
subroutine 19 22 86.3
pod 10 10 100.0
total 229 241 95.0


line stmt bran cond sub pod time code
1             package Data::Password::zxcvbn::Match::Dictionary;
2 4     4   10051 use Moo;
  4         14  
  4         35  
3             with 'Data::Password::zxcvbn::Match';
4 4     4   4929 use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
  4         12  
  4         273  
5 4     4   32 use List::AllUtils qw(min);
  4         9  
  4         4629  
6             our $VERSION = '1.1.2'; # VERSION
7             # ABSTRACT: match class for words in passwords
8              
9              
10             has reversed => (is => 'ro', default => 0); # bool
11             has substitutions => ( is => 'ro', default => sub { +{} } );
12             has rank => ( is => 'ro', default => 1 ); # int
13             # this should be constrained to the keys of %ranked_dictionaries, but
14             # we can't do that because users can pass their own dictionaries to
15             # ->make
16             has dictionary_name => ( is => 'ro', default => 'passwords' );
17              
18              
19             sub l33t {
20 451     451 1 60692 return scalar(keys %{shift->substitutions})!=0;
  451         2615  
21             }
22              
23              
24             our %l33t_table = ( ## no critic (ProhibitPackageVars)
25             a => ['4', '@'],
26             b => ['8'],
27             c => ['(', '{', '[', '<'],
28             e => ['3'],
29             g => ['6', '9'],
30             i => ['1', '!', '|'],
31             l => ['1', '|', '7'],
32             o => ['0'],
33             s => ['$', '5'],
34             t => ['+', '7'],
35             x => ['%'],
36             z => ['2'],
37             );
38              
39             sub make {
40 1533     1533 1 98953 my ($class, $password, $opts) = @_;
41             ## no critic (ProhibitPackageVars)
42             my $dictionaries = $opts->{ranked_dictionaries}
43 1533   66     8091 || do {
44             require Data::Password::zxcvbn::RankedDictionaries;
45             \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46             };
47 1533   100     9263 my $l33t_table = $opts->{l33t_table} || \%l33t_table;
48              
49 1533         3976 my @matches;
50 1533         7295 $class->_make_simple(\@matches,$password,$dictionaries);
51 1533         9556 $class->_make_reversed(\@matches,$password,$dictionaries);
52 1533         10853 $class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53              
54 1533         15710 @matches = sort @matches;
55 1533         10503 return \@matches;
56             }
57              
58             sub _make_simple {
59 4648     4648   11931 my ($class, $matches, $password, $dictionaries) = @_;
60 4648         9812 my $password_lc = lc($password);
61             # lc may change the length of the password...
62 4648         9014 my $length = length($password_lc);
63              
64 4648         8496 for my $dictionary_name (keys %{$dictionaries}) {
  4648         16640  
65 27453         49745 my $ranked_dict = $dictionaries->{$dictionary_name};
66 27453         50353 for my $i (0..$length-1) {
67 156745         262934 for my $j ($i..$length-1) {
68 674738         1098536 my $word = substr($password_lc,$i,$j-$i+1);
69 674738 100       1938980 if (my $rank = $ranked_dict->{$word}) {
70 8139         14111 push @{$matches}, $class->new({
  8139         191337  
71             token => substr($password,$i,$j-$i+1),
72             i => $i, j=> $j,
73             rank => $rank,
74             dictionary_name => $dictionary_name,
75             });
76             }
77             }
78             }
79             }
80             }
81              
82             sub _make_reversed {
83 1533     1533   5458 my ($class, $matches, $password, $dictionaries) = @_;
84              
85 1533         5554 my $rev_password = reverse($password);
86 1533         3128 my @rev_matches;
87 1533         6461 $class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88              
89 1533         5399 my $rev_length = length($password)-1;
90 1533         7375 for my $rev_match (@rev_matches) {
91 1553         9099 my $word = $rev_match->token;
92             # no need to add this, the normal matching will have produced
93             # it already
94 1553 100       7438 next if $word eq reverse($word);
95 661         1607 push @{$matches}, $class->new({
  661         17623  
96             token => reverse($word),
97             i => $rev_length - $rev_match->j,
98             j=> $rev_length - $rev_match->i,
99             rank => $rev_match->rank,
100             dictionary_name => $rev_match->dictionary_name,
101             reversed => 1,
102             });
103             }
104             }
105              
106             # makes a pruned copy of l33t_table that only includes password's
107             # possible substitutions
108             sub _relevant_l33t_subtable {
109 1539     1539   22071 my ($class, $password, $l33t_table) = @_;
110             # set of characters
111 1539         3232 my %password_chars; @password_chars{split //,$password} = ();
  1539         11235  
112              
113 1539         4769 my %subtable;
114 1539         3606 for my $letter (keys %{$l33t_table}) {
  1539         9244  
115 34681         73973 my @relevant_subs = grep { exists $password_chars{$_} }
116 18084         26989 @{$l33t_table->{$letter}};
  18084         38767  
117 18084 100       44279 $subtable{$letter} = \@relevant_subs
118             if @relevant_subs;
119             }
120              
121 1539         12429 return \%subtable;
122             }
123              
124             sub _translate {
125 1582     1582   4973 my ($class, $string, $table) = @_;
126 1582         3102 my $keys = join '', keys %{$table};
  1582         5283  
127 1582         41200 $string =~ s{([\Q$keys\E])}
128 1582         7082 {$table->{$1}}g;
129             return $string;
130             }
131              
132 1533     1533   5555 sub _make_l33t {
133             my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134 1533         5668  
135             my $subs = enumerate_substitution_maps(
136             $class->_relevant_l33t_subtable($password,$l33t_table)
137 1533         5920 );
  1533         4879  
138 2246 100       8551 for my $sub (@{$subs}) {
  2246         7897  
139 1582         6240 next unless %{$sub};
140 1582         3355 my $subbed_password = $class->_translate($password,$sub);
141 1582         5940 my @subbed_matches;
142             $class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143 1582         6453  
144 3651         24611 for my $subbed_match (@subbed_matches) {
145             my $token = substr($password,
146             $subbed_match->i,
147             $subbed_match->j - $subbed_match->i + 1);
148 3651 100       11422 # too short, ignore
149             next if length($token) <= 1;
150 1930 100       7759 # only return the matches that contain an actual substitution
151             next if lc($token) eq lc($subbed_match->token);
152             # subset of mappings in $sub that are in use for this match
153             my %min_subs = map {
154 2164 100       20416 $token =~ m{\Q$_}
155             ? ( $_ => $sub->{$_} )
156 589         1388 : ()
  589         2008  
157 589         1778 } keys %{$sub};
  589         15472  
158             push @{$matches}, $class->new({
159             token => $token,
160             substitutions => \%min_subs,
161             i => $subbed_match->i,
162             j=> $subbed_match->j,
163             rank => $subbed_match->rank,
164             dictionary_name => $subbed_match->dictionary_name,
165             });
166             }
167             }
168             }
169              
170              
171 4179     4179 1 63436 sub estimate_guesses {
172             my ($self,$min_guesses) = @_;
173 4179         16080  
174             return $self->rank *
175             $self->_uppercase_variations *
176             $self->_l33t_variations *
177             $self->_reversed_variations;
178             }
179              
180              
181             # an uppercase letter, followed by stuff that is *not* uppercase
182 3     3   1829 # letters
  3         47  
  3         42  
183             my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184             # stuff that is *not* uppercase letters, followed by an uppercase
185             # letter
186             my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187             # no characters that are *not* uppercase letters
188             my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189             # no characters that are *not* lowercase letters
190             my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191              
192 386     386 1 4430  
193 0     0 1 0 sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194 0     0 1 0 sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195 0     0 1 0 sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196 356   100 356 1 3564 sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197             sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198              
199 4179     4179   8505 sub _uppercase_variations {
200             my ($self) = @_;
201 4179         9749  
202             my $word = $self->token;
203              
204 4179 100       30188 # if the word has no uppercase letters, count it as 1 variation
205 270 100       1073 return 1 if $word =~ $ALL_NOT_UPPER_RE;
206             return 1 if lc($word) eq $word;
207              
208             # a capitalized word is the most common capitalization scheme, so
209             # it only doubles the search space (uncapitalized + capitalized).
210             # allcaps and end-capitalized are common enough too, underestimate
211 268 100       1632 # as 2x factor to be safe.
212 148 100       821 return 2 if $word =~ $START_UPPER_RE;
213 127 100       826 return 2 if $word =~ $END_UPPER_RE;
214             return 2 if $word =~ $ALL_NOT_LOWER_RE;
215              
216             # otherwise calculate the number of ways to capitalize U+L
217             # uppercase+lowercase letters with U uppercase letters or
218             # less. or, if there's more uppercase than lower (for
219             # eg. PASSwORD), the number of ways to lowercase U+L letters with
220 37         198 # L lowercase letters or less.
221 37         161 my $U = () = $word =~ m/\p{Lu}/g;
222             my $L = () = $word =~ m/\p{Ll}/g;
223 37         88  
224 37         243 my $variations = 0;
225 37         137 $variations += nCk($U+$L,$_) for 1..min($U,$L);
226             return $variations;
227             }
228              
229 4179     4179   10325 sub _l33t_variations {
230             my ($self) = @_;
231 4179         9931  
232             my $word = $self->token;
233 4179         7587  
234 4179         7232 my $variations = 1;
  4179         16445  
235 1098         3292 for my $subbed (keys %{$self->substitutions}) {
236             my $unsubbed = $self->substitutions->{$subbed};
237              
238 1098         10603 # number of Substituted characters
239             my $S = () = $word =~ m{\Q$subbed}gi;
240 1098         7425 # number of Unsubstituted characters
241             my $U = () = $word =~ m{\Q$unsubbed}gi;
242 1098 100 66     4809  
243             if ($S==0 || $U==0) {
244             # for this substitution, password is either fully subbed
245             # (444) or fully unsubbed (aaa); treat that as doubling
246             # the space (attacker needs to try fully subbed chars in
247 1054         3239 # addition to unsubbed.)
248             $variations *= 2;
249             }
250             else {
251             # this case is similar to capitalization: with aa44a, U =
252             # 3, S = 2, attacker needs to try unsubbed + one sub + two
253 44         135 # subs
254 44         305 my $possibilities = 0;
255 44         133 $possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256             $variations *= $possibilities;
257             }
258             }
259 4179         13522  
260             return $variations;
261             }
262              
263 4179 100   4179   22109 sub _reversed_variations {
264             return shift->reversed ? 2 : 1;
265             }
266              
267              
268 386     386 1 1565 sub feedback_warning {
269             my ($self, $is_sole_match) = @_;
270 386 100       3422  
    100          
    100          
271 107 100 100     1051 if ($self->dictionary_name eq 'passwords') {
    100 100        
272 26 50       171 if ($is_sole_match && !$self->l33t && !$self->reversed) {
    50          
273 0         0 if ($self->rank <= 10) {
274             return 'This is a top-10 common password';
275             }
276 0         0 elsif ($self->rank <= 100) {
277             return 'This is a top-100 common password';
278             }
279 26         166 else {
280             return 'This is a very common password';
281             }
282             }
283 64         1757 elsif ($self->guesses_log10 <= 4) {
284             return 'This is similar to a commonly used password';
285             }
286             }
287 131 100       364 elsif ($self->dictionary_name =~ /names$/) {
288 22         134 if ($is_sole_match) {
289             return 'Names and surnames by themselves are easy to guess'
290             }
291 109         769 else {
292             return 'Common names and surnames are easy to guess';
293             }
294             }
295 27         173 elsif ($is_sole_match) {
296             return 'A word by itself is easy to guess';
297             }
298 138         1191  
299             return undef;
300             }
301              
302 386     386 1 1374 sub feedback_suggestions {
303             my ($self) = @_;
304 386         1295  
305 386         824 my $word = $self->token;
306             my @suggestions;
307 386 100       1829  
    100          
308 30         155 if ($self->does_word_start_upper($word)) {
309             push @suggestions, q{Capitalization doesn't help very much};
310             }
311 7         31 elsif ($self->is_word_all_upper($word)) {
312             push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313             }
314 386 100 100     2157  
315 13         48 if ($self->reversed && length($word) >= 4) {
316             push @suggestions, q{Reversed words aren't much harder to guess};
317             }
318 386 100       1408  
319 10         39 if ($self->l33t) {
320             push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321             }
322 386         1991  
323             return \@suggestions;
324             }
325              
326              
327             around fields_for_json => sub {
328             my ($orig,$self) = @_;
329             ( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330             };
331              
332             1;
333              
334             __END__
335              
336             =pod
337              
338             =encoding UTF-8
339              
340             =for :stopwords Wiktionary xato
341              
342             =head1 NAME
343              
344             Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345              
346             =head1 VERSION
347              
348             version 1.1.2
349              
350             =head1 DESCRIPTION
351              
352             This class represents the guess that a certain substring of a password
353             can be guessed by going through a dictionary.
354              
355             =head1 ATTRIBUTES
356              
357             =head2 C<reversed>
358              
359             Boolean, true if the token appears to be a dictionary word that's been
360             reversed (i.e. last letter first)
361              
362             =head2 C<substitutions>
363              
364             Hashref representing the characters that need to be substituted to
365             make the token match a dictionary work (e.g. if the token is
366             C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367              
368             =head2 C<rank>
369              
370             Number, indicating how common the dictionary word is. 1 means "most
371             common".
372              
373             =head2 C<dictionary_name>
374              
375             String, the name of the dictionary that the word was found in. Usually one of:
376              
377             =over 4
378              
379             =item *
380              
381             C<english_wikipedia>
382              
383             words extracted from a dump of the English edition of Wikipedia
384              
385             =item *
386              
387             C<male_names>, C<female_names>, C<surnames>
388              
389             common names from the 1990 US census
390              
391             =item *
392              
393             C<passwords>
394              
395             most common passwords, extracted from the "xato" password dump
396              
397             =item *
398              
399             C<us_tv_and_film>
400              
401             words from a 2006 Wiktionary word frequency study over American
402             television and movies
403              
404             =back
405              
406             =head1 METHODS
407              
408             =head2 C<l33t>
409              
410             Returns true if the token had any L</substitutions> (i.e. it was
411             written in "l33t-speak")
412              
413             =head2 C<make>
414              
415             my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416             $password,
417             { # these are the defaults
418             ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419             l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420             },
421             ) };
422              
423             Scans the C<$password> for substrings that match words in the
424             C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425             from the C<l33t_table>.
426              
427             The C<ranked_dictionaries> should look like:
428              
429             { some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430             ... }
431              
432             (i.e. a hash of dictionaries, each mapping words to their frequency
433             rank) and the C<l33t_table> should look like:
434              
435             { a => [ '4', '@' ], ... }
436              
437             (i.e. a hash mapping characters to arrays of other characters)
438              
439             =head2 C<estimate_guesses>
440              
441             The number of guesses is the product of the rank of the word, how many
442             case combinations match it, how many substitutions were used, doubled
443             if the token is reversed.
444              
445             =head2 C<does_word_start_upper>
446              
447             =head2 C<does_word_end_upper>
448              
449             =head2 C<is_word_all_not_upper>
450              
451             =head2 C<is_word_all_not_lower>
452              
453             =head2 C<is_word_all_upper>
454              
455             if ($self->does_word_start_upper($word)) { ... }
456              
457             These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458             >> and L<< /C<feedback_suggestions> >>.
459              
460             =head2 C<feedback_warning>
461              
462             =head2 C<feedback_suggestions>
463              
464             This class suggests not using common words or passwords, especially on
465             their own. It also suggests that capitalisation, "special characters"
466             substitutions, and writing things backwards are not very useful.
467              
468             =head2 C<fields_for_json>
469              
470             The JSON serialisation for matches of this class will contain C<token
471             i j guesses guesses_log10 dictionary_name reversed rank
472             substitutions>.
473              
474             =head1 AUTHOR
475              
476             Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477              
478             =head1 COPYRIGHT AND LICENSE
479              
480             This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481              
482             This is free software; you can redistribute it and/or modify it under
483             the same terms as the Perl 5 programming language system itself.
484              
485             =cut