File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm
Criterion Covered Total %
statement 132 137 96.3
branch 50 52 96.1
condition 18 20 90.0
subroutine 19 22 86.3
pod 10 10 100.0
total 229 241 95.0


line stmt bran cond sub pod time code
1             package Data::Password::zxcvbn::Match::Dictionary;
2 4     4   10884 use Moo;
  4         12  
  4         36  
3             with 'Data::Password::zxcvbn::Match';
4 4     4   4948 use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
  4         14  
  4         296  
5 4     4   31 use List::AllUtils qw(min);
  4         9  
  4         4638  
6             our $VERSION = '1.1.0'; # VERSION
7             # ABSTRACT: match class for words in passwords
8              
9              
10             has reversed => (is => 'ro', default => 0); # bool
11             has substitutions => ( is => 'ro', default => sub { +{} } );
12             has rank => ( is => 'ro', default => 1 ); # int
13             # this should be constrained to the keys of %ranked_dictionaries, but
14             # we can't do that because users can pass their own dictionaries to
15             # ->make
16             has dictionary_name => ( is => 'ro', default => 'passwords' );
17              
18              
19             sub l33t {
20 451     451 1 61213 return scalar(keys %{shift->substitutions})!=0;
  451         2639  
21             }
22              
23              
24             our %l33t_table = ( ## no critic (ProhibitPackageVars)
25             a => ['4', '@'],
26             b => ['8'],
27             c => ['(', '{', '[', '<'],
28             e => ['3'],
29             g => ['6', '9'],
30             i => ['1', '!', '|'],
31             l => ['1', '|', '7'],
32             o => ['0'],
33             s => ['$', '5'],
34             t => ['+', '7'],
35             x => ['%'],
36             z => ['2'],
37             );
38              
39             sub make {
40 1533     1533 1 106829 my ($class, $password, $opts) = @_;
41             ## no critic (ProhibitPackageVars)
42             my $dictionaries = $opts->{ranked_dictionaries}
43 1533   66     8234 || do {
44             require Data::Password::zxcvbn::RankedDictionaries;
45             \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46             };
47 1533   100     8620 my $l33t_table = $opts->{l33t_table} || \%l33t_table;
48              
49 1533         4485 my @matches;
50 1533         7633 $class->_make_simple(\@matches,$password,$dictionaries);
51 1533         8570 $class->_make_reversed(\@matches,$password,$dictionaries);
52 1533         11174 $class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53              
54 1533         15366 @matches = sort @matches;
55 1533         8911 return \@matches;
56             }
57              
58             sub _make_simple {
59 4648     4648   12956 my ($class, $matches, $password, $dictionaries) = @_;
60 4648         13495 my $password_lc = lc($password);
61             # lc may change the length of the password...
62 4648         8914 my $length = length($password_lc);
63              
64 4648         7285 for my $dictionary_name (keys %{$dictionaries}) {
  4648         17746  
65 27453         47865 my $ranked_dict = $dictionaries->{$dictionary_name};
66 27453         48029 for my $i (0..$length-1) {
67 156745         260003 for my $j ($i..$length-1) {
68 674738         1068870 my $word = substr($password_lc,$i,$j-$i+1);
69 674738 100       1747619 if (my $rank = $ranked_dict->{$word}) {
70 8139         12769 push @{$matches}, $class->new({
  8139         192085  
71             token => substr($password,$i,$j-$i+1),
72             i => $i, j=> $j,
73             rank => $rank,
74             dictionary_name => $dictionary_name,
75             });
76             }
77             }
78             }
79             }
80             }
81              
82             sub _make_reversed {
83 1533     1533   5118 my ($class, $matches, $password, $dictionaries) = @_;
84              
85 1533         4548 my $rev_password = reverse($password);
86 1533         3275 my @rev_matches;
87 1533         5809 $class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88              
89 1533         6697 my $rev_length = length($password)-1;
90 1533         4996 for my $rev_match (@rev_matches) {
91 1553         8642 my $word = $rev_match->token;
92             # no need to add this, the normal matching will have produced
93             # it already
94 1553 100       7041 next if $word eq reverse($word);
95 661         1634 push @{$matches}, $class->new({
  661         17086  
96             token => reverse($word),
97             i => $rev_length - $rev_match->j,
98             j=> $rev_length - $rev_match->i,
99             rank => $rev_match->rank,
100             dictionary_name => $rev_match->dictionary_name,
101             reversed => 1,
102             });
103             }
104             }
105              
106             # makes a pruned copy of l33t_table that only includes password's
107             # possible substitutions
108             sub _relevant_l33t_subtable {
109 1539     1539   24524 my ($class, $password, $l33t_table) = @_;
110             # set of characters
111 1539         3704 my %password_chars; @password_chars{split //,$password} = ();
  1539         12214  
112              
113 1539         4545 my %subtable;
114 1539         3994 for my $letter (keys %{$l33t_table}) {
  1539         9075  
115 34681         74062 my @relevant_subs = grep { exists $password_chars{$_} }
116 18084         27249 @{$l33t_table->{$letter}};
  18084         37273  
117 18084 100       44575 $subtable{$letter} = \@relevant_subs
118             if @relevant_subs;
119             }
120              
121 1539         13443 return \%subtable;
122             }
123              
124             sub _translate {
125 1582     1582   4133 my ($class, $string, $table) = @_;
126 1582         2927 my $keys = join '', keys %{$table};
  1582         5303  
127 1582         37349 $string =~ s{([\Q$keys\E])}
128 1582         7099 {$table->{$1}}g;
129             return $string;
130             }
131              
132 1533     1533   5834 sub _make_l33t {
133             my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134 1533         6748  
135             my $subs = enumerate_substitution_maps(
136             $class->_relevant_l33t_subtable($password,$l33t_table)
137 1533         5373 );
  1533         4798  
138 2246 100       8960 for my $sub (@{$subs}) {
  2246         7742  
139 1582         6184 next unless %{$sub};
140 1582         3797 my $subbed_password = $class->_translate($password,$sub);
141 1582         5827 my @subbed_matches;
142             $class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143 1582         5900  
144 3651         24755 for my $subbed_match (@subbed_matches) {
145             my $token = substr($password,
146             $subbed_match->i,
147             $subbed_match->j - $subbed_match->i + 1);
148 3651 100       11267 # too short, ignore
149             next if length($token) <= 1;
150 1930 100       8332 # only return the matches that contain an actual substitution
151             next if lc($token) eq lc($subbed_match->token);
152             # subset of mappings in $sub that are in use for this match
153             my %min_subs = map {
154 2164 100       20289 $token =~ m{\Q$_}
155             ? ( $_ => $sub->{$_} )
156 589         1255 : ()
  589         2023  
157 589         1631 } keys %{$sub};
  589         14751  
158             push @{$matches}, $class->new({
159             token => $token,
160             substitutions => \%min_subs,
161             i => $subbed_match->i,
162             j=> $subbed_match->j,
163             rank => $subbed_match->rank,
164             dictionary_name => $subbed_match->dictionary_name,
165             });
166             }
167             }
168             }
169              
170              
171 4179     4179 1 67147 sub estimate_guesses {
172             my ($self,$min_guesses) = @_;
173 4179         16532  
174             return $self->rank *
175             $self->_uppercase_variations *
176             $self->_l33t_variations *
177             $self->_reversed_variations;
178             }
179              
180              
181             # an uppercase letter, followed by stuff that is *not* uppercase
182 3     3   1850 # letters
  3         47  
  3         42  
183             my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184             # stuff that is *not* uppercase letters, followed by an uppercase
185             # letter
186             my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187             # no characters that are *not* uppercase letters
188             my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189             # no characters that are *not* lowercase letters
190             my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191              
192 386     386 1 4044  
193 0     0 1 0 sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194 0     0 1 0 sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195 0     0 1 0 sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196 356   100 356 1 3639 sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197             sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198              
199 4179     4179   8869 sub _uppercase_variations {
200             my ($self) = @_;
201 4179         10066  
202             my $word = $self->token;
203              
204 4179 100       31028 # if the word has no uppercase letters, count it as 1 variation
205 270 100       992 return 1 if $word =~ $ALL_NOT_UPPER_RE;
206             return 1 if lc($word) eq $word;
207              
208             # a capitalized word is the most common capitalization scheme, so
209             # it only doubles the search space (uncapitalized + capitalized).
210             # allcaps and end-capitalized are common enough too, underestimate
211 268 100       1500 # as 2x factor to be safe.
212 148 100       764 return 2 if $word =~ $START_UPPER_RE;
213 127 100       810 return 2 if $word =~ $END_UPPER_RE;
214             return 2 if $word =~ $ALL_NOT_LOWER_RE;
215              
216             # otherwise calculate the number of ways to capitalize U+L
217             # uppercase+lowercase letters with U uppercase letters or
218             # less. or, if there's more uppercase than lower (for
219             # eg. PASSwORD), the number of ways to lowercase U+L letters with
220 37         190 # L lowercase letters or less.
221 37         209 my $U = () = $word =~ m/\p{Lu}/g;
222             my $L = () = $word =~ m/\p{Ll}/g;
223 37         95  
224 37         253 my $variations = 0;
225 37         133 $variations += nCk($U+$L,$_) for 1..min($U,$L);
226             return $variations;
227             }
228              
229 4179     4179   9775 sub _l33t_variations {
230             my ($self) = @_;
231 4179         9321  
232             my $word = $self->token;
233 4179         7973  
234 4179         7445 my $variations = 1;
  4179         15487  
235 1098         3030 for my $subbed (keys %{$self->substitutions}) {
236             my $unsubbed = $self->substitutions->{$subbed};
237              
238 1098         10335 # number of Substituted characters
239             my $S = () = $word =~ m{\Q$subbed}gi;
240 1098         6808 # number of Unsubstituted characters
241             my $U = () = $word =~ m{\Q$unsubbed}gi;
242 1098 100 66     4858  
243             if ($S==0 || $U==0) {
244             # for this substitution, password is either fully subbed
245             # (444) or fully unsubbed (aaa); treat that as doubling
246             # the space (attacker needs to try fully subbed chars in
247 1054         2397 # addition to unsubbed.)
248             $variations *= 2;
249             }
250             else {
251             # this case is similar to capitalization: with aa44a, U =
252             # 3, S = 2, attacker needs to try unsubbed + one sub + two
253 44         102 # subs
254 44         307 my $possibilities = 0;
255 44         111 $possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256             $variations *= $possibilities;
257             }
258             }
259 4179         13023  
260             return $variations;
261             }
262              
263 4179 100   4179   23473 sub _reversed_variations {
264             return shift->reversed ? 2 : 1;
265             }
266              
267              
268 386     386 1 1213 sub feedback_warning {
269             my ($self, $is_sole_match) = @_;
270 386 100       3371  
    100          
    100          
271 107 100 100     1040 if ($self->dictionary_name eq 'passwords') {
    100 100        
272 26 50       179 if ($is_sole_match && !$self->l33t && !$self->reversed) {
    50          
273 0         0 if ($self->rank <= 10) {
274             return 'This is a top-10 common password';
275             }
276 0         0 elsif ($self->rank <= 100) {
277             return 'This is a top-100 common password';
278             }
279 26         196 else {
280             return 'This is a very common password';
281             }
282             }
283 64         1366 elsif ($self->guesses_log10 <= 4) {
284             return 'This is similar to a commonly used password';
285             }
286             }
287 131 100       434 elsif ($self->dictionary_name =~ /names$/) {
288 22         144 if ($is_sole_match) {
289             return 'Names and surnames by themselves are easy to guess'
290             }
291 109         730 else {
292             return 'Common names and surnames are easy to guess';
293             }
294             }
295 27         190 elsif ($is_sole_match) {
296             return 'A word by itself is easy to guess';
297             }
298 138         1169  
299             return undef;
300             }
301              
302 386     386 1 1296 sub feedback_suggestions {
303             my ($self) = @_;
304 386         1348  
305 386         886 my $word = $self->token;
306             my @suggestions;
307 386 100       1674  
    100          
308 30         149 if ($self->does_word_start_upper($word)) {
309             push @suggestions, q{Capitalization doesn't help very much};
310             }
311 7         31 elsif ($self->is_word_all_upper($word)) {
312             push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313             }
314 386 100 100     2123  
315 13         45 if ($self->reversed && length($word) >= 4) {
316             push @suggestions, q{Reversed words aren't much harder to guess};
317             }
318 386 100       1957  
319 10         32 if ($self->l33t) {
320             push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321             }
322 386         2180  
323             return \@suggestions;
324             }
325              
326              
327             around fields_for_json => sub {
328             my ($orig,$self) = @_;
329             ( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330             };
331              
332             1;
333              
334             __END__
335              
336             =pod
337              
338             =encoding UTF-8
339              
340             =for :stopwords Wiktionary xato
341              
342             =head1 NAME
343              
344             Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345              
346             =head1 VERSION
347              
348             version 1.1.0
349              
350             =head1 DESCRIPTION
351              
352             This class represents the guess that a certain substring of a password
353             can be guessed by going through a dictionary.
354              
355             =head1 ATTRIBUTES
356              
357             =head2 C<reversed>
358              
359             Boolean, true if the token appears to be a dictionary word that's been
360             reversed (i.e. last letter first)
361              
362             =head2 C<substitutions>
363              
364             Hashref representing the characters that need to be substituted to
365             make the token match a dictionary work (e.g. if the token is
366             C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367              
368             =head2 C<rank>
369              
370             Number, indicating how common the dictionary word is. 1 means "most
371             common".
372              
373             =head2 C<dictionary_name>
374              
375             String, the name of the dictionary that the word was found in. Usually one of:
376              
377             =over 4
378              
379             =item *
380              
381             C<english_wikipedia>
382              
383             words extracted from a dump of the English edition of Wikipedia
384              
385             =item *
386              
387             C<male_names>, C<female_names>, C<surnames>
388              
389             common names from the 1990 US census
390              
391             =item *
392              
393             C<passwords>
394              
395             most common passwords, extracted from the "xato" password dump
396              
397             =item *
398              
399             C<us_tv_and_film>
400              
401             words from a 2006 Wiktionary word frequency study over American
402             television and movies
403              
404             =back
405              
406             =head1 METHODS
407              
408             =head2 C<l33t>
409              
410             Returns true if the token had any L</substitutions> (i.e. it was
411             written in "l33t-speak")
412              
413             =head2 C<make>
414              
415             my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416             $password,
417             { # these are the defaults
418             ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419             l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420             },
421             ) };
422              
423             Scans the C<$password> for substrings that match words in the
424             C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425             from the C<l33t_table>.
426              
427             The C<ranked_dictionaries> should look like:
428              
429             { some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430             ... }
431              
432             (i.e. a hash of dictionaries, each mapping words to their frequency
433             rank) and the C<l33t_table> should look like:
434              
435             { a => [ '4', '@' ], ... }
436              
437             (i.e. a hash mapping characters to arrays of other characters)
438              
439             =head2 C<estimate_guesses>
440              
441             The number of guesses is the product of the rank of the word, how many
442             case combinations match it, how many substitutions were used, doubled
443             if the token is reversed.
444              
445             =head2 C<does_word_start_upper>
446              
447             =head2 C<does_word_end_upper>
448              
449             =head2 C<is_word_all_not_upper>
450              
451             =head2 C<is_word_all_not_lower>
452              
453             =head2 C<is_word_all_upper>
454              
455             if ($self->does_word_start_upper($word)) { ... }
456              
457             These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458             >> and L<< /C<feedback_suggestions> >>.
459              
460             =head2 C<feedback_warning>
461              
462             =head2 C<feedback_suggestions>
463              
464             This class suggests not using common words or passwords, especially on
465             their own. It also suggests that capitalisation, "special characters"
466             substitutions, and writing things backwards are not very useful.
467              
468             =head2 C<fields_for_json>
469              
470             The JSON serialisation for matches of this class will contain C<token
471             i j guesses guesses_log10 dictionary_name reversed rank
472             substitutions>.
473              
474             =head1 AUTHOR
475              
476             Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477              
478             =head1 COPYRIGHT AND LICENSE
479              
480             This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481              
482             This is free software; you can redistribute it and/or modify it under
483             the same terms as the Perl 5 programming language system itself.
484              
485             =cut