| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Unicode::Security; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 7 |  |  | 7 |  | 597460 | use 5.008; | 
|  | 7 |  |  |  |  | 30 |  | 
|  | 7 |  |  |  |  | 376 |  | 
| 4 | 7 |  |  | 7 |  | 43 | use strict; | 
|  | 7 |  |  |  |  | 16 |  | 
|  | 7 |  |  |  |  | 272 |  | 
| 5 | 7 |  |  | 7 |  | 36 | use warnings; | 
|  | 7 |  |  |  |  | 33 |  | 
|  | 7 |  |  |  |  | 276 |  | 
| 6 | 7 |  |  | 7 |  | 39 | use Exporter qw(import); | 
|  | 7 |  |  |  |  | 13 |  | 
|  | 7 |  |  |  |  | 276 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 7 |  |  | 7 |  | 29605 | use Unicode::Security::Confusables; | 
|  | 7 |  |  |  |  | 47 |  | 
|  | 7 |  |  |  |  | 4651 |  | 
| 9 | 7 |  |  | 7 |  | 27371 | use Unicode::Normalize qw(NFD); | 
|  | 7 |  |  |  |  | 57334 |  | 
|  | 7 |  |  |  |  | 1285 |  | 
| 10 | 7 |  |  | 7 |  | 39985 | use Unicode::UCD qw(charinfo charscript); | 
|  | 7 |  |  |  |  | 902614 |  | 
|  | 7 |  |  |  |  | 1593 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our $VERSION = '0.06'; | 
| 13 |  |  |  |  |  |  | $VERSION = eval $VERSION; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 16 |  |  |  |  |  |  | skeleton confusable soss restriction_level mixed_script | 
| 17 |  |  |  |  |  |  | mixed_number mixed_num | 
| 18 |  |  |  |  |  |  | whole_script_confusable mixed_script_confusable | 
| 19 |  |  |  |  |  |  | ws_confusable ms_confusable | 
| 20 |  |  |  |  |  |  | ); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | our (%MA, %WS); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | use constant { | 
| 25 | 7 |  |  |  |  | 8457 | UNRESTRICTED           => 0, | 
| 26 |  |  |  |  |  |  | ASCII_ONLY             => 1, | 
| 27 |  |  |  |  |  |  | SINGLE_SCRIPT          => 2, | 
| 28 |  |  |  |  |  |  | HIGHLY_RESTRICTIVE     => 3, | 
| 29 |  |  |  |  |  |  | MODERATELY_RESTRICTIVE => 4, | 
| 30 |  |  |  |  |  |  | MINIMALLY_RESTRICTIVE  => 5, | 
| 31 | 7 |  |  | 7 |  | 138 | }; | 
|  | 7 |  |  |  |  | 17 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | my %recommended_script = map { $_ => \1 } qw( | 
| 34 |  |  |  |  |  |  | Common Inherited Arabic Armenian Bengali Bopomofo Cyrillic Devanagari | 
| 35 |  |  |  |  |  |  | Ethiopic Georgian Greek Gujarati Gurmukhi Han Hangul Hebrew Hiragana | 
| 36 |  |  |  |  |  |  | Kannada Katakana Khmer Lao Latin Malayalam Myanmar Oriya Sinhala Tamil | 
| 37 |  |  |  |  |  |  | Telugu Thaana Thai Tibetan | 
| 38 |  |  |  |  |  |  | ); | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | my %aspirational_script = map { $_ => \1 } qw( | 
| 41 |  |  |  |  |  |  | Canadian_Aboriginal Miao Mongolian Tifinagh Yi | 
| 42 |  |  |  |  |  |  | ); | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | my %highly_restrictive = map { $_ => \1 } ( | 
| 45 |  |  |  |  |  |  | '', 'Hiragana', 'Katakana', 'Hiragana, Katakana', 'Bopomofo', 'Hangul', | 
| 46 |  |  |  |  |  |  | ); | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub skeleton { | 
| 50 | 10 |  |  | 10 | 1 | 70 | my $str = NFD shift; | 
| 51 | 10 | 100 |  |  |  | 115 | my $m = $str =~ s{(.)}{ my $c = $MA{$1}; defined $c ? $c : $1 }eg; | 
|  | 49 |  |  |  |  | 227 |  | 
|  | 49 |  |  |  |  | 177 |  | 
| 52 | 10 | 50 |  |  |  | 81 | return $m ? NFD $str : $str; | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub confusable { | 
| 57 | 5 |  |  | 5 | 1 | 25 | return skeleton($_[0]) eq skeleton($_[1]); | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | # Algorithm described here: | 
| 62 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Whole_Script_Confusables | 
| 63 |  |  |  |  |  |  | sub whole_script_confusable { | 
| 64 | 3 |  |  | 3 | 1 | 18 | my ($target, $str) = @_; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | # Canonicalize the script name to match the format used in %WS. | 
| 67 | 3 |  |  |  |  | 12 | $target = ucfirst lc $target; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 3 |  |  |  |  | 52 | my %soss = soss(NFD $str); | 
| 70 | 3 |  |  |  |  | 10 | delete @soss{qw(Common Inherited)}; | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 3 | 50 |  |  |  | 183 | my $count = keys %soss or return ''; | 
| 73 | 3 | 50 |  |  |  | 13 | return if 1 < $count; | 
| 74 | 3 |  |  |  |  | 8 | my ($source) = keys %soss; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 3 |  |  |  |  | 13 | my $chars = $WS{$source}{$target}; | 
| 77 | 3 | 100 |  |  |  | 14 | do { return 1 if $chars->{$_} } for keys %{ $soss{$source} }; | 
|  | 3 |  |  |  |  | 15 |  | 
|  | 7 |  |  |  |  | 114 |  | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | *ws_confusable = *ws_confusable = \&whole_script_confusable; | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | # Algorithm described here: | 
| 83 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Mixed_Script_Confusables | 
| 84 |  |  |  |  |  |  | sub mixed_script_confusable { | 
| 85 | 3 |  |  | 3 | 1 | 64 | my %soss = soss(NFD $_[0]); | 
| 86 | 3 |  |  |  |  | 13 | delete @soss{qw(Common Inherited)}; | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 3 |  |  |  |  | 13 | my @soss = keys %soss; | 
| 89 | 3 |  |  |  |  | 8 | for my $source (@soss) { | 
| 90 | 4 |  |  |  |  | 9 | my $sum = 0; | 
| 91 | 4 |  |  |  |  | 8 | for my $target (@soss) { | 
| 92 | 8 | 100 |  |  |  | 23 | next if $target eq $source; | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 5 |  |  |  |  | 7 | my $nok = 0; | 
| 95 | 5 |  |  |  |  | 18 | my $chars = $WS{$target}{$source}; | 
| 96 | 5 |  |  |  |  | 9 | for my $char (keys %{ $soss{$target} }) { | 
|  | 5 |  |  |  |  | 31 |  | 
| 97 | 5 | 100 |  |  |  | 28 | $nok = 1, last unless $chars->{$char}; | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 5 | 100 |  |  |  | 18 | last if $nok; | 
| 100 | 3 |  |  |  |  | 8 | $sum++; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 4 | 100 |  |  |  | 33 | return 1 if 1 == @soss - $sum; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 1 |  |  |  |  | 11 | return ''; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  | *ms_confusable = *ms_confusable = \&mixed_script_confusable; | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | sub soss { | 
| 112 | 24 |  |  | 24 | 1 | 46 | my %soss; | 
| 113 | 24 |  |  |  |  | 129 | for my $char (split //, $_[0]) { | 
| 114 | 146 |  |  |  |  | 546 | my $script = charscript(ord($char)); | 
| 115 | 146 | 50 |  |  |  | 133830 | $script = 'Unknown' unless defined $script; | 
| 116 | 146 |  |  |  |  | 683 | $soss{$script}{$char} = \1; | 
| 117 |  |  |  |  |  |  | } | 
| 118 | 24 |  |  |  |  | 180 | return %soss; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub mixed_script { | 
| 123 | 4 |  |  | 4 | 1 | 7788 | my %soss = soss($_[0]); | 
| 124 | 4 |  |  |  |  | 19 | delete @soss{qw(Common Inherited)}; | 
| 125 | 4 |  |  |  |  | 108 | return 1 < keys %soss; | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub mixed_number { | 
| 130 | 7 |  |  | 7 | 1 | 9979 | my %z; | 
| 131 | 7 |  |  |  |  | 58 | for my $char (split //, $_[0]) { | 
| 132 | 36 | 50 |  |  |  | 155 | my $info = charinfo(ord $char) or next; | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 36 |  |  |  |  | 692222 | my $num = $info->{decimal}; | 
| 135 | 36 | 100 |  |  |  | 149 | next unless length $num; | 
| 136 |  |  |  |  |  |  |  | 
| 137 | 28 |  |  |  |  | 301 | $z{ ord($char) - $num } = \1; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 7 |  |  |  |  | 89 | return 1 < keys %z; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | *mixed_num = *mixed_num = \&mixed_number; | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | # Algorithm described here: | 
| 146 |  |  |  |  |  |  | #   http://www.unicode.org/reports/tr39/#Restriction_Level_Detection | 
| 147 |  |  |  |  |  |  | sub restriction_level { | 
| 148 | 17 |  |  | 17 | 1 | 12650 | my ($str, $non_id_regex) = @_; | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 17 | 100 |  |  |  | 105 | $non_id_regex = qr/\P{ID_Continue}/ unless defined $non_id_regex; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 17 | 100 |  |  |  | 181 | return UNRESTRICTED if $str =~ /$non_id_regex/; | 
| 153 | 16 | 100 |  |  |  | 80 | return ASCII_ONLY   if $str !~ /\P{ASCII}/; | 
| 154 |  |  |  |  |  |  |  | 
| 155 | 14 |  |  |  |  | 37 | my %soss = soss($str); | 
| 156 | 14 |  |  |  |  | 38 | delete @soss{qw(Common Inherited)}; | 
| 157 | 14 | 100 |  |  |  | 58 | return SINGLE_SCRIPT if 1 == keys %soss; | 
| 158 |  |  |  |  |  |  |  | 
| 159 | 13 |  |  |  |  | 39 | delete $soss{Latin}; | 
| 160 | 13 |  |  |  |  | 36 | my %copy = %soss; | 
| 161 | 13 |  |  |  |  | 26 | delete $copy{Han}; | 
| 162 | 13 |  |  |  |  | 60 | my $soss = join ', ', sort keys %copy; | 
| 163 | 13 | 100 |  |  |  | 89 | return HIGHLY_RESTRICTIVE if $highly_restrictive{$soss}; | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 6 | 50 |  |  |  | 19 | if (1 == keys %soss) { | 
| 166 | 6 |  |  |  |  | 13 | my ($script) = keys %soss; | 
| 167 | 6 | 100 | 66 |  |  | 176 | return MODERATELY_RESTRICTIVE | 
|  |  |  | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 168 |  |  |  |  |  |  | if ($recommended_script{$script} or $aspirational_script{$script}) | 
| 169 |  |  |  |  |  |  | and not ($soss{Cyrillic} or $soss{Greek}); | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 4 |  |  |  |  | 28 | return MINIMALLY_RESTRICTIVE; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | 1; | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | __END__ |