File Coverage

blib/lib/Hash/Normalize.pm
Criterion Covered Total %
statement 45 45 100.0
branch 12 12 100.0
condition 3 3 100.0
subroutine 11 11 100.0
pod 2 2 100.0
total 73 73 100.0


line stmt bran cond sub pod time code
1             package Hash::Normalize;
2              
3 6     6   75917 use 5.010;
  6         24  
4              
5 6     6   33 use strict;
  6         15  
  6         135  
6 6     6   30 use warnings;
  6         16  
  6         334  
7              
8             =encoding UTF-8
9              
10             =head1 NAME
11              
12             Hash::Normalize - Automatically normalize Unicode hash keys.
13              
14             =head1 VERSION
15              
16             Version 0.01
17              
18             =cut
19              
20             our $VERSION;
21             BEGIN {
22 6     6   125 $VERSION = '0.01';
23             }
24              
25             =head1 SYNOPSIS
26              
27             use Hash::Normalize qw;
28              
29             normalize my %hash, 'NFC';
30              
31             $hash{café} = 'coffee'; # NFD, "cafe\x{301}"
32              
33             print $hash{café}; # NFD, "cafe\x{301}"
34             # 'coffee' is printed
35              
36             print $hash{café}; # NFC, "caf\x{e9}"
37             # 'coffee' is also printed
38              
39             =head1 DESCRIPTION
40              
41             This module provides an utility routine that augments a given Perl hash table so that its keys are automatically normalized following one of the Unicode normalization schemes.
42             All the following actions on this hash will be made regardless of how the key used for the action is normalized.
43              
44             Since this module does not use the C mechanism, normalized hashes are indistinguishable from regular hashes as far as Perl is concerned, but this module also provides L to identify them if necessary.
45              
46             =cut
47              
48 6     6   2571 use Variable::Magic;
  6         5939  
  6         278  
49 6     6   2911 use Unicode::Normalize ();
  6         9201  
  6         2408  
50              
51             =head1 FUNCTIONS
52              
53             =head2 C
54              
55             normalize %hash;
56             normalize %hash, $mode;
57              
58             Applies the Unicode normalization scheme C<$mode> onto C<%hash>.
59             C<$mode> defaults to C<'NFC'> if omitted, and should match C otherwise.
60              
61             C will first try to forcefully normalize the existing keys in C<%hash> to the new mode, but it will throw an exception if there are distinct keys that have the same normalization.
62             All the keys subsequently used for fetches, stores, exists, deletes and list assignments are then first passed through the according normalization procedure.
63             C will also return the list of normalized keys.
64              
65             =cut
66              
67 33     33   10969 sub _remap { $_[2] = Unicode::Normalize::normalize($_[1], "$_[2]"); undef }
  33         1240  
68              
69             my $wiz = Variable::Magic::wizard(
70             data => sub { $_[1] },
71             fetch => \&_remap,
72             store => \&_remap,
73             exists => \&_remap,
74             delete => \&_remap,
75             copy_key => 1,
76             );
77              
78             sub _validate_mode {
79 15     15   36 my $mode = shift;
80              
81 15 100       81 $mode = 'nfc' unless defined $mode;
82 15 100       130 if ($mode =~ /^(?:nf)?(k?[cd])$/i) {
    100          
83 12         60 $mode = uc "NF$1";
84             } elsif ($mode =~ /^(fc[cd])$/i) {
85 2         10 $mode = uc "$1";
86             } else {
87 1         13 require Carp;
88 1         216 Carp::croak('Invalid normalization');
89             }
90              
91 14         42 return $mode
92             }
93              
94             sub normalize (\%;$) {
95 15     15 1 4552 my ($hash, $mode) = @_;
96              
97 15         48 my $previous_mode = &get_normalization($hash);
98 15         49 my $new_mode = _validate_mode($mode);
99 14 100 100     82 return $hash if defined $previous_mode and $previous_mode eq $new_mode;
100              
101 13         58 &Variable::Magic::dispell($hash, $wiz);
102              
103 13 100       45 if (%$hash) {
104 6         10 my %dup;
105 6         24 for my $key (keys %$hash) {
106 12         41 my $norm = Unicode::Normalize::normalize($new_mode, $key);
107 12 100       199 if (exists $dup{$norm}) {
108 1         9 require Carp;
109 1         163 Carp::croak('Key collision after normalization');
110             }
111 11         38 $dup{$norm} = $hash->{$key};
112             }
113 5         38 %$hash = %dup;
114             }
115              
116 12         76 &Variable::Magic::cast($hash, $wiz, $new_mode);
117              
118 12         81 return $hash;
119             }
120              
121             =head2 C
122              
123             my $mode = get_normalization %hash;
124             normalize %hash, $mode;
125              
126             Returns the current Unicode normalization scheme in use for C<%hash>, or C if it is a plain hash.
127              
128             =cut
129              
130 23     23 1 134 sub get_normalization (\%) { &Variable::Magic::getdata($_[0], $wiz) }
131              
132             =head1 NORMALIZED SYMBOL LOOKUPS
133              
134             Stashes (Perl symbol tables) are implemented as plain hashes, therefore one can use C on them to make sure that Unicode symbol lookups are made regardless of normalization.
135              
136             package Foo;
137              
138             BEGIN {
139             require Hash::Normalize;
140             # Enforce NFC normalization
141             Hash::Normalize::normalize(%Foo::, 'NFC')
142             }
143              
144             sub café { # NFD, "cafe\x{301}"
145             return 'coffee'
146             }
147              
148             sub coffee_nfc {
149             café() # NFC, "cafe\x{e9}"
150             }
151              
152             sub coffee_nfd {
153             café() # NFD, "cafe\x{301}"
154             }
155              
156             # Both coffee_nfc() and coffee_nfd() return 'coffee'
157              
158             =head1 CAVEATS
159              
160             Using a normalized hash is slightly slower than a plain hash, due to the normalization procedure and the overhead of magic.
161              
162             If a hash is initialized from a normalized hash by list assignment (C<%new = %normalized>), then the normalization scheme will not be carried over to the new hash, although its keys will initially be normalized like the ones from the original hash.
163              
164             =head1 EXPORT
165              
166             The functions L and L are only exported on request by specifying their names in the module import list.
167              
168             =cut
169              
170 6     6   54 use base 'Exporter';
  6         11  
  6         576  
171              
172             our @EXPORT = ();
173             our %EXPORT_TAGS = ();
174             our @EXPORT_OK = qw;
175              
176             =head1 DEPENDENCIES
177              
178             L 5.10.
179              
180             L, L (core since perl 5).
181              
182             L (core since perl 5.8).
183              
184             L 0.51.
185              
186             =head1 AUTHOR
187              
188             Vincent Pit, C<< >>, L.
189              
190             You can contact me by mail or on C (vincent).
191              
192             =head1 BUGS
193              
194             Please report any bugs or feature requests to C, or through the web interface at L.
195             I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
196              
197             =head1 SUPPORT
198              
199             You can find documentation for this module with the perldoc command.
200              
201             perldoc Hash::Normalize
202              
203             =head1 COPYRIGHT & LICENSE
204              
205             Copyright 2017 Vincent Pit, all rights reserved.
206              
207             This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
208              
209             =cut
210              
211             1; # End of Hash::Normalize