File Coverage

blib/lib/Locale/Maketext/Fuzzy.pm
Criterion Covered Total %
statement 63 63 100.0
branch 22 28 78.5
condition 3 5 60.0
subroutine 9 9 100.0
pod 2 2 100.0
total 99 107 92.5


line stmt bran cond sub pod time code
1             package Locale::Maketext::Fuzzy;
2             $Locale::Maketext::Fuzzy::VERSION = '0.11';
3              
4 1     1   30338 use 5.005;
  1         4  
  1         54  
5 1     1   6 use strict;
  1         2  
  1         36  
6 1     1   1153 use Locale::Maketext;
  1         13260  
  1         38  
7 1     1   12 use base 'Locale::Maketext';
  1         3  
  1         162  
8              
9             sub override_maketext {
10 6     6 1 1074 my ( $class, $flag ) = @_;
11 6 50       17 $class = ref($class) if ref($class);
12              
13 1     1   6 no strict 'refs';
  1         2  
  1         994  
14              
15 6 100       20 if ($flag) {
    100          
16 2         3 *{"$class\::maketext"} = \&maketext_fuzzy;
  2         12  
17             }
18             elsif ( @_ >= 2 ) {
19 2         3 delete ${"$class\::"}{maketext};
  2         8  
20             }
21              
22 6 100       7 return ( defined &{"$class\::maketext"} ? 1 : 0 );
  6         40  
23             }
24              
25             # Global cache of entries and their regexified forms
26             my %regex_cache;
27              
28             sub maketext_fuzzy {
29 7     7 1 4568 my ( $handle, $phrase ) = splice( @_, 0, 2 );
30              
31             # An array of all lexicon hashrefs
32 7         7 my @lexicons = @{ $handle->_lex_refs };
  7         31  
33              
34             # Try exact match if possible at all.
35 7         118 foreach my $lex (@lexicons) {
36 7 100       33 return $handle->SUPER::maketext( $phrase, @_ )
37             if exists $lex->{$phrase};
38             }
39              
40             # Keys are matched entries; values are arrayrefs of extracted params
41 5         8 my %candidate;
42              
43             # Fuzzy match phase 1 -- extract all candidates
44 5         7 foreach my $lex (@lexicons) {
45              
46             # We're not interested in non-bracketed entries, so ignore them
47 5         6 foreach my $entry ( grep /(?:(?
  5         31  
48             # Skip entries which are _only_ brackets and whitespace.
49             # The most value they could add is rearrangement, and that
50             # is almost certainly incorrect.
51 10 50       49 next if $entry =~ /^\s*(\[[^]]+\]\s*)+$/;
52              
53 10   100     28 my $re = ( $regex_cache{$entry} ||= [ _regexify($entry) ] );
54 10 100       76 my @vars = ( $phrase =~ $re->[0] ) or next;
55 7         15 $candidate{$entry} ||=
56 7 50 33     20 ( @{ $re->[1] } ? [ @vars[ @{ $re->[1] } ] ] : \@vars );
  7         35  
57             }
58             }
59              
60             # Fail early if we cannot find anything that matches
61 5 100       17 return $phrase unless %candidate;
62              
63             # Fuzzy match phase 2 -- select the best candidate
64 3 50       10 $phrase = (
65             sort {
66              
67             # For now, we just use a very crude heuristic: "Longer is better"
68 4         12 length($b) <=> length($a)
69             or $b cmp $a
70             } keys %candidate
71             )[0];
72              
73 4         5 return $handle->SUPER::maketext( $phrase, @{ $candidate{$phrase} }, @_ );
  4         17  
74             }
75              
76             sub _regexify {
77 2     2   6 my $text = quotemeta(shift);
78 2         2 my @ords;
79              
80 2         27 $text =~ s{
81             ( # capture into $1...
82             (?
83             ) # (to be restored back)
84             \\\[ # opening bracket
85              
86             ( # capture into $2...
87             (?: # any numbers of
88             [^~\]] # ordinary non-] characters
89             | # or
90             ~\\?. # escaped characters
91             )*
92             )
93             \\\] # closing bracket
94             }{
95 3         8 $1._paramify($2, \@ords)
96             }egx;
97              
98 2         5 $text =~ s/\Q.*?\E$/.*/;
99 2         81 return qr/^$text$/, \@ords;
100             }
101              
102             sub _paramify {
103 3     3   5 my ( $text, $ordref ) = @_;
104 3         5 my $out = '(.*?)';
105 3         7 my @choices = split( /\\,/, $text );
106              
107 3 50       13 if ( $choices[0] =~ /^(?:\w+|\\#|\\\*)$/ ) {
108              
109             # Do away with the function name
110 3 100       9 shift @choices unless $choices[0] =~ /^_(?:\d+|\\\*)$/;
111              
112             # Build an alternate regex to weed out vars
113 2         22 $out .= '(?:' . join(
114             '|',
115             sort {
116             length($b) <=> length($a) # longest first
117             } map {
118 3         5 /^_(?:(\d+)|\\\*)$/
119 5 100       17 ? do {
120 3 50       8 push @{$ordref}, ( $1 - 1 ) if defined $1;
  3         7  
121 3         7 '';
122             }
123             : $_ # turn _1, _2, _*... into ''
124             } @choices
125             ) . ')';
126              
127 3         7 $out =~ s/\Q(?:)\E$//;
128             }
129              
130 3         15 return $out;
131             }
132              
133             1;
134              
135             =head1 NAME
136              
137             Locale::Maketext::Fuzzy - Maketext from already interpolated strings
138              
139             =head1 SYNOPSIS
140              
141             package MyApp::L10N;
142             use base 'Locale::Maketext::Fuzzy'; # instead of Locale::Maketext
143              
144             package MyApp::L10N::de;
145             use base 'MyApp::L10N';
146             our %Lexicon = (
147             # Exact match should always be preferred if possible
148             "0 camels were released."
149             => "Exact match",
150              
151             # Fuzzy match candidate
152             "[quant,_1,camel was,camels were] released."
153             => "[quant,_1,Kamel wurde,Kamele wurden] freigegeben.",
154              
155             # This could also match fuzzily, but is less preferred
156             "[_2] released[_1]"
157             => "[_1][_2] ist frei[_1]",
158             );
159              
160             package main;
161             my $lh = MyApp::L10N->get_handle('de');
162              
163             # All ->maketext calls below will become ->maketext_fuzzy instead
164             $lh->override_maketext(1);
165              
166             # This prints "Exact match"
167             print $lh->maketext('0 camels were released.');
168              
169             # "1 Kamel wurde freigegeben." -- quant() gets 1
170             print $lh->maketext('1 camel was released.');
171              
172             # "2 Kamele wurden freigegeben." -- quant() gets 2
173             print $lh->maketext('2 camels were released.');
174              
175             # "3 Kamele wurden freigegeben." -- parameters are ignored
176             print $lh->maketext('3 released.');
177              
178             # "4 Kamele wurden freigegeben." -- normal usage
179             print $lh->maketext('[*,_1,camel was,camels were] released.', 4);
180              
181             # "!Perl ist frei!" -- matches the broader one
182             # Note that the sequence ([_2] before [_1]) is preserved
183             print $lh->maketext('Perl released!');
184              
185             =head1 DESCRIPTION
186              
187             This module is a subclass of C, with additional
188             support for localizing messages that already contains interpolated
189             variables.
190              
191             This is most useful when the messages are returned by external sources
192             -- for example, to match C against
193             C<[_1]: command not found>.
194              
195             Of course, this module is also useful if you're simply too lazy
196             to use the
197              
198             $lh->maketext("[quant,_1,file,files] deleted.", $count);
199              
200             syntax, but wish to write
201              
202             $lh->maketext_fuzzy("$count files deleted");
203              
204             instead, and have the correct plural form figured out automatically.
205              
206             If C seems too long to type for you, this module
207             also provides a C method to turn I C
208             calls into C calls.
209              
210             =head1 METHODS
211              
212             =head2 $lh->maketext_fuzzy(I[, I]);
213              
214             That method takes exactly the same arguments as the C method
215             of C.
216              
217             If I is found in lexicons, it is applied in the same way as
218             C. Otherwise, it looks at all lexicon entries that could
219             possibly yield I, by turning C<[...]> sequences into C<(.*?)> and
220             match the resulting regular expression against I.
221              
222             Once it finds all candidate entries, the longest one replaces the
223             I for the real C call. Variables matched by its bracket
224             sequences (C<$1>, C<$2>...) are placed before I; the order
225             of variables in the matched entry are correctly preserved.
226              
227             For example, if the matched entry in C<%Lexicon> is C,
228             this call:
229              
230             $fh->maketext_fuzzy("Test string", "param");
231              
232             is equivalent to this:
233              
234             $fh->maketext("Test [_1]", "string", "param");
235              
236             However, most of the time you won't need to supply I to
237             a C call, since all parameters are already interpolated
238             into the string.
239              
240             =head2 $lh->override_maketext([I]);
241              
242             If I is true, this accessor method turns C<$lh-Emaketext>
243             into an alias for C<$lh-Emaketext_fuzzy>, so all consecutive
244             C calls in the C<$lh>'s packages are automatically fuzzy.
245             A false I restores the original behaviour. If the flag is not
246             specified, returns the current status of override; the default is
247             0 (no overriding).
248              
249             Note that this call only modifies the symbol table of the I
250             class> that C<$lh> belongs to, so other languages are not affected.
251             If you want to override all language handles in a certain application,
252             try this:
253              
254             MyApp::L10N->override_maketext(1);
255              
256             =head1 CAVEATS
257              
258             =over 4
259              
260             =item *
261              
262             The "longer is better" heuristic to determine the best match is
263             reasonably good, but could certainly be improved.
264              
265             =item *
266              
267             Currently, C<"[quant,_1,file] deleted"> won't match C<"3 files deleted">;
268             you'll have to write C<"[quant,_1,file,files] deleted"> instead, or
269             simply use C<"[_1] file deleted"> as the lexicon key and put the correct
270             plural form handling into the corresponding value.
271              
272             =item *
273              
274             When used in combination with C's C
275             backend, all keys would be iterated over each time a fuzzy match is
276             performed, and may cause serious speed penalty. Patches welcome.
277              
278             =back
279              
280             =head1 SEE ALSO
281              
282             L, L
283              
284             =head1 HISTORY
285              
286             This particular module was written to facilitate an I
287             layer for Slashcode's I