File Coverage

blib/lib/Locale/Maketext.pm
Criterion Covered Total %
statement 328 386 84.9
branch 113 176 64.2
condition 46 100 46.0
subroutine 40 44 90.9
pod 12 19 63.1
total 539 725 74.3


line stmt bran cond sub pod time code
1             package Locale::Maketext;
2 19     19   870087 use strict;
  19         165  
  19         602  
3             our $USE_LITERALS;
4 17     17   80 use Carp ();
  17         26  
  17         198  
5 17     17   7806 use I18N::LangTags ();
  17         44094  
  17         389  
6 17     17   7266 use I18N::LangTags::Detect ();
  17         28413  
  17         736  
7              
8             #--------------------------------------------------------------------------
9              
10 17 50   17   87 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
  17         1282  
11             # define the constant 'DEBUG' at compile-time
12              
13             # turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
14             # use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
15             BEGIN {
16              
17             # if we have it || we can load it
18 17 50 33 17   106 if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
  17         94  
  17         8237  
19 17         387 utf8->import();
20 17         6051 DEBUG and warn " utf8 on for _compile()\n";
21             }
22             else {
23 0         0 DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
24             }
25             }
26              
27              
28             our $VERSION = '1.32';
29             our @ISA = ();
30              
31             our $MATCH_SUPERS = 1;
32             our $MATCH_SUPERS_TIGHTLY = 1;
33             our $USING_LANGUAGE_TAGS = 1;
34             # Turning this off is somewhat of a security risk in that little or no
35             # checking will be done on the legality of tokens passed to the
36             # eval("use $module_name") in _try_use. If you turn this off, you have
37             # to do your own taint checking.
38              
39             $USE_LITERALS = 1 unless defined $USE_LITERALS;
40             # a hint for compiling bracket-notation things.
41              
42             my %isa_scan = ();
43              
44             ###########################################################################
45              
46             sub quant {
47 2     2 1 14 my($handle, $num, @forms) = @_;
48              
49 2 50       5 return $num if @forms == 0; # what should this mean?
50 2 50 33     7 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
51              
52             # Normal case:
53             # Note that the formatting of $num is preserved.
54 2         8 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
55             # Most human languages put the number phrase before the qualified phrase.
56             }
57              
58              
59             sub numerate {
60             # return this lexical item in a form appropriate to this number
61 0     0 1 0 my($handle, $num, @forms) = @_;
62 0         0 my $s = ($num == 1);
63              
64 0 0       0 return '' unless @forms;
65 0 0       0 if(@forms == 1) { # only the headword form specified
66 0 0       0 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
67             }
68             else { # sing and plural were specified
69 0 0       0 return $s ? $forms[0] : $forms[1];
70             }
71             }
72              
73             #--------------------------------------------------------------------------
74              
75             sub numf {
76 2     2 1 4 my($handle, $num) = @_[0,1];
77 2 50 33     13 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
      33        
78 2         4 $num += 0; # Just use normal integer stringification.
79             # Specifically, don't let %G turn ten million into 1E+007
80             }
81             else {
82 0         0 $num = CORE::sprintf('%G', $num);
83             # "CORE::" is there to avoid confusion with the above sub sprintf.
84             }
85 2         4 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5
  0         0  
86             # The initial \d+ gobbles as many digits as it can, and then we
87             # backtrack so it un-eats the rightmost three, and then we
88             # insert the comma there.
89              
90 2 50 33     10 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
91             # This is just a lame hack instead of using Number::Format
92 2         14 return $num;
93             }
94              
95             sub sprintf {
96 17     17   123 no integer;
  17         31  
  17         72  
97 6     6 1 21 my($handle, $format, @params) = @_;
98 6         30 return CORE::sprintf($format, @params);
99             # "CORE::" is there to avoid confusion with myself!
100             }
101              
102             #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
103              
104 17     17   1067 use integer; # vroom vroom... applies to the whole rest of the module
  17         54  
  17         88  
105              
106             sub language_tag {
107 0   0 0 1 0 my $it = ref($_[0]) || $_[0];
108 0 0       0 return undef unless $it =~ m/([^':]+)(?:::)?$/s;
109 0         0 $it = lc($1);
110 0         0 $it =~ tr<_><->;
111 0         0 return $it;
112             }
113              
114             sub encoding {
115 0     0 1 0 my $it = $_[0];
116             return(
117 0   0     0 (ref($it) && $it->{'encoding'})
118             || 'iso-8859-1' # Latin-1
119             );
120             }
121              
122             #--------------------------------------------------------------------------
123              
124 17     17 0 53 sub fallback_languages { return('i-default', 'en', 'en-US') }
125              
126 17     17 0 37 sub fallback_language_classes { return () }
127              
128             #--------------------------------------------------------------------------
129              
130             sub fail_with { # an actual attribute method!
131 1     1 1 18 my($handle, @params) = @_;
132 1 50       4 return unless ref($handle);
133 1 50       4 $handle->{'fail'} = $params[0] if @params;
134 1         2 return $handle->{'fail'};
135             }
136              
137             #--------------------------------------------------------------------------
138              
139             sub _exclude {
140 40     40   62 my ( $handle, @methods ) = @_;
141              
142 40 100       130 unless ( defined $handle->{'denylist'} ) {
143 17     17   4518 no strict 'refs';
  17         39  
  17         36417  
144              
145             # Don't let people call methods they're not supposed to from maketext.
146             # Explicitly exclude all methods in this package that start with an
147             # underscore on principle.
148             $handle->{'denylist'} = {
149 408         779 map { $_ => 1 } (
150             qw/
151             blacklist
152             denylist
153             encoding
154             fail_with
155             failure_handler_auto
156             fallback_language_classes
157             fallback_languages
158             get_handle
159             init
160             language_tag
161             maketext
162             new
163             whitelist
164             allowlist
165 17         70 /, grep { /^_/ } keys %{ __PACKAGE__ . "::" }
  650         974  
  17         220  
166             ),
167             };
168             }
169              
170 40 100       176 if ( scalar @methods ) {
171 6         9 $handle->{'denylist'} = { %{ $handle->{'denylist'} }, map { $_ => 1 } @methods };
  6         36  
  8         57  
172             }
173              
174 40         89 delete $handle->{'_external_lex_cache'};
175 40         60 return;
176             }
177              
178             sub blacklist {
179 20     20 1 1777 my ( $handle, @methods ) = @_;
180 20         70 _exclude ( $handle, @methods );
181 20         27 return;
182             }
183              
184             sub denylist {
185 20     20 1 1692 my ( $handle, @methods ) = @_;
186 20         45 _exclude ( $handle, @methods );
187 20         23 return;
188             }
189              
190             sub _include {
191 6     6   10 my ( $handle, @methods ) = @_;
192 6 50       14 if ( scalar @methods ) {
193 6 100       15 $handle->{'allowlist'} = {} unless defined $handle->{'allowlist'};
194 6         7 $handle->{'allowlist'} = { %{ $handle->{'allowlist'} }, map { $_ => 1 } @methods };
  6         18  
  6         21  
195             }
196              
197 6         27 delete $handle->{'_external_lex_cache'};
198 6         9 return;
199             }
200              
201             sub whitelist {
202 3     3 1 2645 my ( $handle, @methods ) = @_;
203 3         7 _include ( $handle, @methods );
204 3         6 return;
205             }
206              
207             sub allowlist {
208 3     3 1 2505 my ( $handle, @methods ) = @_;
209 3         10 _include ( $handle, @methods );
210 3         3 return;
211             }
212              
213             #--------------------------------------------------------------------------
214              
215             sub failure_handler_auto {
216             # Meant to be used like:
217             # $handle->fail_with('failure_handler_auto')
218              
219 5     5 1 9 my $handle = shift;
220 5         6 my $phrase = shift;
221              
222 5   100     22 $handle->{'failure_lex'} ||= {};
223 5         32 my $lex = $handle->{'failure_lex'};
224              
225 5   66     43 my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase));
      33        
226              
227             # Dumbly copied from sub maketext:
228 5 100       13 return ${$value} if ref($value) eq 'SCALAR';
  3         18  
229 2 50       8 return $value if ref($value) ne 'CODE';
230             {
231 2         4 local $SIG{'__DIE__'};
  2         7  
232 2         3 eval { $value = &$value($handle, @_) };
  2         29  
233             }
234             # If we make it here, there was an exception thrown in the
235             # call to $value, and so scream:
236 2 100       15 if($@) {
237             # pretty up the error message
238 1         2 $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
239             {\n in bracket code [compiled line $1],}s;
240 1         85 #$err =~ s/\n?$/\n/s;
241             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
242             # Rather unexpected, but suppose that the sub tried calling
243             # a method that didn't exist.
244             }
245 1         5 else {
246             return $value;
247             }
248             }
249              
250             #==========================================================================
251              
252             sub new {
253 17   33 17 0 108 # Nothing fancy!
254 17         53 my $class = ref($_[0]) || $_[0];
255 17         67 my $handle = bless {}, $class;
256 17         90 $handle->blacklist;
257 17         70 $handle->denylist;
258 17         79 $handle->init;
259             return $handle;
260             }
261 17     17 0 31  
262             sub init { return } # no-op
263              
264             ###########################################################################
265              
266             sub maketext {
267 52 50   52 0 19554 # Remember, this can fail. Failure is controllable many ways.
268             Carp::croak 'maketext requires at least one parameter' unless @_ > 1;
269 52         142  
270 52 50 33     211 my($handle, $phrase) = splice(@_,0,2);
271             Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase));
272              
273             # backup $@ in case it's still being used in the calling code.
274 52         85 # If no failures, we'll re-set it back to what it was later.
275             my $at = $@;
276              
277 52         94 # Copy @_ case one of its elements is $@.
278             @_ = @_;
279              
280             # Look up the value:
281 52         70  
282 52 100       125 my $value;
283 1         1 if (exists $handle->{'_external_lex_cache'}{$phrase}) {
284 1         3 DEBUG and warn "* Using external lex cache version of \"$phrase\"\n";
285             $value = $handle->{'_external_lex_cache'}{$phrase};
286             }
287 51         61 else {
288 51 100 33     248 foreach my $h_r (
289             @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs }
290 51         82 ) {
291 51 50 66     301 DEBUG and warn "* Looking up \"$phrase\" in $h_r\n";
    100          
    100          
    100          
292 11         13 if(exists $h_r->{$phrase}) {
293 11 50       37 DEBUG and warn " Found \"$phrase\" in $h_r\n";
294             unless(ref($value = $h_r->{$phrase})) {
295 11 100       37 # Nonref means it's not yet compiled. Compile and replace.
296 1         18 if ($handle->{'use_external_lex_cache'}) {
297             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value);
298             }
299 10         37 else {
300             $value = $h_r->{$phrase} = $handle->_compile($value);
301             }
302 11         21 }
303             last;
304             }
305             # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;"
306             # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;"
307             elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) {
308 36         46 # it's an auto lex, and this is an autoable key!
309 36 100       68 DEBUG and warn " Automaking \"$phrase\" into $h_r\n";
310 33         85 if ($handle->{'use_external_lex_cache'}) {
311             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase);
312             }
313 3         7 else {
314             $value = $h_r->{$phrase} = $handle->_compile($phrase);
315 17         31 }
316             last;
317 4         6 }
318             DEBUG>1 and print " Not found in $h_r, nor automakable\n";
319             # else keep looking
320             }
321             }
322 33 100       83  
323 4         6 unless(defined($value)) {
324 4 50 33     20 DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n";
325 4         7 if(ref($handle) and $handle->{'fail'}) {
326 4         4 DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n";
327 4 50       9 my $fail;
328 0         0 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
329 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
  0         0  
330             return &{$fail}($handle, $phrase, @_);
331             # If it ever returns, it should return a good value.
332             }
333 4         7 else { # It's a method name
334 4         15 $@ = $at; # Put $@ back in case we altered it along the way.
335             return $handle->$fail($phrase, @_);
336             # If it ever returns, it should return a good value.
337             }
338             }
339             else {
340 0         0 # All we know how to do is this;
341             Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
342             }
343             }
344 29 100       89  
345 6         37 if(ref($value) eq 'SCALAR'){
346 6         29 $@ = $at; # Put $@ back in case we altered it along the way.
347             return $$value ;
348 23 50       52 }
349 0         0 if(ref($value) ne 'CODE'){
350 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
351             return $value ;
352             }
353              
354 23         30 {
  23         72  
355 23         35 local $SIG{'__DIE__'};
  23         505  
356             eval { $value = &$value($handle, @_) };
357             }
358             # If we make it here, there was an exception thrown in the
359 23 50       119 # call to $value, and so scream:
360             if ($@) {
361 0         0 # pretty up the error message
362             $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
363 0         0 {\n in bracket code [compiled line $1],}s;
364             #$err =~ s/\n?$/\n/s;
365             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
366             # Rather unexpected, but suppose that the sub tried calling
367             # a method that didn't exist.
368 23         34 }
369 23         80 else {
370             $@ = $at; # Put $@ back in case we altered it along the way.
371 0         0 return $value;
372             }
373             $@ = $at; # Put $@ back in case we altered it along the way.
374             }
375              
376             ###########################################################################
377              
378             sub get_handle { # This is a constructor and, yes, it CAN FAIL.
379             # Its class argument has to be the base class for the current
380 17     17 0 2622 # application's l10n files.
381 17   33     98  
382             my($base_class, @languages) = @_;
383             $base_class = ref($base_class) || $base_class;
384 17 100       52 # Complain if they use __PACKAGE__ as a project base class?
385 11         17  
386 11 50       31 if( @languages ) {
387             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
388 11         57 if($USING_LANGUAGE_TAGS) { # An explicit language-list was given!
  11         270  
389             @languages =
390             map {; $_, I18N::LangTags::alternate_language_tags($_) }
391             # Catch alternation
392             map I18N::LangTags::locale2language_tag($_),
393             # If it's a lg tag, fine, pass thru (untainted)
394             # If it's a locale ID, try converting to a lg tag (untainted),
395 11         328 # otherwise nix it.
396             @languages;
397             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
398             }
399 6         16 }
400             else {
401             @languages = $base_class->_ambient_langprefs;
402 17         844 }
403              
404 17         31 @languages = $base_class->_langtag_munging(@languages);
405 17         42  
  104         231  
406 24 50       123 my %seen;
407 24 100 66     144 foreach my $module_name ( map { $base_class . '::' . $_ } @languages ) {
408             next unless length $module_name; # sanity
409 17         99 next if $seen{$module_name}++ # Already been here, and it was no-go
410             || !&_try_use($module_name); # Try to use() it, but can't it.
411             return($module_name->new); # Make it!
412 0         0 }
413              
414             return undef; # Fail!
415             }
416              
417             ###########################################################################
418 17     17   53  
419             sub _langtag_munging {
420             my($base_class, @languages) = @_;
421              
422             # We have all these DEBUG statements because otherwise it's hard as hell
423 17         25 # to diagnose if/when something goes wrong.
424              
425 17 50       44 DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n";
426 17         18  
427 17         63 if($USING_LANGUAGE_TAGS) {
428             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
429 17         49 @languages = $base_class->_add_supers( @languages );
430 17         324  
431             push @languages, I18N::LangTags::panic_languages(@languages);
432             DEBUG and warn "After adding panic languages:\n",
433 17         81 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
434              
435 17         26 push @languages, $base_class->fallback_languages;
436             # You are free to override fallback_languages to return empty-list!
437             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
438              
439 17         46 @languages = # final bit of processing to turn them into classname things
  104         125  
440 104         125 map {
441 104         124 my $it = $_; # copy
442 104         162 $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
443             $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_
444             $it;
445 17         25 } @languages
446             ;
447             DEBUG and warn "Nearing end of munging:\n",
448             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
449 0         0 }
450             else {
451             DEBUG and warn "Bypassing language-tags.\n",
452             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
453 17         21 }
454              
455             DEBUG and warn "Before adding fallback classes:\n",
456 17         68 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
457              
458             push @languages, $base_class->fallback_language_classes;
459 17         21 # You are free to override that to return whatever.
460              
461             DEBUG and warn "Finally:\n",
462 17         54 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
463              
464             return @languages;
465             }
466              
467             ###########################################################################
468 6     6   16  
469             sub _ambient_langprefs {
470             return I18N::LangTags::Detect::detect();
471             }
472              
473             ###########################################################################
474 60     60   19441  
475             sub _add_supers {
476 60 50       156 my($base_class, @languages) = @_;
    100          
477              
478 0         0 if (!$MATCH_SUPERS) {
479             # Nothing
480             DEBUG and warn "Bypassing any super-matching.\n",
481             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
482              
483 42         68 }
484             elsif( $MATCH_SUPERS_TIGHTLY ) {
485 42         116 DEBUG and warn "Before adding new supers tightly:\n",
486 42         6306 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
487             @languages = I18N::LangTags::implicate_supers( @languages );
488             DEBUG and warn "After adding new supers tightly:\n",
489             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
490              
491 18         19 }
492             else {
493 18         39 DEBUG and warn "Before adding supers to end:\n",
494 18         1352 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
495             @languages = I18N::LangTags::implicate_supers_strictly( @languages );
496             DEBUG and warn "After adding supers to end:\n",
497             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
498 60         156 }
499              
500             return @languages;
501             }
502              
503             ###########################################################################
504             #
505             # This is where most people should stop reading.
506             #
507             ###########################################################################
508              
509             my %tried = ();
510             # memoization of whether we've used this module, or found it unusable.
511              
512 26 100   26   929 sub _try_use { # Basically a wrapper around "require Modulename"
513             # "Many men have tried..." "They tried and failed?" "They tried and died."
514 18         52 return $tried{$_[0]} if exists $tried{$_[0]}; # memoization
515 17     17   133  
  17         41  
  17         533  
  18         31  
516 17     17   95 my $module = $_[0]; # ASSUME sane module name!
  17         29  
  17         3454  
517             { no strict 'refs';
518 18 100 100     25 no warnings 'once';
  18         183  
  7         44  
519             return($tried{$module} = 1)
520             if %{$module . '::Lexicon'} or @{$module . '::ISA'};
521             # weird case: we never use'd it, but there it is!
522 6         8 }
523              
524 6         19 DEBUG and warn " About to use $module ...\n";
525 6         8  
526 6         31 local $SIG{'__DIE__'};
527 6 100       17 local $@;
528 6         348 local @INC = @INC;
529             pop @INC if $INC[-1] eq '.';
530 6 100       35 eval "require $module"; # used to be "use $module", but no point in that.
531 5         5  
532 5         38 if($@) {
533             DEBUG and warn "Error using $module \: $@\n";
534             return $tried{$module} = 0;
535 1         1 }
536 1         8 else {
537             DEBUG and warn " OK, $module is used\n";
538             return $tried{$module} = 1;
539             }
540             }
541              
542             #--------------------------------------------------------------------------
543              
544 17     17   112 sub _lex_refs { # report the lexicon references for this handle's class
  17         33  
  17         559  
545 17     17   93 # returns an arrayREF!
  17         30  
  17         31377  
546 36   66 36   133 no strict 'refs';
547 36         46 no warnings 'once';
548 36 50       93 my $class = ref($_[0]) || $_[0];
549             DEBUG and warn "Lex refs lookup on $class\n";
550 36         81 return $isa_scan{$class} if exists $isa_scan{$class}; # memoization!
551 36 100       101  
552             my @lex_refs;
553 36 100       50 my $seen_r = ref($_[1]) ? $_[1] : {};
  36         191  
554 18         34  
  18         67  
555             if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
556 18         34 push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
557             DEBUG and warn '%' . $class . '::Lexicon contains ',
558             scalar(keys %{$class . '::Lexicon'}), " entries\n";
559             }
560              
561 36         54 # Implements depth(height?)-first recursive searching of superclasses.
  36         141  
562 24         66 # In hindsight, I suppose I could have just used Class::ISA!
563 24 50       116 foreach my $superclass (@{$class . '::ISA'}) {
564 24         40 DEBUG and warn " Super-class search into $superclass\n";
  24         91  
565             next if $seen_r->{$superclass}++;
566             push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself
567 36         67 }
568 36         108  
569             $isa_scan{$class} = \@lex_refs; # save for next time
570             return \@lex_refs;
571 0     0 0 0 }
  0         0  
572              
573             sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
574              
575             #--------------------------------------------------------------------------
576              
577             sub _compile {
578             # This big scary routine compiles an entry.
579             # It returns either a coderef if there's brackety bits in this, or
580 51     51   1009 # otherwise a ref to a scalar.
581              
582             my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344
583              
584             # The while() regex is more expensive than this check on strings that don't need a compile.
585 51 100       221 # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
586             # on strings that don't need compiling.
587 43         58 return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
588              
589 43         54 my $handle = $_[0];
590 43         72  
591 43         70 my(@code);
592 43         67 my(@c) = (''); # "chunks" -- scratch.
593             my $call_count = 0;
594 43         55 my $big_pile = '';
  43         60  
595 43         59 {
596             my $in_group = 0; # start out outside a group
597 43         202 my($m, @params); # scratch
598              
599             while($string_to_compile =~ # Iterate over chunks.
600             m/(
601             [^\~\[\]]+ # non-~[] stuff (Capture everything else here)
602             |
603             ~. # ~[, ~], ~~, ~other
604             |
605             \[ # [ presumably opening a group
606             |
607             \] # ] presumably closing a group
608             |
609             ~ # terminal ~ ?
610             |
611             $
612 162         208 )/xgs
613             ) {
614 162 100 100     676 DEBUG>2 and warn qq{ "$1"\n};
    100          
    50          
    0          
    0          
    0          
    0          
    0          
615              
616             if($1 eq '[' or $1 eq '') { # "[" or end
617 66 50       152 # Whether this is "[" or end, force processing of any
618 0 0       0 # preceding literal.
619 0         0 if($in_group) {
620             if($1 eq '') {
621             $handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
622 0         0 }
623             else {
624             $handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
625             }
626 66 100       219 }
627 24         35 else {
628             if ($1 eq '') {
629             DEBUG>2 and warn " [end-string]\n";
630 42         67 }
631             else {
632 66 50       131 $in_group = 1;
633 66 100       244 }
634             die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
635 10         17 if(length $c[-1]) {
636 10 100 66     63 # Now actually processing the preceding literal
637             $big_pile .= $c[-1];
638             if($USE_LITERALS and (
639             (ord('A') == 65)
640             ? $c[-1] !~ m/[^\x20-\x7E]/s
641             # ASCII very safe chars
642             : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
643             # EBCDIC very safe chars
644 7         17 )) {
645 7         18 # normal case -- all very safe chars
646 7         37 $c[-1] =~ s/'/\\'/g;
647             push @code, q{ '} . $c[-1] . "',\n";
648             $c[-1] = ''; # reuse this slot
649 3         11 }
650 3         10 else {
651 3         12 $c[-1] =~ s/\\\\/\\/g;
652             push @code, ' $c[' . $#c . "],\n";
653             push @c, ''; # new chunk
654             }
655             }
656             # else just ignore the empty string.
657             }
658              
659             }
660 43 100       79 elsif($1 eq ']') { # "]"
661 42         54 # close group -- go back in-band
662             if($in_group) {
663 42         45 $in_group = 0;
664              
665             DEBUG>2 and warn " --Closing group [$c[-1]]\n";
666              
667 42 50 33     228 # And now process the group...
668 0         0  
669 0         0 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
670 0         0 DEBUG>2 and warn " -- (Ignoring)\n";
671             $c[-1] = ''; # reset out chink
672             next;
673             }
674              
675 42         137 #$c[-1] =~ s/^\s+//s;
676             #$c[-1] =~ s/\s+$//s;
677             ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
678              
679 42         59 # A bit of a hack -- we've turned "~,"'s into DELs, so turn
680 42         78 # 'em into real commas here.
  73         149  
681             if (ord('A') == 65) { # ASCII, etc
682             foreach($m, @params) { tr/\x7F/,/ }
683             }
684             else { # EBCDIC (1047, 0037, POSIX-BC)
685             # Thanks to Peter Prymmer for the EBCDIC handling
686             foreach($m, @params) { tr/\x07/,/ }
687             }
688 42 100 66     235  
    100          
    50          
689             # Special-case handling of some method names:
690 5         11 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
691 5         8 # Treat [_1,...] as [,_1,...], etc.
692             unshift @params, $m;
693             $m = '';
694 1         2 }
695             elsif($m eq '*') {
696             $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
697 0         0 }
698             elsif($m eq '#') {
699             $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
700             }
701 42 100 33     422  
    100 66        
      33        
      66        
      66        
      33        
702             # Most common case: a simple, legal-looking method name
703 5         9 if($m eq '') {
704             # 0-length method name means to just interpolate:
705             push @code, ' (';
706             }
707             elsif($m =~ /^\w+$/s
708             && !$handle->{'blacklist'}{$m}
709             && !$handle->{'denylist'}{$m}
710             && ( !defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m} )
711             && ( !defined $handle->{'allowlist'} || $handle->{'allowlist'}{$m} )
712 19         49 # exclude anything fancy and restrict to the allowlist/denylist (and historical whitelist/blacklist).
713             ) {
714             push @code, ' $_[0]->' . $m . '(';
715             }
716 18         81 else {
717             # TODO: implement something? or just too icky to consider?
718             $handle->_die_pointing(
719             $string_to_compile,
720             "Can't use \"$m\" as a method name in bracket group",
721             2 + length($c[-1])
722             );
723 24         48 }
724 24         40  
725             pop @c; # we don't need that chunk anymore
726 24         49 ++$call_count;
727 24 50 33     159  
    100          
    50          
728             foreach my $p (@params) {
729 0         0 if($p eq '_*') {
730             # Meaning: all parameters except $_[0]
731             $code[-1] .= ' @_[1 .. $#_], ';
732             # and yes, that does the right thing for all @_ < 3
733             }
734 10         47 elsif($p =~ m/^_(-?\d+)$/s) {
735             # _3 meaning $_[3]
736             $code[-1] .= '$_[' . (0 + $1) . '], ';
737             }
738             elsif($USE_LITERALS and (
739             (ord('A') == 65)
740             ? $p !~ m/[^\x20-\x7E]/s
741             # ASCII very safe chars
742             : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
743             # EBCDIC very safe chars
744 14         24 )) {
745 14         34 # Normal case: a literal containing only safe characters
746             $p =~ s/'/\\'/g;
747             $code[-1] .= q{'} . $p . q{', };
748             }
749 0         0 else {
750 0         0 # Stow it on the chunk-stack, and just refer to that.
751             push @c, $p;
752             push @code, ' $c[' . $#c . '], ';
753 24         38 }
754             }
755 24         89 $code[-1] .= "),\n";
756              
757             push @c, '';
758 1         5 }
759             else {
760             $handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
761             }
762              
763             }
764             elsif(substr($1,0,1) ne '~') {
765 53         194 # it's stuff not containing "~" or "[" or "]"
766 53         108 # i.e., a literal blob
767 53         262 my $text = $1;
768             $text =~ s/\\/\\\\/g;
769             $c[-1] .= $text;
770              
771 0         0 }
772             elsif($1 eq '~~') { # "~~"
773             $c[-1] .= '~';
774              
775 0         0 }
776             elsif($1 eq '~[') { # "~["
777             $c[-1] .= '[';
778              
779 0         0 }
780             elsif($1 eq '~]') { # "~]"
781             $c[-1] .= ']';
782              
783 0 0       0 }
784             elsif($1 eq '~,') { # "~,"
785             if($in_group) {
786 0         0 # This is a hack, based on the assumption that no-one will actually
787 0         0 # want a DEL inside a bracket group. Let's hope that's it's true.
788             if (ord('A') == 65) { # ASCII etc
789             $c[-1] .= "\x7F";
790             }
791             else { # EBCDIC (cp 1047, 0037, POSIX-BC)
792             $c[-1] .= "\x07";
793             }
794 0         0 }
795             else {
796             $c[-1] .= '~,';
797             }
798              
799 0         0 }
800             elsif($1 eq '~') { # possible only at string-end, it seems.
801             $c[-1] .= '~';
802              
803             }
804             else {
805 0         0 # It's a "~X" where X is not a special character.
806 0         0 # Consider it a literal ~ and X.
807 0         0 my $text = $1;
808             $text =~ s/\\/\\\\/g;
809             $c[-1] .= $text;
810             }
811             }
812 24 50       58 }
813 24         45  
814             if($call_count) {
815             undef $big_pile; # Well, nevermind that.
816             }
817             else {
818 0         0 # It's all literals! Ahwell, that can happen.
819             # So don't bother with the eval. Return a SCALAR reference.
820             return \$big_pile;
821 24 50 33     125 }
822 24         33  
823 24 50       85 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
    100          
824 0         0 DEBUG and warn scalar(@c), " chunks under closure\n";
825 0         0 if(@code == 0) { # not possible?
826             DEBUG and warn "Empty code\n";
827             return \'';
828 10         27 }
829             elsif(@code > 1) { # most cases, presumably!
830 24         47 unshift @code, "join '',\n";
831 24         62 }
832             unshift @code, "use strict; sub {\n";
833 24         33 push @code, "}\n";
834 24     10   1550  
  10     6   70  
  10     6   17  
  10         488  
  6         36  
  6         17  
  6         266  
  6         36  
  6         8  
  6         222  
835 24 50       94 DEBUG and warn @code;
836 24         99 my $sub = eval(join '', @code);
837             die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
838             return $sub;
839             }
840              
841             #--------------------------------------------------------------------------
842              
843 19     19   28 sub _die_pointing {
844 19   33     41 # This is used by _compile to throw a fatal error
845             my $target = shift;
846             $target = ref($target) || $target; # class name
847 19         34 # ...leaving $_[0] the error-causing text, and $_[1] the error message
848              
849 19         24 my $i = index($_[0], "\n");
850 19 100       48  
851 19 100       37 my $pointy;
852 18         22 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
853             if($pos < 1) {
854             $pointy = "^=== near there\n";
855 1         2 }
856 1 50 33     17 else { # we need to space over
      33        
857             my $first_tab = index($_[0], "\t");
858             if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
859 1         4 # No tabs, or the first tab is harmlessly after where we will point to,
860             # AND we're far enough from the margin that we can draw a proper arrow.
861             $pointy = ('=' x $pos) . "^ near there\n";
862             }
863 0         0 else {
864 0         0 # tabs screw everything up!
865             $pointy = substr($_[0],0,$pos);
866 0         0 $pointy =~ tr/\t //cd;
867             # make everything into whitespace, but preserving tabs
868             $pointy .= "^=== near there\n";
869             }
870 19         41 }
871              
872 19 50       30 my $errmsg = "$_[1], in\:\n$_[0]";
    0          
873              
874 19         39 if($i == -1) {
875             # No newline.
876             $errmsg .= "\n" . $pointy;
877             }
878 0         0 elsif($i == (length($_[0]) - 1) ) {
879             # Already has a newline at end.
880             $errmsg .= $pointy;
881             }
882             else {
883 19         1815 # don't bother with the pointy bit, I guess.
884             }
885             Carp::croak( "$errmsg via $target, as used" );
886             }
887              
888             1;