File Coverage

blib/lib/Locale/Maketext.pm
Criterion Covered Total %
statement 328 386 84.9
branch 113 176 64.2
condition 46 100 46.0
subroutine 40 44 90.9
pod 12 19 63.1
total 539 725 74.3


line stmt bran cond sub pod time code
1             package Locale::Maketext;
2 19     19   848264 use strict;
  19         168  
  19         604  
3             our $USE_LITERALS;
4 17     17   84 use Carp ();
  17         27  
  17         196  
5 17     17   8017 use I18N::LangTags ();
  17         42732  
  17         370  
6 17     17   6999 use I18N::LangTags::Detect ();
  17         26828  
  17         727  
7              
8             #--------------------------------------------------------------------------
9              
10 17 50   17   86 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
  17         1188  
11             # define the constant 'DEBUG' at compile-time
12              
13             # turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
14             # use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
15             BEGIN {
16              
17             # if we have it || we can load it
18 17 50 33 17   120 if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
  17         98  
  17         8074  
19 17         11824 utf8->import();
20 17         5992 DEBUG and warn " utf8 on for _compile()\n";
21             }
22             else {
23 0         0 DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
24             }
25             }
26              
27              
28             our $VERSION = '1.30';
29             our @ISA = ();
30              
31             our $MATCH_SUPERS = 1;
32             our $MATCH_SUPERS_TIGHTLY = 1;
33             our $USING_LANGUAGE_TAGS = 1;
34             # Turning this off is somewhat of a security risk in that little or no
35             # checking will be done on the legality of tokens passed to the
36             # eval("use $module_name") in _try_use. If you turn this off, you have
37             # to do your own taint checking.
38              
39             $USE_LITERALS = 1 unless defined $USE_LITERALS;
40             # a hint for compiling bracket-notation things.
41              
42             my %isa_scan = ();
43              
44             ###########################################################################
45              
46             sub quant {
47 2     2 1 7 my($handle, $num, @forms) = @_;
48              
49 2 50       5 return $num if @forms == 0; # what should this mean?
50 2 50 33     6 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
51              
52             # Normal case:
53             # Note that the formatting of $num is preserved.
54 2         10 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
55             # Most human languages put the number phrase before the qualified phrase.
56             }
57              
58              
59             sub numerate {
60             # return this lexical item in a form appropriate to this number
61 0     0 1 0 my($handle, $num, @forms) = @_;
62 0         0 my $s = ($num == 1);
63              
64 0 0       0 return '' unless @forms;
65 0 0       0 if(@forms == 1) { # only the headword form specified
66 0 0       0 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
67             }
68             else { # sing and plural were specified
69 0 0       0 return $s ? $forms[0] : $forms[1];
70             }
71             }
72              
73             #--------------------------------------------------------------------------
74              
75             sub numf {
76 2     2 1 7 my($handle, $num) = @_[0,1];
77 2 50 33     14 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
      33        
78 2         3 $num += 0; # Just use normal integer stringification.
79             # Specifically, don't let %G turn ten million into 1E+007
80             }
81             else {
82 0         0 $num = CORE::sprintf('%G', $num);
83             # "CORE::" is there to avoid confusion with the above sub sprintf.
84             }
85 2         6 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5
  0         0  
86             # The initial \d+ gobbles as many digits as it can, and then we
87             # backtrack so it un-eats the rightmost three, and then we
88             # insert the comma there.
89              
90 2 50 33     12 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
91             # This is just a lame hack instead of using Number::Format
92 2         12 return $num;
93             }
94              
95             sub sprintf {
96 17     17   122 no integer;
  17         28  
  17         83  
97 6     6 1 22 my($handle, $format, @params) = @_;
98 6         31 return CORE::sprintf($format, @params);
99             # "CORE::" is there to avoid confusion with myself!
100             }
101              
102             #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
103              
104 17     17   1135 use integer; # vroom vroom... applies to the whole rest of the module
  17         44  
  17         69  
105              
106             sub language_tag {
107 0   0 0 1 0 my $it = ref($_[0]) || $_[0];
108 0 0       0 return undef unless $it =~ m/([^':]+)(?:::)?$/s;
109 0         0 $it = lc($1);
110 0         0 $it =~ tr<_><->;
111 0         0 return $it;
112             }
113              
114             sub encoding {
115 0     0 1 0 my $it = $_[0];
116             return(
117 0   0     0 (ref($it) && $it->{'encoding'})
118             || 'iso-8859-1' # Latin-1
119             );
120             }
121              
122             #--------------------------------------------------------------------------
123              
124 17     17 0 51 sub fallback_languages { return('i-default', 'en', 'en-US') }
125              
126 17     17 0 31 sub fallback_language_classes { return () }
127              
128             #--------------------------------------------------------------------------
129              
130             sub fail_with { # an actual attribute method!
131 1     1 1 9 my($handle, @params) = @_;
132 1 50       4 return unless ref($handle);
133 1 50       4 $handle->{'fail'} = $params[0] if @params;
134 1         3 return $handle->{'fail'};
135             }
136              
137             #--------------------------------------------------------------------------
138              
139             sub _exclude {
140 40     40   66 my ( $handle, @methods ) = @_;
141              
142 40 100       138 unless ( defined $handle->{'denylist'} ) {
143 17     17   4359 no strict 'refs';
  17         39  
  17         34421  
144              
145             # Don't let people call methods they're not supposed to from maketext.
146             # Explicitly exclude all methods in this package that start with an
147             # underscore on principle.
148             $handle->{'denylist'} = {
149 408         750 map { $_ => 1 } (
150             qw/
151             blacklist
152             denylist
153             encoding
154             fail_with
155             failure_handler_auto
156             fallback_language_classes
157             fallback_languages
158             get_handle
159             init
160             language_tag
161             maketext
162             new
163             whitelist
164             allowlist
165 17         54 /, grep { /^_/ } keys %{ __PACKAGE__ . "::" }
  650         1005  
  17         240  
166             ),
167             };
168             }
169              
170 40 100       150 if ( scalar @methods ) {
171 6         7 $handle->{'denylist'} = { %{ $handle->{'denylist'} }, map { $_ => 1 } @methods };
  6         38  
  8         59  
172             }
173              
174 40         88 delete $handle->{'_external_lex_cache'};
175 40         66 return;
176             }
177              
178             sub blacklist {
179 20     20 1 2022 my ( $handle, @methods ) = @_;
180 20         71 _exclude ( $handle, @methods );
181 20         26 return;
182             }
183              
184             sub denylist {
185 20     20 1 2055 my ( $handle, @methods ) = @_;
186 20         48 _exclude ( $handle, @methods );
187 20         28 return;
188             }
189              
190             sub _include {
191 6     6   12 my ( $handle, @methods ) = @_;
192 6 50       15 if ( scalar @methods ) {
193 6 100       14 $handle->{'allowlist'} = {} unless defined $handle->{'allowlist'};
194 6         11 $handle->{'allowlist'} = { %{ $handle->{'allowlist'} }, map { $_ => 1 } @methods };
  6         16  
  6         24  
195             }
196              
197 6         26 delete $handle->{'_external_lex_cache'};
198 6         12 return;
199             }
200              
201             sub whitelist {
202 3     3 1 3638 my ( $handle, @methods ) = @_;
203 3         8 _include ( $handle, @methods );
204 3         5 return;
205             }
206              
207             sub allowlist {
208 3     3 1 3443 my ( $handle, @methods ) = @_;
209 3         8 _include ( $handle, @methods );
210 3         4 return;
211             }
212              
213             #--------------------------------------------------------------------------
214              
215             sub failure_handler_auto {
216             # Meant to be used like:
217             # $handle->fail_with('failure_handler_auto')
218              
219 5     5 1 7 my $handle = shift;
220 5         6 my $phrase = shift;
221              
222 5   100     29 $handle->{'failure_lex'} ||= {};
223 5         33 my $lex = $handle->{'failure_lex'};
224              
225 5   66     37 my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase));
      33        
226              
227             # Dumbly copied from sub maketext:
228 5 100       15 return ${$value} if ref($value) eq 'SCALAR';
  3         18  
229 2 50       6 return $value if ref($value) ne 'CODE';
230             {
231 2         3 local $SIG{'__DIE__'};
  2         7  
232 2         4 eval { $value = &$value($handle, @_) };
  2         28  
233             }
234             # If we make it here, there was an exception thrown in the
235             # call to $value, and so scream:
236 2 100       13 if($@) {
237             # pretty up the error message
238 1         4 $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
239             {\n in bracket code [compiled line $1],}s;
240 1         92 #$err =~ s/\n?$/\n/s;
241             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
242             # Rather unexpected, but suppose that the sub tried calling
243             # a method that didn't exist.
244             }
245 1         6 else {
246             return $value;
247             }
248             }
249              
250             #==========================================================================
251              
252             sub new {
253 17   33 17 0 119 # Nothing fancy!
254 17         58 my $class = ref($_[0]) || $_[0];
255 17         85 my $handle = bless {}, $class;
256 17         90 $handle->blacklist;
257 17         74 $handle->denylist;
258 17         80 $handle->init;
259             return $handle;
260             }
261 17     17 0 26  
262             sub init { return } # no-op
263              
264             ###########################################################################
265              
266             sub maketext {
267 52 50   52 0 24122 # Remember, this can fail. Failure is controllable many ways.
268             Carp::croak 'maketext requires at least one parameter' unless @_ > 1;
269 52         144  
270 52 50 33     220 my($handle, $phrase) = splice(@_,0,2);
271             Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase));
272              
273             # backup $@ in case it's still being used in the calling code.
274 52         84 # If no failures, we'll re-set it back to what it was later.
275             my $at = $@;
276              
277 52         88 # Copy @_ case one of its elements is $@.
278             @_ = @_;
279              
280             # Look up the value:
281 52         111  
282 52 100       142 my $value;
283 1         1 if (exists $handle->{'_external_lex_cache'}{$phrase}) {
284 1         2 DEBUG and warn "* Using external lex cache version of \"$phrase\"\n";
285             $value = $handle->{'_external_lex_cache'}{$phrase};
286             }
287 51         66 else {
288 51 100 33     251 foreach my $h_r (
289             @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs }
290 51         83 ) {
291 51 50 66     291 DEBUG and warn "* Looking up \"$phrase\" in $h_r\n";
    100          
    100          
    100          
292 11         18 if(exists $h_r->{$phrase}) {
293 11 50       39 DEBUG and warn " Found \"$phrase\" in $h_r\n";
294             unless(ref($value = $h_r->{$phrase})) {
295 11 100       33 # Nonref means it's not yet compiled. Compile and replace.
296 1         11 if ($handle->{'use_external_lex_cache'}) {
297             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value);
298             }
299 10         44 else {
300             $value = $h_r->{$phrase} = $handle->_compile($value);
301             }
302 11         27 }
303             last;
304             }
305             # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;"
306             # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;"
307             elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) {
308 36         48 # it's an auto lex, and this is an autoable key!
309 36 100       73 DEBUG and warn " Automaking \"$phrase\" into $h_r\n";
310 33         82 if ($handle->{'use_external_lex_cache'}) {
311             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase);
312             }
313 3         10 else {
314             $value = $h_r->{$phrase} = $handle->_compile($phrase);
315 17         35 }
316             last;
317 4         13 }
318             DEBUG>1 and print " Not found in $h_r, nor automakable\n";
319             # else keep looking
320             }
321             }
322 33 100       91  
323 4         5 unless(defined($value)) {
324 4 50 33     13 DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n";
325 4         5 if(ref($handle) and $handle->{'fail'}) {
326 4         4 DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n";
327 4 50       11 my $fail;
328 0         0 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
329 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
  0         0  
330             return &{$fail}($handle, $phrase, @_);
331             # If it ever returns, it should return a good value.
332             }
333 4         5 else { # It's a method name
334 4         16 $@ = $at; # Put $@ back in case we altered it along the way.
335             return $handle->$fail($phrase, @_);
336             # If it ever returns, it should return a good value.
337             }
338             }
339             else {
340 0         0 # All we know how to do is this;
341             Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
342             }
343             }
344 29 100       79  
345 6         47 if(ref($value) eq 'SCALAR'){
346 6         31 $@ = $at; # Put $@ back in case we altered it along the way.
347             return $$value ;
348 23 50       62 }
349 0         0 if(ref($value) ne 'CODE'){
350 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
351             return $value ;
352             }
353              
354 23         30 {
  23         73  
355 23         36 local $SIG{'__DIE__'};
  23         502  
356             eval { $value = &$value($handle, @_) };
357             }
358             # If we make it here, there was an exception thrown in the
359 23 50       118 # call to $value, and so scream:
360             if ($@) {
361 0         0 # pretty up the error message
362             $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
363 0         0 {\n in bracket code [compiled line $1],}s;
364             #$err =~ s/\n?$/\n/s;
365             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
366             # Rather unexpected, but suppose that the sub tried calling
367             # a method that didn't exist.
368 23         39 }
369 23         81 else {
370             $@ = $at; # Put $@ back in case we altered it along the way.
371 0         0 return $value;
372             }
373             $@ = $at; # Put $@ back in case we altered it along the way.
374             }
375              
376             ###########################################################################
377              
378             sub get_handle { # This is a constructor and, yes, it CAN FAIL.
379             # Its class argument has to be the base class for the current
380 17     17 0 2678 # application's l10n files.
381 17   33     117  
382             my($base_class, @languages) = @_;
383             $base_class = ref($base_class) || $base_class;
384 17 100       57 # Complain if they use __PACKAGE__ as a project base class?
385 11         19  
386 11 50       33 if( @languages ) {
387             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
388 11         67 if($USING_LANGUAGE_TAGS) { # An explicit language-list was given!
  11         555  
389             @languages =
390             map {; $_, I18N::LangTags::alternate_language_tags($_) }
391             # Catch alternation
392             map I18N::LangTags::locale2language_tag($_),
393             # If it's a lg tag, fine, pass thru (untainted)
394             # If it's a locale ID, try converting to a lg tag (untainted),
395 11         348 # otherwise nix it.
396             @languages;
397             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
398             }
399 6         20 }
400             else {
401             @languages = $base_class->_ambient_langprefs;
402 17         917 }
403              
404 17         30 @languages = $base_class->_langtag_munging(@languages);
405 17         37  
  104         224  
406 24 50       117 my %seen;
407 24 100 66     143 foreach my $module_name ( map { $base_class . '::' . $_ } @languages ) {
408             next unless length $module_name; # sanity
409 17         103 next if $seen{$module_name}++ # Already been here, and it was no-go
410             || !&_try_use($module_name); # Try to use() it, but can't it.
411             return($module_name->new); # Make it!
412 0         0 }
413              
414             return undef; # Fail!
415             }
416              
417             ###########################################################################
418 17     17   50  
419             sub _langtag_munging {
420             my($base_class, @languages) = @_;
421              
422             # We have all these DEBUG statements because otherwise it's hard as hell
423 17         24 # to diagnose if/when something goes wrong.
424              
425 17 50       46 DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n";
426 17         21  
427 17         77 if($USING_LANGUAGE_TAGS) {
428             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
429 17         55 @languages = $base_class->_add_supers( @languages );
430 17         344  
431             push @languages, I18N::LangTags::panic_languages(@languages);
432             DEBUG and warn "After adding panic languages:\n",
433 17         90 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
434              
435 17         28 push @languages, $base_class->fallback_languages;
436             # You are free to override fallback_languages to return empty-list!
437             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
438              
439 17         31 @languages = # final bit of processing to turn them into classname things
  104         128  
440 104         129 map {
441 104         126 my $it = $_; # copy
442 104         164 $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
443             $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_
444             $it;
445 17         30 } @languages
446             ;
447             DEBUG and warn "Nearing end of munging:\n",
448             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
449 0         0 }
450             else {
451             DEBUG and warn "Bypassing language-tags.\n",
452             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
453 17         20 }
454              
455             DEBUG and warn "Before adding fallback classes:\n",
456 17         74 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
457              
458             push @languages, $base_class->fallback_language_classes;
459 17         25 # You are free to override that to return whatever.
460              
461             DEBUG and warn "Finally:\n",
462 17         55 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
463              
464             return @languages;
465             }
466              
467             ###########################################################################
468 6     6   17  
469             sub _ambient_langprefs {
470             return I18N::LangTags::Detect::detect();
471             }
472              
473             ###########################################################################
474 60     60   20450  
475             sub _add_supers {
476 60 50       166 my($base_class, @languages) = @_;
    100          
477              
478 0         0 if (!$MATCH_SUPERS) {
479             # Nothing
480             DEBUG and warn "Bypassing any super-matching.\n",
481             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
482              
483 42         52 }
484             elsif( $MATCH_SUPERS_TIGHTLY ) {
485 42         130 DEBUG and warn "Before adding new supers tightly:\n",
486 42         6319 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
487             @languages = I18N::LangTags::implicate_supers( @languages );
488             DEBUG and warn "After adding new supers tightly:\n",
489             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
490              
491 18         19 }
492             else {
493 18         42 DEBUG and warn "Before adding supers to end:\n",
494 18         1320 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
495             @languages = I18N::LangTags::implicate_supers_strictly( @languages );
496             DEBUG and warn "After adding supers to end:\n",
497             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
498 60         160 }
499              
500             return @languages;
501             }
502              
503             ###########################################################################
504             #
505             # This is where most people should stop reading.
506             #
507             ###########################################################################
508              
509             my %tried = ();
510             # memoization of whether we've used this module, or found it unusable.
511              
512 26 100   26   1235 sub _try_use { # Basically a wrapper around "require Modulename"
513             # "Many men have tried..." "They tried and failed?" "They tried and died."
514 18         57 return $tried{$_[0]} if exists $tried{$_[0]}; # memoization
515 17     17   133  
  17         46  
  17         548  
  18         41  
516 17     17   92 my $module = $_[0]; # ASSUME sane module name!
  17         58  
  17         3253  
517             { no strict 'refs';
518 18 100 100     28 no warnings 'once';
  18         208  
  7         42  
519             return($tried{$module} = 1)
520             if %{$module . '::Lexicon'} or @{$module . '::ISA'};
521             # weird case: we never use'd it, but there it is!
522 6         8 }
523              
524 6         20 DEBUG and warn " About to use $module ...\n";
525 6         10  
526 6         32 local $SIG{'__DIE__'};
527 6 100       19 local $@;
528 6         400 local @INC = @INC;
529             pop @INC if $INC[-1] eq '.';
530 6 100       36 eval "require $module"; # used to be "use $module", but no point in that.
531 5         7  
532 5         39 if($@) {
533             DEBUG and warn "Error using $module \: $@\n";
534             return $tried{$module} = 0;
535 1         2 }
536 1         8 else {
537             DEBUG and warn " OK, $module is used\n";
538             return $tried{$module} = 1;
539             }
540             }
541              
542             #--------------------------------------------------------------------------
543              
544 17     17   115 sub _lex_refs { # report the lexicon references for this handle's class
  17         32  
  17         504  
545 17     17   81 # returns an arrayREF!
  17         30  
  17         31506  
546 36   66 36   172 no strict 'refs';
547 36         48 no warnings 'once';
548 36 50       98 my $class = ref($_[0]) || $_[0];
549             DEBUG and warn "Lex refs lookup on $class\n";
550 36         47 return $isa_scan{$class} if exists $isa_scan{$class}; # memoization!
551 36 100       102  
552             my @lex_refs;
553 36 100       49 my $seen_r = ref($_[1]) ? $_[1] : {};
  36         174  
554 18         43  
  18         64  
555             if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
556 18         29 push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
557             DEBUG and warn '%' . $class . '::Lexicon contains ',
558             scalar(keys %{$class . '::Lexicon'}), " entries\n";
559             }
560              
561 36         57 # Implements depth(height?)-first recursive searching of superclasses.
  36         134  
562 24         59 # In hindsight, I suppose I could have just used Class::ISA!
563 24 50       117 foreach my $superclass (@{$class . '::ISA'}) {
564 24         39 DEBUG and warn " Super-class search into $superclass\n";
  24         98  
565             next if $seen_r->{$superclass}++;
566             push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself
567 36         68 }
568 36         115  
569             $isa_scan{$class} = \@lex_refs; # save for next time
570             return \@lex_refs;
571 0     0 0 0 }
  0         0  
572              
573             sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
574              
575             #--------------------------------------------------------------------------
576              
577             sub _compile {
578             # This big scary routine compiles an entry.
579             # It returns either a coderef if there's brackety bits in this, or
580 51     51   709 # otherwise a ref to a scalar.
581              
582             my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344
583              
584             # The while() regex is more expensive than this check on strings that don't need a compile.
585 51 100       219 # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
586             # on strings that don't need compiling.
587 43         59 return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
588              
589 43         51 my $handle = $_[0];
590 43         79  
591 43         65 my(@code);
592 43         75 my(@c) = (''); # "chunks" -- scratch.
593             my $call_count = 0;
594 43         64 my $big_pile = '';
  43         51  
595 43         84 {
596             my $in_group = 0; # start out outside a group
597 43         197 my($m, @params); # scratch
598              
599             while($string_to_compile =~ # Iterate over chunks.
600             m/(
601             [^\~\[\]]+ # non-~[] stuff (Capture everything else here)
602             |
603             ~. # ~[, ~], ~~, ~other
604             |
605             \[ # [ presumably opening a group
606             |
607             \] # ] presumably closing a group
608             |
609             ~ # terminal ~ ?
610             |
611             $
612 162         203 )/xgs
613             ) {
614 162 100 100     665 DEBUG>2 and warn qq{ "$1"\n};
    100          
    50          
    0          
    0          
    0          
    0          
    0          
615              
616             if($1 eq '[' or $1 eq '') { # "[" or end
617 66 50       143 # Whether this is "[" or end, force processing of any
618 0 0       0 # preceding literal.
619 0         0 if($in_group) {
620             if($1 eq '') {
621             $handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
622 0         0 }
623             else {
624             $handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
625             }
626 66 100       224 }
627 24         34 else {
628             if ($1 eq '') {
629             DEBUG>2 and warn " [end-string]\n";
630 42         60 }
631             else {
632 66 50       133 $in_group = 1;
633 66 100       274 }
634             die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
635 10         19 if(length $c[-1]) {
636 10 100 66     75 # Now actually processing the preceding literal
637             $big_pile .= $c[-1];
638             if($USE_LITERALS and (
639             (ord('A') == 65)
640             ? $c[-1] !~ m/[^\x20-\x7E]/s
641             # ASCII very safe chars
642             : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
643             # EBCDIC very safe chars
644 7         17 )) {
645 7         26 # normal case -- all very safe chars
646 7         28 $c[-1] =~ s/'/\\'/g;
647             push @code, q{ '} . $c[-1] . "',\n";
648             $c[-1] = ''; # reuse this slot
649 3         11 }
650 3         11 else {
651 3         13 $c[-1] =~ s/\\\\/\\/g;
652             push @code, ' $c[' . $#c . "],\n";
653             push @c, ''; # new chunk
654             }
655             }
656             # else just ignore the empty string.
657             }
658              
659             }
660 43 100       75 elsif($1 eq ']') { # "]"
661 42         54 # close group -- go back in-band
662             if($in_group) {
663 42         51 $in_group = 0;
664              
665             DEBUG>2 and warn " --Closing group [$c[-1]]\n";
666              
667 42 50 33     207 # And now process the group...
668 0         0  
669 0         0 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
670 0         0 DEBUG>2 and warn " -- (Ignoring)\n";
671             $c[-1] = ''; # reset out chink
672             next;
673             }
674              
675 42         138 #$c[-1] =~ s/^\s+//s;
676             #$c[-1] =~ s/\s+$//s;
677             ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
678              
679 42         64 # A bit of a hack -- we've turned "~,"'s into DELs, so turn
680 42         75 # 'em into real commas here.
  73         123  
681             if (ord('A') == 65) { # ASCII, etc
682             foreach($m, @params) { tr/\x7F/,/ }
683             }
684             else { # EBCDIC (1047, 0037, POSIX-BC)
685             # Thanks to Peter Prymmer for the EBCDIC handling
686             foreach($m, @params) { tr/\x07/,/ }
687             }
688 42 100 66     223  
    100          
    50          
689             # Special-case handling of some method names:
690 5         13 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
691 5         9 # Treat [_1,...] as [,_1,...], etc.
692             unshift @params, $m;
693             $m = '';
694 1         3 }
695             elsif($m eq '*') {
696             $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
697 0         0 }
698             elsif($m eq '#') {
699             $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
700             }
701 42 100 33     397  
    100 66        
      33        
      66        
      66        
      33        
702             # Most common case: a simple, legal-looking method name
703 5         10 if($m eq '') {
704             # 0-length method name means to just interpolate:
705             push @code, ' (';
706             }
707             elsif($m =~ /^\w+$/s
708             && !$handle->{'blacklist'}{$m}
709             && !$handle->{'denylist'}{$m}
710             && ( !defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m} )
711             && ( !defined $handle->{'allowlist'} || $handle->{'allowlist'}{$m} )
712 19         63 # exclude anything fancy and restrict to the allowlist/denylist (and historical whitelist/blacklist).
713             ) {
714             push @code, ' $_[0]->' . $m . '(';
715             }
716 18         88 else {
717             # TODO: implement something? or just too icky to consider?
718             $handle->_die_pointing(
719             $string_to_compile,
720             "Can't use \"$m\" as a method name in bracket group",
721             2 + length($c[-1])
722             );
723 24         36 }
724 24         41  
725             pop @c; # we don't need that chunk anymore
726 24         47 ++$call_count;
727 24 50 33     124  
    100          
    50          
728             foreach my $p (@params) {
729 0         0 if($p eq '_*') {
730             # Meaning: all parameters except $_[0]
731             $code[-1] .= ' @_[1 .. $#_], ';
732             # and yes, that does the right thing for all @_ < 3
733             }
734 10         48 elsif($p =~ m/^_(-?\d+)$/s) {
735             # _3 meaning $_[3]
736             $code[-1] .= '$_[' . (0 + $1) . '], ';
737             }
738             elsif($USE_LITERALS and (
739             (ord('A') == 65)
740             ? $p !~ m/[^\x20-\x7E]/s
741             # ASCII very safe chars
742             : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
743             # EBCDIC very safe chars
744 14         23 )) {
745 14         30 # Normal case: a literal containing only safe characters
746             $p =~ s/'/\\'/g;
747             $code[-1] .= q{'} . $p . q{', };
748             }
749 0         0 else {
750 0         0 # Stow it on the chunk-stack, and just refer to that.
751             push @c, $p;
752             push @code, ' $c[' . $#c . '], ';
753 24         40 }
754             }
755 24         104 $code[-1] .= "),\n";
756              
757             push @c, '';
758 1         6 }
759             else {
760             $handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
761             }
762              
763             }
764             elsif(substr($1,0,1) ne '~') {
765 53         179 # it's stuff not containing "~" or "[" or "]"
766 53         127 # i.e., a literal blob
767 53         209 my $text = $1;
768             $text =~ s/\\/\\\\/g;
769             $c[-1] .= $text;
770              
771 0         0 }
772             elsif($1 eq '~~') { # "~~"
773             $c[-1] .= '~';
774              
775 0         0 }
776             elsif($1 eq '~[') { # "~["
777             $c[-1] .= '[';
778              
779 0         0 }
780             elsif($1 eq '~]') { # "~]"
781             $c[-1] .= ']';
782              
783 0 0       0 }
784             elsif($1 eq '~,') { # "~,"
785             if($in_group) {
786 0         0 # This is a hack, based on the assumption that no-one will actually
787 0         0 # want a DEL inside a bracket group. Let's hope that's it's true.
788             if (ord('A') == 65) { # ASCII etc
789             $c[-1] .= "\x7F";
790             }
791             else { # EBCDIC (cp 1047, 0037, POSIX-BC)
792             $c[-1] .= "\x07";
793             }
794 0         0 }
795             else {
796             $c[-1] .= '~,';
797             }
798              
799 0         0 }
800             elsif($1 eq '~') { # possible only at string-end, it seems.
801             $c[-1] .= '~';
802              
803             }
804             else {
805 0         0 # It's a "~X" where X is not a special character.
806 0         0 # Consider it a literal ~ and X.
807 0         0 my $text = $1;
808             $text =~ s/\\/\\\\/g;
809             $c[-1] .= $text;
810             }
811             }
812 24 50       62 }
813 24         57  
814             if($call_count) {
815             undef $big_pile; # Well, nevermind that.
816             }
817             else {
818 0         0 # It's all literals! Ahwell, that can happen.
819             # So don't bother with the eval. Return a SCALAR reference.
820             return \$big_pile;
821 24 50 33     132 }
822 24         41  
823 24 50       76 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
    100          
824 0         0 DEBUG and warn scalar(@c), " chunks under closure\n";
825 0         0 if(@code == 0) { # not possible?
826             DEBUG and warn "Empty code\n";
827             return \'';
828 10         24 }
829             elsif(@code > 1) { # most cases, presumably!
830 24         46 unshift @code, "join '',\n";
831 24         53 }
832             unshift @code, "use strict; sub {\n";
833 24         35 push @code, "}\n";
834 24     10   1560  
  10     6   64  
  10     6   15  
  10         478  
  6         39  
  6         10  
  6         240  
  6         376  
  6         12  
  6         237  
835 24 50       79 DEBUG and warn @code;
836 24         134 my $sub = eval(join '', @code);
837             die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
838             return $sub;
839             }
840              
841             #--------------------------------------------------------------------------
842              
843 19     19   31 sub _die_pointing {
844 19   33     45 # This is used by _compile to throw a fatal error
845             my $target = shift;
846             $target = ref($target) || $target; # class name
847 19         39 # ...leaving $_[0] the error-causing text, and $_[1] the error message
848              
849 19         21 my $i = index($_[0], "\n");
850 19 100       53  
851 19 100       36 my $pointy;
852 18         23 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
853             if($pos < 1) {
854             $pointy = "^=== near there\n";
855 1         3 }
856 1 50 33     6 else { # we need to space over
      33        
857             my $first_tab = index($_[0], "\t");
858             if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
859 1         4 # No tabs, or the first tab is harmlessly after where we will point to,
860             # AND we're far enough from the margin that we can draw a proper arrow.
861             $pointy = ('=' x $pos) . "^ near there\n";
862             }
863 0         0 else {
864 0         0 # tabs screw everything up!
865             $pointy = substr($_[0],0,$pos);
866 0         0 $pointy =~ tr/\t //cd;
867             # make everything into whitespace, but preserving tabs
868             $pointy .= "^=== near there\n";
869             }
870 19         44 }
871              
872 19 50       35 my $errmsg = "$_[1], in\:\n$_[0]";
    0          
873              
874 19         40 if($i == -1) {
875             # No newline.
876             $errmsg .= "\n" . $pointy;
877             }
878 0         0 elsif($i == (length($_[0]) - 1) ) {
879             # Already has a newline at end.
880             $errmsg .= $pointy;
881             }
882             else {
883 19         1823 # don't bother with the pointy bit, I guess.
884             }
885             Carp::croak( "$errmsg via $target, as used" );
886             }
887              
888             1;