File Coverage

blib/lib/Locale/Maketext.pm
Criterion Covered Total %
statement 328 386 84.9
branch 113 176 64.2
condition 46 100 46.0
subroutine 40 44 90.9
pod 12 19 63.1
total 539 725 74.3


line stmt bran cond sub pod time code
1             package Locale::Maketext;
2 19     19   827706 use strict;
  19         142  
  19         572  
3             our $USE_LITERALS;
4 17     17   72 use Carp ();
  17         30  
  17         189  
5 17     17   7657 use I18N::LangTags ();
  17         41890  
  17         344  
6 17     17   6564 use I18N::LangTags::Detect ();
  17         26547  
  17         737  
7              
8             #--------------------------------------------------------------------------
9              
10 17 50   17   109 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
  17         1186  
11             # define the constant 'DEBUG' at compile-time
12              
13             # turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
14             # use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
15             BEGIN {
16              
17             # if we have it || we can load it
18 17 50 33 17   122 if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
  17         87  
  17         8133  
19 17         371 utf8->import();
20 17         5937 DEBUG and warn " utf8 on for _compile()\n";
21             }
22             else {
23 0         0 DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
24             }
25             }
26              
27              
28             our $VERSION = '1.31';
29             our @ISA = ();
30              
31             our $MATCH_SUPERS = 1;
32             our $MATCH_SUPERS_TIGHTLY = 1;
33             our $USING_LANGUAGE_TAGS = 1;
34             # Turning this off is somewhat of a security risk in that little or no
35             # checking will be done on the legality of tokens passed to the
36             # eval("use $module_name") in _try_use. If you turn this off, you have
37             # to do your own taint checking.
38              
39             $USE_LITERALS = 1 unless defined $USE_LITERALS;
40             # a hint for compiling bracket-notation things.
41              
42             my %isa_scan = ();
43              
44             ###########################################################################
45              
46             sub quant {
47 2     2 1 7 my($handle, $num, @forms) = @_;
48              
49 2 50       5 return $num if @forms == 0; # what should this mean?
50 2 50 33     6 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
51              
52             # Normal case:
53             # Note that the formatting of $num is preserved.
54 2         6 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
55             # Most human languages put the number phrase before the qualified phrase.
56             }
57              
58              
59             sub numerate {
60             # return this lexical item in a form appropriate to this number
61 0     0 1 0 my($handle, $num, @forms) = @_;
62 0         0 my $s = ($num == 1);
63              
64 0 0       0 return '' unless @forms;
65 0 0       0 if(@forms == 1) { # only the headword form specified
66 0 0       0 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
67             }
68             else { # sing and plural were specified
69 0 0       0 return $s ? $forms[0] : $forms[1];
70             }
71             }
72              
73             #--------------------------------------------------------------------------
74              
75             sub numf {
76 2     2 1 5 my($handle, $num) = @_[0,1];
77 2 50 33     12 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
      33        
78 2         3 $num += 0; # Just use normal integer stringification.
79             # Specifically, don't let %G turn ten million into 1E+007
80             }
81             else {
82 0         0 $num = CORE::sprintf('%G', $num);
83             # "CORE::" is there to avoid confusion with the above sub sprintf.
84             }
85 2         6 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5
  0         0  
86             # The initial \d+ gobbles as many digits as it can, and then we
87             # backtrack so it un-eats the rightmost three, and then we
88             # insert the comma there.
89              
90 2 50 33     11 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
91             # This is just a lame hack instead of using Number::Format
92 2         9 return $num;
93             }
94              
95             sub sprintf {
96 17     17   115 no integer;
  17         26  
  17         78  
97 6     6 1 22 my($handle, $format, @params) = @_;
98 6         30 return CORE::sprintf($format, @params);
99             # "CORE::" is there to avoid confusion with myself!
100             }
101              
102             #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
103              
104 17     17   1035 use integer; # vroom vroom... applies to the whole rest of the module
  17         43  
  17         68  
105              
106             sub language_tag {
107 0   0 0 1 0 my $it = ref($_[0]) || $_[0];
108 0 0       0 return undef unless $it =~ m/([^':]+)(?:::)?$/s;
109 0         0 $it = lc($1);
110 0         0 $it =~ tr<_><->;
111 0         0 return $it;
112             }
113              
114             sub encoding {
115 0     0 1 0 my $it = $_[0];
116             return(
117 0   0     0 (ref($it) && $it->{'encoding'})
118             || 'iso-8859-1' # Latin-1
119             );
120             }
121              
122             #--------------------------------------------------------------------------
123              
124 17     17 0 46 sub fallback_languages { return('i-default', 'en', 'en-US') }
125              
126 17     17 0 27 sub fallback_language_classes { return () }
127              
128             #--------------------------------------------------------------------------
129              
130             sub fail_with { # an actual attribute method!
131 1     1 1 8 my($handle, @params) = @_;
132 1 50       3 return unless ref($handle);
133 1 50       3 $handle->{'fail'} = $params[0] if @params;
134 1         3 return $handle->{'fail'};
135             }
136              
137             #--------------------------------------------------------------------------
138              
139             sub _exclude {
140 40     40   57 my ( $handle, @methods ) = @_;
141              
142 40 100       140 unless ( defined $handle->{'denylist'} ) {
143 17     17   4249 no strict 'refs';
  17         48  
  17         34397  
144              
145             # Don't let people call methods they're not supposed to from maketext.
146             # Explicitly exclude all methods in this package that start with an
147             # underscore on principle.
148             $handle->{'denylist'} = {
149 408         727 map { $_ => 1 } (
150             qw/
151             blacklist
152             denylist
153             encoding
154             fail_with
155             failure_handler_auto
156             fallback_language_classes
157             fallback_languages
158             get_handle
159             init
160             language_tag
161             maketext
162             new
163             whitelist
164             allowlist
165 17         54 /, grep { /^_/ } keys %{ __PACKAGE__ . "::" }
  650         1074  
  17         209  
166             ),
167             };
168             }
169              
170 40 100       139 if ( scalar @methods ) {
171 6         9 $handle->{'denylist'} = { %{ $handle->{'denylist'} }, map { $_ => 1 } @methods };
  6         37  
  8         53  
172             }
173              
174 40         82 delete $handle->{'_external_lex_cache'};
175 40         67 return;
176             }
177              
178             sub blacklist {
179 20     20 1 1685 my ( $handle, @methods ) = @_;
180 20         58 _exclude ( $handle, @methods );
181 20         25 return;
182             }
183              
184             sub denylist {
185 20     20 1 2078 my ( $handle, @methods ) = @_;
186 20         54 _exclude ( $handle, @methods );
187 20         21 return;
188             }
189              
190             sub _include {
191 6     6   14 my ( $handle, @methods ) = @_;
192 6 50       16 if ( scalar @methods ) {
193 6 100       17 $handle->{'allowlist'} = {} unless defined $handle->{'allowlist'};
194 6         10 $handle->{'allowlist'} = { %{ $handle->{'allowlist'} }, map { $_ => 1 } @methods };
  6         18  
  6         24  
195             }
196              
197 6         31 delete $handle->{'_external_lex_cache'};
198 6         10 return;
199             }
200              
201             sub whitelist {
202 3     3 1 3361 my ( $handle, @methods ) = @_;
203 3         8 _include ( $handle, @methods );
204 3         6 return;
205             }
206              
207             sub allowlist {
208 3     3 1 3508 my ( $handle, @methods ) = @_;
209 3         12 _include ( $handle, @methods );
210 3         7 return;
211             }
212              
213             #--------------------------------------------------------------------------
214              
215             sub failure_handler_auto {
216             # Meant to be used like:
217             # $handle->fail_with('failure_handler_auto')
218              
219 5     5 1 8 my $handle = shift;
220 5         7 my $phrase = shift;
221              
222 5   100     19 $handle->{'failure_lex'} ||= {};
223 5         32 my $lex = $handle->{'failure_lex'};
224              
225 5   66     33 my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase));
      33        
226              
227             # Dumbly copied from sub maketext:
228 5 100       13 return ${$value} if ref($value) eq 'SCALAR';
  3         14  
229 2 50       6 return $value if ref($value) ne 'CODE';
230             {
231 2         3 local $SIG{'__DIE__'};
  2         8  
232 2         4 eval { $value = &$value($handle, @_) };
  2         27  
233             }
234             # If we make it here, there was an exception thrown in the
235             # call to $value, and so scream:
236 2 100       13 if($@) {
237             # pretty up the error message
238 1         2 $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
239             {\n in bracket code [compiled line $1],}s;
240 1         100 #$err =~ s/\n?$/\n/s;
241             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
242             # Rather unexpected, but suppose that the sub tried calling
243             # a method that didn't exist.
244             }
245 1         6 else {
246             return $value;
247             }
248             }
249              
250             #==========================================================================
251              
252             sub new {
253 17   33 17 0 103 # Nothing fancy!
254 17         52 my $class = ref($_[0]) || $_[0];
255 17         66 my $handle = bless {}, $class;
256 17         89 $handle->blacklist;
257 17         70 $handle->denylist;
258 17         86 $handle->init;
259             return $handle;
260             }
261 17     17 0 21  
262             sub init { return } # no-op
263              
264             ###########################################################################
265              
266             sub maketext {
267 52 50   52 0 20876 # Remember, this can fail. Failure is controllable many ways.
268             Carp::croak 'maketext requires at least one parameter' unless @_ > 1;
269 52         132  
270 52 50 33     195 my($handle, $phrase) = splice(@_,0,2);
271             Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase));
272              
273             # backup $@ in case it's still being used in the calling code.
274 52         73 # If no failures, we'll re-set it back to what it was later.
275             my $at = $@;
276              
277 52         91 # Copy @_ case one of its elements is $@.
278             @_ = @_;
279              
280             # Look up the value:
281 52         65  
282 52 100       134 my $value;
283 1         2 if (exists $handle->{'_external_lex_cache'}{$phrase}) {
284 1         1 DEBUG and warn "* Using external lex cache version of \"$phrase\"\n";
285             $value = $handle->{'_external_lex_cache'}{$phrase};
286             }
287 51         68 else {
288 51 100 33     238 foreach my $h_r (
289             @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs }
290 51         76 ) {
291 51 50 66     284 DEBUG and warn "* Looking up \"$phrase\" in $h_r\n";
    100          
    100          
    100          
292 11         14 if(exists $h_r->{$phrase}) {
293 11 50       35 DEBUG and warn " Found \"$phrase\" in $h_r\n";
294             unless(ref($value = $h_r->{$phrase})) {
295 11 100       29 # Nonref means it's not yet compiled. Compile and replace.
296 1         12 if ($handle->{'use_external_lex_cache'}) {
297             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value);
298             }
299 10         37 else {
300             $value = $h_r->{$phrase} = $handle->_compile($value);
301             }
302 11         22 }
303             last;
304             }
305             # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;"
306             # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;"
307             elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) {
308 36         46 # it's an auto lex, and this is an autoable key!
309 36 100       65 DEBUG and warn " Automaking \"$phrase\" into $h_r\n";
310 33         84 if ($handle->{'use_external_lex_cache'}) {
311             $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase);
312             }
313 3         9 else {
314             $value = $h_r->{$phrase} = $handle->_compile($phrase);
315 17         34 }
316             last;
317 4         6 }
318             DEBUG>1 and print " Not found in $h_r, nor automakable\n";
319             # else keep looking
320             }
321             }
322 33 100       87  
323 4         5 unless(defined($value)) {
324 4 50 33     13 DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n";
325 4         4 if(ref($handle) and $handle->{'fail'}) {
326 4         4 DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n";
327 4 50       9 my $fail;
328 0         0 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
329 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
  0         0  
330             return &{$fail}($handle, $phrase, @_);
331             # If it ever returns, it should return a good value.
332             }
333 4         5 else { # It's a method name
334 4         13 $@ = $at; # Put $@ back in case we altered it along the way.
335             return $handle->$fail($phrase, @_);
336             # If it ever returns, it should return a good value.
337             }
338             }
339             else {
340 0         0 # All we know how to do is this;
341             Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
342             }
343             }
344 29 100       85  
345 6         35 if(ref($value) eq 'SCALAR'){
346 6         30 $@ = $at; # Put $@ back in case we altered it along the way.
347             return $$value ;
348 23 50       48 }
349 0         0 if(ref($value) ne 'CODE'){
350 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
351             return $value ;
352             }
353              
354 23         27 {
  23         74  
355 23         36 local $SIG{'__DIE__'};
  23         506  
356             eval { $value = &$value($handle, @_) };
357             }
358             # If we make it here, there was an exception thrown in the
359 23 50       161 # call to $value, and so scream:
360             if ($@) {
361 0         0 # pretty up the error message
362             $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
363 0         0 {\n in bracket code [compiled line $1],}s;
364             #$err =~ s/\n?$/\n/s;
365             Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
366             # Rather unexpected, but suppose that the sub tried calling
367             # a method that didn't exist.
368 23         37 }
369 23         74 else {
370             $@ = $at; # Put $@ back in case we altered it along the way.
371 0         0 return $value;
372             }
373             $@ = $at; # Put $@ back in case we altered it along the way.
374             }
375              
376             ###########################################################################
377              
378             sub get_handle { # This is a constructor and, yes, it CAN FAIL.
379             # Its class argument has to be the base class for the current
380 17     17 0 2645 # application's l10n files.
381 17   33     97  
382             my($base_class, @languages) = @_;
383             $base_class = ref($base_class) || $base_class;
384 17 100       49 # Complain if they use __PACKAGE__ as a project base class?
385 11         13  
386 11 50       30 if( @languages ) {
387             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
388 11         47 if($USING_LANGUAGE_TAGS) { # An explicit language-list was given!
  11         254  
389             @languages =
390             map {; $_, I18N::LangTags::alternate_language_tags($_) }
391             # Catch alternation
392             map I18N::LangTags::locale2language_tag($_),
393             # If it's a lg tag, fine, pass thru (untainted)
394             # If it's a locale ID, try converting to a lg tag (untainted),
395 11         304 # otherwise nix it.
396             @languages;
397             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
398             }
399 6         17 }
400             else {
401             @languages = $base_class->_ambient_langprefs;
402 17         833 }
403              
404 17         25 @languages = $base_class->_langtag_munging(@languages);
405 17         30  
  104         206  
406 24 50       116 my %seen;
407 24 100 66     140 foreach my $module_name ( map { $base_class . '::' . $_ } @languages ) {
408             next unless length $module_name; # sanity
409 17         95 next if $seen{$module_name}++ # Already been here, and it was no-go
410             || !&_try_use($module_name); # Try to use() it, but can't it.
411             return($module_name->new); # Make it!
412 0         0 }
413              
414             return undef; # Fail!
415             }
416              
417             ###########################################################################
418 17     17   44  
419             sub _langtag_munging {
420             my($base_class, @languages) = @_;
421              
422             # We have all these DEBUG statements because otherwise it's hard as hell
423 17         23 # to diagnose if/when something goes wrong.
424              
425 17 50       40 DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n";
426 17         20  
427 17         61 if($USING_LANGUAGE_TAGS) {
428             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
429 17         43 @languages = $base_class->_add_supers( @languages );
430 17         311  
431             push @languages, I18N::LangTags::panic_languages(@languages);
432             DEBUG and warn "After adding panic languages:\n",
433 17         68 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
434              
435 17         26 push @languages, $base_class->fallback_languages;
436             # You are free to override fallback_languages to return empty-list!
437             DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
438              
439 17         34 @languages = # final bit of processing to turn them into classname things
  104         124  
440 104         123 map {
441 104         133 my $it = $_; # copy
442 104         155 $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
443             $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_
444             $it;
445 17         27 } @languages
446             ;
447             DEBUG and warn "Nearing end of munging:\n",
448             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
449 0         0 }
450             else {
451             DEBUG and warn "Bypassing language-tags.\n",
452             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
453 17         22 }
454              
455             DEBUG and warn "Before adding fallback classes:\n",
456 17         59 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
457              
458             push @languages, $base_class->fallback_language_classes;
459 17         21 # You are free to override that to return whatever.
460              
461             DEBUG and warn "Finally:\n",
462 17         54 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
463              
464             return @languages;
465             }
466              
467             ###########################################################################
468 6     6   16  
469             sub _ambient_langprefs {
470             return I18N::LangTags::Detect::detect();
471             }
472              
473             ###########################################################################
474 60     60   19736  
475             sub _add_supers {
476 60 50       167 my($base_class, @languages) = @_;
    100          
477              
478 0         0 if (!$MATCH_SUPERS) {
479             # Nothing
480             DEBUG and warn "Bypassing any super-matching.\n",
481             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
482              
483 42         47 }
484             elsif( $MATCH_SUPERS_TIGHTLY ) {
485 42         102 DEBUG and warn "Before adding new supers tightly:\n",
486 42         6155 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
487             @languages = I18N::LangTags::implicate_supers( @languages );
488             DEBUG and warn "After adding new supers tightly:\n",
489             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
490              
491 18         21 }
492             else {
493 18         39 DEBUG and warn "Before adding supers to end:\n",
494 18         1406 ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
495             @languages = I18N::LangTags::implicate_supers_strictly( @languages );
496             DEBUG and warn "After adding supers to end:\n",
497             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
498 60         162 }
499              
500             return @languages;
501             }
502              
503             ###########################################################################
504             #
505             # This is where most people should stop reading.
506             #
507             ###########################################################################
508              
509             my %tried = ();
510             # memoization of whether we've used this module, or found it unusable.
511              
512 26 100   26   969 sub _try_use { # Basically a wrapper around "require Modulename"
513             # "Many men have tried..." "They tried and failed?" "They tried and died."
514 18         53 return $tried{$_[0]} if exists $tried{$_[0]}; # memoization
515 17     17   118  
  17         41  
  17         502  
  18         29  
516 17     17   90 my $module = $_[0]; # ASSUME sane module name!
  17         44  
  17         3232  
517             { no strict 'refs';
518 18 100 100     29 no warnings 'once';
  18         185  
  7         40  
519             return($tried{$module} = 1)
520             if %{$module . '::Lexicon'} or @{$module . '::ISA'};
521             # weird case: we never use'd it, but there it is!
522 6         9 }
523              
524 6         15 DEBUG and warn " About to use $module ...\n";
525 6         11  
526 6         28 local $SIG{'__DIE__'};
527 6 100       16 local $@;
528 6         346 local @INC = @INC;
529             pop @INC if $INC[-1] eq '.';
530 6 100       34 eval "require $module"; # used to be "use $module", but no point in that.
531 5         6  
532 5         39 if($@) {
533             DEBUG and warn "Error using $module \: $@\n";
534             return $tried{$module} = 0;
535 1         1 }
536 1         14 else {
537             DEBUG and warn " OK, $module is used\n";
538             return $tried{$module} = 1;
539             }
540             }
541              
542             #--------------------------------------------------------------------------
543              
544 17     17   113 sub _lex_refs { # report the lexicon references for this handle's class
  17         35  
  17         514  
545 17     17   80 # returns an arrayREF!
  17         23  
  17         30789  
546 36   66 36   126 no strict 'refs';
547 36         45 no warnings 'once';
548 36 50       81 my $class = ref($_[0]) || $_[0];
549             DEBUG and warn "Lex refs lookup on $class\n";
550 36         44 return $isa_scan{$class} if exists $isa_scan{$class}; # memoization!
551 36 100       90  
552             my @lex_refs;
553 36 100       45 my $seen_r = ref($_[1]) ? $_[1] : {};
  36         165  
554 18         34  
  18         55  
555             if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
556 18         29 push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
557             DEBUG and warn '%' . $class . '::Lexicon contains ',
558             scalar(keys %{$class . '::Lexicon'}), " entries\n";
559             }
560              
561 36         55 # Implements depth(height?)-first recursive searching of superclasses.
  36         130  
562 24         61 # In hindsight, I suppose I could have just used Class::ISA!
563 24 50       113 foreach my $superclass (@{$class . '::ISA'}) {
564 24         39 DEBUG and warn " Super-class search into $superclass\n";
  24         87  
565             next if $seen_r->{$superclass}++;
566             push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself
567 36         64 }
568 36         104  
569             $isa_scan{$class} = \@lex_refs; # save for next time
570             return \@lex_refs;
571 0     0 0 0 }
  0         0  
572              
573             sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
574              
575             #--------------------------------------------------------------------------
576              
577             sub _compile {
578             # This big scary routine compiles an entry.
579             # It returns either a coderef if there's brackety bits in this, or
580 51     51   629 # otherwise a ref to a scalar.
581              
582             my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344
583              
584             # The while() regex is more expensive than this check on strings that don't need a compile.
585 51 100       214 # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
586             # on strings that don't need compiling.
587 43         63 return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
588              
589 43         53 my $handle = $_[0];
590 43         69  
591 43         63 my(@code);
592 43         68 my(@c) = (''); # "chunks" -- scratch.
593             my $call_count = 0;
594 43         65 my $big_pile = '';
  43         54  
595 43         57 {
596             my $in_group = 0; # start out outside a group
597 43         186 my($m, @params); # scratch
598              
599             while($string_to_compile =~ # Iterate over chunks.
600             m/(
601             [^\~\[\]]+ # non-~[] stuff (Capture everything else here)
602             |
603             ~. # ~[, ~], ~~, ~other
604             |
605             \[ # [ presumably opening a group
606             |
607             \] # ] presumably closing a group
608             |
609             ~ # terminal ~ ?
610             |
611             $
612 162         199 )/xgs
613             ) {
614 162 100 100     695 DEBUG>2 and warn qq{ "$1"\n};
    100          
    50          
    0          
    0          
    0          
    0          
    0          
615              
616             if($1 eq '[' or $1 eq '') { # "[" or end
617 66 50       137 # Whether this is "[" or end, force processing of any
618 0 0       0 # preceding literal.
619 0         0 if($in_group) {
620             if($1 eq '') {
621             $handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
622 0         0 }
623             else {
624             $handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
625             }
626 66 100       217 }
627 24         31 else {
628             if ($1 eq '') {
629             DEBUG>2 and warn " [end-string]\n";
630 42         104 }
631             else {
632 66 50       130 $in_group = 1;
633 66 100       240 }
634             die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
635 10         16 if(length $c[-1]) {
636 10 100 66     65 # Now actually processing the preceding literal
637             $big_pile .= $c[-1];
638             if($USE_LITERALS and (
639             (ord('A') == 65)
640             ? $c[-1] !~ m/[^\x20-\x7E]/s
641             # ASCII very safe chars
642             : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
643             # EBCDIC very safe chars
644 7         15 )) {
645 7         24 # normal case -- all very safe chars
646 7         28 $c[-1] =~ s/'/\\'/g;
647             push @code, q{ '} . $c[-1] . "',\n";
648             $c[-1] = ''; # reuse this slot
649 3         10 }
650 3         9 else {
651 3         12 $c[-1] =~ s/\\\\/\\/g;
652             push @code, ' $c[' . $#c . "],\n";
653             push @c, ''; # new chunk
654             }
655             }
656             # else just ignore the empty string.
657             }
658              
659             }
660 43 100       69 elsif($1 eq ']') { # "]"
661 42         56 # close group -- go back in-band
662             if($in_group) {
663 42         51 $in_group = 0;
664              
665             DEBUG>2 and warn " --Closing group [$c[-1]]\n";
666              
667 42 50 33     197 # And now process the group...
668 0         0  
669 0         0 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
670 0         0 DEBUG>2 and warn " -- (Ignoring)\n";
671             $c[-1] = ''; # reset out chink
672             next;
673             }
674              
675 42         140 #$c[-1] =~ s/^\s+//s;
676             #$c[-1] =~ s/\s+$//s;
677             ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
678              
679 42         55 # A bit of a hack -- we've turned "~,"'s into DELs, so turn
680 42         70 # 'em into real commas here.
  73         120  
681             if (ord('A') == 65) { # ASCII, etc
682             foreach($m, @params) { tr/\x7F/,/ }
683             }
684             else { # EBCDIC (1047, 0037, POSIX-BC)
685             # Thanks to Peter Prymmer for the EBCDIC handling
686             foreach($m, @params) { tr/\x07/,/ }
687             }
688 42 100 66     226  
    100          
    50          
689             # Special-case handling of some method names:
690 5         12 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
691 5         7 # Treat [_1,...] as [,_1,...], etc.
692             unshift @params, $m;
693             $m = '';
694 1         2 }
695             elsif($m eq '*') {
696             $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
697 0         0 }
698             elsif($m eq '#') {
699             $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
700             }
701 42 100 33     387  
    100 66        
      33        
      66        
      66        
      33        
702             # Most common case: a simple, legal-looking method name
703 5         9 if($m eq '') {
704             # 0-length method name means to just interpolate:
705             push @code, ' (';
706             }
707             elsif($m =~ /^\w+$/s
708             && !$handle->{'blacklist'}{$m}
709             && !$handle->{'denylist'}{$m}
710             && ( !defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m} )
711             && ( !defined $handle->{'allowlist'} || $handle->{'allowlist'}{$m} )
712 19         90 # exclude anything fancy and restrict to the allowlist/denylist (and historical whitelist/blacklist).
713             ) {
714             push @code, ' $_[0]->' . $m . '(';
715             }
716 18         85 else {
717             # TODO: implement something? or just too icky to consider?
718             $handle->_die_pointing(
719             $string_to_compile,
720             "Can't use \"$m\" as a method name in bracket group",
721             2 + length($c[-1])
722             );
723 24         44 }
724 24         40  
725             pop @c; # we don't need that chunk anymore
726 24         41 ++$call_count;
727 24 50 33     134  
    100          
    50          
728             foreach my $p (@params) {
729 0         0 if($p eq '_*') {
730             # Meaning: all parameters except $_[0]
731             $code[-1] .= ' @_[1 .. $#_], ';
732             # and yes, that does the right thing for all @_ < 3
733             }
734 10         46 elsif($p =~ m/^_(-?\d+)$/s) {
735             # _3 meaning $_[3]
736             $code[-1] .= '$_[' . (0 + $1) . '], ';
737             }
738             elsif($USE_LITERALS and (
739             (ord('A') == 65)
740             ? $p !~ m/[^\x20-\x7E]/s
741             # ASCII very safe chars
742             : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
743             # EBCDIC very safe chars
744 14         23 )) {
745 14         53 # Normal case: a literal containing only safe characters
746             $p =~ s/'/\\'/g;
747             $code[-1] .= q{'} . $p . q{', };
748             }
749 0         0 else {
750 0         0 # Stow it on the chunk-stack, and just refer to that.
751             push @c, $p;
752             push @code, ' $c[' . $#c . '], ';
753 24         41 }
754             }
755 24         87 $code[-1] .= "),\n";
756              
757             push @c, '';
758 1         6 }
759             else {
760             $handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
761             }
762              
763             }
764             elsif(substr($1,0,1) ne '~') {
765 53         168 # it's stuff not containing "~" or "[" or "]"
766 53         110 # i.e., a literal blob
767 53         192 my $text = $1;
768             $text =~ s/\\/\\\\/g;
769             $c[-1] .= $text;
770              
771 0         0 }
772             elsif($1 eq '~~') { # "~~"
773             $c[-1] .= '~';
774              
775 0         0 }
776             elsif($1 eq '~[') { # "~["
777             $c[-1] .= '[';
778              
779 0         0 }
780             elsif($1 eq '~]') { # "~]"
781             $c[-1] .= ']';
782              
783 0 0       0 }
784             elsif($1 eq '~,') { # "~,"
785             if($in_group) {
786 0         0 # This is a hack, based on the assumption that no-one will actually
787 0         0 # want a DEL inside a bracket group. Let's hope that's it's true.
788             if (ord('A') == 65) { # ASCII etc
789             $c[-1] .= "\x7F";
790             }
791             else { # EBCDIC (cp 1047, 0037, POSIX-BC)
792             $c[-1] .= "\x07";
793             }
794 0         0 }
795             else {
796             $c[-1] .= '~,';
797             }
798              
799 0         0 }
800             elsif($1 eq '~') { # possible only at string-end, it seems.
801             $c[-1] .= '~';
802              
803             }
804             else {
805 0         0 # It's a "~X" where X is not a special character.
806 0         0 # Consider it a literal ~ and X.
807 0         0 my $text = $1;
808             $text =~ s/\\/\\\\/g;
809             $c[-1] .= $text;
810             }
811             }
812 24 50       57 }
813 24         46  
814             if($call_count) {
815             undef $big_pile; # Well, nevermind that.
816             }
817             else {
818 0         0 # It's all literals! Ahwell, that can happen.
819             # So don't bother with the eval. Return a SCALAR reference.
820             return \$big_pile;
821 24 50 33     118 }
822 24         33  
823 24 50       76 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
    100          
824 0         0 DEBUG and warn scalar(@c), " chunks under closure\n";
825 0         0 if(@code == 0) { # not possible?
826             DEBUG and warn "Empty code\n";
827             return \'';
828 10         22 }
829             elsif(@code > 1) { # most cases, presumably!
830 24         49 unshift @code, "join '',\n";
831 24         49 }
832             unshift @code, "use strict; sub {\n";
833 24         33 push @code, "}\n";
834 24     10   1484  
  10     6   62  
  10     6   43  
  10         469  
  6         37  
  6         12  
  6         252  
  6         35  
  6         11  
  6         229  
835 24 50       79 DEBUG and warn @code;
836 24         106 my $sub = eval(join '', @code);
837             die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
838             return $sub;
839             }
840              
841             #--------------------------------------------------------------------------
842              
843 19     19   31 sub _die_pointing {
844 19   33     44 # This is used by _compile to throw a fatal error
845             my $target = shift;
846             $target = ref($target) || $target; # class name
847 19         36 # ...leaving $_[0] the error-causing text, and $_[1] the error message
848              
849 19         21 my $i = index($_[0], "\n");
850 19 100       49  
851 19 100       40 my $pointy;
852 18         25 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
853             if($pos < 1) {
854             $pointy = "^=== near there\n";
855 1         2 }
856 1 50 33     6 else { # we need to space over
      33        
857             my $first_tab = index($_[0], "\t");
858             if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
859 1         3 # No tabs, or the first tab is harmlessly after where we will point to,
860             # AND we're far enough from the margin that we can draw a proper arrow.
861             $pointy = ('=' x $pos) . "^ near there\n";
862             }
863 0         0 else {
864 0         0 # tabs screw everything up!
865             $pointy = substr($_[0],0,$pos);
866 0         0 $pointy =~ tr/\t //cd;
867             # make everything into whitespace, but preserving tabs
868             $pointy .= "^=== near there\n";
869             }
870 19         40 }
871              
872 19 50       40 my $errmsg = "$_[1], in\:\n$_[0]";
    0          
873              
874 19         39 if($i == -1) {
875             # No newline.
876             $errmsg .= "\n" . $pointy;
877             }
878 0         0 elsif($i == (length($_[0]) - 1) ) {
879             # Already has a newline at end.
880             $errmsg .= $pointy;
881             }
882             else {
883 19         1789 # don't bother with the pointy bit, I guess.
884             }
885             Carp::croak( "$errmsg via $target, as used" );
886             }
887              
888             1;