File Coverage

blib/lib/Regexp/Grammars.pm
Criterion Covered Total %
statement 499 772 64.6
branch 265 454 58.3
condition 81 154 52.6
subroutine 49 69 71.0
pod 0 3 0.0
total 894 1452 61.5


line stmt bran cond sub pod time code
1             =encoding ISO8859-1
2             =cut
3              
4             package Regexp::Grammars;
5 81     81   5676601 use re 'eval';
  81         908  
  81         5927  
6              
7 81     81   572 use warnings;
  81         176  
  81         2289  
8 81     81   432 use strict;
  81         164  
  81         1870  
9 81     81   2332 use 5.010;
  81         295  
10 81     81   531 use vars ();
  81         186  
  81         2138  
11              
12 81     81   590 use Scalar::Util qw< blessed reftype >;
  81         236  
  81         5444  
13 81     81   54097 use Data::Dumper qw< Dumper >;
  81         574744  
  81         9624  
14              
15             our $VERSION = '1.057';
16              
17             my $anon_scalar_ref = \do{my $var};
18             my $MAGIC_VARS = q{my ($CAPTURE, $CONTEXT, $DEBUG, $INDEX, $MATCH, %ARG, %MATCH);};
19              
20             my $PROBLEM_WITH_5_18 = <<'END_ERROR_MSG';
21             Warning: Regexp::Grammars is unsupported
22             under Perl 5.18.0 through 5.18.3 due to a bug
23             in regex parsing under those versions.
24              
25             Please upgrade to Perl 5.18.4 or later, or revert to
26             Perl 5.16 or earlier.
27             END_ERROR_MSG
28              
29             # Load the module...
30             sub import {
31             # Signal lexical scoping (active, unless something was exported)...
32 93     93   2939 $^H{'Regexp::Grammars::active'} = 1;
33              
34             # Process any regexes in module's active lexical scope...
35 81     81   100162 use overload;
  81         85908  
  81         554  
36             overload::constant(
37             qr => sub {
38 275     275   88871 my ($raw, $cooked, $type) = @_;
39             # In active scope and really a regex...
40 275 100 66     740 if (_module_is_active() && $type =~ /qq?/) {
41 173         5473 return bless \$cooked, 'Regexp::Grammars::Precursor';
42             }
43             # Ignore everything else...
44             else {
45 102         52801 return $cooked;
46             }
47             }
48 93         977 );
49              
50             # Deal with 5.18 issues...
51 93 50       2748 if ($] >= 5.018) {
52             # Issue warning...
53 93 50       492 if ($] < 5.018004) {
54 0         0 require Carp;
55 0         0 Carp::croak($PROBLEM_WITH_5_18);
56             }
57              
58             # Deal with cases where Perl 5.18+ complains about
59             # the injection of (??{...}) and (?{...})
60 93         631 require re;
61 93         1925 re->import('eval');
62              
63             # Sanctify the standard Regexp::Grammars pseudo-variables from
64             # Perl 5.18's early enforcement of strictures...
65 93         275 my $caller = caller;
66 93         2186 warnings->unimport('once');
67 93         451 @_ = ( 'vars', '$CAPTURE', '$CONTEXT', '$DEBUG', '$INDEX', '$MATCH', '%ARG', '%MATCH' );
68 93         9819 goto &vars::import;
69             }
70             }
71              
72             # Deactivate module's regex effect when it is "anti-imported" with 'no'...
73             sub unimport {
74             # Signal lexical (non-)scoping...
75 38     38   5128 $^H{'Regexp::Grammars::active'} = 0;
76 38         244 require re;
77 38         14147 re->unimport('eval');
78             }
79              
80             # Encapsulate the hoopy user-defined pragma interface...
81             sub _module_is_active {
82 275     275   3583 return (caller 1)[10]->{'Regexp::Grammars::active'};
83             }
84              
85             my $RULE_HANDLER;
86 743     743 0 22616 sub clear_rule_handler { undef $RULE_HANDLER; }
87              
88             sub Regexp::with_actions {
89 21     21   14869 my ($self, $handler) = @_;
90 21         40 $RULE_HANDLER = $handler;
91 21         462 return $self;
92             }
93              
94             #=====[ COMPILE-TIME INTERIM REPRESENTATION OF GRAMMARS ]===================
95             {
96             package Regexp::Grammars::Precursor;
97              
98             # Only translate precursors once...
99             state %grammar_cache;
100              
101             use overload (
102             # Concatenation/interpolation just concatenates to the precursor...
103             q{.} => sub {
104 92     92   249 my ($x, $y, $reversed) = @_;
105 92 50       198 if (ref $x) { $x = ${$x} }
  92         131  
  92         175  
106 92 100       198 if (ref $y) { $y = ${$y} }
  36         57  
  36         73  
107 92 100       189 if ($reversed) { ($y,$x) = ($x,$y); }
  20         65  
108 92   50     243 $x .= $y//q{};
109 92         256 return bless \$x, 'Regexp::Grammars::Precursor';
110             },
111              
112             # Using as a string (i.e. matching) preprocesses the precursor...
113             q{""} => sub {
114 137     137   412 my ($obj) = @_;
115             return $grammar_cache{ overload::StrVal($$obj) }
116 137   33     622 //= Regexp::Grammars::_build_grammar( ${$obj} );
  137         1551  
117             },
118              
119             # Everything else, as usual...
120 81         1035 fallback => 1,
121 81     81   44475 );
  81         215  
122             }
123              
124              
125             #=====[ SUPPORT FOR THE INTEGRATED DEBUGGER ]=========================
126              
127             # All messages go to STDERR by default...
128             *Regexp::Grammars::LOGFILE = *STDERR{IO};
129              
130             # Debugging levels indicate where to stop...
131             our %DEBUG_LEVEL = (
132             same => undef, # No change in debugging mode
133             off => 0, # No more debugging
134             run => 1, continue => 1, # Run to completion of regex match
135             match => 2, on => 2, # Run to next successful submatch
136             step => 3, try => 3, # Run to next reportable event
137             );
138              
139             # Debugging levels can be abbreviated to one character during interactions...
140             @DEBUG_LEVEL{ map {substr($_,0,1)} keys %DEBUG_LEVEL } = values %DEBUG_LEVEL;
141             $DEBUG_LEVEL{o} = $DEBUG_LEVEL{off}; # Not "on"
142             $DEBUG_LEVEL{s} = $DEBUG_LEVEL{step}; # Not "same"
143              
144             # Width of leading context field in debugging messages is constrained...
145             my $MAX_CONTEXT_WIDTH = 20;
146             my $MIN_CONTEXT_WIDTH = 6;
147              
148             sub set_context_width {
149 0         0 { package Regexp::Grammars::ContextRestorer;
150             sub new {
151 0     0   0 my ($class, $old_context_width) = @_;
152 0         0 bless \$old_context_width, $class;
153             }
154             sub DESTROY {
155 0     0   0 my ($old_context_width_ref) = @_;
156 0         0 $MAX_CONTEXT_WIDTH = ${$old_context_width_ref};
  0         0  
157             }
158             }
159              
160 0     0 0 0 my ($new_context_width) = @_;
  0         0  
161 0         0 my $old_context_width = $MAX_CONTEXT_WIDTH;
162 0         0 $MAX_CONTEXT_WIDTH = $new_context_width;
163 0 0       0 if (defined wantarray) {
164 0         0 return Regexp::Grammars::ContextRestorer->new($old_context_width);
165             }
166             }
167              
168             # Rewrite a string currently being matched, to make \n and \t visible
169             sub _show_metas {
170 0   0 0   0 my $context_str = shift // q{};
171              
172             # Quote newlines (\n -> \\n, without using a regex)...
173 0         0 my $index = index($context_str,"\n");
174 0         0 while ($index >= 0) {
175 0         0 substr($context_str, $index, 1, '\\n');
176 0         0 $index = index($context_str,"\n",$index+2);
177             }
178              
179             # Quote tabs (\t -> \\t, without using a regex)...
180 0         0 $index = index($context_str,"\t");
181 0         0 while ($index >= 0) {
182 0         0 substr($context_str, $index, 1, '\\t');
183 0         0 $index = index($context_str,"\t",$index+2);
184             }
185              
186 0         0 return $context_str;
187             }
188              
189             # Minimize whitespace in a string...
190             sub _squeeze_ws {
191 1792     1792   3076 my ($str) = @_;
192              
193 1792         3010 $str =~ tr/\n\t/ /;
194              
195 1792         3494 my $index = index($str,q{ });
196 1792         4010 while ($index >= 0) {
197 7089         9637 substr($str, $index, 2, q{ });
198 7089         12241 $index = index($str,q{ },$index);
199             }
200              
201 1792         4109 return $str;
202             }
203              
204             # Prepare for debugging...
205             sub _init_try_stack {
206 0     0   0 our (@try_stack, $last_try_pos, $last_context_str);
207              
208             # Start with a representation of the entire grammar match...
209 0         0 @try_stack = ({
210             subrule => '',
211             height => 0,
212             errmsg => ' \\FAIL ',
213             });
214              
215             # Initialize tracking of location and context...
216 0         0 $last_try_pos = -1;
217 0         0 $last_context_str = q{};
218              
219             # Report...
220 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context('=>')
  0         0  
221             . 'Trying from position ' . pos();
222             }
223              
224             # Create a "context string" showing where the regex is currently matching...
225             sub _debug_context {
226 0     0   0 my ($fill_chars) = @_;
227              
228             # Determine minimal sufficient width for context field...
229 0   0     0 my $field_width = length(_show_metas($_//q{}));
230 0 0       0 if ($field_width > $MAX_CONTEXT_WIDTH) {
    0          
231 0         0 $field_width = $MAX_CONTEXT_WIDTH;
232             }
233             elsif ($field_width < $MIN_CONTEXT_WIDTH) {
234 0         0 $field_width = $MIN_CONTEXT_WIDTH;
235             }
236              
237             # Get current matching position (and some additional trailing context)...
238 0   0     0 my $context_str
      0        
239             = substr(_show_metas(substr(($_//q{}).q{},pos()//0,$field_width)),0,$field_width);
240              
241             # Build the context string, handling special cases...
242 0   0     0 our $last_context_str //= q{};
243 0 0       0 if ($fill_chars) {
244             # If caller supplied a 1- or 2-char fill sequence, use that instead...
245 0 0       0 my $last_fill_char = length($fill_chars) > 1
246             ? substr($fill_chars,-1,1,q{})
247             : $fill_chars
248             ;
249 0         0 $context_str = $fill_chars x ($field_width-1) . $last_fill_char;
250             }
251             else {
252             # Make end-of-string visible in empty context string...
253 0 0       0 if ($context_str eq q{}) {
254 0         0 $context_str = '[eos]';
255             }
256              
257             # Don't repeat consecutive identical context strings...
258 0 0       0 if ($context_str eq $last_context_str) {
259 0         0 $context_str = q{ } x $field_width;
260             }
261             else {
262             # If not repeating, remember for next time...
263 0         0 $last_context_str = $context_str;
264             }
265             }
266              
267             # Left justify and return context string...
268 0         0 return sprintf("%-*s ",$field_width,$context_str);
269             }
270              
271             # Show a debugging message (mainly used for compile-time errors and info)...
272             sub _debug_notify {
273             # Single arg is a line to be printed with a null severity...
274 2 50   2   11 my ($severity, @lines) = @_==1 ? (q{},@_) : @_;
275 2         8 chomp @lines;
276              
277             # Formatting string for all lines...
278 2         4 my $format = qq{%*s | %s\n};
279              
280             # Track previous severity and avoid repeating the same level...
281 2         4 state $prev_severity = q{};
282 2 50 33     17 if ($severity !~ /\S/) {
    50          
283             # Do nothing
284             }
285             elsif ($severity eq 'info' && $prev_severity eq 'info' ) {
286 0         0 $severity = q{};
287             }
288             else {
289 2         5 $prev_severity = $severity;
290             }
291              
292             # Display first line with severity indicator (unless same as previous)...
293 2         4 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, $severity, shift @lines;
  2         134  
294              
295             # Display first line without severity indicator
296 2         15 for my $next_line (@lines) {
297 8         17 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, q{}, $next_line;
  8         94  
298             }
299             }
300              
301             # Handle user interactions during runtime debugging...
302             sub _debug_interact {
303 0     0   0 my ($stack_height, $leader, $curr_frame_ref, $min_debug_level) = @_;
304              
305 0         0 our $DEBUG; # ...stores current debug level within regex
306              
307             # Only interact with terminals, and if debug level is appropriate...
308 0 0 0     0 if (-t *Regexp::Grammars::LOGFILE
      0        
      0        
309             && defined $DEBUG
310             && ($DEBUG_LEVEL{$DEBUG}//0) >= $DEBUG_LEVEL{$min_debug_level}
311             ) {
312 0         0 local $/ = "\n"; # ...in case some caller is being clever
313             INPUT:
314 0         0 while (1) {
315 0   0     0 my $cmd = readline // q{};
316 0         0 chomp $cmd;
317              
318             # Input of 'd' means 'display current result frame'...
319 0 0       0 if ($cmd eq 'd') {
320 0         0 print {*Regexp::Grammars::LOGFILE} join "\n",
321 0 0       0 map { $leader . ($stack_height?'| ':q{})
  0         0  
322             . ' : ' . $_
323             }
324             split "\n", q{ }x8 . substr(Dumper($curr_frame_ref),8);
325 0         0 print "\t";
326             }
327             # Any other (valid) input changes debugging level and continues...
328             else {
329 0 0       0 if (defined $DEBUG_LEVEL{$cmd}) { $DEBUG = $cmd; }
  0         0  
330 0         0 last INPUT;
331             }
332             }
333             }
334             # When interaction not indicated, just complete the debugging line...
335             else {
336 0         0 print {*Regexp::Grammars::LOGFILE} "\n";
  0         0  
337             }
338             }
339              
340             # Handle reporting of unsuccessful match attempts...
341             sub _debug_handle_failures {
342 0     0   0 my ($stack_height, $subrule, $in_match) = @_;
343 0         0 our @try_stack;
344              
345             # Unsuccessful match attempts leave "leftovers" on the attempt stack...
346             CLEANUP:
347 0   0     0 while (@try_stack && $try_stack[-1]{height} >= $stack_height) {
348             # Grab record of (potentially) unsuccessful attempt...
349 0         0 my $error_ref = pop @try_stack;
350              
351             # If attempt was the one whose match is being reported, go and report...
352             last CLEANUP if $in_match
353             && $error_ref->{height} == $stack_height
354 0 0 0     0 && $error_ref->{subrule} eq $subrule;
      0        
355              
356             # Otherwise, report the match failure...
357 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context(q{ }) . $error_ref->{errmsg};
  0         0  
358             }
359             }
360              
361             # Handle attempts to call non-existent subrules...
362             sub _debug_fatal {
363 0     0   0 my ($naughty_construct) = @_;
364              
365 0         0 print {*Regexp::Grammars::LOGFILE}
  0         0  
366             "_________________________________________________________________\n",
367             "Fatal error: Entire parse terminated prematurely while attempting\n",
368             " to call non-existent rule: $naughty_construct\n",
369             "_________________________________________________________________\n";
370 0         0 $@ = "Entire parse terminated prematurely while attempting to call non-existent rule: $naughty_construct";
371             }
372              
373             # Handle objrules that don't return hashes...
374             sub _debug_non_hash {
375 331     331   16170 my ($obj, $name) = @_;
376              
377             # If the object is okay, no further action required...
378 331 100       7985 return q{} if reftype($obj) eq 'HASH';
379              
380             # Generate error messages...
381 1         3 print {*Regexp::Grammars::LOGFILE}
  1         14  
382             "_________________________________________________________________\n",
383             "Fatal error: returned a non-hash-based object\n",
384             "_________________________________________________________________\n";
385 1         6 $@ = " returned a non-hash-based object";
386              
387 1         23 return '(*COMMIT)(*FAIL)';
388             }
389              
390              
391             # Print a message in context...
392             sub _debug_logmsg {
393 0     0   0 my ($stack_height, @msg) = @_;
394              
395             # Determine indent for messages...
396 0         0 my $leader = _debug_context() . q{| } x ($stack_height-1) . '|';
397              
398             # Report the attempt...
399 0         0 print {*Regexp::Grammars::LOGFILE} map { "$leader$_\n" } @msg;
  0         0  
  0         0  
400             }
401              
402             # Print a message indicating a (sub)match attempt...
403             sub _debug_trying {
404 0     0   0 my ($stack_height, $curr_frame_ref, $subrule) = @_;
405              
406             # Clean up after any preceding unsuccessful attempts...
407 0         0 _debug_handle_failures($stack_height, $subrule);
408              
409             # Determine indent for messages...
410 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
411              
412             # Detect and report any backtracking prior to this attempt...
413 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
414 0         0 my $backtrack_distance = $last_try_pos - pos();
415 0 0       0 if ($backtrack_distance > 0) {
416 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
417             . q{| } x ($stack_height-2)
418             . qq{|...Backtracking $backtrack_distance char}
419             . ($backtrack_distance > 1 ? q{s} : q{})
420             . q{ and trying new match}
421             ;
422             }
423              
424             # Report the attempt...
425 0         0 print {*Regexp::Grammars::LOGFILE} $leader, "|...Trying $subrule\t";
  0         0  
426              
427             # Handle user interactions during debugging...
428 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'step');
429              
430             # Record the attempt, for later error handling in _debug_matched()...
431 0 0       0 if ($subrule ne 'next alternative') {
432 0         0 our @try_stack;
433 0         0 push @try_stack, {
434             height => $stack_height,
435             subrule => $subrule,
436             # errmsg should align under: |...Trying $subrule\t
437             errmsg => q{| } x ($stack_height-2) . "| \\FAIL $subrule",
438             };
439             }
440 0         0 $last_try_pos = pos();
441             }
442              
443             # Print a message indicating a successful (sub)match...
444             sub _debug_matched {
445 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $matched_text) = @_;
446              
447             # Clean up any intervening unsuccessful attempts...
448 0         0 _debug_handle_failures($stack_height, $subrule, 'in match');
449              
450             # Build debugging message...
451 0         0 my $debug_context = _debug_context();
452 0         0 my $leader = $debug_context . q{| } x ($stack_height-2);
453 0 0       0 my $message = ($stack_height ? '| ' : q{})
454             . " \\_____$subrule matched ";
455 0 0       0 my $filler = $stack_height
456             ? '| ' . q{ } x (length($message)-4)
457             : q{ } x length($message);
458              
459 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
460              
461             # Report if match required backtracking...
462 0   0     0 my $backtrack_distance = $last_try_pos - (pos()//0);
463 0 0       0 if ($backtrack_distance > 0) {
464 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
465             . q{| } x ($stack_height-2)
466             . qq{|...Backtracking $backtrack_distance char}
467             . ($backtrack_distance > 1 ? q{s} : q{})
468             . qq{ and rematching $subrule}
469             ;
470             }
471 0         0 $last_try_pos = pos();
472              
473             # Format match text (splitting multi-line texts and indent them correctly)...
474 0 0       0 $matched_text = defined($matched_text)
475             ? $matched_text = q{'} . join("\n$leader$filler", split "\n", $matched_text) . q{'}
476             : q{};
477              
478             # Print match message...
479 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $matched_text . qq{\t};
  0         0  
480              
481             # Check for user interaction...
482 0 0       0 _debug_interact($stack_height, $leader, $curr_frame_ref, $stack_height ? 'match' : 'run');
483             }
484              
485             # Print a message indicating a successful (sub)match...
486             sub _debug_require {
487 0     0   0 my ($stack_height, $condition, $succeeded) = @_;
488              
489             # Build debugging message...
490 0         0 my $debug_context = _debug_context();
491 0         0 my $leader = $debug_context . q{| } x ($stack_height-1);
492 0 0       0 my $message1 = ($stack_height ? '|...' : q{})
493             . "Testing condition: $condition"
494             ;
495 0 0       0 my $message2 = ($stack_height ? '| ' : q{})
    0          
496             . " \\_____"
497             . ($succeeded ? 'Satisfied' : 'FAILED')
498             ;
499              
500             # Report if match required backtracking...
501 0         0 our $last_try_pos;
502 0         0 my $backtrack_distance = $last_try_pos - pos();
503 0 0       0 if ($backtrack_distance > 0) {
504 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
505             . q{| } x ($stack_height-1)
506             . qq{|...Backtracking $backtrack_distance char}
507             . ($backtrack_distance > 1 ? q{s} : q{})
508             . qq{ and rematching}
509             ;
510             }
511              
512             # Remember where the condition was tried...
513 0         0 $last_try_pos = pos();
514              
515             # Print match message...
516 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message1;
  0         0  
517 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message2;
  0         0  
518             }
519              
520             # Print a message indicating a successful store-result-of-code-block...
521             sub _debug_executed {
522 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $value) = @_;
523              
524             # Build message...
525 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
526 0         0 my $message = "|...Action $subrule\n";
527 0         0 my $message2 = "| saved value: '";
528 0         0 $message .= $leader . $message2;
529 0         0 my $filler = q{ } x length($message2);
530              
531             # Split multiline results over multiple lines (properly indented)...
532 0         0 $value = join "\n$leader$filler", split "\n", $value;
533              
534             # Report the action...
535 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $value . qq{'\t};
  0         0  
536              
537             # Check for user interaction...
538 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'match');
539             }
540              
541             # Create the code to be inserted into the regex to facilitate debugging...
542             sub _build_debugging_statements {
543 414     414   989 my ($debugging_active, $subrule, $extra_pre_indent) = @_;
544              
545 414 100       1684 return (q{}, q{}) if ! $debugging_active;;
546              
547 1   50     5 $extra_pre_indent //= 0;
548              
549 1         7 $subrule = "q{$subrule}";
550              
551             return (
552 1         11 qq{Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent, \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], $subrule)
553             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};},
554             qq{Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1, \$Regexp::Grammars::RESULT_STACK[-1], $subrule, \$^N)
555             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};},
556             );
557             }
558              
559             sub _build_raw_debugging_statements {
560 1790     1790   3427 my ($debugging_active, $subpattern, $extra_pre_indent) = @_;
561              
562 1790 50       5117 return (q{}, q{}) if ! $debugging_active;
563              
564 0   0     0 $extra_pre_indent //= 0;
565              
566 0 0       0 if ($subpattern eq '|') {
567             return (
568 0         0 q{},
569             qq{(?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
570             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], 'next alternative')
571             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
572             );
573             }
574             else {
575             return (
576 0         0 qq{(?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
577             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], q{subpattern /$subpattern/}, \$^N)
578             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
579             qq{(?{;Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1,
580             \$Regexp::Grammars::RESULT_STACK[-1], q{subpattern /$subpattern/}, \$^N)
581             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
582             );
583             }
584             }
585              
586              
587             #=====[ SUPPORT FOR AUTOMATIC TIMEOUTS ]=========================
588              
589             sub _test_timeout {
590 0     0   0 our ($DEBUG, $TIMEOUT);
591              
592 0 0       0 return q{} if time() < $TIMEOUT->{'limit'};
593              
594             my $duration = "$TIMEOUT->{duration} second"
595 0 0       0 . ( $TIMEOUT->{duration} == 1 ? q{} : q{s} );
596              
597 0 0 0     0 if (defined($DEBUG) && $DEBUG ne 'off') {
598 0         0 my $leader = _debug_context(q{ });
599 0         0 say {*LOGFILE} $leader . '|';
  0         0  
600 0         0 say {*LOGFILE} $leader . "|...Invoking {duration}>";
  0         0  
601 0         0 say {*LOGFILE} $leader . "| \\_____No match after $duration";
  0         0  
602 0         0 say {*LOGFILE} $leader . '|';
  0         0  
603 0         0 say {*LOGFILE} $leader . " \\FAIL ";
  0         0  
604             }
605              
606 0 0       0 if (! @!) {
607 0         0 @! = "Internal error: Timed out after $duration (as requested)";
608             }
609 0         0 return q{(*COMMIT)(*FAIL)};
610             }
611              
612              
613             #=====[ SUPPORT FOR UPDATING THE RESULT STACK ]=========================
614              
615             # Create a clone of the current result frame with an new key/value...
616             sub _extend_current_result_frame_with_scalar {
617 2008     2008   26352 my ($stack_ref, $key, $value) = @_;
618              
619             # Autovivify null stacks (only occur when grammar invokes no subrules)...
620 2008 50       3054 if (!@{$stack_ref}) {
  2008         4378  
621 0         0 $stack_ref = [{}];
622             }
623              
624             # Copy existing frame, appending new value so it overwrites any old value...
625             my $cloned_result_frame = {
626 2008         2744 %{$stack_ref->[-1]},
  2008         6423  
627             $key => $value,
628             };
629              
630             # Make the copy into an object, if the original was one...
631 2008 50       5572 if (my $class = blessed($stack_ref->[-1])) {
632 0         0 bless $cloned_result_frame, $class;
633             }
634              
635 2008         22565 return $cloned_result_frame;
636             }
637              
638             # Create a clone of the current result frame with an additional key/value
639             # (As above, but preserving the "listiness" of the key being added to)...
640             sub _extend_current_result_frame_with_list {
641 131     131   1917 my ($stack_ref, $key, $value) = @_;
642              
643             # Copy existing frame, appending new value to appropriate element's list...
644             my $cloned_result_frame = {
645 131         389 %{$stack_ref->[-1]},
646             $key => [
647 131   100     223 @{$stack_ref->[-1]{$key}//[]},
  131         757  
648             $value,
649             ],
650             };
651              
652             # Make the copy into an object, if the original was one...
653 131 50       502 if (my $class = blessed($stack_ref->[-1])) {
654 0         0 bless $cloned_result_frame, $class;
655             }
656              
657 131         2117 return $cloned_result_frame;
658             }
659              
660             # Pop current result frame and add it to a clone of previous result frame
661             # (flattening it if possible, and preserving any blessing)...
662             sub _pop_current_result_frame {
663 819     819   17173 my ($stack_ref, $key, $original_name, $value) = @_;
664              
665             # Where are we in the stack?
666 819         1670 my $curr_frame = $stack_ref->[-1];
667 819         1251 my $caller_frame = $stack_ref->[-2];
668              
669             # Track which frames are objects...
670 819         1934 my $is_blessed_curr = blessed($curr_frame);
671 819         1526 my $is_blessed_caller = blessed($caller_frame);
672              
673             # Remove "private" captures (i.e. those starting with _)...
674 819         1220 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  819         1508  
  1582         3958  
  819         2392  
675              
676             # Remove "nocontext" marker...
677 819         1629 my $nocontext = delete $curr_frame->{'~'};
678              
679             # Build a clone of the current frame...
680             my $cloned_result_frame
681             = exists $curr_frame->{'='} ? $curr_frame->{'='}
682 544         1692 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
683 819 50 100     2688 : keys %{$curr_frame} ? $curr_frame->{q{}}
  102 100       312  
    100          
684             : $value
685             ;
686              
687             # Apply any appropriate handler...
688 819 100       1987 if ($RULE_HANDLER) {
689 88 100 66     565 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
690 22         66 my $replacement_result_frame
691             = $RULE_HANDLER->$original_name($cloned_result_frame);
692 22 50       1153 if (defined $replacement_result_frame) {
693 22         42 $cloned_result_frame = $replacement_result_frame;
694             }
695             }
696             }
697              
698             # Remove capture if not requested...
699 819 100 100     2268 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  110   100     377  
700 28         54 delete $cloned_result_frame->{q{}};
701             }
702              
703             # Nest a clone of current frame inside a clone of the caller frame...
704             my $cloned_caller_frame = {
705 819   50     1195 %{$caller_frame//{}},
  819         3155  
706             $key => $cloned_result_frame,
707             };
708              
709             # Make the copies into objects, if the originals were...
710 819 100 66     2434 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
711 212         429 bless $cloned_caller_frame->{$key}, $is_blessed_curr;
712             }
713 819 50       1754 if ($is_blessed_caller) {
714 0         0 bless $cloned_caller_frame, $is_blessed_caller;
715             }
716              
717 819         15394 return $cloned_caller_frame;
718             }
719              
720             # Pop current result frame and add it to a clone of previous result frame
721             # (flattening it if possible, and preserving any blessing)
722             # (As above, but preserving listiness of key being added to)...
723             sub _pop_current_result_frame_with_list {
724 1396     1396   37886 my ($stack_ref, $key, $original_name, $value) = @_;
725              
726             # Where are we in the stack?
727 1396         2991 my $curr_frame = $stack_ref->[-1];
728 1396         2021 my $caller_frame = $stack_ref->[-2];
729              
730             # Track which frames are objects...
731 1396         2931 my $is_blessed_curr = blessed($curr_frame);
732 1396         2321 my $is_blessed_caller = blessed($caller_frame);
733              
734             # Remove "private" captures (i.e. those starting with _)...
735 1396         1998 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  1396         2311  
  1954         5171  
  1396         3946  
736              
737             # Remove "nocontext" marker...
738 1396         2628 my $nocontext = delete $curr_frame->{'~'};
739              
740             # Clone the current frame...
741             my $cloned_result_frame
742             = exists $curr_frame->{'='} ? $curr_frame->{'='}
743 231         690 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
744 1396 50 100     4124 : keys %{$curr_frame} ? $curr_frame->{q{}}
  947 100       2098  
    100          
745             : $value
746             ;
747              
748             # Apply any appropriate handler...
749 1396 100       3003 if ($RULE_HANDLER) {
750 174 100 66     785 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
751 106         272 my $replacement_result_frame
752             = $RULE_HANDLER->$original_name($cloned_result_frame);
753 106 50       2499 if (defined $replacement_result_frame) {
754 106         200 $cloned_result_frame = $replacement_result_frame;
755             }
756             }
757             }
758              
759             # Remove capture if not requested...
760 1396 100 100     3149 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  86   100     275  
761 82         153 delete $cloned_result_frame->{q{}};
762             }
763              
764             # Append a clone of current frame inside a clone of the caller frame...
765             my $cloned_caller_frame = {
766 1396         3341 %{$caller_frame},
767             $key => [
768 1396   100     1870 @{$caller_frame->{$key}//[]},
  1396         7053  
769             $cloned_result_frame,
770             ],
771             };
772              
773             # Make the copies into objects, if the originals were...
774 1396 100 66     3798 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
775 118         230 bless $cloned_caller_frame->{$key}[-1], $is_blessed_curr;
776             }
777 1396 50       2556 if ($is_blessed_caller) {
778 0         0 bless $cloned_caller_frame, $is_blessed_caller;
779             }
780              
781 1396         28464 return $cloned_caller_frame;
782             }
783              
784              
785             #=====[ MISCELLANEOUS CONSTANTS ]=========================
786              
787             # Namespace in which grammar inheritance occurs...
788             my $CACHE = 'Regexp::Grammars::_CACHE_::';
789             my $CACHE_LEN = length $CACHE;
790             my %CACHE; #...for subrule tracking
791              
792             # This code inserted at the start of every grammar regex
793             # (initializes the result stack cleanly and backtrackably, via local)...
794             my $PROLOGUE = q{((?{; @! = () if !pos;
795             local @Regexp::Grammars::RESULT_STACK
796             = (@Regexp::Grammars::RESULT_STACK, {});
797             local $Regexp::Grammars::TIMEOUT = { limit => -1>>1 };
798             local $Regexp::Grammars::DEBUG = 'off' }) };
799              
800             # This code inserted at the end of every grammar regex
801             # (puts final result in %/. Also defines default , , etc.)...
802             my $EPILOGUE = q{)(?{; $Regexp::Grammars::RESULT_STACK[-1]{q{}} //= $^N;;
803             local $Regexp::Grammars::match_frame = pop @Regexp::Grammars::RESULT_STACK;
804             delete @{$Regexp::Grammars::match_frame}{
805             '~', grep {substr($_,0,1) eq '_'} keys %{$Regexp::Grammars::match_frame}
806             };
807             if (exists $Regexp::Grammars::match_frame->{'='}) {
808             if (ref($Regexp::Grammars::match_frame->{'='}) eq 'HASH') {
809             $Regexp::Grammars::match_frame
810             = $Regexp::Grammars::match_frame->{'='};
811             }
812             }
813             if (@Regexp::Grammars::RESULT_STACK) {
814             $Regexp::Grammars::RESULT_STACK[-1]{'(?R)'}
815             = $Regexp::Grammars::match_frame;
816             }
817             Regexp::Grammars::clear_rule_handler();
818             */ = $Regexp::Grammars::match_frame;
819             })|\Z(?{Regexp::Grammars::clear_rule_handler();})(?!)(?(DEFINE)
820             (? \\s* )
821             (?
822             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
823             \\s*
824             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
825             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
826             })
827             )
828             (? \\S+ )
829             (?
830             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
831             \\S+
832             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
833             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
834             })
835             )
836             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = pos; }) )
837             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = 1 + substr($_,0,pos) =~ tr/\n/\n/; }) )
838             )
839             };
840             my $EPILOGUE_NC = $EPILOGUE;
841             $EPILOGUE_NC =~ s{ ; .* ;;}{;}xms;
842              
843              
844             #=====[ MISCELLANEOUS PATTERNS THAT MATCH USEFUL THINGS ]========
845              
846             # Match an identifier...
847             my $IDENT = qr{ [^\W\d] \w*+ }xms;
848             my $QUALIDENT = qr{ (?: $IDENT :: )*+ $IDENT }xms;
849              
850             # Match balanced parentheses, taking into account \-escapes and []-escapes...
851             my $PARENS = qr{
852             (?&VAR_PARENS)
853             (?(DEFINE)
854             (? \( (?: \\. | (?&VAR_PARENS) | (?&CHARSET) | [^][()\\]++)*+ \) )
855             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]])*+ \] )
856              
857             )
858             }xms;
859              
860             # Match a directive within rules...
861             my $WS_PATTERN = qr{]++ | $PARENS )*+) >}xms;
862              
863              
864             #=====[ UTILITY SUBS FOR ERROR AND WARNING MESSAGES ]========
865              
866             sub _uniq {
867 53     53   210 my %seen;
868 53 50       128 return grep { defined $_ && !$seen{$_}++ } @_;
  86         1445  
869             }
870              
871             # Default translator for error messages...
872             my $ERRORMSG_TRANSLATOR = sub {
873             my ($errormsg, $rulename, $context) = @_;
874              
875             $rulename = 'valid input' if $rulename eq q{};
876             $context //= '';
877              
878             # Unimplemented subrule when rulename starts with '-'...
879             if (substr($rulename,0,1) eq '-') {
880             $rulename = substr($rulename,1);
881             return "Can't match subrule <$rulename> (not implemented)";
882             }
883              
884             # Empty message converts to a "Expected...but found..." message...
885             if ($errormsg eq q{}) {
886             $rulename =~ tr/_/ /;
887             $rulename = lc($rulename);
888             return "Expected $rulename, but found '$context' instead";
889             }
890              
891             # "Expecting..." messages get "but found" added...
892             if (lc(substr($errormsg,0,6)) eq 'expect') {
893             return "$errormsg, but found '$context' instead";
894             }
895              
896             # Everything else stays "as is"...
897             return $errormsg;
898             };
899              
900             # Allow user to set translation...
901             sub set_error_translator {
902 0         0 { package Regexp::Grammars::TranslatorRestorer;
903             sub new {
904 1     1   4 my ($class, $old_translator) = @_;
905 1         4 bless \$old_translator, $class;
906             }
907             sub DESTROY {
908 1     1   593 my ($old_translator_ref) = @_;
909 1         3 $ERRORMSG_TRANSLATOR = ${$old_translator_ref};
  1         8  
910             }
911             }
912              
913 1     1 0 99 my ($translator_ref) = @_;
  1         4  
914 1 50       6 die "Usage: set_error_translator(\$subroutine_reference)\n"
915             if ref($translator_ref) ne 'CODE';
916              
917 1         3 my $old_translator_ref = $ERRORMSG_TRANSLATOR;
918 1         2 $ERRORMSG_TRANSLATOR = $translator_ref;
919              
920             return defined wantarray
921 1 50       10 ? Regexp::Grammars::TranslatorRestorer->new($old_translator_ref)
922             : ();
923             }
924              
925             # Dispatch to current translator for error messages...
926             sub _translate_errormsg {
927 53     53   2361 goto &{$ERRORMSG_TRANSLATOR};
  53         170  
928             }
929              
930             #=====[ SUPPORT FOR TRANSLATING GRAMMAR-ENHANCED REGEX TO NATIVE REGEX ]====
931              
932             # Store any specified grammars...
933             my %user_defined_grammar;
934              
935             my %REPETITION_DESCRIPTION_FOR = (
936             '+' => 'once or more',
937             '*' => 'any number of times',
938             '?' => 'if possible',
939             '+?' => 'as few times as possible',
940             '*?' => 'as few times as possible',
941             '??' => 'if necessary',
942             '++' => 'as many times as possible',
943             '*+' => 'as many times as possible',
944             '?+' => 'if possible',
945             );
946              
947             sub _translate_raw_regex {
948 1792     1792   4489 my ($regex, $debug_build, $debug_runtime) = @_;
949              
950 1792   66     6933 my $is_comment = substr($regex, 0, 1) eq q{#}
951             || substr($regex, 0, 3) eq q{(?#};
952 1792         3595 my $visible_regex = _squeeze_ws($regex);
953              
954             # Report how regex was interpreted, if requested to...
955 1792 0 33     4258 if ($debug_build && $visible_regex ne q{} && $visible_regex ne q{ }) {
      33        
956 0 0       0 _debug_notify( info =>
957             " |",
958             " |...Treating '$visible_regex' as:",
959             ($is_comment ? " | \\ a comment (which will be ignored)"
960             : " | \\ normal Perl regex syntax"
961             ),
962             );
963             }
964              
965 1792 100       3215 return q{} if $is_comment;
966              
967             # Generate run-time debugging code (if any)...
968 1790         3390 my ($debug_pre, $debug_post)
969             = _build_raw_debugging_statements($debug_runtime,$visible_regex, +1);
970              
971             # Replace negative lookahead with one that works under R::G...
972 1790         3529 $regex =~ s{\(\?!}{(?!(?!)|}gxms;
973             # ToDo: Also replace positive lookahead with one that works under R::G...
974             # This replacement should be of the form:
975             # $regex =~ s{\(\?!}{(?!(?!)|(?!(?!)|}gxms;
976             # but need to find a way to insert the extra ) at the other end
977              
978 1790 50 33     7751 return $debug_runtime && $regex eq '|' ? $regex . $debug_post
    50 33        
979             : $debug_runtime && $regex =~ /\S/ ? "(?#)(?:$debug_pre($regex)$debug_post(?#))"
980             : $regex;
981             }
982              
983             # Report and convert a debugging directive...
984             sub _translate_debug_directive {
985 0     0   0 my ($construct, $cmd, $debug_build) = @_;
986              
987             # Report how directive was interpreted, if requested to...
988 0 0       0 if ($debug_build) {
989 0         0 _debug_notify( info =>
990             " |",
991             " |...Treating $construct as:",
992             " | \\ Change run-time debugging mode to '$cmd'",
993             );
994             }
995              
996 0         0 return qq{(?{; local \$Regexp::Grammars::DEBUG = q{$cmd}; }) };
997             }
998              
999             # Report and convert a timeout directive...
1000             sub _translate_timeout_directive {
1001 0     0   0 my ($construct, $timeout, $debug_build) = @_;
1002              
1003             # Report how directive was interpreted, if requested to...
1004 0 0       0 if ($debug_build) {
1005 0 0       0 _debug_notify( info =>
    0          
1006             " |",
1007             " |...Treating $construct as:",
1008             ($timeout > 0
1009             ? " | \\ Cause the entire parse to fail after $timeout second" . ($timeout==1 ? q{} : q{s})
1010             : " | \\ Cause the entire parse to fail immediately"
1011             ),
1012             );
1013             }
1014              
1015 0 0       0 return $timeout > 0
1016             ? qq{(?{; local \$Regexp::Grammars::TIMEOUT = { duration => $timeout, limit => time() + $timeout }; }) }
1017             : qq{(*COMMIT)(*FAIL)};
1018             }
1019              
1020             # Report and convert a directive...
1021             sub _translate_require_directive {
1022 0     0   0 my ($construct, $condition, $debug_build) = @_;
1023              
1024 0         0 $condition = substr($condition, 3, -2);
1025              
1026             # Report how directive was interpreted, if requested to...
1027 0 0       0 if ($debug_build) {
1028 0         0 _debug_notify( info =>
1029             " |",
1030             " |...Treating $construct as:",
1031             " | \\ Require that {$condition} is true",
1032             );
1033             }
1034              
1035 0         0 my $quoted_condition = $condition;
1036 0         0 $quoted_condition =~ s{\$}{}xms;
1037              
1038 0         0 return qq{(?(?{;$condition})
1039             (?{;Regexp::Grammars::_debug_require(
1040             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 1)
1041             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})
1042             | (?{;Regexp::Grammars::_debug_require(
1043             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 0)
1044             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})(?!))
1045             };
1046             }
1047              
1048              
1049             # Report and convert a directive...
1050             sub _translate_minimize_directive {
1051 3     3   12 my ($construct, $debug_build) = @_;
1052              
1053             # Report how directive was interpreted, if requested to...
1054 3 50       9 if ($debug_build) {
1055 0         0 _debug_notify( info =>
1056             " |",
1057             " |...Treating $construct as:",
1058             " | \\ Minimize result value if possible",
1059             );
1060             }
1061              
1062 3         10 return q{(?{;
1063             if (1 == grep { $_ ne '!' && $_ ne '@' && $_ ne '~' } keys %MATCH) { # ...single alnum key
1064             local %Regexp::Grammars::matches = %MATCH;
1065             delete @Regexp::Grammars::matches{'!', '@', '~'};
1066             local ($Regexp::Grammars::only_key) = keys %Regexp::Grammars::matches;
1067             local $Regexp::Grammars::array_ref = $MATCH{$Regexp::Grammars::only_key};
1068             if (ref($Regexp::Grammars::array_ref) eq 'ARRAY' && 1 == @{$Regexp::Grammars::array_ref}) {
1069             $MATCH = $Regexp::Grammars::array_ref->[0];
1070             }
1071             }
1072             })};
1073             }
1074              
1075             # Report and convert a debugging directive...
1076             sub _translate_error_directive {
1077 19     19   88 my ($construct, $type, $msg, $debug_build, $subrule_name) = @_;
1078 19   50     50 $subrule_name //= 'undef';
1079              
1080             # Determine severity...
1081 19 100       46 my $severity = ($type eq 'error') ? 'fail' : 'non-fail';
1082              
1083             # Determine fatality (and build code to invoke it)...
1084 19 100       37 my $fatality = ($type eq 'fatal') ? '(*COMMIT)(*FAIL)' : q{};
1085              
1086             # Unpack message...
1087 19 100       44 if (substr($msg,0,3) eq '(?{') {
1088 4         13 $msg = 'do'. substr($msg,2,-1);
1089             }
1090             else {
1091 15         33 $msg = quotemeta $msg;
1092 15         32 $msg = qq{qq{$msg}};
1093             }
1094              
1095             # Report how directive was interpreted, if requested to...
1096 19 50       44 if ($debug_build) {
1097 0 0       0 _debug_notify( info => " |",
1098             " |...Treating $construct as:",
1099             ( $type eq 'log' ? " | \\ Log a message to the logfile"
1100             : " | \\ Append a $severity error message to \@!"
1101             ),
1102             );
1103             }
1104              
1105             # Generate the regex...
1106 19 100       135 return $type eq 'log'
    50          
1107             ? qq{(?{Regexp::Grammars::_debug_logmsg(scalar \@Regexp::Grammars::RESULT_STACK,$msg)
1108             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}
1109             })}
1110              
1111             : qq{(?:(?{;local \$Regexp::Grammar::_memopos=pos();})
1112             (?>\\s*+((?-s).{0,$MAX_CONTEXT_WIDTH}+))
1113             (?{; pos() = \$Regexp::Grammar::_memopos;
1114             @! = Regexp::Grammars::_uniq(
1115             @!,
1116             Regexp::Grammars::_translate_errormsg($msg,q{$subrule_name},\$CONTEXT)
1117             ) }) (?!)|}
1118             . ($severity eq 'fail' ? q{(?!)} : $fatality)
1119             . q{)}
1120             ;
1121             }
1122              
1123             sub _translate_subpattern {
1124 108     108   537 my ($construct, $alias, $subpattern, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout, $backref)
1125             = @_;
1126              
1127             # Determine save behaviour...
1128 108         263 my $is_noncapturing = $savemode eq 'noncapturing';
1129 108         182 my $is_listifying = $savemode eq 'list';
1130 108         210 my $is_codeblock = substr($subpattern,0,3) eq '(?{';
1131 108 100       286 my $value_saved = $is_codeblock ? '$^R' : '$^N';
1132 108 100       213 my $do_something_with = $is_codeblock ? 'execute the code block' : 'match the pattern';
1133 108 100       210 my $result = $is_codeblock ? 'result' : 'matched substring';
1134 108 100       294 my $description = $is_codeblock ? substr($subpattern,2,-1)
    100          
1135             : defined $backref ? $backref
1136             : $subpattern;
1137 108 100       299 my $debug_construct
1138             = $is_codeblock ? '<' . substr($alias,1,-1) . '= (?{;' . substr($subpattern,3,-2) . '})>'
1139             : $construct
1140             ;
1141              
1142             # Report how construct was interpreted, if requested to...
1143 108   50     428 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1144 108 50 66     645 my $results = $is_listifying && $postmodifier ? "each $result"
    50 33        
    50          
1145             : substr($postmodifier,0,1) eq '?' ? "any $result"
1146             : $postmodifier && !$is_noncapturing ? "only the final $result"
1147             : "the $result"
1148             ;
1149 108 50       266 if ($debug_build) {
1150 0 0       0 _debug_notify( info =>
    0          
1151             " |",
1152             " |...Treating $construct as:",
1153             " | | $do_something_with $description $repeatedly",
1154             ( $is_noncapturing ? " | \\ but don't save $results"
1155             : $is_listifying ? " | \\ appending $results to \@{\$MATCH{$alias}}"
1156             : " | \\ saving $results in \$MATCH{$alias}"
1157             )
1158             );
1159             }
1160              
1161             # Generate run-time debugging code (if any)...
1162 108         247 my ($debug_pre, $debug_post)
1163             = _build_debugging_statements($debug_runtime,$debug_construct, +1);
1164              
1165             # Generate post-match result-capturing code, if match captures...
1166 108 100       392 my $post_action = $is_noncapturing
1167             ? q{}
1168             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1169             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1170             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1171             \\\@Regexp::Grammars::RESULT_STACK, $alias, $value_saved
1172             ),
1173             );}
1174             ;
1175              
1176             # Generate timeout test...
1177 108 50       264 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1178              
1179             # Translate to standard regex code...
1180 108         540 return qq{$timeout_test(?{;local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;$debug_pre})(?:($subpattern)(?{;$post_action$debug_post}))$postmodifier};
1181             }
1182              
1183              
1184             sub _translate_hashmatch {
1185 14     14   103 my ($construct, $alias, $hashname, $keypat, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout)
1186             = @_;
1187              
1188             # Empty or missing keypattern defaults to <.hk>...
1189 14 100 66     76 if (!defined $keypat || $keypat !~ /\S/) {
1190 8         14 $keypat = '(?&hk__implicit__)'
1191             }
1192             else {
1193 6         15 $keypat = substr($keypat, 1, -1);
1194             }
1195              
1196             # Determine save behaviour...
1197 14         25 my $is_noncapturing = $savemode eq 'noncapturing';
1198 14         23 my $is_listifying = $savemode eq 'list';
1199              
1200             # Convert hash to hash lookup...
1201 14         32 my $hash_lookup = '$' . substr($hashname, 1). '{$^N}';
1202              
1203             # Report how construct was interpreted, if requested to...
1204 14   100     51 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1205 14 50 66     86 my $results = $is_listifying && $postmodifier ? 'each matched key'
    50 66        
    100          
1206             : substr($postmodifier,0,1) eq '?' ? 'any matched key'
1207             : $postmodifier && !$is_noncapturing ? 'only the final matched key'
1208             : 'the matched key'
1209             ;
1210 14 50       31 if ($debug_build) {
1211 0 0       0 _debug_notify( info =>
    0          
1212             " |",
1213             " |...Treating $construct as:",
1214             " | | match a key from the hash $hashname $repeatedly",
1215             ( $is_noncapturing ? " | \\ but don't save $results"
1216             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1217             : " | \\ saving $results in \$MATCH{$alias}"
1218             )
1219             );
1220             }
1221              
1222             # Generate run-time debugging code (if any)...
1223 14         32 my ($debug_pre, $debug_post)
1224             = _build_debugging_statements($debug_runtime,$construct, +1);
1225              
1226             # Generate post-match result-capturing code, if match captures...
1227 14 100       49 my $post_action = $is_noncapturing
1228             ? q{}
1229             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1230             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1231             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1232             \\\@Regexp::Grammars::RESULT_STACK, $alias, \$^N
1233             ),
1234             );}
1235             ;
1236              
1237             # Generate timeout test...
1238 14 50       30 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1239              
1240             # Translate to standard regex code...
1241 14         73 return qq{$timeout_test(?:(?{;local \@Regexp::Grammars::RESULT_STACK
1242             = \@Regexp::Grammars::RESULT_STACK;$debug_pre})(?:($keypat)(??{exists $hash_lookup ? q{} : q{(?!)}})(?{;$post_action$debug_post})))$postmodifier};
1243             }
1244              
1245              
1246             # Convert a " % " construct to pure Perl 5.10...
1247             sub _translate_separated_list {
1248 73     73   267 my ($term, $op, $separator, $term_trans, $sep_trans,
1249             $ws, $debug_build, $debug_runtime, $timeout) = @_;
1250              
1251             # This insertion ensures backtracking upwinds the stack correctly...
1252 73         144 state $CHECKPOINT = q{(?{;@Regexp::Grammars::RESULT_STACK = @Regexp::Grammars::RESULT_STACK;})};
1253              
1254             # Translate meaningful whitespace...
1255 73 100       219 $ws = length($ws) ? q{(?&ws__implicit__)} : q{};
1256              
1257             # Generate support for optional trailing separator...
1258 73 100       292 my $opt_trailing = substr($op,-2) eq '%%' ? qq{$ws$sep_trans?}
1259             : q{};
1260              
1261             # Generate timeout test...
1262 73 50       191 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1263              
1264             # Report how construct was interpreted, if requested to...
1265 73 50       199 if ($debug_build) {
1266 0 0       0 _debug_notify( info =>
1267             " |",
1268             " |...Treating $term $op $separator as:",
1269             " | | repeatedly match the subrule $term",
1270             " | \\ as long as the matches are separated by matches of $separator",
1271             (substr($op,-2) eq '%%' ?
1272             " | \\ and allowing an optional trailing $separator"
1273             : q{}
1274             )
1275             );
1276             }
1277              
1278             # One-or-more...
1279 73 100       717 return qq{$timeout_test(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+$opt_trailing}
1280             if $op =~ m{ [*][*]() | [+]([+?]?) \s* %%?+ | \{ 1, \}([+?]?) \s* %%?+ }xms;
1281              
1282             # Zero-or-more...
1283             return
1284 22 100       136 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+)?$+$opt_trailing}
1285             if $op =~ m{ [*]([+?]?) \s* %%? | \{ 0, \}([+?]?) \s* %%? }xms;
1286              
1287             # One-or-zero...
1288 18 100       94 return qq{?$+$opt_trailing}
1289             if $op =~ m{ [?]([+?]?) \s* %%? | \{ 0,1 \}([+?]?) \s* %%? }xms;
1290              
1291             # Zero exactly...
1292 14 100       58 return qq{{0}$ws$opt_trailing}
1293             if $op =~ m{ \{ 0 \}[+?]? \s* %%? }xms;
1294              
1295             # N exactly...
1296 12 100       70 if ($op =~ m{ \{ (\d+) \}([+?]?) \s* %%? }xms ) {
1297 2         9 my $min = $1-1;
1298             return
1299 2         25 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min}$+$opt_trailing)}
1300             }
1301              
1302             # Zero-to-N...
1303 10 100       41 if ($op =~ m{ \{ 0,(\d+) \}([+?]?) \s* %%? }xms ) {
1304 2         11 my $max = $1-1;
1305             return
1306 2         19 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){0,$max}$+)?$+$opt_trailing}
1307             }
1308              
1309             # M-to-N and M-to-whatever...
1310 8 50       45 if ($op =~ m{ \{ (\d+),(\d*) \} ([+?]?) \s* %%? }xms ) {
1311 8         31 my $min = $1-1;
1312 8 100       37 my $max = $2 ? $2-1 : q{};
1313             return
1314 8         71 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min,$max}$+$opt_trailing)}
1315             }
1316              
1317             # Somehow we missed a case (this should never happen)...
1318 0         0 die "Internal error: missing case in separated list handler";
1319             }
1320              
1321             sub _translate_subrule_call {
1322 292     292   2229 my ($source_line, $source_file, $rulename, $grammar_name, $construct, $alias,
1323             $subrule, $args, $savemode, $postmodifier,
1324             $debug_build, $debug_runtime, $timeout, $valid_subrule_names_ref) = @_;
1325              
1326             # Translate arg list, if provided...
1327 292         623 my $arg_desc;
1328 292 100       806 if ($args eq q{}) {
    100          
1329 282         530 $args = q{()};
1330             }
1331             elsif (substr($args,0,3) eq '(?{') {
1332             # Turn parencode into do block...
1333 1         3 $arg_desc = substr($args,3,-2);
1334 1         2 substr($args,1,1) = 'do';
1335             }
1336             else {
1337             # Turn abbreviated format into a key=>value list...
1338 9         48 $args =~ s{ [(,] \s* \K : (\w+) (?= \s* [,)] ) }{$1 => \$MATCH{'$1'}}gxms;
1339 9         22 $arg_desc = substr($args,1,-1);
1340             }
1341              
1342             # Transform qualified subrule names...
1343 292         507 my $simple_subrule = $subrule;
1344 292 100       917 my $start_grammar = (($simple_subrule =~ s{(.*)::}{}xms) ? $1 : "");
1345 292 100       919 if ($start_grammar !~ /^NEXT$|::/) {
1346 290         1066 $start_grammar = caller(3).'::'.$start_grammar;
1347             }
1348              
1349 292 100       2676 my @candidates = $start_grammar eq 'NEXT' ? _ancestry_of($grammar_name)
1350             : _ancestry_of($start_grammar);
1351              
1352             # Rename fully-qualified rule call, if to ancestor grammar...
1353             RESOLVING:
1354 292         732 for my $parent_class (@candidates) {
1355 295         663 my $inherited_subrule = $parent_class.'::'.$simple_subrule;
1356 295 100       959 if ($CACHE{$inherited_subrule}) {
1357 3         6 $subrule = $inherited_subrule;
1358 3         9 last RESOLVING;
1359             }
1360             }
1361              
1362             # Replace package separators, which regex engine can't handle...
1363 292         483 my $internal_subrule = $subrule;
1364 292         586 $internal_subrule =~ s{::}{_88_}gxms;
1365              
1366             # Shortcircuit if unknown subrule invoked...
1367 292 50       834 if (!$valid_subrule_names_ref->{$subrule}) {
1368 0         0 _debug_notify( error =>
1369             qq{Found call to $construct inside definition of $rulename},
1370             qq{near $source_file line $source_line.},
1371             qq{But no or was defined in the grammar},
1372             qq{(Did you misspell $construct? Or forget to define the rule?)},
1373             q{},
1374             );
1375 0         0 return "(?{Regexp::Grammars::_debug_fatal('$construct')})(*COMMIT)(*FAIL)";
1376             }
1377              
1378             # Determine save behaviour...
1379 292         565 my $is_noncapturing = $savemode =~ /noncapturing|lookahead/;
1380 292         499 my $is_listifying = $savemode eq 'list';
1381              
1382 292 100       1118 my $save_code =
    100          
1383             $is_noncapturing?
1384             q{ @Regexp::Grammars::RESULT_STACK[0..@Regexp::Grammars::RESULT_STACK-2] }
1385             : $is_listifying?
1386             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1387             Regexp::Grammars::_pop_current_result_frame_with_list(
1388             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1389             ),
1390             }
1391             :
1392             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1393             Regexp::Grammars::_pop_current_result_frame(
1394             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1395             ),
1396             }
1397             ;
1398              
1399             # Report how construct was interpreted, if requested to...
1400 292   100     1109 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1401 292 100 100     1252 my $results = $is_listifying && $postmodifier ? 'each match'
    100          
1402             : substr($postmodifier,0,1) eq '?' ? 'any match'
1403             : 'the match'
1404             ;
1405 292 100       829 my $do_something_with = $savemode eq 'neglookahead' ? 'lookahead for anything except'
    100          
1406             : $savemode eq 'poslookahead' ? 'lookahead for'
1407             : 'match'
1408             ;
1409 292 50       599 if ($debug_build) {
1410 0 0       0 _debug_notify( info =>
    0          
    0          
1411             " |",
1412             " |...Treating $construct as:",
1413             " | | $do_something_with the subrule <$subrule> $repeatedly",
1414             (defined $arg_desc ? " | | passing the args: ($arg_desc)"
1415             : ()
1416             ),
1417             ( $is_noncapturing ? " | \\ but don't save anything"
1418             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1419             : " | \\ saving $results in \$MATCH{$alias}"
1420             ),
1421             );
1422             }
1423              
1424             # Generate post-match result-capturing code, if match captures...
1425 292         705 my ($debug_pre, $debug_post)
1426             = _build_debugging_statements($debug_runtime, $construct);
1427              
1428             # Generate timeout test...
1429 292 50       740 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1430              
1431             # Translate to standard regex code...
1432 292         2152 return qq{(?:$timeout_test(?{;
1433             local \@Regexp::Grammars::RESULT_STACK = (\@Regexp::Grammars::RESULT_STACK, {'\@'=>{$args}});
1434             $debug_pre})((?&$internal_subrule))(?{;
1435             local \@Regexp::Grammars::RESULT_STACK = (
1436             $save_code
1437             );$debug_post
1438             }))$postmodifier};
1439             }
1440              
1441             sub _translate_rule_def {
1442 256     256   1004 my ($type, $qualifier, $name, $callname, $qualname, $body, $objectify, $local_ws, $nocontext)
1443             = @_;
1444 256         1011 $qualname =~ s{::}{_88_}gxms;
1445              
1446             # Return object if requested...
1447 256 100       847 my $objectification =
1448             $objectify ? qq{(??{; local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
1449             \$Regexp::Grammars::RESULT_STACK[-1] = '$qualifier$name'->can('new')
1450             ? '$qualifier$name'->new(\$Regexp::Grammars::RESULT_STACK[-1])
1451             : bless \$Regexp::Grammars::RESULT_STACK[-1], '$qualifier$name';
1452             Regexp::Grammars::_debug_non_hash(\$Regexp::Grammars::RESULT_STACK[-1],'$name');
1453             })}
1454             : q{};
1455              
1456             # Each rule or token becomes a DEFINE'd Perl 5.10 named capture...
1457 256 100 100     1115 my $implicit_version
1458             = ($callname eq 'ws' || $callname eq 'hk')
1459             ? qq{(?<${callname}__implicit__> $body) }
1460             : qq{};
1461 256         1836 return qq{
1462             (?(DEFINE) $local_ws
1463             (?<$qualname>
1464             (?<$callname>
1465             (?{\@{\$Regexp::Grammars::RESULT_STACK[-1]}{'!','~'}=(\$#{!}, $nocontext);})
1466             (?:$body) $objectification
1467             (?{;\$#{!}=delete(\$Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
1468             delete(\$Regexp::Grammars::RESULT_STACK[-1]{'\@'});
1469             })
1470             ))
1471             $implicit_version
1472             )
1473             };
1474             }
1475              
1476              
1477             # Locate any valid <...> sequences and replace with native regex code...
1478             sub _translate_subrule_calls {
1479 384     384   1368 my ($source_file, $source_line,
1480             $grammar_name,
1481             $grammar_spec,
1482             $compiletime_debugging_requested,
1483             $runtime_debugging_requested,
1484             $timeout_requested,
1485             $pre_match_debug,
1486             $post_match_debug,
1487             $rule_name,
1488             $subrule_names_ref,
1489             $magic_ws,
1490             ) = @_;
1491              
1492 384 100       1237 my $pretty_rule_name = $rule_name ? ($magic_ws ? '"
    100          
1493             : 'main regex (before first rule)';
1494              
1495             # Remember the preceding construct, so as to implement the +% etc. operators...
1496 384         609 my $prev_construct = q{};
1497 384         568 my $prev_translation = q{};
1498 384         540 my $curr_line_num = 1;
1499              
1500             # Translate all other calls (MAIN GRAMMAR FOR MODULE)...
1501 384         87359 $grammar_spec =~ s{
1502 2312         59837 (?{ $curr_line_num = substr($_, 0, pos) =~ tr/\n//; })
1503             (? (? \s*+) (? (?&SEPLIST_OP) ) (? \s*+) )?
1504             (?
1505             (?
1506             <
1507             (?:
1508             (?
1509             \. \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1510             )
1511             | (?
1512             (? \? | \! ) \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1513             )
1514             | (?
1515             \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1516              
1517             )
1518             | (?
1519             \[ \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1520             )
1521             | (?
1522             (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1523              
1524             )
1525             | (?
1526             \[ (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1527             )
1528              
1529             | (?
1530             \s* : (?(?&QUALIDENT)) \s*
1531             )
1532             | (?
1533             (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s*
1534             )
1535             | (?
1536             \[ (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s* \]
1537             )
1538              
1539             | (?
1540             \. (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1541             )
1542             | (?
1543             (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1544             )
1545             | (?
1546             \[ (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s* \]
1547             )
1548             | (?
1549             (?(?&HASH)) \s* (?(?&BRACES))? \s*
1550             )
1551             | (?
1552             (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s*
1553             )
1554             | (?
1555             \[ (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s* \]
1556             )
1557             | (?
1558             \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1559             | \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1560             )
1561             | (?
1562             (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1563             | (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1564             )
1565             | (?
1566             \[ (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s* \]
1567             | \[ (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s* \]
1568             )
1569             |
1570             (?
1571             minimize \s* : \s*
1572             )
1573             |
1574             (?
1575             require \s* : \s* (? (?&PARENCODE) ) \s*
1576             )
1577             |
1578             (?
1579             debug \s* : \s* (? run | match | step | try | off | on) \s*
1580             )
1581             |
1582             (?
1583             timeout \s* : \s* (? \d+) \s*
1584             )
1585             |
1586             (?
1587             context \s* : \s*
1588             )
1589             |
1590             (?
1591             nocontext \s* : \s*
1592             )
1593             |
1594             (?
1595             [.][.][.]
1596             | [!][!][!]
1597             | [?][?][?]
1598             )
1599             |
1600             (?
1601             (? error | fatal ) \s*+ : \s*+
1602             )
1603             |
1604             (?
1605             (? log | error | warning | fatal )
1606             \s*+ : \s*+
1607             (? (?&PARENCODE) | .+? )
1608             \s*+
1609             )
1610             )
1611             > (? \s* (?! (?&SEPLIST_OP) ) [?+*][?+]? | )
1612             |
1613             (?
1614             $WS_PATTERN
1615             )
1616             |
1617             (?
1618             (?&SEPLIST_OP) \s* (? \S* )
1619             )
1620             |
1621             (?
1622             \(\?\<\w+\>
1623             )
1624             |
1625             (?
1626             < [^>\n]* [>\n]
1627             )
1628             |
1629             (?
1630             (?
1631             | (?
1632             )
1633             |
1634             (?
1635             (?: \\[^shv]
1636             | (?! (?&PARENCODE) ) (?&PARENS)
1637             | (?&CHARSET)
1638             | \w++
1639             | \|
1640             )
1641             (?&QUANTIFIER)?
1642             )
1643             |
1644             (?
1645             \s++
1646             | \\. (?&QUANTIFIER)?
1647             | \(\?!
1648             | \(\?\# [^)]* \) # (?# -> old style inline comment)
1649             | (?&PARENCODE)
1650             | \# [^\n]*+
1651             | [^][\s()<>#\\]++
1652             )
1653             )
1654              
1655             (?(DEFINE)
1656             (? \*\* | [*+?] [+?]?+ \s* %%?+ | \{ \d+(,\d*)? \} [+?]?+ \s* %%?+ )
1657             (? \( (?:[?] (?: <[=!] | [:>] ))?
1658             (?: \\. | (?&PARENCODE) | (?&PARENS) | (?&CHARSET) | [^][()\\<>]++ )*+
1659             \)
1660             )
1661             (? \{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \} )
1662             (? \(\?\??\{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \}\) )
1663             (? \% (?&IDENT) (?: :: (?&IDENT) )* )
1664             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]] )*+ \] )
1665             (? [^\W\d]\w*+ )
1666             (? (?: [^\W\d]\w*+ :: )* [^\W\d]\w*+ )
1667             (? (?&NUMBER) | (?&STRING) | (?&VAR) )
1668             (? [+-]? \d++ (?:\. \d++)? (?:[eE] [+-]? \d++)? )
1669             (? ' [^\\']++ (?: \\. [^\\']++ )* ' )
1670             (? (?&PARENCODE) | \( \s* (?&ARGS)? \s* \) | (?# NOTHING ) )
1671             (? (?&ARG) \s* (?: , \s* (?&ARG) \s* )* ,? )
1672             (? (?&VAR) | (?&KEY) \s* => \s* (?&LITERAL) )
1673             (? : (?&IDENT) )
1674             (? (?&IDENT) | (?&LITERAL) )
1675             (? [*+?][+?]? | \{ \d+,?\d* \} [+?]? )
1676             )
1677             }{
1678 81     81   716184 my $curr_construct = $+{construct};
  81         33852  
  81         220585  
  2232         12380  
1679 2232   100     12101 my $list_marker = $+{list_marker} // q{};
1680 2232 100 100     12373 my $alias = ($+{alias}//'MATCH') eq 'MATCH' ? q{'='} : qq{'$+{alias}'};
1681              
1682             # Determine and remember the necessary translation...
1683 2232         4184 my $curr_translation = do{
1684              
1685             # Translate subrule calls of the form: ...
1686 2232 100 100     63127 if (defined $+{alias_parens_scalar}) {
    50 66        
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    0          
    0          
1687 82 100       582 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1688             _translate_subpattern(
1689             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1690 82         380 $compiletime_debugging_requested,
1691             $runtime_debugging_requested, $timeout_requested,
1692             );
1693             }
1694             elsif (defined $+{alias_parens_scalar_nocap}) {
1695 0 0       0 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1696             _translate_subpattern(
1697             $curr_construct, $alias, $pattern, 'noncapturing', $+{modifier},
1698 0         0 $compiletime_debugging_requested,
1699             $runtime_debugging_requested, $timeout_requested,
1700             );
1701             }
1702             elsif (defined $+{alias_parens_list}) {
1703 16 50       112 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1704             _translate_subpattern(
1705             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1706 16         76 $compiletime_debugging_requested,
1707             $runtime_debugging_requested, $timeout_requested,
1708             );
1709             }
1710              
1711             # Translate subrule calls of the form: ...
1712             elsif (defined $+{alias_hash_scalar}) {
1713             _translate_hashmatch(
1714             $curr_construct, $alias, $+{varname}, $+{keypat}, 'scalar', $+{modifier},
1715 7         34 $compiletime_debugging_requested,
1716             $runtime_debugging_requested,
1717             $timeout_requested,
1718             );
1719             }
1720             elsif (defined $+{alias_hash_scalar_nocap}) {
1721             _translate_hashmatch(
1722             $curr_construct, $alias, $+{varname}, $+{keypat}, 'noncapturing', $+{modifier},
1723 4         27 $compiletime_debugging_requested,
1724             $runtime_debugging_requested,
1725             $timeout_requested,
1726             );
1727             }
1728             elsif (defined $+{alias_hash_list}) {
1729             _translate_hashmatch(
1730             $curr_construct, $alias, $+{varname}, $+{keypat}, 'list', $+{modifier},
1731 3         26 $compiletime_debugging_requested,
1732             $runtime_debugging_requested,
1733             $timeout_requested,
1734             );
1735             }
1736              
1737             # Translate subrule calls of the form: ...
1738             elsif (defined $+{alias_subrule_scalar}) {
1739             _translate_subrule_call(
1740             $source_line, $source_file,
1741             $pretty_rule_name,
1742             $grammar_name,
1743             $curr_construct, $alias, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1744 22         111 $compiletime_debugging_requested,
1745             $runtime_debugging_requested,
1746             $timeout_requested,
1747             $subrule_names_ref,
1748             );
1749             }
1750             elsif (defined $+{alias_subrule_list}) {
1751             _translate_subrule_call(
1752             $source_line, $source_file,
1753             $pretty_rule_name,
1754             $grammar_name,
1755             $curr_construct, $alias, $+{subrule}, $+{args}, 'list', $+{modifier},
1756 26         133 $compiletime_debugging_requested,
1757             $runtime_debugging_requested,
1758             $timeout_requested,
1759             $subrule_names_ref,
1760             );
1761             }
1762              
1763             # Translate subrule calls of the form: and ...
1764             elsif (defined $+{self_subrule_lookahead}) {
1765              
1766             # Determine type of lookahead, and work around capture problem...
1767 2         6 my ($type, $pre, $post) = ( 'neglookahead', '(?!(?!)|', ')' );
1768 2 100       9 if ($+{sign} eq '?') {
1769 1         3 $type = 'poslookahead';
1770 1         2 $pre x= 2;
1771 1         2 $post x= 2;
1772             }
1773              
1774             $pre . _translate_subrule_call(
1775             $source_line, $source_file,
1776             $pretty_rule_name,
1777             $grammar_name,
1778 2         13 $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, $type, q{},
1779             $compiletime_debugging_requested,
1780             $runtime_debugging_requested,
1781             $timeout_requested,
1782             $subrule_names_ref,
1783             )
1784             . $post;
1785             }
1786             elsif (defined $+{self_subrule_scalar_nocap}) {
1787             _translate_subrule_call(
1788             $source_line, $source_file,
1789             $pretty_rule_name,
1790             $grammar_name,
1791             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'noncapturing', $+{modifier},
1792 6         51 $compiletime_debugging_requested,
1793             $runtime_debugging_requested,
1794             $timeout_requested,
1795             $subrule_names_ref,
1796             );
1797             }
1798             elsif (defined $+{self_subrule_scalar}) {
1799             _translate_subrule_call(
1800             $source_line, $source_file,
1801             $pretty_rule_name,
1802             $grammar_name,
1803             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1804 174         1355 $compiletime_debugging_requested,
1805             $runtime_debugging_requested,
1806             $timeout_requested,
1807             $subrule_names_ref,
1808             );
1809             }
1810             elsif (defined $+{self_subrule_list}) {
1811             _translate_subrule_call(
1812             $source_line, $source_file,
1813             $pretty_rule_name,
1814             $grammar_name,
1815             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'list', $+{modifier},
1816 62         456 $compiletime_debugging_requested,
1817             $runtime_debugging_requested,
1818             $timeout_requested,
1819             $subrule_names_ref,
1820             );
1821             }
1822              
1823             # Translate subrule calls of the form: ...
1824             elsif (defined $+{alias_argrule_scalar}) {
1825 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1826             _translate_subpattern(
1827             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1828 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1829             "in \$ARG{'$+{subrule}'}"
1830             );
1831             }
1832             elsif (defined $+{alias_argrule_list}) {
1833 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1834             _translate_subpattern(
1835             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1836 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1837             "in \$ARG{'$+{subrule}'}"
1838             );
1839             }
1840              
1841             # Translate subrule calls of the form: <:ARGNAME>...
1842             elsif (defined $+{self_argrule_scalar}) {
1843 1         5 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1844             _translate_subpattern(
1845             $curr_construct, qq{'$+{subrule}'}, $pattern, 'noncapturing', $+{modifier},
1846 1         9 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1847             "in \$ARG{'$+{subrule}'}"
1848             );
1849             }
1850              
1851             # Translate subrule calls of the form: <\IDENT> or ...
1852             elsif (defined $+{backref} || $+{alias_backref} || $+{alias_backref_list}) {
1853             # Use "%ARGS" if subrule names starts with a colon...
1854 9         34 my $subrule = $+{subrule};
1855 9 100       33 if (substr($subrule,0,1) eq ':') {
1856 3         8 substr($subrule,0,1,"\@'}{'");
1857             }
1858              
1859 9         22 my $backref = qq{\$Regexp::Grammars::RESULT_STACK[-1]{'$subrule'}};
1860 9 100 100     71 my $quoter = $+{slash} eq '\\' || $+{slash} eq '\\_'
1861             ? "quotemeta($backref)"
1862             : "Regexp::Grammars::_invert_delim($backref)"
1863             ;
1864 9         32 my $pattern = qq{ (??{ defined $backref ? $quoter : q{(?!)}})};
1865             my $type = $+{backref} ? 'noncapturing'
1866 9 100       55 : $+{alias_backref} ? 'scalar'
    100          
1867             : 'list'
1868             ;
1869             _translate_subpattern(
1870             $curr_construct, $alias, $pattern, $type, $+{modifier},
1871 9         45 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1872             "in \$MATCH{'$subrule'}"
1873             );
1874             }
1875              
1876             # Translate reportable raw regexes (add debugging support)...
1877             elsif (defined $+{reportable_raw_regex}) {
1878             _translate_raw_regex(
1879 474         1474 $+{reportable_raw_regex}, $compiletime_debugging_requested, $runtime_debugging_requested
1880             );
1881             }
1882              
1883             # Translate too-complex repetition specifications...
1884             elsif (defined $+{complex_repetition}) {
1885 0         0 my ($repetition, $separator) = @+{'complex_repetition', 'complex_separator'};
1886 0         0 my ($metaop) = $repetition =~ m{(%%?)};
1887 0         0 my $quotedop = quotemeta($metaop);
1888 0         0 $separator =~ s/\s+/ /g;
1889 0 0       0 my $problem = $separator =~ /\S/
1890             ? ["The $separator... separator you specified after the $metaop is too complex",
1891             "(Try refactoring it into a single subrule call)",
1892             ]
1893             : ["No separator was specified after the $metaop",
1894             "(Or did you need a $quotedop instead, to match a literal '$metaop'?)",
1895             ];
1896             _debug_notify( fatal =>
1897             "Invalid separation specifier: $metaop",
1898             "at line $curr_line_num of $pretty_rule_name",
1899 0         0 @{$problem},
  0         0  
1900             );
1901 0         0 exit(1);
1902             }
1903              
1904             # Translate non-reportable raw regexes (leave as is)...
1905             elsif (defined $+{raw_regex}) {
1906             # Handle raw % and %%
1907 1318         4684 my $raw_regex = $+{raw_regex};
1908 1318 50       4167 if ($raw_regex =~ / \A %%?+ /x) {
1909 0         0 _debug_notify( fatal =>
1910             "Invalid separation specifier: $&",
1911             "at line $curr_line_num of $pretty_rule_name",
1912             "(Did you forget to put a repetition quantifier before the $&",
1913             " or did you need a " . quotemeta($&) . " instead, to match a literal '$&'?)",
1914             );
1915 0         0 exit(1);
1916             }
1917              
1918             # Handle any other raw regex...
1919             _translate_raw_regex(
1920 1318         2846 $raw_regex, $compiletime_debugging_requested
1921             );
1922             }
1923              
1924             # Translate directives...
1925             elsif (defined $+{require_directive}) {
1926             _translate_require_directive(
1927 0         0 $curr_construct, $+{condition}, $compiletime_debugging_requested
1928             );
1929             }
1930             elsif (defined $+{minimize_directive}) {
1931             _translate_minimize_directive(
1932 3         16 $curr_construct, $+{condition}, $compiletime_debugging_requested
1933             );
1934             }
1935             elsif (defined $+{debug_directive}) {
1936             _translate_debug_directive(
1937 0         0 $curr_construct, $+{cmd}, $compiletime_debugging_requested
1938             );
1939             }
1940             elsif (defined $+{timeout_directive}) {
1941             _translate_timeout_directive(
1942 0         0 $curr_construct, $+{timeout}, $compiletime_debugging_requested
1943             );
1944             }
1945             elsif (defined $+{error_directive}) {
1946             _translate_error_directive(
1947             $curr_construct, $+{error_type}, $+{msg},
1948 8         40 $compiletime_debugging_requested, $rule_name
1949             );
1950             }
1951             elsif (defined $+{autoerror_directive}) {
1952             _translate_error_directive(
1953 7         29 $curr_construct, $+{error_type}, q{},
1954             $compiletime_debugging_requested, $rule_name
1955             );
1956             }
1957             elsif (defined $+{yadaerror_directive}) {
1958             _translate_error_directive(
1959             $curr_construct,
1960 4 50       33 ($+{yadaerror_directive} eq '???' ? 'warning' : 'error'),
1961             q{},
1962             $compiletime_debugging_requested, -$rule_name
1963             );
1964             }
1965             elsif (defined $+{context_directive}) {
1966 0 0       0 if ($compiletime_debugging_requested) {
1967 0         0 _debug_notify( info => " |",
1968             " |...Treating $curr_construct as:",
1969             " | \\ Turn on context-saving for the current rule"
1970             );
1971             }
1972 0         0 q{}; # Remove the directive
1973             }
1974             elsif (defined $+{nocontext_directive}) {
1975 0 0       0 if ($compiletime_debugging_requested) {
1976 0         0 _debug_notify( info => " |",
1977             " |...Treating $curr_construct as:",
1978             " | \\ Turn off context-saving for the current rule"
1979             );
1980             }
1981 0         0 q{}; # Remove the directive
1982             }
1983             elsif (defined $+{ws_directive}) {
1984 4 50       12 if ($compiletime_debugging_requested) {
1985 0         0 _debug_notify( info => " |",
1986             " |...Treating $curr_construct as:",
1987             " | \\ Change whitespace matching for the current rule"
1988             );
1989             }
1990 4         9 $curr_construct;
1991             }
1992              
1993             # Something that looks like a rule call or directive, but isn't...
1994             elsif (defined $+{incomplete_request}) {
1995 0         0 my $request = $+{incomplete_request};
1996 0         0 $request =~ s/\n//g;
1997 0 0       0 if ($request =~ /\A\s*<\s*\Z/) {
1998 0         0 _debug_notify( fatal =>
1999             qq{Invalid < metacharacter near line $curr_line_num of $pretty_rule_name},
2000             qq{If you meant to match a literal '<', use: \\<},
2001             );
2002             }
2003             else {
2004 0         0 _debug_notify( fatal =>
2005             qq{Possible failed attempt to specify},
2006             qq{a subrule call or directive: $request},
2007             qq{near line $curr_line_num of $pretty_rule_name},
2008             qq{If you meant to match literally, use: \\$request},
2009             );
2010             }
2011 0         0 exit(1);
2012             }
2013              
2014             # A quantifier that isn't quantifying anything...
2015             elsif (defined $+{loose_quantifier}) {
2016 0         0 my $quant = $+{loose_quantifier};
2017 0         0 $quant =~ s{^\s+}{};
2018 0         0 my $literal = quotemeta($quant);
2019 0         0 _debug_notify( fatal =>
2020             qq{Quantifier that doesn't quantify anything: $quant},
2021             qq{at line $curr_line_num in declaration of $pretty_rule_name},
2022             qq{(Did you mean to match literally? If so, try: $literal)},
2023             q{},
2024             );
2025 0         0 exit(1);
2026             }
2027              
2028             # There shouldn't be any other possibility...
2029             else {
2030 0         0 die qq{Internal error: this shouldn't happen!\n},
2031             qq{Near '$curr_construct' at $curr_line_num of $pretty_rule_name\n};
2032             }
2033             };
2034              
2035             # Handle the **/*%/*%%/+%/{n,m}%/etc operators...
2036 2232 100       10387 if ($list_marker) {
2037 73 100       527 my $ws = $magic_ws ? $+{ws1} . $+{ws2} : q{};
2038 73         317 my $op = $+{op};
2039              
2040 73         248 $curr_translation = _translate_separated_list(
2041             $prev_construct, $op, $curr_construct,
2042             $prev_translation, $curr_translation, $ws,
2043             $compiletime_debugging_requested,
2044             $runtime_debugging_requested, $timeout_requested,
2045             );
2046 73         269 $curr_construct = qq{$prev_construct $op $curr_construct};
2047             }
2048              
2049             # Finally, remember this latest translation, and return it...
2050 2232         3742 $prev_construct = $curr_construct;
2051 2232         13752 $prev_translation = $curr_translation;;
2052             }exmsg;
2053              
2054             # Translate magic hash accesses...
2055 384         2606 $grammar_spec =~ s{\$(?:\:\:)?MATCH (?= \s*\{) }
2056 384         1284 {\$Regexp::Grammars::RESULT_STACK[-1]}xmsg;
2057             $grammar_spec =~ s{\$(?:\:\:)?ARG (?= \s*\{) }
2058             {\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}}xmsg;
2059 384         1593  
2060             # Translate magic scalars and hashes...
2061             state $translate_scalar = {
2062             q{%$MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2063             q{@$MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2064             q{$MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2065             q{%MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2066             q{$CAPTURE} => q{$^N},
2067             q{$CONTEXT} => q{$^N},
2068             q{$DEBUG} => q{$Regexp::Grammars::DEBUG},
2069             q{$INDEX} => q{${\\pos()}},
2070             q{%ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2071              
2072             q{%$::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2073             q{@$::MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2074             q{$::MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2075             q{%::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2076             q{$::CAPTURE} => q{$^N},
2077             q{$::CONTEXT} => q{$^N},
2078             q{$::DEBUG} => q{$Regexp::Grammars::DEBUG},
2079             q{$::INDEX} => q{${\\pos()}},
2080             q{%::ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2081              
2082 1422         2568 };
2083 4325         5906 state $translatable_scalar
2084 384         578 = join '|', map {quotemeta $_}
  79         749  
2085             sort {length $b <=> length $a}
2086 384         12336 keys %{$translate_scalar};
2087              
2088 384         2048 $grammar_spec =~ s{ ($translatable_scalar) (?! \s* (?: \[ | \{) ) }
2089             {$translate_scalar->{$1}}oxmsg;
2090              
2091             return $grammar_spec;
2092             }
2093 0     0   0  
2094             # Generate a "decimal timestamp" and insert in a template...
2095             sub _timestamp {
2096 0 0       0 my ($template) = @_;
2097 0         0  
2098 0         0 # Generate and insert any timestamp...
  0         0  
2099 0         0 if ($template =~ /%t/) {
2100             my ($sec, $min, $hour, $day, $mon, $year) = localtime;
2101 0         0 $mon++; $year+=1900;
2102             my $timestamp = sprintf("%04d%02d%02d.%02d%02d%02d",
2103             $year, $mon, $day, $hour, $min, $sec);
2104 0         0 $template =~ s{%t}{$timestamp}xms;;
2105             }
2106              
2107             return $template;
2108             }
2109 0     0   0  
2110 0         0 # Open (or re-open) the requested log file...
2111 0         0 sub _autoflush {
2112 0         0 my ($fh) = @_;
2113             my $originally_selected = select $fh;
2114             $|=1;
2115             select $originally_selected;
2116 3777     3777   662719 }
2117 3777   50     8076  
2118             sub _open_log {
2119             my ($mode, $filename, $from_where) = @_;
2120 3777 50       7649 $from_where //= q{};
    0          
2121 3777         56822  
2122             # Special case: '-' --> STDERR
2123             if ($filename eq q{-}) {
2124             return *STDERR{IO};
2125 0         0 }
2126 0         0 # Otherwise, just open the named file...
2127             elsif (open my $fh, $mode, $filename) {
2128             _autoflush($fh);
2129             return $fh;
2130 0         0 }
2131 0 0       0 # Otherwise, generate a warning and default to STDERR...
2132             else {
2133             local *Regexp::Grammars::LOGFILE = *STDERR{IO};
2134             _debug_notify( warn =>
2135             qq{Unable to open log file '$filename'},
2136             ($from_where ? $from_where : ()),
2137             qq{($!)},
2138 0         0 qq{Defaulting to STDERR instead.},
2139             q{},
2140             );
2141             return *STDERR{IO};
2142             }
2143 1087     1087   8188 }
2144 1087         2116  
2145 1087         1631 sub _invert_delim {
2146 1087         19500 my ($delim) = @_;
2147             $delim = reverse $delim;
2148             $delim =~ tr/<>[]{}()??`'/><][}{)(??'`/;
2149             return quotemeta $delim;
2150             }
2151              
2152             # Regex to detect if other regexes contain a grammar specification...
2153             my $GRAMMAR_DIRECTIVE
2154             = qr{ < grammar: \s* (? $QUALIDENT ) \s* > }xms;
2155              
2156             # Regex to detect if other regexes contain a grammar inheritance...
2157             my $EXTENDS_DIRECTIVE
2158             = qr{ < extends: \s* (? $QUALIDENT ) \s* > }xms;
2159              
2160             # Cache of rule/token names within defined grammars...
2161             my %subrule_names_for;
2162 425     425   893  
2163             # Build list of ancestors for a given grammar...
2164 425 50       1044 sub _ancestry_of {
2165             my ($grammar_name) = @_;
2166 81     81   6335  
  81         2544  
  81         843  
2167 425         631 return () if !$grammar_name;
  446         1993  
  425         3076  
2168              
2169             use mro;
2170             return map { substr($_, $CACHE_LEN) } @{mro::get_linear_isa($CACHE.$grammar_name, 'c3')};
2171             }
2172 133     133   366  
2173             # Detect and translate any requested grammar inheritances...
2174             sub _extract_inheritances {
2175             my ($source_line, $source_file, $regex, $compiletime_debugging_requested, $derived_grammar_name) = @_;
2176 133         593  
2177              
2178 12         77 # Detect and remove inheritance requests...
2179 12         27 while ($regex =~ s{$EXTENDS_DIRECTIVE}{}xms) {
2180 12 100       37 # Normalize grammar name and report...
2181 3         10 my $orig_grammar_name = $+{base_grammar_name};
2182             my $grammar_name = $orig_grammar_name;
2183             if ($grammar_name !~ /::/) {
2184 12 50       28 $grammar_name = caller(2).'::'.$grammar_name;
2185 12 50       26 }
2186 0         0  
2187             if (exists $user_defined_grammar{$grammar_name}) {
2188             if ($compiletime_debugging_requested) {
2189             _debug_notify( info =>
2190             "Processing inheritance request for $grammar_name...",
2191             q{},
2192             );
2193 81     81   17969 }
  81         2490  
  81         252124  
2194 12         17  
  12         223  
2195             # Specify new relationship...
2196             no strict 'refs';
2197 0         0 push @{$CACHE.$derived_grammar_name.'::ISA'}, $CACHE.$grammar_name;
2198             }
2199             else {
2200             _debug_notify( fatal =>
2201             "Inheritance from unknown grammar requested",
2202             "by directive",
2203 0         0 "in regex grammar declared at $source_file line $source_line",
2204             q{},
2205             );
2206             exit(1);
2207             }
2208 133         349 }
2209              
2210             # Retrieve ancestors (but not self) in C3 dispatch order...
2211 133         325 my (undef, @ancestors) = _ancestry_of($derived_grammar_name);
  17         23  
  17         135  
2212 133         464  
2213             # Extract subrule names and implementations for ancestors...
2214 133         342 my %subrule_names = map { %{$subrule_names_for{$_}} } @ancestors;
  17         80  
2215             $_ = -1 for values %subrule_names;
2216 133         471 my $implementation
2217             = join "\n", map { $user_defined_grammar{$_} } @ancestors;
2218              
2219             return $implementation, \%subrule_names;
2220             }
2221 137     137   332  
2222 137         355 # Transform grammar-augmented regex into pure Perl 5.10 regex...
2223             sub _build_grammar {
2224             my ($grammar_spec) = @_;
2225 137 100       904 $grammar_spec .= q{};
2226 4         1710  
2227             # Check for lack of Regexp::Grammar-y constructs and short-circuit...
2228             if ($grammar_spec !~ m{ < (?: [.?![:%\\/]? [^\W\d]\w* [^>]* | [.?!]{3} ) > }xms) {
2229             return $grammar_spec;
2230 133         771 }
2231 133         672  
2232             # Remember where we parked...
2233             my ($source_file, $source_line) = (caller 1)[1,2];
2234 133         292 $source_line -= $grammar_spec =~ tr/\n//;
2235 133         6791  
2236             # Check for dubious repeated constructs that throw away captures...
2237             my $dubious_line = $source_line;
2238             while ($grammar_spec =~ m{
2239             (.*?)
2240             (
2241             < (?! \[ ) # not <[SUBRULE]>
2242             ( $IDENT (?: = [^>]*)? ) # but or
2243             > \s*
2244             ( # followed by a quantifier...
2245             [+*][?+]? # either symbolic
2246             | \{\d+(?:,\d*)?\}[?+]? # or numeric
2247 0         0 )
2248 0         0 )
2249 0         0 }gxms) {
2250             my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2251             $dubious_line += $prefix =~ tr/\n//;
2252             _debug_notify( warn =>
2253             qq{Repeated subrule <$rule>$qual},
2254             qq{at $source_file line $dubious_line},
2255             qq{will only capture its final match},
2256 0         0 qq{(Did you mean <[$rule]>$qual instead?)},
2257             q{},
2258             );
2259             $dubious_line += $match =~ tr/\n//;
2260 133         411 }
2261 133         1539  
2262             # Check for dubious non-backtracking constructs...
2263             $dubious_line = $source_line;
2264             while (
2265             $grammar_spec =~ m{
2266             (.*?)
2267             (
2268             <
2269             (?! (?:obj)? (?:rule: | token ) )
2270             ( [^>]+ )
2271             >
2272             \s*
2273 2         13 ( [?+*][+] | \{.*\}[+] )
2274 2         6 )
2275 2         6 }gxms) {
2276 2         18 my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2277             $dubious_line += $prefix =~ tr/\n//;
2278             my $safe_qual = substr($qual,0,-1);
2279             _debug_notify( warn =>
2280             qq{Non-backtracking subrule call <$rule>$qual},
2281             qq{at $source_file line $dubious_line},
2282             qq{may not revert correctly during backtracking.},
2283 2         25 qq{(If grammar does not work, try <$rule>$safe_qual instead)},
2284             q{},
2285             );
2286             $dubious_line += $match =~ tr/\n//;
2287 133         260 }
2288 133         282  
2289 133         255 # Check whether a log file was specified...
2290             my $compiletime_debugging_requested;
2291 133         440 local *Regexp::Grammars::LOGFILE = *Regexp::Grammars::LOGFILE;
2292 133         382 my $logfile = q{-};
2293 0         0  
2294             my $log_where = "for regex grammar defined at $source_file line $source_line";
2295             $grammar_spec =~ s{ ^ [^#]* < logfile: \s* ([^>]+?) \s* > }{
2296 0         0 $logfile = _timestamp($1);
2297 0         0  
2298             # Presence of implies compile-time logging...
2299             $compiletime_debugging_requested = 1;
2300 0         0 *Regexp::Grammars::LOGFILE = _open_log('>',$logfile, $log_where );
2301              
2302             # Delete directive...
2303             q{};
2304 133         5493 }gexms;
2305              
2306             # Look ahead for any run-time debugging or timeout requests...
2307             my $runtime_debugging_requested
2308             = $grammar_spec =~ m{
2309             ^ [^#]*
2310             < debug: \s* (run | match | step | try | on | same ) \s* >
2311 133         338 | \$DEBUG (?! \s* (?: \[ | \{) )
2312             }xms;
2313              
2314             my $timeout_requested
2315             = $grammar_spec =~ m{
2316             ^ [^#]*
2317             < timeout: \s* \d+ \s* >
2318             }xms;
2319              
2320 133 100       539  
2321             # Standard actions set up and clean up any regex debugging...
2322             # Before entire match, set up a stack of attempt records and report...
2323             my $pre_match_debug
2324             = $runtime_debugging_requested
2325             ? qq{(?{; *Regexp::Grammars::LOGFILE
2326             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where');
2327             Regexp::Grammars::_init_try_stack(); })}
2328             : qq{(?{; *Regexp::Grammars::LOGFILE
2329             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where'); })}
2330 133 100       347 ;
2331              
2332             # After entire match, report whether successful or not...
2333             my $post_match_debug
2334             = $runtime_debugging_requested
2335             ? qq{(?{;Regexp::Grammars::_debug_matched(0,\\%/,'',\$^N)})
2336             |(?>(?{;Regexp::Grammars::_debug_handle_failures(0,''); }) (?!))
2337             }
2338             : q{}
2339 133         412 ;
2340              
2341             # Remove comment lines...
2342             $grammar_spec =~ s{^ ([^#\n]*) \s \# [^\n]* }{$1}gxms;
2343              
2344             # Subdivide into rule and token definitions, preparing to process each...
2345             # REWRITE THIS, USING (PROBABLY NEED TO REFACTOR ALL GRAMMARS TO REUSe
2346             # THESE COMPONENTS:
2347             # (? \( \s* (?&PARAMS)? \s* \) | (?# NOTHING ) )
2348             # (? (?&PARAM) \s* (?: , \s* (?&PARAM) \s* )* ,? )
2349 133         12629 # (? (?&VAR) (?: \s* = \s* (?: (?&LITERAL) | (?&PARENCODE) ) )? )
2350             # (? (?&NUMBER) | (?&STRING) | (?&VAR) )
2351             # (? : (?&IDENT) )
2352             my @defns = split m{
2353             (< (obj|)(rule|token) \s*+ :
2354             \s*+ ((?:${IDENT}::)*+) (?: ($IDENT) \s*+ = \s*+ )?+
2355             ($IDENT)
2356             \s* >)
2357             }xms, $grammar_spec;
2358 133         1019  
  256         949  
2359 133         655 # Extract up list of names of defined rules/tokens...
2360 133         277 # (Name is every 6th item out of every seven, skipping the first item)
2361             my @subrule_names = @defns[ map { $_ * 7 + 6 } 0 .. ((@defns-1)/7-1) ];
2362             my @defns_copy = @defns[1..$#defns];
2363 133         327 my %subrule_names;
2364 133         228  
2365 133         315 # Build a look-up table of subrule names, checking for duplicates...
2366 256         834 my $defn_line = $source_line + $defns[0] =~ tr/\n//;
2367 256 50       987 my %first_decl_explanation;
2368             for my $subrule_name (@subrule_names) {
2369             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns_copy, 0, 7);
2370             if (++$subrule_names{$subrule_name} > 1) {
2371             _debug_notify( warn =>
2372 0         0 "Redeclaration of <$objectify$type: $subrule_name>",
  0         0  
2373             "at $source_file line $defn_line",
2374             "will be ignored.",
2375             @{ $first_decl_explanation{$subrule_name} },
2376             q{},
2377 256         1095 );
2378             }
2379             else {
2380             $first_decl_explanation{$subrule_name} = [
2381             "(Hidden by the earlier declaration of <$objectify$type: $subrule_name>",
2382 256         759 " at $source_file line $defn_line)"
2383             ];
2384             }
2385             $defn_line += ($full_decl.$body) =~ tr/\n//;
2386 133         604 }
2387              
2388             # Add the built-ins...
2389 133         281 @subrule_names{'ws', 'hk', 'matchpos', 'matchline'} = (1) x 4;
2390 133 50       845  
2391 0         0 # An empty main rule will never match anything...
2392             my $main_regex = shift @defns;
2393             if ($main_regex =~ m{\A (?: \s++ | \(\?\# [^)]* \) | \# [^\n]++ )* \z}xms) {
2394             _debug_notify( error =>
2395             "No main regex specified before rule definitions",
2396             "in regex grammar declared at $source_file line $source_line",
2397             "Grammar will never match anything.",
2398             "(Or did you forget a specification?)",
2399             q{},
2400             );
2401 133         302 }
2402 133         258  
2403             # Compile the regex or grammar...
2404             my $regex = q{};
2405             my $grammar_name;
2406 133 100       645 my $is_grammar;
2407              
2408 5         47 # Is this a grammar specification?
2409 5 100       22 if ($main_regex =~ $GRAMMAR_DIRECTIVE) {
2410 3         11 # Normalize grammar name and report...
2411             $grammar_name = $+{grammar_name};
2412 5         7 if ($grammar_name !~ /::/) {
2413             $grammar_name = caller(1) . "::$grammar_name";
2414             }
2415 5         10 $is_grammar = 1;
2416 7         20  
2417             # Add subrule definitions to namespace...
2418             for my $subrule_name (@subrule_names) {
2419             $CACHE{$grammar_name.'::'.$subrule_name} = 1;
2420 128         261 }
2421 128         313 }
2422             else {
2423             state $dummy_grammar_index = 0;
2424             $grammar_name = '______' . $dummy_grammar_index++;
2425 133         375 }
2426              
2427             # Extract any inheritance information...
2428             my ($inherited_rules, $inherited_subrule_names)
2429             = _extract_inheritances(
2430             $source_line, $source_file,
2431             $main_regex,
2432             $compiletime_debugging_requested,
2433             $grammar_name
2434 133         7643 );
2435              
2436             # Remove requests...
2437 133         363 $main_regex =~ s{ $EXTENDS_DIRECTIVE }{}gxms;
2438 133         436  
  133         334  
2439             # Add inherited subrule names to allowed subrule names;
2440             @subrule_names{ keys %{$inherited_subrule_names} }
2441 133         1071 = values %{$inherited_subrule_names};
2442              
2443             # Remove comments from top-level grammar...
2444             $main_regex =~ s{
2445             \(\?\# [^)]* \)
2446             | (?
2447             }{}gxms;
2448 133 0       295  
  0         0  
2449 133 50       626 # Remove any top-level nocontext directive...
    100          
2450             # 1 2 3 4
2451             $main_regex =~ s{^( (.*?) (\\*) (\# [^\n]*) )$}{length($3) % 2 ? $1 : $2.substr($3,0,-1)}gexms;
2452             my $nocontext = ($main_regex =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2453             : ($main_regex =~ s{ < context \s* : \s* > }{}gxms) ? 0
2454 133 100       392 : 0;
2455              
2456 5 50       17 # If so, set up to save the grammar...
2457 0         0 if ($is_grammar) {
2458             # Normalize grammar name and report...
2459 5 50       12 if ($grammar_name !~ /::/) {
2460 0         0 $grammar_name = caller(1) . "::$grammar_name";
2461             }
2462             if ($compiletime_debugging_requested) {
2463             _debug_notify( info =>
2464             "Processing definition of grammar $grammar_name...",
2465             q{},
2466             );
2467 5         293 }
2468              
2469             # Remove the grammar directive...
2470             $main_regex =~ s{
2471 5         21 ( $GRAMMAR_DIRECTIVE
  5         20  
2472             | < debug: \s* (run | match | step | try | on | off | same ) \s* >
2473             )
2474 5 50       24 }{$source_line += $1 =~ tr/\n//; q{}}gexms;
2475 0         0  
2476             # Check for anything else in the main regex...
2477             if ($main_regex =~ /\A(\s*)\S/) {
2478             $source_line += $1 =~ tr/\n//;
2479             _debug_notify( warn =>
2480 0         0 "Unexpected item before first subrule specification",
  0         0  
2481             "in definition of ",
2482             "at $source_file line $source_line:",
2483             map({ " $_"} grep /\S/, split "\n", $main_regex),
2484             "(this will be ignored when defining the grammar)",
2485             q{},
2486             );
2487             }
2488              
2489 48         82 # Remember set of valid subrule names...
2490 5         36 $subrule_names_for{$grammar_name}
  30         100  
  48         94  
2491             = {
2492             map({ ($_ => 1) } keys %subrule_names),
2493             map({ ($grammar_name.'::'.$_ => 1) } grep { !/::/ } keys %subrule_names),
2494             };
2495 128 50       362 }
2496 0         0 else { #...not a grammar specification
2497             # Report how main regex was interpreted, if requested to...
2498             if ($compiletime_debugging_requested) {
2499             _debug_notify( info =>
2500             "Processing the main regex before any rule definitions",
2501             );
2502 128         461 }
2503              
2504             # Any actual regex is processed first...
2505             $regex = _translate_subrule_calls(
2506             $source_file, $source_line,
2507             $grammar_name,
2508             $main_regex,
2509             $compiletime_debugging_requested,
2510             $runtime_debugging_requested,
2511             $timeout_requested,
2512             $pre_match_debug,
2513             $post_match_debug,
2514             q{}, # Expected...what?
2515             \%subrule_names,
2516             0, # Whitespace isn't magical
2517 128         531 );
2518              
2519             # Wrap the main regex (to ensure |'s don't segment pre and # post commands)...
2520 128 50       432 $regex = "(?:$regex)";
2521 0         0  
2522             # Report how construct was interpreted, if requested to...
2523             if ($compiletime_debugging_requested) {
2524             _debug_notify( q{} =>
2525             q{ |},
2526             q{ \\___End of main regex},
2527             q{},
2528             );
2529             }
2530 133         334 }
2531              
2532             # Update line number...
2533 133         417 $source_line += $main_regex =~ tr/\n//;
2534              
2535 256         992 # Then iterate any following rule definitions...
2536 256   66     1260 while (@defns) {
2537 256         581 # Grab details of each rule defn (as extracted by previous split)...
2538             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns, 0, 7);
2539             $name //= $callname;
2540 256 50       565 my $qualified_name = $grammar_name.'::'.$callname;
2541 0 0       0  
2542             # Report how construct was interpreted, if requested to...
2543             if ($compiletime_debugging_requested) {
2544             _debug_notify( info =>
2545             "Defining a $type: <$callname>",
2546             " |...Returns: " . ($objectify ? "an object of class '$qualifier$name'" : "a hash"),
2547 256 100       940 );
    100          
2548             }
2549              
2550             my $local_nocontext
2551             = ($body =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2552             : ($body =~ s{ < context \s* : \s* > }{}gxms) ? 0
2553 256         794 : $nocontext;
2554              
2555             # Translate any nested <...> constructs...
2556             my $trans_body = _translate_subrule_calls(
2557             $source_file, $source_line,
2558             $grammar_name,
2559             $body,
2560             $compiletime_debugging_requested,
2561             $runtime_debugging_requested,
2562             $timeout_requested,
2563             $pre_match_debug,
2564             $post_match_debug,
2565             $callname, # Expected...what?
2566             \%subrule_names,
2567             $type eq 'rule', # Is whitespace magical?
2568 256 50       732 );
2569 0         0  
2570             # Report how construct was interpreted, if requested to...
2571             if ($compiletime_debugging_requested) {
2572             _debug_notify( q{} =>
2573             q{ |},
2574             q{ \\___End of rule definition},
2575             q{},
2576             );
2577 256         424 }
2578 256         397  
2579             # Make allowance for possible local whitespace definitions...
2580             my $local_ws_defn = q{};
2581 256 100       588 my $local_ws_call = q{(?&ws__implicit__)};
2582              
2583 134         295 # Rules make non-code literal whitespace match textual whitespace...
2584             if ($type eq 'rule') {
2585 134         798 # Implement any local whitespace definition...
2586 4         14 my $first_ws = 1;
2587 4 50       16 WS_DIRECTIVE:
    50          
2588 0         0 while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2589             my $defn = $1;
2590             if ($defn !~ m{\S}xms) {
2591             _debug_notify( warn =>
2592             qq{Ignoring useless empty directive},
2593             qq{in definition of },
2594             qq{near $source_file line $source_line},
2595 0         0 qq{(Did you mean instead?)},
2596             q{},
2597             );
2598 0         0 next WS_DIRECTIVE;
2599             }
2600             elsif (!$first_ws) {
2601             _debug_notify( warn =>
2602             qq{Ignoring useless extra directive},
2603             qq{in definition of },
2604             qq{at $source_file line $source_line},
2605 0         0 qq{(No more than one is permitted per rule!)},
2606             q{},
2607             );
2608 4         8 next WS_DIRECTIVE;
2609             }
2610 4         5 else {
2611 4         6 $first_ws = 0;
2612 4         13 }
2613 4         17 state $ws_counter = 0;
2614             $ws_counter++;
2615             $local_ws_defn = qq{(?<__RG_ws_$ws_counter> $defn)};
2616             $local_ws_call = qq{(?&__RG_ws_$ws_counter)};
2617 134         335 }
2618              
2619             # Implement auto-whitespace...
2620             state $CODE_OR_SPACE = qr{
2621             (? # These are not magic...
2622             \( \?\?? (?&BRACED) \) # Embedded code blocks
2623             | \s++ # Whitespace not followed by...
2624             (?= \| # ...an OR
2625             | \(\?\#\) # ...a null comment
2626             | (?: \) \s* )? \z # ...the end of the rule
2627             | \(\(?\?\&ws\) # ...an explicit ws match
2628             | \(\?\??\{ # ...an embedded code block
2629             | \\[shv] # ...an explicit space match
2630             )
2631             )
2632             |
2633             (? \s++ ) # All other whitespace is magic
2634 134   66     5907  
  1168         65862  
2635             (?(DEFINE) (? \{ (?: \\. | (?&BRACED) | [^{}] )* \} ) )
2636             }xms;
2637 122         578 $trans_body =~ s{($CODE_OR_SPACE)}{ $+{ignorable_space} // $local_ws_call }exmsg;
2638 0         0 }
2639             else {
2640             while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2641             _debug_notify( warn =>
2642             qq{Ignoring useless directive},
2643             qq{in definition of },
2644             qq{at $source_file line $source_line},
2645             qq{(Did you need to define instead of ?)},
2646             q{},
2647             );
2648 256         1626 }
2649             }
2650              
2651             $regex
2652             .= "\n###############[ $source_file line $source_line ]###############\n"
2653             . _translate_rule_def(
2654             $type, $qualifier, $name, $callname, $qualified_name, $trans_body, $objectify,
2655             $local_ws_defn, $local_nocontext,
2656 256         1201 );
2657              
2658             # Update line number...
2659             $source_line += ($full_decl.$body) =~ tr/\n//;
2660 133         2457 }
2661              
2662             # Insert checkpoints into any user-defined code block...
2663             $regex =~ s{ \( \?\?? \{ \K (?!;) }{
2664             local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
2665 133         555 }xmsg;
2666              
2667             # Check for any suspicious left-overs from the start of the regex...
2668 133 100       461 pos $regex = 0;
2669 5         11  
2670 5         557 # If a grammar definition, save grammar and return a placeholder...
2671             if ($is_grammar) {
2672             $user_defined_grammar{$grammar_name} = $regex;
2673             return qq{(?{
2674             warn "Can't match directly against a pure grammar: \n";
2675             })(*COMMIT)(?!)};
2676 128         757 }
2677             # Otherwise, aggregrate the final grammar...
2678             else {
2679             return _complete_regex($regex.$inherited_rules, $pre_match_debug, $post_match_debug, $nocontext);
2680             }
2681 128     128   368 }
2682              
2683 128 100       147608 sub _complete_regex {
2684             my ($regex, $pre_match_debug, $post_match_debug, $nocontext) = @_;
2685              
2686             return $nocontext ? qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE_NC$post_match_debug}
2687             : qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE$post_match_debug};
2688             }
2689              
2690             1; # Magic true value required at end of module
2691              
2692             __END__