File Coverage

blib/lib/Regexp/Grammars.pm
Criterion Covered Total %
statement 499 772 64.6
branch 265 454 58.3
condition 81 154 52.6
subroutine 49 69 71.0
pod 0 3 0.0
total 894 1452 61.5


line stmt bran cond sub pod time code
1             =encoding ISO8859-1
2             =cut
3              
4             package Regexp::Grammars;
5 81     81   5673335 use re 'eval';
  81         893  
  81         5776  
6              
7 81     81   560 use warnings;
  81         185  
  81         2170  
8 81     81   403 use strict;
  81         169  
  81         1866  
9 81     81   2244 use 5.010;
  81         282  
10 81     81   533 use vars ();
  81         189  
  81         2122  
11              
12 81     81   558 use Scalar::Util qw< blessed reftype >;
  81         192  
  81         5360  
13 81     81   54431 use Data::Dumper qw< Dumper >;
  81         572335  
  81         9191  
14              
15             our $VERSION = '1.056';
16              
17             my $anon_scalar_ref = \do{my $var};
18             my $MAGIC_VARS = q{my ($CAPTURE, $CONTEXT, $DEBUG, $INDEX, $MATCH, %ARG, %MATCH);};
19              
20             my $PROBLEM_WITH_5_18 = <<'END_ERROR_MSG';
21             Warning: Regexp::Grammars is unsupported
22             under Perl 5.18.0 through 5.18.3 due to a bug
23             in regex parsing under those versions.
24              
25             Please upgrade to Perl 5.18.4 or later, or revert to
26             Perl 5.16 or earlier.
27             END_ERROR_MSG
28              
29             # Load the module...
30             sub import {
31             # Signal lexical scoping (active, unless something was exported)...
32 93     93   2784 $^H{'Regexp::Grammars::active'} = 1;
33              
34             # Process any regexes in module's active lexical scope...
35 81     81   100941 use overload;
  81         85266  
  81         534  
36             overload::constant(
37             qr => sub {
38 275     275   89727 my ($raw, $cooked, $type) = @_;
39             # In active scope and really a regex...
40 275 100 66     718 if (_module_is_active() && $type =~ /qq?/) {
41 173         5331 return bless \$cooked, 'Regexp::Grammars::Precursor';
42             }
43             # Ignore everything else...
44             else {
45 102         50684 return $cooked;
46             }
47             }
48 93         917 );
49              
50             # Deal with 5.18 issues...
51 93 50       2666 if ($] >= 5.018) {
52             # Issue warning...
53 93 50       491 if ($] < 5.018004) {
54 0         0 require Carp;
55 0         0 Carp::croak($PROBLEM_WITH_5_18);
56             }
57              
58             # Deal with cases where Perl 5.18+ complains about
59             # the injection of (??{...}) and (?{...})
60 93         597 require re;
61 93         1610 re->import('eval');
62              
63             # Sanctify the standard Regexp::Grammars pseudo-variables from
64             # Perl 5.18's early enforcement of strictures...
65 93         268 my $caller = caller;
66 93         1912 warnings->unimport('once');
67 93         411 @_ = ( 'vars', '$CAPTURE', '$CONTEXT', '$DEBUG', '$INDEX', '$MATCH', '%ARG', '%MATCH' );
68 93         10266 goto &vars::import;
69             }
70             }
71              
72             # Deactivate module's regex effect when it is "anti-imported" with 'no'...
73             sub unimport {
74             # Signal lexical (non-)scoping...
75 38     38   4945 $^H{'Regexp::Grammars::active'} = 0;
76 38         228 require re;
77 38         13929 re->unimport('eval');
78             }
79              
80             # Encapsulate the hoopy user-defined pragma interface...
81             sub _module_is_active {
82 275     275   3480 return (caller 1)[10]->{'Regexp::Grammars::active'};
83             }
84              
85             my $RULE_HANDLER;
86 743     743 0 23211 sub clear_rule_handler { undef $RULE_HANDLER; }
87              
88             sub Regexp::with_actions {
89 21     21   16247 my ($self, $handler) = @_;
90 21         43 $RULE_HANDLER = $handler;
91 21         466 return $self;
92             }
93              
94             #=====[ COMPILE-TIME INTERIM REPRESENTATION OF GRAMMARS ]===================
95             {
96             package Regexp::Grammars::Precursor;
97              
98             # Only translate precursors once...
99             state %grammar_cache;
100              
101             use overload (
102             # Concatenation/interpolation just concatenates to the precursor...
103             q{.} => sub {
104 92     92   216 my ($x, $y, $reversed) = @_;
105 92 50       200 if (ref $x) { $x = ${$x} }
  92         123  
  92         170  
106 92 100       197 if (ref $y) { $y = ${$y} }
  36         55  
  36         69  
107 92 100       173 if ($reversed) { ($y,$x) = ($x,$y); }
  20         81  
108 92   50     246 $x .= $y//q{};
109 92         282 return bless \$x, 'Regexp::Grammars::Precursor';
110             },
111              
112             # Using as a string (i.e. matching) preprocesses the precursor...
113             q{""} => sub {
114 137     137   393 my ($obj) = @_;
115             return $grammar_cache{ overload::StrVal($$obj) }
116 137   33     600 //= Regexp::Grammars::_build_grammar( ${$obj} );
  137         1391  
117             },
118              
119             # Everything else, as usual...
120 81         1003 fallback => 1,
121 81     81   44702 );
  81         203  
122             }
123              
124              
125             #=====[ SUPPORT FOR THE INTEGRATED DEBUGGER ]=========================
126              
127             # All messages go to STDERR by default...
128             *Regexp::Grammars::LOGFILE = *STDERR{IO};
129              
130             # Debugging levels indicate where to stop...
131             our %DEBUG_LEVEL = (
132             same => undef, # No change in debugging mode
133             off => 0, # No more debugging
134             run => 1, continue => 1, # Run to completion of regex match
135             match => 2, on => 2, # Run to next successful submatch
136             step => 3, try => 3, # Run to next reportable event
137             );
138              
139             # Debugging levels can be abbreviated to one character during interactions...
140             @DEBUG_LEVEL{ map {substr($_,0,1)} keys %DEBUG_LEVEL } = values %DEBUG_LEVEL;
141             $DEBUG_LEVEL{o} = $DEBUG_LEVEL{off}; # Not "on"
142             $DEBUG_LEVEL{s} = $DEBUG_LEVEL{step}; # Not "same"
143              
144             # Width of leading context field in debugging messages is constrained...
145             my $MAX_CONTEXT_WIDTH = 20;
146             my $MIN_CONTEXT_WIDTH = 6;
147              
148             sub set_context_width {
149 0         0 { package Regexp::Grammars::ContextRestorer;
150             sub new {
151 0     0   0 my ($class, $old_context_width) = @_;
152 0         0 bless \$old_context_width, $class;
153             }
154             sub DESTROY {
155 0     0   0 my ($old_context_width_ref) = @_;
156 0         0 $MAX_CONTEXT_WIDTH = ${$old_context_width_ref};
  0         0  
157             }
158             }
159              
160 0     0 0 0 my ($new_context_width) = @_;
  0         0  
161 0         0 my $old_context_width = $MAX_CONTEXT_WIDTH;
162 0         0 $MAX_CONTEXT_WIDTH = $new_context_width;
163 0 0       0 if (defined wantarray) {
164 0         0 return Regexp::Grammars::ContextRestorer->new($old_context_width);
165             }
166             }
167              
168             # Rewrite a string currently being matched, to make \n and \t visible
169             sub _show_metas {
170 0   0 0   0 my $context_str = shift // q{};
171              
172             # Quote newlines (\n -> \\n, without using a regex)...
173 0         0 my $index = index($context_str,"\n");
174 0         0 while ($index >= 0) {
175 0         0 substr($context_str, $index, 1, '\\n');
176 0         0 $index = index($context_str,"\n",$index+2);
177             }
178              
179             # Quote tabs (\t -> \\t, without using a regex)...
180 0         0 $index = index($context_str,"\t");
181 0         0 while ($index >= 0) {
182 0         0 substr($context_str, $index, 1, '\\t');
183 0         0 $index = index($context_str,"\t",$index+2);
184             }
185              
186 0         0 return $context_str;
187             }
188              
189             # Minimize whitespace in a string...
190             sub _squeeze_ws {
191 1792     1792   3027 my ($str) = @_;
192              
193 1792         2907 $str =~ tr/\n\t/ /;
194              
195 1792         3535 my $index = index($str,q{ });
196 1792         3928 while ($index >= 0) {
197 7089         9509 substr($str, $index, 2, q{ });
198 7089         11963 $index = index($str,q{ },$index);
199             }
200              
201 1792         3990 return $str;
202             }
203              
204             # Prepare for debugging...
205             sub _init_try_stack {
206 0     0   0 our (@try_stack, $last_try_pos, $last_context_str);
207              
208             # Start with a representation of the entire grammar match...
209 0         0 @try_stack = ({
210             subrule => '',
211             height => 0,
212             errmsg => ' \\FAIL ',
213             });
214              
215             # Initialize tracking of location and context...
216 0         0 $last_try_pos = -1;
217 0         0 $last_context_str = q{};
218              
219             # Report...
220 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context('=>')
  0         0  
221             . 'Trying from position ' . pos();
222             }
223              
224             # Create a "context string" showing where the regex is currently matching...
225             sub _debug_context {
226 0     0   0 my ($fill_chars) = @_;
227              
228             # Determine minimal sufficient width for context field...
229 0   0     0 my $field_width = length(_show_metas($_//q{}));
230 0 0       0 if ($field_width > $MAX_CONTEXT_WIDTH) {
    0          
231 0         0 $field_width = $MAX_CONTEXT_WIDTH;
232             }
233             elsif ($field_width < $MIN_CONTEXT_WIDTH) {
234 0         0 $field_width = $MIN_CONTEXT_WIDTH;
235             }
236              
237             # Get current matching position (and some additional trailing context)...
238 0   0     0 my $context_str
      0        
239             = substr(_show_metas(substr(($_//q{}).q{},pos()//0,$field_width)),0,$field_width);
240              
241             # Build the context string, handling special cases...
242 0   0     0 our $last_context_str //= q{};
243 0 0       0 if ($fill_chars) {
244             # If caller supplied a 1- or 2-char fill sequence, use that instead...
245 0 0       0 my $last_fill_char = length($fill_chars) > 1
246             ? substr($fill_chars,-1,1,q{})
247             : $fill_chars
248             ;
249 0         0 $context_str = $fill_chars x ($field_width-1) . $last_fill_char;
250             }
251             else {
252             # Make end-of-string visible in empty context string...
253 0 0       0 if ($context_str eq q{}) {
254 0         0 $context_str = '[eos]';
255             }
256              
257             # Don't repeat consecutive identical context strings...
258 0 0       0 if ($context_str eq $last_context_str) {
259 0         0 $context_str = q{ } x $field_width;
260             }
261             else {
262             # If not repeating, remember for next time...
263 0         0 $last_context_str = $context_str;
264             }
265             }
266              
267             # Left justify and return context string...
268 0         0 return sprintf("%-*s ",$field_width,$context_str);
269             }
270              
271             # Show a debugging message (mainly used for compile-time errors and info)...
272             sub _debug_notify {
273             # Single arg is a line to be printed with a null severity...
274 2 50   2   11 my ($severity, @lines) = @_==1 ? (q{},@_) : @_;
275 2         7 chomp @lines;
276              
277             # Formatting string for all lines...
278 2         4 my $format = qq{%*s | %s\n};
279              
280             # Track previous severity and avoid repeating the same level...
281 2         4 state $prev_severity = q{};
282 2 50 33     15 if ($severity !~ /\S/) {
    50          
283             # Do nothing
284             }
285             elsif ($severity eq 'info' && $prev_severity eq 'info' ) {
286 0         0 $severity = q{};
287             }
288             else {
289 2         4 $prev_severity = $severity;
290             }
291              
292             # Display first line with severity indicator (unless same as previous)...
293 2         4 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, $severity, shift @lines;
  2         161  
294              
295             # Display first line without severity indicator
296 2         13 for my $next_line (@lines) {
297 8         16 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, q{}, $next_line;
  8         95  
298             }
299             }
300              
301             # Handle user interactions during runtime debugging...
302             sub _debug_interact {
303 0     0   0 my ($stack_height, $leader, $curr_frame_ref, $min_debug_level) = @_;
304              
305 0         0 our $DEBUG; # ...stores current debug level within regex
306              
307             # Only interact with terminals, and if debug level is appropriate...
308 0 0 0     0 if (-t *Regexp::Grammars::LOGFILE
      0        
      0        
309             && defined $DEBUG
310             && ($DEBUG_LEVEL{$DEBUG}//0) >= $DEBUG_LEVEL{$min_debug_level}
311             ) {
312 0         0 local $/ = "\n"; # ...in case some caller is being clever
313             INPUT:
314 0         0 while (1) {
315 0   0     0 my $cmd = readline // q{};
316 0         0 chomp $cmd;
317              
318             # Input of 'd' means 'display current result frame'...
319 0 0       0 if ($cmd eq 'd') {
320 0         0 print {*Regexp::Grammars::LOGFILE} join "\n",
321 0 0       0 map { $leader . ($stack_height?'| ':q{})
  0         0  
322             . ' : ' . $_
323             }
324             split "\n", q{ }x8 . substr(Dumper($curr_frame_ref),8);
325 0         0 print "\t";
326             }
327             # Any other (valid) input changes debugging level and continues...
328             else {
329 0 0       0 if (defined $DEBUG_LEVEL{$cmd}) { $DEBUG = $cmd; }
  0         0  
330 0         0 last INPUT;
331             }
332             }
333             }
334             # When interaction not indicated, just complete the debugging line...
335             else {
336 0         0 print {*Regexp::Grammars::LOGFILE} "\n";
  0         0  
337             }
338             }
339              
340             # Handle reporting of unsuccessful match attempts...
341             sub _debug_handle_failures {
342 0     0   0 my ($stack_height, $subrule, $in_match) = @_;
343 0         0 our @try_stack;
344              
345             # Unsuccessful match attempts leave "leftovers" on the attempt stack...
346             CLEANUP:
347 0   0     0 while (@try_stack && $try_stack[-1]{height} >= $stack_height) {
348             # Grab record of (potentially) unsuccessful attempt...
349 0         0 my $error_ref = pop @try_stack;
350              
351             # If attempt was the one whose match is being reported, go and report...
352             last CLEANUP if $in_match
353             && $error_ref->{height} == $stack_height
354 0 0 0     0 && $error_ref->{subrule} eq $subrule;
      0        
355              
356             # Otherwise, report the match failure...
357 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context(q{ }) . $error_ref->{errmsg};
  0         0  
358             }
359             }
360              
361             # Handle attempts to call non-existent subrules...
362             sub _debug_fatal {
363 0     0   0 my ($naughty_construct) = @_;
364              
365 0         0 print {*Regexp::Grammars::LOGFILE}
  0         0  
366             "_________________________________________________________________\n",
367             "Fatal error: Entire parse terminated prematurely while attempting\n",
368             " to call non-existent rule: $naughty_construct\n",
369             "_________________________________________________________________\n";
370 0         0 $@ = "Entire parse terminated prematurely while attempting to call non-existent rule: $naughty_construct";
371             }
372              
373             # Handle objrules that don't return hashes...
374             sub _debug_non_hash {
375 331     331   15539 my ($obj, $name) = @_;
376              
377             # If the object is okay, no further action required...
378 331 100       7660 return q{} if reftype($obj) eq 'HASH';
379              
380             # Generate error messages...
381 1         3 print {*Regexp::Grammars::LOGFILE}
  1         15  
382             "_________________________________________________________________\n",
383             "Fatal error: returned a non-hash-based object\n",
384             "_________________________________________________________________\n";
385 1         6 $@ = " returned a non-hash-based object";
386              
387 1         22 return '(*COMMIT)(*FAIL)';
388             }
389              
390              
391             # Print a message in context...
392             sub _debug_logmsg {
393 0     0   0 my ($stack_height, @msg) = @_;
394              
395             # Determine indent for messages...
396 0         0 my $leader = _debug_context() . q{| } x ($stack_height-1) . '|';
397              
398             # Report the attempt...
399 0         0 print {*Regexp::Grammars::LOGFILE} map { "$leader$_\n" } @msg;
  0         0  
  0         0  
400             }
401              
402             # Print a message indicating a (sub)match attempt...
403             sub _debug_trying {
404 0     0   0 my ($stack_height, $curr_frame_ref, $subrule) = @_;
405              
406             # Clean up after any preceding unsuccessful attempts...
407 0         0 _debug_handle_failures($stack_height, $subrule);
408              
409             # Determine indent for messages...
410 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
411              
412             # Detect and report any backtracking prior to this attempt...
413 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
414 0         0 my $backtrack_distance = $last_try_pos - pos();
415 0 0       0 if ($backtrack_distance > 0) {
416 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
417             . q{| } x ($stack_height-2)
418             . qq{|...Backtracking $backtrack_distance char}
419             . ($backtrack_distance > 1 ? q{s} : q{})
420             . q{ and trying new match}
421             ;
422             }
423              
424             # Report the attempt...
425 0         0 print {*Regexp::Grammars::LOGFILE} $leader, "|...Trying $subrule\t";
  0         0  
426              
427             # Handle user interactions during debugging...
428 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'step');
429              
430             # Record the attempt, for later error handling in _debug_matched()...
431 0 0       0 if ($subrule ne 'next alternative') {
432 0         0 our @try_stack;
433 0         0 push @try_stack, {
434             height => $stack_height,
435             subrule => $subrule,
436             # errmsg should align under: |...Trying $subrule\t
437             errmsg => q{| } x ($stack_height-2) . "| \\FAIL $subrule",
438             };
439             }
440 0         0 $last_try_pos = pos();
441             }
442              
443             # Print a message indicating a successful (sub)match...
444             sub _debug_matched {
445 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $matched_text) = @_;
446              
447             # Clean up any intervening unsuccessful attempts...
448 0         0 _debug_handle_failures($stack_height, $subrule, 'in match');
449              
450             # Build debugging message...
451 0         0 my $debug_context = _debug_context();
452 0         0 my $leader = $debug_context . q{| } x ($stack_height-2);
453 0 0       0 my $message = ($stack_height ? '| ' : q{})
454             . " \\_____$subrule matched ";
455 0 0       0 my $filler = $stack_height
456             ? '| ' . q{ } x (length($message)-4)
457             : q{ } x length($message);
458              
459 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
460              
461             # Report if match required backtracking...
462 0   0     0 my $backtrack_distance = $last_try_pos - (pos()//0);
463 0 0       0 if ($backtrack_distance > 0) {
464 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
465             . q{| } x ($stack_height-2)
466             . qq{|...Backtracking $backtrack_distance char}
467             . ($backtrack_distance > 1 ? q{s} : q{})
468             . qq{ and rematching $subrule}
469             ;
470             }
471 0         0 $last_try_pos = pos();
472              
473             # Format match text (splitting multi-line texts and indent them correctly)...
474 0 0       0 $matched_text = defined($matched_text)
475             ? $matched_text = q{'} . join("\n$leader$filler", split "\n", $matched_text) . q{'}
476             : q{};
477              
478             # Print match message...
479 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $matched_text . qq{\t};
  0         0  
480              
481             # Check for user interaction...
482 0 0       0 _debug_interact($stack_height, $leader, $curr_frame_ref, $stack_height ? 'match' : 'run');
483             }
484              
485             # Print a message indicating a successful (sub)match...
486             sub _debug_require {
487 0     0   0 my ($stack_height, $condition, $succeeded) = @_;
488              
489             # Build debugging message...
490 0         0 my $debug_context = _debug_context();
491 0         0 my $leader = $debug_context . q{| } x ($stack_height-1);
492 0 0       0 my $message1 = ($stack_height ? '|...' : q{})
493             . "Testing condition: $condition"
494             ;
495 0 0       0 my $message2 = ($stack_height ? '| ' : q{})
    0          
496             . " \\_____"
497             . ($succeeded ? 'Satisfied' : 'FAILED')
498             ;
499              
500             # Report if match required backtracking...
501 0         0 our $last_try_pos;
502 0         0 my $backtrack_distance = $last_try_pos - pos();
503 0 0       0 if ($backtrack_distance > 0) {
504 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
505             . q{| } x ($stack_height-1)
506             . qq{|...Backtracking $backtrack_distance char}
507             . ($backtrack_distance > 1 ? q{s} : q{})
508             . qq{ and rematching}
509             ;
510             }
511              
512             # Remember where the condition was tried...
513 0         0 $last_try_pos = pos();
514              
515             # Print match message...
516 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message1;
  0         0  
517 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message2;
  0         0  
518             }
519              
520             # Print a message indicating a successful store-result-of-code-block...
521             sub _debug_executed {
522 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $value) = @_;
523              
524             # Build message...
525 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
526 0         0 my $message = "|...Action $subrule\n";
527 0         0 my $message2 = "| saved value: '";
528 0         0 $message .= $leader . $message2;
529 0         0 my $filler = q{ } x length($message2);
530              
531             # Split multiline results over multiple lines (properly indented)...
532 0         0 $value = join "\n$leader$filler", split "\n", $value;
533              
534             # Report the action...
535 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $value . qq{'\t};
  0         0  
536              
537             # Check for user interaction...
538 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'match');
539             }
540              
541             # Create the code to be inserted into the regex to facilitate debugging...
542             sub _build_debugging_statements {
543 414     414   910 my ($debugging_active, $subrule, $extra_pre_indent) = @_;
544              
545 414 100       1462 return (q{}, q{}) if ! $debugging_active;;
546              
547 1   50     4 $extra_pre_indent //= 0;
548              
549 1         3 $subrule = "q{$subrule}";
550              
551             return (
552 1         8 qq{ Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent, \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], $subrule)
553             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};
554             },
555             qq{ Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1, \$Regexp::Grammars::RESULT_STACK[-1], $subrule, \$^N)
556             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};
557             },
558             );
559             }
560              
561             sub _build_raw_debugging_statements {
562 1790     1790   3262 my ($debugging_active, $subpattern, $extra_pre_indent) = @_;
563              
564 1790 50       5038 return (q{}, q{}) if ! $debugging_active;
565              
566 0   0     0 $extra_pre_indent //= 0;
567              
568 0 0       0 if ($subpattern eq '|') {
569             return (
570 0         0 q{},
571             qq{
572             (?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
573             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], 'next alternative')
574             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})
575             },
576             );
577             }
578             else {
579             return (
580 0         0 qq{
581             (?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
582             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], q{subpattern /$subpattern/}, \$^N)
583             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})
584             },
585             qq{
586             (?{;Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1,
587             \$Regexp::Grammars::RESULT_STACK[-1], q{subpattern /$subpattern/}, \$^N)
588             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})
589             },
590             );
591             }
592             }
593              
594              
595             #=====[ SUPPORT FOR AUTOMATIC TIMEOUTS ]=========================
596              
597             sub _test_timeout {
598 0     0   0 our ($DEBUG, $TIMEOUT);
599              
600 0 0       0 return q{} if time() < $TIMEOUT->{'limit'};
601              
602             my $duration = "$TIMEOUT->{duration} second"
603 0 0       0 . ( $TIMEOUT->{duration} == 1 ? q{} : q{s} );
604              
605 0 0 0     0 if (defined($DEBUG) && $DEBUG ne 'off') {
606 0         0 my $leader = _debug_context(q{ });
607 0         0 say {*LOGFILE} $leader . '|';
  0         0  
608 0         0 say {*LOGFILE} $leader . "|...Invoking {duration}>";
  0         0  
609 0         0 say {*LOGFILE} $leader . "| \\_____No match after $duration";
  0         0  
610 0         0 say {*LOGFILE} $leader . '|';
  0         0  
611 0         0 say {*LOGFILE} $leader . " \\FAIL ";
  0         0  
612             }
613              
614 0 0       0 if (! @!) {
615 0         0 @! = "Internal error: Timed out after $duration (as requested)";
616             }
617 0         0 return q{(*COMMIT)(*FAIL)};
618             }
619              
620              
621             #=====[ SUPPORT FOR UPDATING THE RESULT STACK ]=========================
622              
623             # Create a clone of the current result frame with an new key/value...
624             sub _extend_current_result_frame_with_scalar {
625 2011     2011   25820 my ($stack_ref, $key, $value) = @_;
626              
627             # Autovivify null stacks (only occur when grammar invokes no subrules)...
628 2011 50       2927 if (!@{$stack_ref}) {
  2011         4200  
629 0         0 $stack_ref = [{}];
630             }
631              
632             # Copy existing frame, appending new value so it overwrites any old value...
633             my $cloned_result_frame = {
634 2011         2857 %{$stack_ref->[-1]},
  2011         6322  
635             $key => $value,
636             };
637              
638             # Make the copy into an object, if the original was one...
639 2011 50       5620 if (my $class = blessed($stack_ref->[-1])) {
640 0         0 bless $cloned_result_frame, $class;
641             }
642              
643 2011         22162 return $cloned_result_frame;
644             }
645              
646             # Create a clone of the current result frame with an additional key/value
647             # (As above, but preserving the "listiness" of the key being added to)...
648             sub _extend_current_result_frame_with_list {
649 131     131   2003 my ($stack_ref, $key, $value) = @_;
650              
651             # Copy existing frame, appending new value to appropriate element's list...
652             my $cloned_result_frame = {
653 131         383 %{$stack_ref->[-1]},
654             $key => [
655 131   100     220 @{$stack_ref->[-1]{$key}//[]},
  131         760  
656             $value,
657             ],
658             };
659              
660             # Make the copy into an object, if the original was one...
661 131 50       469 if (my $class = blessed($stack_ref->[-1])) {
662 0         0 bless $cloned_result_frame, $class;
663             }
664              
665 131         2018 return $cloned_result_frame;
666             }
667              
668             # Pop current result frame and add it to a clone of previous result frame
669             # (flattening it if possible, and preserving any blessing)...
670             sub _pop_current_result_frame {
671 819     819   17497 my ($stack_ref, $key, $original_name, $value) = @_;
672              
673             # Where are we in the stack?
674 819         1575 my $curr_frame = $stack_ref->[-1];
675 819         1257 my $caller_frame = $stack_ref->[-2];
676              
677             # Track which frames are objects...
678 819         1967 my $is_blessed_curr = blessed($curr_frame);
679 819         1474 my $is_blessed_caller = blessed($caller_frame);
680              
681             # Remove "private" captures (i.e. those starting with _)...
682 819         1280 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  819         1427  
  1582         3924  
  819         2417  
683              
684             # Remove "nocontext" marker...
685 819         1678 my $nocontext = delete $curr_frame->{'~'};
686              
687             # Build a clone of the current frame...
688             my $cloned_result_frame
689             = exists $curr_frame->{'='} ? $curr_frame->{'='}
690 544         1760 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
691 819 50 100     2770 : keys %{$curr_frame} ? $curr_frame->{q{}}
  102 100       307  
    100          
692             : $value
693             ;
694              
695             # Apply any appropriate handler...
696 819 100       1922 if ($RULE_HANDLER) {
697 88 100 66     590 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
698 22         67 my $replacement_result_frame
699             = $RULE_HANDLER->$original_name($cloned_result_frame);
700 22 50       1147 if (defined $replacement_result_frame) {
701 22         40 $cloned_result_frame = $replacement_result_frame;
702             }
703             }
704             }
705              
706             # Remove capture if not requested...
707 819 100 100     2139 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  110   100     361  
708 28         56 delete $cloned_result_frame->{q{}};
709             }
710              
711             # Nest a clone of current frame inside a clone of the caller frame...
712             my $cloned_caller_frame = {
713 819   50     1252 %{$caller_frame//{}},
  819         3195  
714             $key => $cloned_result_frame,
715             };
716              
717             # Make the copies into objects, if the originals were...
718 819 100 66     2327 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
719 212         416 bless $cloned_caller_frame->{$key}, $is_blessed_curr;
720             }
721 819 50       1740 if ($is_blessed_caller) {
722 0         0 bless $cloned_caller_frame, $is_blessed_caller;
723             }
724              
725 819         15277 return $cloned_caller_frame;
726             }
727              
728             # Pop current result frame and add it to a clone of previous result frame
729             # (flattening it if possible, and preserving any blessing)
730             # (As above, but preserving listiness of key being added to)...
731             sub _pop_current_result_frame_with_list {
732 1396     1396   38944 my ($stack_ref, $key, $original_name, $value) = @_;
733              
734             # Where are we in the stack?
735 1396         2605 my $curr_frame = $stack_ref->[-1];
736 1396         1989 my $caller_frame = $stack_ref->[-2];
737              
738             # Track which frames are objects...
739 1396         2911 my $is_blessed_curr = blessed($curr_frame);
740 1396         2252 my $is_blessed_caller = blessed($caller_frame);
741              
742             # Remove "private" captures (i.e. those starting with _)...
743 1396         1995 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  1396         2394  
  1954         5193  
  1396         3853  
744              
745             # Remove "nocontext" marker...
746 1396         2613 my $nocontext = delete $curr_frame->{'~'};
747              
748             # Clone the current frame...
749             my $cloned_result_frame
750             = exists $curr_frame->{'='} ? $curr_frame->{'='}
751 231         688 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
752 1396 50 100     3997 : keys %{$curr_frame} ? $curr_frame->{q{}}
  947 100       1993  
    100          
753             : $value
754             ;
755              
756             # Apply any appropriate handler...
757 1396 100       3314 if ($RULE_HANDLER) {
758 174 100 66     761 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
759 106         275 my $replacement_result_frame
760             = $RULE_HANDLER->$original_name($cloned_result_frame);
761 106 50       2526 if (defined $replacement_result_frame) {
762 106         192 $cloned_result_frame = $replacement_result_frame;
763             }
764             }
765             }
766              
767             # Remove capture if not requested...
768 1396 100 100     3208 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  86   100     267  
769 82         132 delete $cloned_result_frame->{q{}};
770             }
771              
772             # Append a clone of current frame inside a clone of the caller frame...
773             my $cloned_caller_frame = {
774 1396         3421 %{$caller_frame},
775             $key => [
776 1396   100     1814 @{$caller_frame->{$key}//[]},
  1396         6896  
777             $cloned_result_frame,
778             ],
779             };
780              
781             # Make the copies into objects, if the originals were...
782 1396 100 66     3859 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
783 118         223 bless $cloned_caller_frame->{$key}[-1], $is_blessed_curr;
784             }
785 1396 50       2472 if ($is_blessed_caller) {
786 0         0 bless $cloned_caller_frame, $is_blessed_caller;
787             }
788              
789 1396         28155 return $cloned_caller_frame;
790             }
791              
792              
793             #=====[ MISCELLANEOUS CONSTANTS ]=========================
794              
795             # Namespace in which grammar inheritance occurs...
796             my $CACHE = 'Regexp::Grammars::_CACHE_::';
797             my $CACHE_LEN = length $CACHE;
798             my %CACHE; #...for subrule tracking
799              
800             # This code inserted at the start of every grammar regex
801             # (initializes the result stack cleanly and backtrackably, via local)...
802             my $PROLOGUE = q{((?{; @! = () if !pos;
803             local @Regexp::Grammars::RESULT_STACK
804             = (@Regexp::Grammars::RESULT_STACK, {});
805             local $Regexp::Grammars::TIMEOUT = { limit => -1>>1 };
806             local $Regexp::Grammars::DEBUG = 'off' }) };
807              
808             # This code inserted at the end of every grammar regex
809             # (puts final result in %/. Also defines default , , etc.)...
810             my $EPILOGUE = q{)(?{; $Regexp::Grammars::RESULT_STACK[-1]{q{}} //= $^N;;
811             local $Regexp::Grammars::match_frame = pop @Regexp::Grammars::RESULT_STACK;
812             delete @{$Regexp::Grammars::match_frame}{
813             '~', grep {substr($_,0,1) eq '_'} keys %{$Regexp::Grammars::match_frame}
814             };
815             if (exists $Regexp::Grammars::match_frame->{'='}) {
816             if (ref($Regexp::Grammars::match_frame->{'='}) eq 'HASH') {
817             $Regexp::Grammars::match_frame
818             = $Regexp::Grammars::match_frame->{'='};
819             }
820             }
821             if (@Regexp::Grammars::RESULT_STACK) {
822             $Regexp::Grammars::RESULT_STACK[-1]{'(?R)'}
823             = $Regexp::Grammars::match_frame;
824             }
825             Regexp::Grammars::clear_rule_handler();
826             */ = $Regexp::Grammars::match_frame;
827             })|\Z(?{Regexp::Grammars::clear_rule_handler();})(?!)(?(DEFINE)
828             (? \\s* )
829             (?
830             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
831             \\s*
832             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
833             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
834             })
835             )
836             (? \\S+ )
837             (?
838             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
839             \\S+
840             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
841             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
842             })
843             )
844             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = pos; }) )
845             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = 1 + substr($_,0,pos) =~ tr/\n/\n/; }) )
846             )
847             };
848             my $EPILOGUE_NC = $EPILOGUE;
849             $EPILOGUE_NC =~ s{ ; .* ;;}{;}xms;
850              
851              
852             #=====[ MISCELLANEOUS PATTERNS THAT MATCH USEFUL THINGS ]========
853              
854             # Match an identifier...
855             my $IDENT = qr{ [^\W\d] \w*+ }xms;
856             my $QUALIDENT = qr{ (?: $IDENT :: )*+ $IDENT }xms;
857              
858             # Match balanced parentheses, taking into account \-escapes and []-escapes...
859             my $PARENS = qr{
860             (?&VAR_PARENS)
861             (?(DEFINE)
862             (? \( (?: \\. | (?&VAR_PARENS) | (?&CHARSET) | [^][()\\]++)*+ \) )
863             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]])*+ \] )
864              
865             )
866             }xms;
867              
868             # Match a directive within rules...
869             my $WS_PATTERN = qr{]++ | $PARENS )*+) >}xms;
870              
871              
872             #=====[ UTILITY SUBS FOR ERROR AND WARNING MESSAGES ]========
873              
874             sub _uniq {
875 53     53   217 my %seen;
876 53 50       100 return grep { defined $_ && !$seen{$_}++ } @_;
  86         1396  
877             }
878              
879             # Default translator for error messages...
880             my $ERRORMSG_TRANSLATOR = sub {
881             my ($errormsg, $rulename, $context) = @_;
882              
883             $rulename = 'valid input' if $rulename eq q{};
884             $context //= '';
885              
886             # Unimplemented subrule when rulename starts with '-'...
887             if (substr($rulename,0,1) eq '-') {
888             $rulename = substr($rulename,1);
889             return "Can't match subrule <$rulename> (not implemented)";
890             }
891              
892             # Empty message converts to a "Expected...but found..." message...
893             if ($errormsg eq q{}) {
894             $rulename =~ tr/_/ /;
895             $rulename = lc($rulename);
896             return "Expected $rulename, but found '$context' instead";
897             }
898              
899             # "Expecting..." messages get "but found" added...
900             if (lc(substr($errormsg,0,6)) eq 'expect') {
901             return "$errormsg, but found '$context' instead";
902             }
903              
904             # Everything else stays "as is"...
905             return $errormsg;
906             };
907              
908             # Allow user to set translation...
909             sub set_error_translator {
910 0         0 { package Regexp::Grammars::TranslatorRestorer;
911             sub new {
912 1     1   4 my ($class, $old_translator) = @_;
913 1         5 bless \$old_translator, $class;
914             }
915             sub DESTROY {
916 1     1   573 my ($old_translator_ref) = @_;
917 1         3 $ERRORMSG_TRANSLATOR = ${$old_translator_ref};
  1         8  
918             }
919             }
920              
921 1     1 0 110 my ($translator_ref) = @_;
  1         4  
922 1 50       9 die "Usage: set_error_translator(\$subroutine_reference)\n"
923             if ref($translator_ref) ne 'CODE';
924              
925 1         5 my $old_translator_ref = $ERRORMSG_TRANSLATOR;
926 1         2 $ERRORMSG_TRANSLATOR = $translator_ref;
927              
928             return defined wantarray
929 1 50       15 ? Regexp::Grammars::TranslatorRestorer->new($old_translator_ref)
930             : ();
931             }
932              
933             # Dispatch to current translator for error messages...
934             sub _translate_errormsg {
935 53     53   2406 goto &{$ERRORMSG_TRANSLATOR};
  53         184  
936             }
937              
938             #=====[ SUPPORT FOR TRANSLATING GRAMMAR-ENHANCED REGEX TO NATIVE REGEX ]====
939              
940             # Store any specified grammars...
941             my %user_defined_grammar;
942              
943             my %REPETITION_DESCRIPTION_FOR = (
944             '+' => 'once or more',
945             '*' => 'any number of times',
946             '?' => 'if possible',
947             '+?' => 'as few times as possible',
948             '*?' => 'as few times as possible',
949             '??' => 'if necessary',
950             '++' => 'as many times as possible',
951             '*+' => 'as many times as possible',
952             '?+' => 'if possible',
953             );
954              
955             sub _translate_raw_regex {
956 1792     1792   4416 my ($regex, $debug_build, $debug_runtime) = @_;
957              
958 1792   66     6772 my $is_comment = substr($regex, 0, 1) eq q{#}
959             || substr($regex, 0, 3) eq q{(?#};
960 1792         3436 my $visible_regex = _squeeze_ws($regex);
961              
962             # Report how regex was interpreted, if requested to...
963 1792 0 33     4237 if ($debug_build && $visible_regex ne q{} && $visible_regex ne q{ }) {
      33        
964 0 0       0 _debug_notify( info =>
965             " |",
966             " |...Treating '$visible_regex' as:",
967             ($is_comment ? " | \\ a comment (which will be ignored)"
968             : " | \\ normal Perl regex syntax"
969             ),
970             );
971             }
972              
973 1792 100       3174 return q{} if $is_comment;
974              
975             # Generate run-time debugging code (if any)...
976 1790         3341 my ($debug_pre, $debug_post)
977             = _build_raw_debugging_statements($debug_runtime,$visible_regex, +1);
978              
979             # Replace negative lookahead with one that works under R::G...
980 1790         3718 $regex =~ s{\(\?!}{(?!(?!)|}gxms;
981             # ToDo: Also replace positive lookahead with one that works under R::G...
982             # This replacement should be of the form:
983             # $regex =~ s{\(\?!}{(?!(?!)|(?!(?!)|}gxms;
984             # but need to find a way to insert the extra ) at the other end
985              
986 1790 50 33     7790 return $debug_runtime && $regex eq '|' ? $regex . $debug_post
    50 33        
987             : $debug_runtime && $regex =~ /\S/ ? "(?#)(?:$debug_pre($regex)$debug_post(?#))"
988             : $regex;
989             }
990              
991             # Report and convert a debugging directive...
992             sub _translate_debug_directive {
993 0     0   0 my ($construct, $cmd, $debug_build) = @_;
994              
995             # Report how directive was interpreted, if requested to...
996 0 0       0 if ($debug_build) {
997 0         0 _debug_notify( info =>
998             " |",
999             " |...Treating $construct as:",
1000             " | \\ Change run-time debugging mode to '$cmd'",
1001             );
1002             }
1003              
1004 0         0 return qq{(?{; local \$Regexp::Grammars::DEBUG = q{$cmd}; }) };
1005             }
1006              
1007             # Report and convert a timeout directive...
1008             sub _translate_timeout_directive {
1009 0     0   0 my ($construct, $timeout, $debug_build) = @_;
1010              
1011             # Report how directive was interpreted, if requested to...
1012 0 0       0 if ($debug_build) {
1013 0 0       0 _debug_notify( info =>
    0          
1014             " |",
1015             " |...Treating $construct as:",
1016             ($timeout > 0
1017             ? " | \\ Cause the entire parse to fail after $timeout second" . ($timeout==1 ? q{} : q{s})
1018             : " | \\ Cause the entire parse to fail immediately"
1019             ),
1020             );
1021             }
1022              
1023 0 0       0 return $timeout > 0
1024             ? qq{(?{; local \$Regexp::Grammars::TIMEOUT = { duration => $timeout, limit => time() + $timeout }; }) }
1025             : qq{(*COMMIT)(*FAIL)};
1026             }
1027              
1028             # Report and convert a directive...
1029             sub _translate_require_directive {
1030 0     0   0 my ($construct, $condition, $debug_build) = @_;
1031              
1032 0         0 $condition = substr($condition, 3, -2);
1033              
1034             # Report how directive was interpreted, if requested to...
1035 0 0       0 if ($debug_build) {
1036 0         0 _debug_notify( info =>
1037             " |",
1038             " |...Treating $construct as:",
1039             " | \\ Require that {$condition} is true",
1040             );
1041             }
1042              
1043 0         0 my $quoted_condition = $condition;
1044 0         0 $quoted_condition =~ s{\$}{}xms;
1045              
1046 0         0 return qq{(?(?{;$condition})
1047             (?{;Regexp::Grammars::_debug_require(
1048             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 1)
1049             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})
1050             | (?{;Regexp::Grammars::_debug_require(
1051             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 0)
1052             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})(?!))
1053             };
1054             }
1055              
1056              
1057             # Report and convert a directive...
1058             sub _translate_minimize_directive {
1059 3     3   11 my ($construct, $debug_build) = @_;
1060              
1061             # Report how directive was interpreted, if requested to...
1062 3 50       9 if ($debug_build) {
1063 0         0 _debug_notify( info =>
1064             " |",
1065             " |...Treating $construct as:",
1066             " | \\ Minimize result value if possible",
1067             );
1068             }
1069              
1070 3         9 return q{(?{;
1071             if (1 == grep { $_ ne '!' && $_ ne '@' && $_ ne '~' } keys %MATCH) { # ...single alnum key
1072             local %Regexp::Grammars::matches = %MATCH;
1073             delete @Regexp::Grammars::matches{'!', '@', '~'};
1074             local ($Regexp::Grammars::only_key) = keys %Regexp::Grammars::matches;
1075             local $Regexp::Grammars::array_ref = $MATCH{$Regexp::Grammars::only_key};
1076             if (ref($Regexp::Grammars::array_ref) eq 'ARRAY' && 1 == @{$Regexp::Grammars::array_ref}) {
1077             $MATCH = $Regexp::Grammars::array_ref->[0];
1078             }
1079             }
1080             })};
1081             }
1082              
1083             # Report and convert a debugging directive...
1084             sub _translate_error_directive {
1085 19     19   82 my ($construct, $type, $msg, $debug_build, $subrule_name) = @_;
1086 19   50     55 $subrule_name //= 'undef';
1087              
1088             # Determine severity...
1089 19 100       50 my $severity = ($type eq 'error') ? 'fail' : 'non-fail';
1090              
1091             # Determine fatality (and build code to invoke it)...
1092 19 100       37 my $fatality = ($type eq 'fatal') ? '(*COMMIT)(*FAIL)' : q{};
1093              
1094             # Unpack message...
1095 19 100       47 if (substr($msg,0,3) eq '(?{') {
1096 4         12 $msg = 'do'. substr($msg,2,-1);
1097             }
1098             else {
1099 15         29 $msg = quotemeta $msg;
1100 15         43 $msg = qq{qq{$msg}};
1101             }
1102              
1103             # Report how directive was interpreted, if requested to...
1104 19 50       42 if ($debug_build) {
1105 0 0       0 _debug_notify( info => " |",
1106             " |...Treating $construct as:",
1107             ( $type eq 'log' ? " | \\ Log a message to the logfile"
1108             : " | \\ Append a $severity error message to \@!"
1109             ),
1110             );
1111             }
1112              
1113             # Generate the regex...
1114 19 100       121 return $type eq 'log'
    50          
1115             ? qq{(?{Regexp::Grammars::_debug_logmsg(scalar \@Regexp::Grammars::RESULT_STACK,$msg)
1116             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}
1117             })}
1118              
1119             : qq{(?:(?{;local \$Regexp::Grammar::_memopos=pos();})
1120             (?>\\s*+((?-s).{0,$MAX_CONTEXT_WIDTH}+))
1121             (?{; pos() = \$Regexp::Grammar::_memopos;
1122             @! = Regexp::Grammars::_uniq(
1123             @!,
1124             Regexp::Grammars::_translate_errormsg($msg,q{$subrule_name},\$CONTEXT)
1125             ) }) (?!)|}
1126             . ($severity eq 'fail' ? q{(?!)} : $fatality)
1127             . q{)}
1128             ;
1129             }
1130              
1131             sub _translate_subpattern {
1132 108     108   507 my ($construct, $alias, $subpattern, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout, $backref)
1133             = @_;
1134              
1135             # Determine save behaviour...
1136 108         237 my $is_noncapturing = $savemode eq 'noncapturing';
1137 108         181 my $is_listifying = $savemode eq 'list';
1138 108         197 my $is_codeblock = substr($subpattern,0,3) eq '(?{';
1139 108 100       230 my $value_saved = $is_codeblock ? '$^R' : '$^N';
1140 108 100       199 my $do_something_with = $is_codeblock ? 'execute the code block' : 'match the pattern';
1141 108 100       213 my $result = $is_codeblock ? 'result' : 'matched substring';
1142 108 100       298 my $description = $is_codeblock ? substr($subpattern,2,-1)
    100          
1143             : defined $backref ? $backref
1144             : $subpattern;
1145 108 100       254 my $debug_construct
1146             = $is_codeblock ? '<' . substr($alias,1,-1) . '= (?{;' . substr($subpattern,3,-2) . '})>'
1147             : $construct
1148             ;
1149              
1150             # Report how construct was interpreted, if requested to...
1151 108   50     398 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1152 108 50 66     613 my $results = $is_listifying && $postmodifier ? "each $result"
    50 33        
    50          
1153             : substr($postmodifier,0,1) eq '?' ? "any $result"
1154             : $postmodifier && !$is_noncapturing ? "only the final $result"
1155             : "the $result"
1156             ;
1157 108 50       268 if ($debug_build) {
1158 0 0       0 _debug_notify( info =>
    0          
1159             " |",
1160             " |...Treating $construct as:",
1161             " | | $do_something_with $description $repeatedly",
1162             ( $is_noncapturing ? " | \\ but don't save $results"
1163             : $is_listifying ? " | \\ appending $results to \@{\$MATCH{$alias}}"
1164             : " | \\ saving $results in \$MATCH{$alias}"
1165             )
1166             );
1167             }
1168              
1169             # Generate run-time debugging code (if any)...
1170 108         245 my ($debug_pre, $debug_post)
1171             = _build_debugging_statements($debug_runtime,$debug_construct, +1);
1172              
1173             # Generate post-match result-capturing code, if match captures...
1174 108 100       365 my $post_action = $is_noncapturing
1175             ? q{}
1176             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1177             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1178             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1179             \\\@Regexp::Grammars::RESULT_STACK, $alias, $value_saved
1180             ),
1181             );}
1182             ;
1183              
1184             # Generate timeout test...
1185 108 50       224 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1186              
1187             # Translate to standard regex code...
1188 108         538 return qq{$timeout_test(?{;local \@Regexp::Grammars::RESULT_STACK
1189             = \@Regexp::Grammars::RESULT_STACK;$debug_pre})
1190             (?:($subpattern)(?{;$post_action$debug_post}))$postmodifier};
1191             }
1192              
1193              
1194             sub _translate_hashmatch {
1195 14     14   95 my ($construct, $alias, $hashname, $keypat, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout)
1196             = @_;
1197              
1198             # Empty or missing keypattern defaults to <.hk>...
1199 14 100 66     73 if (!defined $keypat || $keypat !~ /\S/) {
1200 8         16 $keypat = '(?&hk__implicit__)'
1201             }
1202             else {
1203 6         17 $keypat = substr($keypat, 1, -1);
1204             }
1205              
1206             # Determine save behaviour...
1207 14         28 my $is_noncapturing = $savemode eq 'noncapturing';
1208 14         21 my $is_listifying = $savemode eq 'list';
1209              
1210             # Convert hash to hash lookup...
1211 14         32 my $hash_lookup = '$' . substr($hashname, 1). '{$^N}';
1212              
1213             # Report how construct was interpreted, if requested to...
1214 14   100     46 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1215 14 50 66     90 my $results = $is_listifying && $postmodifier ? 'each matched key'
    50 66        
    100          
1216             : substr($postmodifier,0,1) eq '?' ? 'any matched key'
1217             : $postmodifier && !$is_noncapturing ? 'only the final matched key'
1218             : 'the matched key'
1219             ;
1220 14 50       36 if ($debug_build) {
1221 0 0       0 _debug_notify( info =>
    0          
1222             " |",
1223             " |...Treating $construct as:",
1224             " | | match a key from the hash $hashname $repeatedly",
1225             ( $is_noncapturing ? " | \\ but don't save $results"
1226             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1227             : " | \\ saving $results in \$MATCH{$alias}"
1228             )
1229             );
1230             }
1231              
1232             # Generate run-time debugging code (if any)...
1233 14         30 my ($debug_pre, $debug_post)
1234             = _build_debugging_statements($debug_runtime,$construct, +1);
1235              
1236             # Generate post-match result-capturing code, if match captures...
1237 14 100       40 my $post_action = $is_noncapturing
1238             ? q{}
1239             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1240             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1241             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1242             \\\@Regexp::Grammars::RESULT_STACK, $alias, \$^N
1243             ),
1244             );}
1245             ;
1246              
1247             # Generate timeout test...
1248 14 50       32 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1249              
1250             # Translate to standard regex code...
1251 14         75 return qq{$timeout_test(?:(?{;local \@Regexp::Grammars::RESULT_STACK
1252             = \@Regexp::Grammars::RESULT_STACK;$debug_pre})
1253             (?:($keypat)(??{exists $hash_lookup ? q{} : q{(?!)}})(?{;$post_action$debug_post})))$postmodifier};
1254             }
1255              
1256              
1257             # Convert a " % " construct to pure Perl 5.10...
1258             sub _translate_separated_list {
1259 73     73   305 my ($term, $op, $separator, $term_trans, $sep_trans,
1260             $ws, $debug_build, $debug_runtime, $timeout) = @_;
1261              
1262             # This insertion ensures backtracking upwinds the stack correctly...
1263 73         151 state $CHECKPOINT = q{(?{;@Regexp::Grammars::RESULT_STACK = @Regexp::Grammars::RESULT_STACK;})};
1264              
1265             # Translate meaningful whitespace...
1266 73 100       223 $ws = length($ws) ? q{(?&ws__implicit__)} : q{};
1267              
1268             # Generate support for optional trailing separator...
1269 73 100       260 my $opt_trailing = substr($op,-2) eq '%%' ? qq{$ws$sep_trans?}
1270             : q{};
1271              
1272             # Generate timeout test...
1273 73 50       191 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1274              
1275             # Report how construct was interpreted, if requested to...
1276 73 50       203 if ($debug_build) {
1277 0 0       0 _debug_notify( info =>
1278             " |",
1279             " |...Treating $term $op $separator as:",
1280             " | | repeatedly match the subrule $term",
1281             " | \\ as long as the matches are separated by matches of $separator",
1282             (substr($op,-2) eq '%%' ?
1283             " | \\ and allowing an optional trailing $separator"
1284             : q{}
1285             )
1286             );
1287             }
1288              
1289             # One-or-more...
1290 73 100       704 return qq{$timeout_test(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+$opt_trailing}
1291             if $op =~ m{ [*][*]() | [+]([+?]?) \s* %%?+ | \{ 1, \}([+?]?) \s* %%?+ }xms;
1292              
1293             # Zero-or-more...
1294             return
1295 22 100       120 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+)?$+$opt_trailing}
1296             if $op =~ m{ [*]([+?]?) \s* %%? | \{ 0, \}([+?]?) \s* %%? }xms;
1297              
1298             # One-or-zero...
1299 18 100       88 return qq{?$+$opt_trailing}
1300             if $op =~ m{ [?]([+?]?) \s* %%? | \{ 0,1 \}([+?]?) \s* %%? }xms;
1301              
1302             # Zero exactly...
1303 14 100       54 return qq{{0}$ws$opt_trailing}
1304             if $op =~ m{ \{ 0 \}[+?]? \s* %%? }xms;
1305              
1306             # N exactly...
1307 12 100       67 if ($op =~ m{ \{ (\d+) \}([+?]?) \s* %%? }xms ) {
1308 2         8 my $min = $1-1;
1309             return
1310 2         17 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min}$+$opt_trailing)}
1311             }
1312              
1313             # Zero-to-N...
1314 10 100       40 if ($op =~ m{ \{ 0,(\d+) \}([+?]?) \s* %%? }xms ) {
1315 2         8 my $max = $1-1;
1316             return
1317 2         17 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){0,$max}$+)?$+$opt_trailing}
1318             }
1319              
1320             # M-to-N and M-to-whatever...
1321 8 50       42 if ($op =~ m{ \{ (\d+),(\d*) \} ([+?]?) \s* %%? }xms ) {
1322 8         33 my $min = $1-1;
1323 8 100       38 my $max = $2 ? $2-1 : q{};
1324             return
1325 8         71 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min,$max}$+$opt_trailing)}
1326             }
1327              
1328             # Somehow we missed a case (this should never happen)...
1329 0         0 die "Internal error: missing case in separated list handler";
1330             }
1331              
1332             sub _translate_subrule_call {
1333 292     292   2299 my ($source_line, $source_file, $rulename, $grammar_name, $construct, $alias,
1334             $subrule, $args, $savemode, $postmodifier,
1335             $debug_build, $debug_runtime, $timeout, $valid_subrule_names_ref) = @_;
1336              
1337             # Translate arg list, if provided...
1338 292         628 my $arg_desc;
1339 292 100       832 if ($args eq q{}) {
    100          
1340 282         517 $args = q{()};
1341             }
1342             elsif (substr($args,0,3) eq '(?{') {
1343             # Turn parencode into do block...
1344 1         2 $arg_desc = substr($args,3,-2);
1345 1         3 substr($args,1,1) = 'do';
1346             }
1347             else {
1348             # Turn abbreviated format into a key=>value list...
1349 9         45 $args =~ s{ [(,] \s* \K : (\w+) (?= \s* [,)] ) }{$1 => \$MATCH{'$1'}}gxms;
1350 9         23 $arg_desc = substr($args,1,-1);
1351             }
1352              
1353             # Transform qualified subrule names...
1354 292         502 my $simple_subrule = $subrule;
1355 292 100       913 my $start_grammar = (($simple_subrule =~ s{(.*)::}{}xms) ? $1 : "");
1356 292 100       904 if ($start_grammar !~ /^NEXT$|::/) {
1357 290         1047 $start_grammar = caller(3).'::'.$start_grammar;
1358             }
1359              
1360 292 100       2670 my @candidates = $start_grammar eq 'NEXT' ? _ancestry_of($grammar_name)
1361             : _ancestry_of($start_grammar);
1362              
1363             # Rename fully-qualified rule call, if to ancestor grammar...
1364             RESOLVING:
1365 292         703 for my $parent_class (@candidates) {
1366 295         644 my $inherited_subrule = $parent_class.'::'.$simple_subrule;
1367 295 100       969 if ($CACHE{$inherited_subrule}) {
1368 3         6 $subrule = $inherited_subrule;
1369 3         9 last RESOLVING;
1370             }
1371             }
1372              
1373             # Replace package separators, which regex engine can't handle...
1374 292         552 my $internal_subrule = $subrule;
1375 292         585 $internal_subrule =~ s{::}{_88_}gxms;
1376              
1377             # Shortcircuit if unknown subrule invoked...
1378 292 50       757 if (!$valid_subrule_names_ref->{$subrule}) {
1379 0         0 _debug_notify( error =>
1380             qq{Found call to $construct inside definition of $rulename},
1381             qq{near $source_file line $source_line.},
1382             qq{But no or was defined in the grammar},
1383             qq{(Did you misspell $construct? Or forget to define the rule?)},
1384             q{},
1385             );
1386 0         0 return "(?{Regexp::Grammars::_debug_fatal('$construct')})(*COMMIT)(*FAIL)";
1387             }
1388              
1389             # Determine save behaviour...
1390 292         559 my $is_noncapturing = $savemode =~ /noncapturing|lookahead/;
1391 292         504 my $is_listifying = $savemode eq 'list';
1392              
1393 292 100       1090 my $save_code =
    100          
1394             $is_noncapturing?
1395             q{ @Regexp::Grammars::RESULT_STACK[0..@Regexp::Grammars::RESULT_STACK-2] }
1396             : $is_listifying?
1397             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1398             Regexp::Grammars::_pop_current_result_frame_with_list(
1399             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1400             ),
1401             }
1402             :
1403             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1404             Regexp::Grammars::_pop_current_result_frame(
1405             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1406             ),
1407             }
1408             ;
1409              
1410             # Report how construct was interpreted, if requested to...
1411 292   100     1117 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1412 292 100 100     1218 my $results = $is_listifying && $postmodifier ? 'each match'
    100          
1413             : substr($postmodifier,0,1) eq '?' ? 'any match'
1414             : 'the match'
1415             ;
1416 292 100       789 my $do_something_with = $savemode eq 'neglookahead' ? 'lookahead for anything except'
    100          
1417             : $savemode eq 'poslookahead' ? 'lookahead for'
1418             : 'match'
1419             ;
1420 292 50       609 if ($debug_build) {
1421 0 0       0 _debug_notify( info =>
    0          
    0          
1422             " |",
1423             " |...Treating $construct as:",
1424             " | | $do_something_with the subrule <$subrule> $repeatedly",
1425             (defined $arg_desc ? " | | passing the args: ($arg_desc)"
1426             : ()
1427             ),
1428             ( $is_noncapturing ? " | \\ but don't save anything"
1429             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1430             : " | \\ saving $results in \$MATCH{$alias}"
1431             ),
1432             );
1433             }
1434              
1435             # Generate post-match result-capturing code, if match captures...
1436 292         672 my ($debug_pre, $debug_post)
1437             = _build_debugging_statements($debug_runtime, $construct);
1438              
1439             # Generate timeout test...
1440 292 50       739 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1441              
1442             # Translate to standard regex code...
1443 292         2122 return qq{(?:$timeout_test(?{;
1444             local \@Regexp::Grammars::RESULT_STACK = (\@Regexp::Grammars::RESULT_STACK, {'\@'=>{$args}});
1445             $debug_pre})((?&$internal_subrule))(?{;
1446             local \@Regexp::Grammars::RESULT_STACK = (
1447             $save_code
1448             );$debug_post
1449             }))$postmodifier};
1450             }
1451              
1452             sub _translate_rule_def {
1453 256     256   957 my ($type, $qualifier, $name, $callname, $qualname, $body, $objectify, $local_ws, $nocontext)
1454             = @_;
1455 256         951 $qualname =~ s{::}{_88_}gxms;
1456              
1457             # Return object if requested...
1458 256 100       743 my $objectification =
1459             $objectify ? qq{(??{; local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
1460             \$Regexp::Grammars::RESULT_STACK[-1] = '$qualifier$name'->can('new')
1461             ? '$qualifier$name'->new(\$Regexp::Grammars::RESULT_STACK[-1])
1462             : bless \$Regexp::Grammars::RESULT_STACK[-1], '$qualifier$name';
1463             Regexp::Grammars::_debug_non_hash(\$Regexp::Grammars::RESULT_STACK[-1],'$name');
1464             })}
1465             : q{};
1466              
1467             # Each rule or token becomes a DEFINE'd Perl 5.10 named capture...
1468 256 100 100     1124 my $implicit_version
1469             = ($callname eq 'ws' || $callname eq 'hk')
1470             ? qq{(?<${callname}__implicit__> $body) }
1471             : qq{};
1472 256         1858 return qq{
1473             (?(DEFINE) $local_ws
1474             (?<$qualname>
1475             (?<$callname>
1476             (?{\@{\$Regexp::Grammars::RESULT_STACK[-1]}{'!','~'}=(\$#{!}, $nocontext);})
1477             (?:$body) $objectification
1478             (?{;\$#{!}=delete(\$Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
1479             delete(\$Regexp::Grammars::RESULT_STACK[-1]{'\@'});
1480             })
1481             ))
1482             $implicit_version
1483             )
1484             };
1485             }
1486              
1487              
1488             # Locate any valid <...> sequences and replace with native regex code...
1489             sub _translate_subrule_calls {
1490 384     384   1299 my ($source_file, $source_line,
1491             $grammar_name,
1492             $grammar_spec,
1493             $compiletime_debugging_requested,
1494             $runtime_debugging_requested,
1495             $timeout_requested,
1496             $pre_match_debug,
1497             $post_match_debug,
1498             $rule_name,
1499             $subrule_names_ref,
1500             $magic_ws,
1501             ) = @_;
1502              
1503 384 100       1201 my $pretty_rule_name = $rule_name ? ($magic_ws ? '"
    100          
1504             : 'main regex (before first rule)';
1505              
1506             # Remember the preceding construct, so as to implement the +% etc. operators...
1507 384         593 my $prev_construct = q{};
1508 384         537 my $prev_translation = q{};
1509 384         582 my $curr_line_num = 1;
1510              
1511             # Translate all other calls (MAIN GRAMMAR FOR MODULE)...
1512 384         88216 $grammar_spec =~ s{
1513 2312         59294 (?{ $curr_line_num = substr($_, 0, pos) =~ tr/\n//; })
1514             (? (? \s*+) (? (?&SEPLIST_OP) ) (? \s*+) )?
1515             (?
1516             (?
1517             <
1518             (?:
1519             (?
1520             \. \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1521             )
1522             | (?
1523             (? \? | \! ) \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1524             )
1525             | (?
1526             \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1527              
1528             )
1529             | (?
1530             \[ \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1531             )
1532             | (?
1533             (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1534              
1535             )
1536             | (?
1537             \[ (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1538             )
1539              
1540             | (?
1541             \s* : (?(?&QUALIDENT)) \s*
1542             )
1543             | (?
1544             (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s*
1545             )
1546             | (?
1547             \[ (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s* \]
1548             )
1549              
1550             | (?
1551             \. (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1552             )
1553             | (?
1554             (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1555             )
1556             | (?
1557             \[ (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s* \]
1558             )
1559             | (?
1560             (?(?&HASH)) \s* (?(?&BRACES))? \s*
1561             )
1562             | (?
1563             (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s*
1564             )
1565             | (?
1566             \[ (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s* \]
1567             )
1568             | (?
1569             \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1570             | \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1571             )
1572             | (?
1573             (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1574             | (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1575             )
1576             | (?
1577             \[ (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s* \]
1578             | \[ (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s* \]
1579             )
1580             |
1581             (?
1582             minimize \s* : \s*
1583             )
1584             |
1585             (?
1586             require \s* : \s* (? (?&PARENCODE) ) \s*
1587             )
1588             |
1589             (?
1590             debug \s* : \s* (? run | match | step | try | off | on) \s*
1591             )
1592             |
1593             (?
1594             timeout \s* : \s* (? \d+) \s*
1595             )
1596             |
1597             (?
1598             context \s* : \s*
1599             )
1600             |
1601             (?
1602             nocontext \s* : \s*
1603             )
1604             |
1605             (?
1606             [.][.][.]
1607             | [!][!][!]
1608             | [?][?][?]
1609             )
1610             |
1611             (?
1612             (? error | fatal ) \s*+ : \s*+
1613             )
1614             |
1615             (?
1616             (? log | error | warning | fatal )
1617             \s*+ : \s*+
1618             (? (?&PARENCODE) | .+? )
1619             \s*+
1620             )
1621             )
1622             > (? \s* (?! (?&SEPLIST_OP) ) [?+*][?+]? | )
1623             |
1624             (?
1625             $WS_PATTERN
1626             )
1627             |
1628             (?
1629             (?&SEPLIST_OP) \s* (? \S* )
1630             )
1631             |
1632             (?
1633             \(\?\<\w+\>
1634             )
1635             |
1636             (?
1637             < [^>\n]* [>\n]
1638             )
1639             |
1640             (?
1641             (?
1642             | (?
1643             )
1644             |
1645             (?
1646             (?: \\[^shv]
1647             | (?! (?&PARENCODE) ) (?&PARENS)
1648             | (?&CHARSET)
1649             | \w++
1650             | \|
1651             )
1652             (?&QUANTIFIER)?
1653             )
1654             |
1655             (?
1656             \s++
1657             | \\. (?&QUANTIFIER)?
1658             | \(\?!
1659             | \(\?\# [^)]* \) # (?# -> old style inline comment)
1660             | (?&PARENCODE)
1661             | \# [^\n]*+
1662             | [^][\s()<>#\\]++
1663             )
1664             )
1665              
1666             (?(DEFINE)
1667             (? \*\* | [*+?] [+?]?+ \s* %%?+ | \{ \d+(,\d*)? \} [+?]?+ \s* %%?+ )
1668             (? \( (?:[?] (?: <[=!] | [:>] ))?
1669             (?: \\. | (?&PARENCODE) | (?&PARENS) | (?&CHARSET) | [^][()\\<>]++ )*+
1670             \)
1671             )
1672             (? \{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \} )
1673             (? \(\?\??\{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \}\) )
1674             (? \% (?&IDENT) (?: :: (?&IDENT) )* )
1675             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]] )*+ \] )
1676             (? [^\W\d]\w*+ )
1677             (? (?: [^\W\d]\w*+ :: )* [^\W\d]\w*+ )
1678             (? (?&NUMBER) | (?&STRING) | (?&VAR) )
1679             (? [+-]? \d++ (?:\. \d++)? (?:[eE] [+-]? \d++)? )
1680             (? ' [^\\']++ (?: \\. [^\\']++ )* ' )
1681             (? (?&PARENCODE) | \( \s* (?&ARGS)? \s* \) | (?# NOTHING ) )
1682             (? (?&ARG) \s* (?: , \s* (?&ARG) \s* )* ,? )
1683             (? (?&VAR) | (?&KEY) \s* => \s* (?&LITERAL) )
1684             (? : (?&IDENT) )
1685             (? (?&IDENT) | (?&LITERAL) )
1686             (? [*+?][+?]? | \{ \d+,?\d* \} [+?]? )
1687             )
1688             }{
1689 81     81   715988 my $curr_construct = $+{construct};
  81         32094  
  81         219573  
  2232         12328  
1690 2232   100     11947 my $list_marker = $+{list_marker} // q{};
1691 2232 100 100     11920 my $alias = ($+{alias}//'MATCH') eq 'MATCH' ? q{'='} : qq{'$+{alias}'};
1692              
1693             # Determine and remember the necessary translation...
1694 2232         4150 my $curr_translation = do{
1695              
1696             # Translate subrule calls of the form: ...
1697 2232 100 100     62598 if (defined $+{alias_parens_scalar}) {
    50 66        
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    0          
    0          
1698 82 100       539 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1699             _translate_subpattern(
1700             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1701 82         357 $compiletime_debugging_requested,
1702             $runtime_debugging_requested, $timeout_requested,
1703             );
1704             }
1705             elsif (defined $+{alias_parens_scalar_nocap}) {
1706 0 0       0 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1707             _translate_subpattern(
1708             $curr_construct, $alias, $pattern, 'noncapturing', $+{modifier},
1709 0         0 $compiletime_debugging_requested,
1710             $runtime_debugging_requested, $timeout_requested,
1711             );
1712             }
1713             elsif (defined $+{alias_parens_list}) {
1714 16 50       104 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1715             _translate_subpattern(
1716             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1717 16         67 $compiletime_debugging_requested,
1718             $runtime_debugging_requested, $timeout_requested,
1719             );
1720             }
1721              
1722             # Translate subrule calls of the form: ...
1723             elsif (defined $+{alias_hash_scalar}) {
1724             _translate_hashmatch(
1725             $curr_construct, $alias, $+{varname}, $+{keypat}, 'scalar', $+{modifier},
1726 7         34 $compiletime_debugging_requested,
1727             $runtime_debugging_requested,
1728             $timeout_requested,
1729             );
1730             }
1731             elsif (defined $+{alias_hash_scalar_nocap}) {
1732             _translate_hashmatch(
1733             $curr_construct, $alias, $+{varname}, $+{keypat}, 'noncapturing', $+{modifier},
1734 4         23 $compiletime_debugging_requested,
1735             $runtime_debugging_requested,
1736             $timeout_requested,
1737             );
1738             }
1739             elsif (defined $+{alias_hash_list}) {
1740             _translate_hashmatch(
1741             $curr_construct, $alias, $+{varname}, $+{keypat}, 'list', $+{modifier},
1742 3         15 $compiletime_debugging_requested,
1743             $runtime_debugging_requested,
1744             $timeout_requested,
1745             );
1746             }
1747              
1748             # Translate subrule calls of the form: ...
1749             elsif (defined $+{alias_subrule_scalar}) {
1750             _translate_subrule_call(
1751             $source_line, $source_file,
1752             $pretty_rule_name,
1753             $grammar_name,
1754             $curr_construct, $alias, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1755 22         110 $compiletime_debugging_requested,
1756             $runtime_debugging_requested,
1757             $timeout_requested,
1758             $subrule_names_ref,
1759             );
1760             }
1761             elsif (defined $+{alias_subrule_list}) {
1762             _translate_subrule_call(
1763             $source_line, $source_file,
1764             $pretty_rule_name,
1765             $grammar_name,
1766             $curr_construct, $alias, $+{subrule}, $+{args}, 'list', $+{modifier},
1767 26         117 $compiletime_debugging_requested,
1768             $runtime_debugging_requested,
1769             $timeout_requested,
1770             $subrule_names_ref,
1771             );
1772             }
1773              
1774             # Translate subrule calls of the form: and ...
1775             elsif (defined $+{self_subrule_lookahead}) {
1776              
1777             # Determine type of lookahead, and work around capture problem...
1778 2         6 my ($type, $pre, $post) = ( 'neglookahead', '(?!(?!)|', ')' );
1779 2 100       11 if ($+{sign} eq '?') {
1780 1         2 $type = 'poslookahead';
1781 1         2 $pre x= 2;
1782 1         3 $post x= 2;
1783             }
1784              
1785             $pre . _translate_subrule_call(
1786             $source_line, $source_file,
1787             $pretty_rule_name,
1788             $grammar_name,
1789 2         14 $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, $type, q{},
1790             $compiletime_debugging_requested,
1791             $runtime_debugging_requested,
1792             $timeout_requested,
1793             $subrule_names_ref,
1794             )
1795             . $post;
1796             }
1797             elsif (defined $+{self_subrule_scalar_nocap}) {
1798             _translate_subrule_call(
1799             $source_line, $source_file,
1800             $pretty_rule_name,
1801             $grammar_name,
1802             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'noncapturing', $+{modifier},
1803 6         55 $compiletime_debugging_requested,
1804             $runtime_debugging_requested,
1805             $timeout_requested,
1806             $subrule_names_ref,
1807             );
1808             }
1809             elsif (defined $+{self_subrule_scalar}) {
1810             _translate_subrule_call(
1811             $source_line, $source_file,
1812             $pretty_rule_name,
1813             $grammar_name,
1814             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1815 174         1376 $compiletime_debugging_requested,
1816             $runtime_debugging_requested,
1817             $timeout_requested,
1818             $subrule_names_ref,
1819             );
1820             }
1821             elsif (defined $+{self_subrule_list}) {
1822             _translate_subrule_call(
1823             $source_line, $source_file,
1824             $pretty_rule_name,
1825             $grammar_name,
1826             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'list', $+{modifier},
1827 62         479 $compiletime_debugging_requested,
1828             $runtime_debugging_requested,
1829             $timeout_requested,
1830             $subrule_names_ref,
1831             );
1832             }
1833              
1834             # Translate subrule calls of the form: ...
1835             elsif (defined $+{alias_argrule_scalar}) {
1836 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1837             _translate_subpattern(
1838             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1839 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1840             "in \$ARG{'$+{subrule}'}"
1841             );
1842             }
1843             elsif (defined $+{alias_argrule_list}) {
1844 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1845             _translate_subpattern(
1846             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1847 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1848             "in \$ARG{'$+{subrule}'}"
1849             );
1850             }
1851              
1852             # Translate subrule calls of the form: <:ARGNAME>...
1853             elsif (defined $+{self_argrule_scalar}) {
1854 1         5 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1855             _translate_subpattern(
1856             $curr_construct, qq{'$+{subrule}'}, $pattern, 'noncapturing', $+{modifier},
1857 1         8 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1858             "in \$ARG{'$+{subrule}'}"
1859             );
1860             }
1861              
1862             # Translate subrule calls of the form: <\IDENT> or ...
1863             elsif (defined $+{backref} || $+{alias_backref} || $+{alias_backref_list}) {
1864             # Use "%ARGS" if subrule names starts with a colon...
1865 9         34 my $subrule = $+{subrule};
1866 9 100       31 if (substr($subrule,0,1) eq ':') {
1867 3         8 substr($subrule,0,1,"\@'}{'");
1868             }
1869              
1870 9         22 my $backref = qq{\$Regexp::Grammars::RESULT_STACK[-1]{'$subrule'}};
1871 9 100 100     68 my $quoter = $+{slash} eq '\\' || $+{slash} eq '\\_'
1872             ? "quotemeta($backref)"
1873             : "Regexp::Grammars::_invert_delim($backref)"
1874             ;
1875 9         31 my $pattern = qq{ (??{ defined $backref ? $quoter : q{(?!)}})};
1876             my $type = $+{backref} ? 'noncapturing'
1877 9 100       59 : $+{alias_backref} ? 'scalar'
    100          
1878             : 'list'
1879             ;
1880             _translate_subpattern(
1881             $curr_construct, $alias, $pattern, $type, $+{modifier},
1882 9         45 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1883             "in \$MATCH{'$subrule'}"
1884             );
1885             }
1886              
1887             # Translate reportable raw regexes (add debugging support)...
1888             elsif (defined $+{reportable_raw_regex}) {
1889             _translate_raw_regex(
1890 474         1483 $+{reportable_raw_regex}, $compiletime_debugging_requested, $runtime_debugging_requested
1891             );
1892             }
1893              
1894             # Translate too-complex repetition specifications...
1895             elsif (defined $+{complex_repetition}) {
1896 0         0 my ($repetition, $separator) = @+{'complex_repetition', 'complex_separator'};
1897 0         0 my ($metaop) = $repetition =~ m{(%%?)};
1898 0         0 my $quotedop = quotemeta($metaop);
1899 0         0 $separator =~ s/\s+/ /g;
1900 0 0       0 my $problem = $separator =~ /\S/
1901             ? ["The $separator... separator you specified after the $metaop is too complex",
1902             "(Try refactoring it into a single subrule call)",
1903             ]
1904             : ["No separator was specified after the $metaop",
1905             "(Or did you need a $quotedop instead, to match a literal '$metaop'?)",
1906             ];
1907             _debug_notify( fatal =>
1908             "Invalid separation specifier: $metaop",
1909             "at line $curr_line_num of $pretty_rule_name",
1910 0         0 @{$problem},
  0         0  
1911             );
1912 0         0 exit(1);
1913             }
1914              
1915             # Translate non-reportable raw regexes (leave as is)...
1916             elsif (defined $+{raw_regex}) {
1917             # Handle raw % and %%
1918 1318         4766 my $raw_regex = $+{raw_regex};
1919 1318 50       4275 if ($raw_regex =~ / \A %%?+ /x) {
1920 0         0 _debug_notify( fatal =>
1921             "Invalid separation specifier: $&",
1922             "at line $curr_line_num of $pretty_rule_name",
1923             "(Did you forget to put a repetition quantifier before the $&",
1924             " or did you need a " . quotemeta($&) . " instead, to match a literal '$&'?)",
1925             );
1926 0         0 exit(1);
1927             }
1928              
1929             # Handle any other raw regex...
1930             _translate_raw_regex(
1931 1318         2905 $raw_regex, $compiletime_debugging_requested
1932             );
1933             }
1934              
1935             # Translate directives...
1936             elsif (defined $+{require_directive}) {
1937             _translate_require_directive(
1938 0         0 $curr_construct, $+{condition}, $compiletime_debugging_requested
1939             );
1940             }
1941             elsif (defined $+{minimize_directive}) {
1942             _translate_minimize_directive(
1943 3         11 $curr_construct, $+{condition}, $compiletime_debugging_requested
1944             );
1945             }
1946             elsif (defined $+{debug_directive}) {
1947             _translate_debug_directive(
1948 0         0 $curr_construct, $+{cmd}, $compiletime_debugging_requested
1949             );
1950             }
1951             elsif (defined $+{timeout_directive}) {
1952             _translate_timeout_directive(
1953 0         0 $curr_construct, $+{timeout}, $compiletime_debugging_requested
1954             );
1955             }
1956             elsif (defined $+{error_directive}) {
1957             _translate_error_directive(
1958             $curr_construct, $+{error_type}, $+{msg},
1959 8         39 $compiletime_debugging_requested, $rule_name
1960             );
1961             }
1962             elsif (defined $+{autoerror_directive}) {
1963             _translate_error_directive(
1964 7         35 $curr_construct, $+{error_type}, q{},
1965             $compiletime_debugging_requested, $rule_name
1966             );
1967             }
1968             elsif (defined $+{yadaerror_directive}) {
1969             _translate_error_directive(
1970             $curr_construct,
1971 4 50       85 ($+{yadaerror_directive} eq '???' ? 'warning' : 'error'),
1972             q{},
1973             $compiletime_debugging_requested, -$rule_name
1974             );
1975             }
1976             elsif (defined $+{context_directive}) {
1977 0 0       0 if ($compiletime_debugging_requested) {
1978 0         0 _debug_notify( info => " |",
1979             " |...Treating $curr_construct as:",
1980             " | \\ Turn on context-saving for the current rule"
1981             );
1982             }
1983 0         0 q{}; # Remove the directive
1984             }
1985             elsif (defined $+{nocontext_directive}) {
1986 0 0       0 if ($compiletime_debugging_requested) {
1987 0         0 _debug_notify( info => " |",
1988             " |...Treating $curr_construct as:",
1989             " | \\ Turn off context-saving for the current rule"
1990             );
1991             }
1992 0         0 q{}; # Remove the directive
1993             }
1994             elsif (defined $+{ws_directive}) {
1995 4 50       10 if ($compiletime_debugging_requested) {
1996 0         0 _debug_notify( info => " |",
1997             " |...Treating $curr_construct as:",
1998             " | \\ Change whitespace matching for the current rule"
1999             );
2000             }
2001 4         8 $curr_construct;
2002             }
2003              
2004             # Something that looks like a rule call or directive, but isn't...
2005             elsif (defined $+{incomplete_request}) {
2006 0         0 my $request = $+{incomplete_request};
2007 0         0 $request =~ s/\n//g;
2008 0 0       0 if ($request =~ /\A\s*<\s*\Z/) {
2009 0         0 _debug_notify( fatal =>
2010             qq{Invalid < metacharacter near line $curr_line_num of $pretty_rule_name},
2011             qq{If you meant to match a literal '<', use: \\<},
2012             );
2013             }
2014             else {
2015 0         0 _debug_notify( fatal =>
2016             qq{Possible failed attempt to specify},
2017             qq{a subrule call or directive: $request},
2018             qq{near line $curr_line_num of $pretty_rule_name},
2019             qq{If you meant to match literally, use: \\$request},
2020             );
2021             }
2022 0         0 exit(1);
2023             }
2024              
2025             # A quantifier that isn't quantifying anything...
2026             elsif (defined $+{loose_quantifier}) {
2027 0         0 my $quant = $+{loose_quantifier};
2028 0         0 $quant =~ s{^\s+}{};
2029 0         0 my $literal = quotemeta($quant);
2030 0         0 _debug_notify( fatal =>
2031             qq{Quantifier that doesn't quantify anything: $quant},
2032             qq{at line $curr_line_num in declaration of $pretty_rule_name},
2033             qq{(Did you mean to match literally? If so, try: $literal)},
2034             q{},
2035             );
2036 0         0 exit(1);
2037             }
2038              
2039             # There shouldn't be any other possibility...
2040             else {
2041 0         0 die qq{Internal error: this shouldn't happen!\n},
2042             qq{Near '$curr_construct' at $curr_line_num of $pretty_rule_name\n};
2043             }
2044             };
2045              
2046             # Handle the **/*%/*%%/+%/{n,m}%/etc operators...
2047 2232 100       10135 if ($list_marker) {
2048 73 100       507 my $ws = $magic_ws ? $+{ws1} . $+{ws2} : q{};
2049 73         323 my $op = $+{op};
2050              
2051 73         252 $curr_translation = _translate_separated_list(
2052             $prev_construct, $op, $curr_construct,
2053             $prev_translation, $curr_translation, $ws,
2054             $compiletime_debugging_requested,
2055             $runtime_debugging_requested, $timeout_requested,
2056             );
2057 73         248 $curr_construct = qq{$prev_construct $op $curr_construct};
2058             }
2059              
2060             # Finally, remember this latest translation, and return it...
2061 2232         3684 $prev_construct = $curr_construct;
2062 2232         14182 $prev_translation = $curr_translation;;
2063             }exmsg;
2064              
2065             # Translate magic hash accesses...
2066 384         2602 $grammar_spec =~ s{\$(?:\:\:)?MATCH (?= \s*\{) }
2067 384         932 {\$Regexp::Grammars::RESULT_STACK[-1]}xmsg;
2068             $grammar_spec =~ s{\$(?:\:\:)?ARG (?= \s*\{) }
2069             {\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}}xmsg;
2070 384         1620  
2071             # Translate magic scalars and hashes...
2072             state $translate_scalar = {
2073             q{%$MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2074             q{@$MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2075             q{$MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2076             q{%MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2077             q{$CAPTURE} => q{$^N},
2078             q{$CONTEXT} => q{$^N},
2079             q{$DEBUG} => q{$Regexp::Grammars::DEBUG},
2080             q{$INDEX} => q{${\\pos()}},
2081             q{%ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2082              
2083             q{%$::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2084             q{@$::MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2085             q{$::MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2086             q{%::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2087             q{$::CAPTURE} => q{$^N},
2088             q{$::CONTEXT} => q{$^N},
2089             q{$::DEBUG} => q{$Regexp::Grammars::DEBUG},
2090             q{$::INDEX} => q{${\\pos()}},
2091             q{%::ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2092              
2093 1422         2571 };
2094 4317         5911 state $translatable_scalar
2095 384         589 = join '|', map {quotemeta $_}
  79         732  
2096             sort {length $b <=> length $a}
2097 384         12315 keys %{$translate_scalar};
2098              
2099 384         1988 $grammar_spec =~ s{ ($translatable_scalar) (?! \s* (?: \[ | \{) ) }
2100             {$translate_scalar->{$1}}oxmsg;
2101              
2102             return $grammar_spec;
2103             }
2104 0     0   0  
2105             # Generate a "decimal timestamp" and insert in a template...
2106             sub _timestamp {
2107 0 0       0 my ($template) = @_;
2108 0         0  
2109 0         0 # Generate and insert any timestamp...
  0         0  
2110 0         0 if ($template =~ /%t/) {
2111             my ($sec, $min, $hour, $day, $mon, $year) = localtime;
2112 0         0 $mon++; $year+=1900;
2113             my $timestamp = sprintf("%04d%02d%02d.%02d%02d%02d",
2114             $year, $mon, $day, $hour, $min, $sec);
2115 0         0 $template =~ s{%t}{$timestamp}xms;;
2116             }
2117              
2118             return $template;
2119             }
2120 0     0   0  
2121 0         0 # Open (or re-open) the requested log file...
2122 0         0 sub _autoflush {
2123 0         0 my ($fh) = @_;
2124             my $originally_selected = select $fh;
2125             $|=1;
2126             select $originally_selected;
2127 3777     3777   657452 }
2128 3777   50     7840  
2129             sub _open_log {
2130             my ($mode, $filename, $from_where) = @_;
2131 3777 50       7551 $from_where //= q{};
    0          
2132 3777         55980  
2133             # Special case: '-' --> STDERR
2134             if ($filename eq q{-}) {
2135             return *STDERR{IO};
2136 0         0 }
2137 0         0 # Otherwise, just open the named file...
2138             elsif (open my $fh, $mode, $filename) {
2139             _autoflush($fh);
2140             return $fh;
2141 0         0 }
2142 0 0       0 # Otherwise, generate a warning and default to STDERR...
2143             else {
2144             local *Regexp::Grammars::LOGFILE = *STDERR{IO};
2145             _debug_notify( warn =>
2146             qq{Unable to open log file '$filename'},
2147             ($from_where ? $from_where : ()),
2148             qq{($!)},
2149 0         0 qq{Defaulting to STDERR instead.},
2150             q{},
2151             );
2152             return *STDERR{IO};
2153             }
2154 1087     1087   7942 }
2155 1087         1705  
2156 1087         1648 sub _invert_delim {
2157 1087         18478 my ($delim) = @_;
2158             $delim = reverse $delim;
2159             $delim =~ tr/<>[]{}()??`'/><][}{)(??'`/;
2160             return quotemeta $delim;
2161             }
2162              
2163             # Regex to detect if other regexes contain a grammar specification...
2164             my $GRAMMAR_DIRECTIVE
2165             = qr{ < grammar: \s* (? $QUALIDENT ) \s* > }xms;
2166              
2167             # Regex to detect if other regexes contain a grammar inheritance...
2168             my $EXTENDS_DIRECTIVE
2169             = qr{ < extends: \s* (? $QUALIDENT ) \s* > }xms;
2170              
2171             # Cache of rule/token names within defined grammars...
2172             my %subrule_names_for;
2173 425     425   887  
2174             # Build list of ancestors for a given grammar...
2175 425 50       1017 sub _ancestry_of {
2176             my ($grammar_name) = @_;
2177 81     81   6428  
  81         2426  
  81         913  
2178 425         603 return () if !$grammar_name;
  446         1945  
  425         3071  
2179              
2180             use mro;
2181             return map { substr($_, $CACHE_LEN) } @{mro::get_linear_isa($CACHE.$grammar_name, 'c3')};
2182             }
2183 133     133   361  
2184             # Detect and translate any requested grammar inheritances...
2185             sub _extract_inheritances {
2186             my ($source_line, $source_file, $regex, $compiletime_debugging_requested, $derived_grammar_name) = @_;
2187 133         596  
2188              
2189 12         78 # Detect and remove inheritance requests...
2190 12         27 while ($regex =~ s{$EXTENDS_DIRECTIVE}{}xms) {
2191 12 100       35 # Normalize grammar name and report...
2192 3         9 my $orig_grammar_name = $+{base_grammar_name};
2193             my $grammar_name = $orig_grammar_name;
2194             if ($grammar_name !~ /::/) {
2195 12 50       28 $grammar_name = caller(2).'::'.$grammar_name;
2196 12 50       23 }
2197 0         0  
2198             if (exists $user_defined_grammar{$grammar_name}) {
2199             if ($compiletime_debugging_requested) {
2200             _debug_notify( info =>
2201             "Processing inheritance request for $grammar_name...",
2202             q{},
2203             );
2204 81     81   18315 }
  81         2066  
  81         253459  
2205 12         19  
  12         231  
2206             # Specify new relationship...
2207             no strict 'refs';
2208 0         0 push @{$CACHE.$derived_grammar_name.'::ISA'}, $CACHE.$grammar_name;
2209             }
2210             else {
2211             _debug_notify( fatal =>
2212             "Inheritance from unknown grammar requested",
2213             "by directive",
2214 0         0 "in regex grammar declared at $source_file line $source_line",
2215             q{},
2216             );
2217             exit(1);
2218             }
2219 133         379 }
2220              
2221             # Retrieve ancestors (but not self) in C3 dispatch order...
2222 133         304 my (undef, @ancestors) = _ancestry_of($derived_grammar_name);
  17         22  
  17         132  
2223 133         499  
2224             # Extract subrule names and implementations for ancestors...
2225 133         388 my %subrule_names = map { %{$subrule_names_for{$_}} } @ancestors;
  17         71  
2226             $_ = -1 for values %subrule_names;
2227 133         453 my $implementation
2228             = join "\n", map { $user_defined_grammar{$_} } @ancestors;
2229              
2230             return $implementation, \%subrule_names;
2231             }
2232 137     137   316  
2233 137         331 # Transform grammar-augmented regex into pure Perl 5.10 regex...
2234             sub _build_grammar {
2235             my ($grammar_spec) = @_;
2236 137 100       922 $grammar_spec .= q{};
2237 4         1695  
2238             # Check for lack of Regexp::Grammar-y constructs and short-circuit...
2239             if ($grammar_spec !~ m{ < (?: [.?![:%\\/]? [^\W\d]\w* [^>]* | [.?!]{3} ) > }xms) {
2240             return $grammar_spec;
2241 133         790 }
2242 133         749  
2243             # Remember where we parked...
2244             my ($source_file, $source_line) = (caller 1)[1,2];
2245 133         256 $source_line -= $grammar_spec =~ tr/\n//;
2246 133         6728  
2247             # Check for dubious repeated constructs that throw away captures...
2248             my $dubious_line = $source_line;
2249             while ($grammar_spec =~ m{
2250             (.*?)
2251             (
2252             < (?! \[ ) # not <[SUBRULE]>
2253             ( $IDENT (?: = [^>]*)? ) # but or
2254             > \s*
2255             ( # followed by a quantifier...
2256             [+*][?+]? # either symbolic
2257             | \{\d+(?:,\d*)?\}[?+]? # or numeric
2258 0         0 )
2259 0         0 )
2260 0         0 }gxms) {
2261             my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2262             $dubious_line += $prefix =~ tr/\n//;
2263             _debug_notify( warn =>
2264             qq{Repeated subrule <$rule>$qual},
2265             qq{at $source_file line $dubious_line},
2266             qq{will only capture its final match},
2267 0         0 qq{(Did you mean <[$rule]>$qual instead?)},
2268             q{},
2269             );
2270             $dubious_line += $match =~ tr/\n//;
2271 133         373 }
2272 133         1478  
2273             # Check for dubious non-backtracking constructs...
2274             $dubious_line = $source_line;
2275             while (
2276             $grammar_spec =~ m{
2277             (.*?)
2278             (
2279             <
2280             (?! (?:obj)? (?:rule: | token ) )
2281             ( [^>]+ )
2282             >
2283             \s*
2284 2         13 ( [?+*][+] | \{.*\}[+] )
2285 2         4 )
2286 2         6 }gxms) {
2287 2         14 my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2288             $dubious_line += $prefix =~ tr/\n//;
2289             my $safe_qual = substr($qual,0,-1);
2290             _debug_notify( warn =>
2291             qq{Non-backtracking subrule call <$rule>$qual},
2292             qq{at $source_file line $dubious_line},
2293             qq{may not revert correctly during backtracking.},
2294 2         23 qq{(If grammar does not work, try <$rule>$safe_qual instead)},
2295             q{},
2296             );
2297             $dubious_line += $match =~ tr/\n//;
2298 133         265 }
2299 133         270  
2300 133         249 # Check whether a log file was specified...
2301             my $compiletime_debugging_requested;
2302 133         428 local *Regexp::Grammars::LOGFILE = *Regexp::Grammars::LOGFILE;
2303 133         396 my $logfile = q{-};
2304 0         0  
2305             my $log_where = "for regex grammar defined at $source_file line $source_line";
2306             $grammar_spec =~ s{ ^ [^#]* < logfile: \s* ([^>]+?) \s* > }{
2307 0         0 $logfile = _timestamp($1);
2308 0         0  
2309             # Presence of implies compile-time logging...
2310             $compiletime_debugging_requested = 1;
2311 0         0 *Regexp::Grammars::LOGFILE = _open_log('>',$logfile, $log_where );
2312              
2313             # Delete directive...
2314             q{};
2315 133         5345 }gexms;
2316              
2317             # Look ahead for any run-time debugging or timeout requests...
2318             my $runtime_debugging_requested
2319             = $grammar_spec =~ m{
2320             ^ [^#]*
2321             < debug: \s* (run | match | step | try | on | same ) \s* >
2322 133         326 | \$DEBUG (?! \s* (?: \[ | \{) )
2323             }xms;
2324              
2325             my $timeout_requested
2326             = $grammar_spec =~ m{
2327             ^ [^#]*
2328             < timeout: \s* \d+ \s* >
2329             }xms;
2330              
2331 133 100       547  
2332             # Standard actions set up and clean up any regex debugging...
2333             # Before entire match, set up a stack of attempt records and report...
2334             my $pre_match_debug
2335             = $runtime_debugging_requested
2336             ? qq{(?{; *Regexp::Grammars::LOGFILE
2337             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where');
2338             Regexp::Grammars::_init_try_stack(); })}
2339             : qq{(?{; *Regexp::Grammars::LOGFILE
2340             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where'); })}
2341 133 100       378 ;
2342              
2343             # After entire match, report whether successful or not...
2344             my $post_match_debug
2345             = $runtime_debugging_requested
2346             ? qq{(?{;Regexp::Grammars::_debug_matched(0,\\%/,'',\$^N)})
2347             |(?>(?{;Regexp::Grammars::_debug_handle_failures(0,''); }) (?!))
2348             }
2349             : q{}
2350 133         423 ;
2351              
2352             # Remove comment lines...
2353             $grammar_spec =~ s{^ ([^#\n]*) \s \# [^\n]* }{$1}gxms;
2354              
2355             # Subdivide into rule and token definitions, preparing to process each...
2356             # REWRITE THIS, USING (PROBABLY NEED TO REFACTOR ALL GRAMMARS TO REUSe
2357             # THESE COMPONENTS:
2358             # (? \( \s* (?&PARAMS)? \s* \) | (?# NOTHING ) )
2359             # (? (?&PARAM) \s* (?: , \s* (?&PARAM) \s* )* ,? )
2360 133         13243 # (? (?&VAR) (?: \s* = \s* (?: (?&LITERAL) | (?&PARENCODE) ) )? )
2361             # (? (?&NUMBER) | (?&STRING) | (?&VAR) )
2362             # (? : (?&IDENT) )
2363             my @defns = split m{
2364             (< (obj|)(rule|token) \s*+ :
2365             \s*+ ((?:${IDENT}::)*+) (?: ($IDENT) \s*+ = \s*+ )?+
2366             ($IDENT)
2367             \s* >)
2368             }xms, $grammar_spec;
2369 133         934  
  256         984  
2370 133         625 # Extract up list of names of defined rules/tokens...
2371 133         280 # (Name is every 6th item out of every seven, skipping the first item)
2372             my @subrule_names = @defns[ map { $_ * 7 + 6 } 0 .. ((@defns-1)/7-1) ];
2373             my @defns_copy = @defns[1..$#defns];
2374 133         334 my %subrule_names;
2375 133         225  
2376 133         296 # Build a look-up table of subrule names, checking for duplicates...
2377 256         911 my $defn_line = $source_line + $defns[0] =~ tr/\n//;
2378 256 50       926 my %first_decl_explanation;
2379             for my $subrule_name (@subrule_names) {
2380             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns_copy, 0, 7);
2381             if (++$subrule_names{$subrule_name} > 1) {
2382             _debug_notify( warn =>
2383 0         0 "Redeclaration of <$objectify$type: $subrule_name>",
  0         0  
2384             "at $source_file line $defn_line",
2385             "will be ignored.",
2386             @{ $first_decl_explanation{$subrule_name} },
2387             q{},
2388 256         1053 );
2389             }
2390             else {
2391             $first_decl_explanation{$subrule_name} = [
2392             "(Hidden by the earlier declaration of <$objectify$type: $subrule_name>",
2393 256         752 " at $source_file line $defn_line)"
2394             ];
2395             }
2396             $defn_line += ($full_decl.$body) =~ tr/\n//;
2397 133         531 }
2398              
2399             # Add the built-ins...
2400 133         285 @subrule_names{'ws', 'hk', 'matchpos', 'matchline'} = (1) x 4;
2401 133 50       820  
2402 0         0 # An empty main rule will never match anything...
2403             my $main_regex = shift @defns;
2404             if ($main_regex =~ m{\A (?: \s++ | \(\?\# [^)]* \) | \# [^\n]++ )* \z}xms) {
2405             _debug_notify( error =>
2406             "No main regex specified before rule definitions",
2407             "in regex grammar declared at $source_file line $source_line",
2408             "Grammar will never match anything.",
2409             "(Or did you forget a specification?)",
2410             q{},
2411             );
2412 133         275 }
2413 133         246  
2414             # Compile the regex or grammar...
2415             my $regex = q{};
2416             my $grammar_name;
2417 133 100       602 my $is_grammar;
2418              
2419 5         39 # Is this a grammar specification?
2420 5 100       21 if ($main_regex =~ $GRAMMAR_DIRECTIVE) {
2421 3         9 # Normalize grammar name and report...
2422             $grammar_name = $+{grammar_name};
2423 5         10 if ($grammar_name !~ /::/) {
2424             $grammar_name = caller(1) . "::$grammar_name";
2425             }
2426 5         9 $is_grammar = 1;
2427 7         18  
2428             # Add subrule definitions to namespace...
2429             for my $subrule_name (@subrule_names) {
2430             $CACHE{$grammar_name.'::'.$subrule_name} = 1;
2431 128         243 }
2432 128         326 }
2433             else {
2434             state $dummy_grammar_index = 0;
2435             $grammar_name = '______' . $dummy_grammar_index++;
2436 133         358 }
2437              
2438             # Extract any inheritance information...
2439             my ($inherited_rules, $inherited_subrule_names)
2440             = _extract_inheritances(
2441             $source_line, $source_file,
2442             $main_regex,
2443             $compiletime_debugging_requested,
2444             $grammar_name
2445 133         7709 );
2446              
2447             # Remove requests...
2448 133         348 $main_regex =~ s{ $EXTENDS_DIRECTIVE }{}gxms;
2449 133         414  
  133         330  
2450             # Add inherited subrule names to allowed subrule names;
2451             @subrule_names{ keys %{$inherited_subrule_names} }
2452 133         1150 = values %{$inherited_subrule_names};
2453              
2454             # Remove comments from top-level grammar...
2455             $main_regex =~ s{
2456             \(\?\# [^)]* \)
2457             | (?
2458             }{}gxms;
2459 133 0       308  
  0         0  
2460 133 50       594 # Remove any top-level nocontext directive...
    100          
2461             # 1 2 3 4
2462             $main_regex =~ s{^( (.*?) (\\*) (\# [^\n]*) )$}{length($3) % 2 ? $1 : $2.substr($3,0,-1)}gexms;
2463             my $nocontext = ($main_regex =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2464             : ($main_regex =~ s{ < context \s* : \s* > }{}gxms) ? 0
2465 133 100       392 : 0;
2466              
2467 5 50       17 # If so, set up to save the grammar...
2468 0         0 if ($is_grammar) {
2469             # Normalize grammar name and report...
2470 5 50       11 if ($grammar_name !~ /::/) {
2471 0         0 $grammar_name = caller(1) . "::$grammar_name";
2472             }
2473             if ($compiletime_debugging_requested) {
2474             _debug_notify( info =>
2475             "Processing definition of grammar $grammar_name...",
2476             q{},
2477             );
2478 5         273 }
2479              
2480             # Remove the grammar directive...
2481             $main_regex =~ s{
2482 5         20 ( $GRAMMAR_DIRECTIVE
  5         19  
2483             | < debug: \s* (run | match | step | try | on | off | same ) \s* >
2484             )
2485 5 50       23 }{$source_line += $1 =~ tr/\n//; q{}}gexms;
2486 0         0  
2487             # Check for anything else in the main regex...
2488             if ($main_regex =~ /\A(\s*)\S/) {
2489             $source_line += $1 =~ tr/\n//;
2490             _debug_notify( warn =>
2491 0         0 "Unexpected item before first subrule specification",
  0         0  
2492             "in definition of ",
2493             "at $source_file line $source_line:",
2494             map({ " $_"} grep /\S/, split "\n", $main_regex),
2495             "(this will be ignored when defining the grammar)",
2496             q{},
2497             );
2498             }
2499              
2500 48         77 # Remember set of valid subrule names...
2501 5         17 $subrule_names_for{$grammar_name}
  30         95  
  48         94  
2502             = {
2503             map({ ($_ => 1) } keys %subrule_names),
2504             map({ ($grammar_name.'::'.$_ => 1) } grep { !/::/ } keys %subrule_names),
2505             };
2506 128 50       353 }
2507 0         0 else { #...not a grammar specification
2508             # Report how main regex was interpreted, if requested to...
2509             if ($compiletime_debugging_requested) {
2510             _debug_notify( info =>
2511             "Processing the main regex before any rule definitions",
2512             );
2513 128         498 }
2514              
2515             # Any actual regex is processed first...
2516             $regex = _translate_subrule_calls(
2517             $source_file, $source_line,
2518             $grammar_name,
2519             $main_regex,
2520             $compiletime_debugging_requested,
2521             $runtime_debugging_requested,
2522             $timeout_requested,
2523             $pre_match_debug,
2524             $post_match_debug,
2525             q{}, # Expected...what?
2526             \%subrule_names,
2527             0, # Whitespace isn't magical
2528 128         522 );
2529              
2530             # Wrap the main regex (to ensure |'s don't segment pre and # post commands)...
2531 128 50       419 $regex = "(?:$regex)";
2532 0         0  
2533             # Report how construct was interpreted, if requested to...
2534             if ($compiletime_debugging_requested) {
2535             _debug_notify( q{} =>
2536             q{ |},
2537             q{ \\___End of main regex},
2538             q{},
2539             );
2540             }
2541 133         318 }
2542              
2543             # Update line number...
2544 133         418 $source_line += $main_regex =~ tr/\n//;
2545              
2546 256         933 # Then iterate any following rule definitions...
2547 256   66     1283 while (@defns) {
2548 256         535 # Grab details of each rule defn (as extracted by previous split)...
2549             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns, 0, 7);
2550             $name //= $callname;
2551 256 50       564 my $qualified_name = $grammar_name.'::'.$callname;
2552 0 0       0  
2553             # Report how construct was interpreted, if requested to...
2554             if ($compiletime_debugging_requested) {
2555             _debug_notify( info =>
2556             "Defining a $type: <$callname>",
2557             " |...Returns: " . ($objectify ? "an object of class '$qualifier$name'" : "a hash"),
2558 256 100       913 );
    100          
2559             }
2560              
2561             my $local_nocontext
2562             = ($body =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2563             : ($body =~ s{ < context \s* : \s* > }{}gxms) ? 0
2564 256         802 : $nocontext;
2565              
2566             # Translate any nested <...> constructs...
2567             my $trans_body = _translate_subrule_calls(
2568             $source_file, $source_line,
2569             $grammar_name,
2570             $body,
2571             $compiletime_debugging_requested,
2572             $runtime_debugging_requested,
2573             $timeout_requested,
2574             $pre_match_debug,
2575             $post_match_debug,
2576             $callname, # Expected...what?
2577             \%subrule_names,
2578             $type eq 'rule', # Is whitespace magical?
2579 256 50       753 );
2580 0         0  
2581             # Report how construct was interpreted, if requested to...
2582             if ($compiletime_debugging_requested) {
2583             _debug_notify( q{} =>
2584             q{ |},
2585             q{ \\___End of rule definition},
2586             q{},
2587             );
2588 256         461 }
2589 256         381  
2590             # Make allowance for possible local whitespace definitions...
2591             my $local_ws_defn = q{};
2592 256 100       619 my $local_ws_call = q{(?&ws__implicit__)};
2593              
2594 134         267 # Rules make non-code literal whitespace match textual whitespace...
2595             if ($type eq 'rule') {
2596 134         779 # Implement any local whitespace definition...
2597 4         13 my $first_ws = 1;
2598 4 50       18 WS_DIRECTIVE:
    50          
2599 0         0 while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2600             my $defn = $1;
2601             if ($defn !~ m{\S}xms) {
2602             _debug_notify( warn =>
2603             qq{Ignoring useless empty directive},
2604             qq{in definition of },
2605             qq{near $source_file line $source_line},
2606 0         0 qq{(Did you mean instead?)},
2607             q{},
2608             );
2609 0         0 next WS_DIRECTIVE;
2610             }
2611             elsif (!$first_ws) {
2612             _debug_notify( warn =>
2613             qq{Ignoring useless extra directive},
2614             qq{in definition of },
2615             qq{at $source_file line $source_line},
2616 0         0 qq{(No more than one is permitted per rule!)},
2617             q{},
2618             );
2619 4         6 next WS_DIRECTIVE;
2620             }
2621 4         6 else {
2622 4         6 $first_ws = 0;
2623 4         10 }
2624 4         17 state $ws_counter = 0;
2625             $ws_counter++;
2626             $local_ws_defn = qq{(?<__RG_ws_$ws_counter> $defn)};
2627             $local_ws_call = qq{(?&__RG_ws_$ws_counter)};
2628 134         350 }
2629              
2630             # Implement auto-whitespace...
2631             state $CODE_OR_SPACE = qr{
2632             (? # These are not magic...
2633             \( \?\?? (?&BRACED) \) # Embedded code blocks
2634             | \s++ # Whitespace not followed by...
2635             (?= \| # ...an OR
2636             | \(\?\#\) # ...a null comment
2637             | (?: \) \s* )? \z # ...the end of the rule
2638             | \(\(?\?\&ws\) # ...an explicit ws match
2639             | \(\?\??\{ # ...an embedded code block
2640             | \\[shv] # ...an explicit space match
2641             )
2642             )
2643             |
2644             (? \s++ ) # All other whitespace is magic
2645 134   66     6336  
  1205         65956  
2646             (?(DEFINE) (? \{ (?: \\. | (?&BRACED) | [^{}] )* \} ) )
2647             }xms;
2648 122         593 $trans_body =~ s{($CODE_OR_SPACE)}{ $+{ignorable_space} // $local_ws_call }exmsg;
2649 0         0 }
2650             else {
2651             while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2652             _debug_notify( warn =>
2653             qq{Ignoring useless directive},
2654             qq{in definition of },
2655             qq{at $source_file line $source_line},
2656             qq{(Did you need to define instead of ?)},
2657             q{},
2658             );
2659 256         1683 }
2660             }
2661              
2662             $regex
2663             .= "\n###############[ $source_file line $source_line ]###############\n"
2664             . _translate_rule_def(
2665             $type, $qualifier, $name, $callname, $qualified_name, $trans_body, $objectify,
2666             $local_ws_defn, $local_nocontext,
2667 256         1234 );
2668              
2669             # Update line number...
2670             $source_line += ($full_decl.$body) =~ tr/\n//;
2671 133         2472 }
2672              
2673             # Insert checkpoints into any user-defined code block...
2674             $regex =~ s{ \( \?\?? \{ \K (?!;) }{
2675             local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
2676 133         523 }xmsg;
2677              
2678             # Check for any suspicious left-overs from the start of the regex...
2679 133 100       485 pos $regex = 0;
2680 5         11  
2681 5         526 # If a grammar definition, save grammar and return a placeholder...
2682             if ($is_grammar) {
2683             $user_defined_grammar{$grammar_name} = $regex;
2684             return qq{(?{
2685             warn "Can't match directly against a pure grammar: \n";
2686             })(*COMMIT)(?!)};
2687 128         706 }
2688             # Otherwise, aggregrate the final grammar...
2689             else {
2690             return _complete_regex($regex.$inherited_rules, $pre_match_debug, $post_match_debug, $nocontext);
2691             }
2692 128     128   356 }
2693              
2694 128 100       146777 sub _complete_regex {
2695             my ($regex, $pre_match_debug, $post_match_debug, $nocontext) = @_;
2696              
2697             return $nocontext ? qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE_NC$post_match_debug}
2698             : qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE$post_match_debug};
2699             }
2700              
2701             1; # Magic true value required at end of module
2702              
2703             __END__