File Coverage

blib/lib/Regexp/Grammars.pm
Criterion Covered Total %
statement 499 772 64.6
branch 265 454 58.3
condition 81 154 52.6
subroutine 49 69 71.0
pod 0 3 0.0
total 894 1452 61.5


line stmt bran cond sub pod time code
1             =encoding ISO8859-1
2             =cut
3              
4             package Regexp::Grammars;
5 82     82   4761897 use re 'eval';
  82         800  
  82         4615  
6              
7 82     82   453 use warnings;
  82         140  
  82         1871  
8 82     82   361 use strict;
  82         139  
  82         1531  
9 82     82   1944 use 5.010;
  82         274  
10 82     82   484 use vars ();
  82         155  
  82         1936  
11              
12 82     82   422 use Scalar::Util qw< blessed reftype >;
  82         161  
  82         4721  
13 82     82   43958 use Data::Dumper qw< Dumper >;
  82         485749  
  82         8496  
14              
15             our $VERSION = '1.058';
16              
17             my $anon_scalar_ref = \do{my $var};
18             my $MAGIC_VARS = q{my ($CAPTURE, $CONTEXT, $DEBUG, $INDEX, $MATCH, %ARG, %MATCH);};
19              
20             my $PROBLEM_WITH_5_18 = <<'END_ERROR_MSG';
21             Warning: Regexp::Grammars is unsupported
22             under Perl 5.18.0 through 5.18.3 due to a bug
23             in regex parsing under those versions.
24              
25             Please upgrade to Perl 5.18.4 or later, or revert to
26             Perl 5.16 or earlier.
27             END_ERROR_MSG
28              
29             # Load the module...
30             sub import {
31             # Signal lexical scoping (active, unless something was exported)...
32 94     94   2437 $^H{'Regexp::Grammars::active'} = 1;
33              
34             # Process any regexes in module's active lexical scope...
35 82     82   83433 use overload;
  82         72699  
  82         468  
36             overload::constant(
37             qr => sub {
38 279     279   79849 my ($raw, $cooked, $type) = @_;
39             # In active scope and really a regex...
40 279 100 66     649 if (_module_is_active() && $type =~ /qq?/) {
41 177         4645 return bless \$cooked, 'Regexp::Grammars::Precursor';
42             }
43             # Ignore everything else...
44             else {
45 102         44063 return $cooked;
46             }
47             }
48 94         711 );
49              
50             # Deal with 5.18 issues...
51 94 50       2419 if ($] >= 5.018) {
52             # Issue warning...
53 94 50       399 if ($] < 5.018004) {
54 0         0 require Carp;
55 0         0 Carp::croak($PROBLEM_WITH_5_18);
56             }
57              
58             # Deal with cases where Perl 5.18+ complains about
59             # the injection of (??{...}) and (?{...})
60 94         602 require re;
61 94         1584 re->import('eval');
62              
63             # Sanctify the standard Regexp::Grammars pseudo-variables from
64             # Perl 5.18's early enforcement of strictures...
65 94         245 my $caller = caller;
66 94         1880 warnings->unimport('once');
67 94         382 @_ = ( 'vars', '$CAPTURE', '$CONTEXT', '$DEBUG', '$INDEX', '$MATCH', '%ARG', '%MATCH' );
68 94         9021 goto &vars::import;
69             }
70             }
71              
72             # Deactivate module's regex effect when it is "anti-imported" with 'no'...
73             sub unimport {
74             # Signal lexical (non-)scoping...
75 38     38   4212 $^H{'Regexp::Grammars::active'} = 0;
76 38         199 require re;
77 38         11624 re->unimport('eval');
78             }
79              
80             # Encapsulate the hoopy user-defined pragma interface...
81             sub _module_is_active {
82 279     279   2984 return (caller 1)[10]->{'Regexp::Grammars::active'};
83             }
84              
85             my $RULE_HANDLER;
86 744     744 0 18694 sub clear_rule_handler { undef $RULE_HANDLER; }
87              
88             sub Regexp::with_actions {
89 21     21   15376 my ($self, $handler) = @_;
90 21         31 $RULE_HANDLER = $handler;
91 21         378 return $self;
92             }
93              
94             #=====[ COMPILE-TIME INTERIM REPRESENTATION OF GRAMMARS ]===================
95             {
96             package Regexp::Grammars::Precursor;
97              
98             # Only translate precursors once...
99             state %grammar_cache;
100              
101             use overload (
102             # Concatenation/interpolation just concatenates to the precursor...
103             q{.} => sub {
104 97     97   186 my ($x, $y, $reversed) = @_;
105 97 50       166 if (ref $x) { $x = ${$x} }
  97         109  
  97         167  
106 97 100       169 if (ref $y) { $y = ${$y} }
  38         55  
  38         57  
107 97 100       157 if ($reversed) { ($y,$x) = ($x,$y); }
  21         54  
108 97   50     232 $x .= $y//q{};
109 97         221 return bless \$x, 'Regexp::Grammars::Precursor';
110             },
111              
112             # Using as a string (i.e. matching) preprocesses the precursor...
113             q{""} => sub {
114 139     139   350 my ($obj) = @_;
115             return $grammar_cache{ overload::StrVal($$obj) }
116 139   33     550 //= Regexp::Grammars::_build_grammar( ${$obj} );
  139         1231  
117             },
118              
119             # Everything else, as usual...
120 82         953 fallback => 1,
121 82     82   37058 );
  82         176  
122             }
123              
124              
125             #=====[ SUPPORT FOR THE INTEGRATED DEBUGGER ]=========================
126              
127             # All messages go to STDERR by default...
128             *Regexp::Grammars::LOGFILE = *STDERR{IO};
129              
130             # Debugging levels indicate where to stop...
131             our %DEBUG_LEVEL = (
132             same => undef, # No change in debugging mode
133             off => 0, # No more debugging
134             run => 1, continue => 1, # Run to completion of regex match
135             match => 2, on => 2, # Run to next successful submatch
136             step => 3, try => 3, # Run to next reportable event
137             );
138              
139             # Debugging levels can be abbreviated to one character during interactions...
140             @DEBUG_LEVEL{ map {substr($_,0,1)} keys %DEBUG_LEVEL } = values %DEBUG_LEVEL;
141             $DEBUG_LEVEL{o} = $DEBUG_LEVEL{off}; # Not "on"
142             $DEBUG_LEVEL{s} = $DEBUG_LEVEL{step}; # Not "same"
143              
144             # Width of leading context field in debugging messages is constrained...
145             my $MAX_CONTEXT_WIDTH = 20;
146             my $MIN_CONTEXT_WIDTH = 6;
147              
148             sub set_context_width {
149 0         0 { package Regexp::Grammars::ContextRestorer;
150             sub new {
151 0     0   0 my ($class, $old_context_width) = @_;
152 0         0 bless \$old_context_width, $class;
153             }
154             sub DESTROY {
155 0     0   0 my ($old_context_width_ref) = @_;
156 0         0 $MAX_CONTEXT_WIDTH = ${$old_context_width_ref};
  0         0  
157             }
158             }
159              
160 0     0 0 0 my ($new_context_width) = @_;
  0         0  
161 0         0 my $old_context_width = $MAX_CONTEXT_WIDTH;
162 0         0 $MAX_CONTEXT_WIDTH = $new_context_width;
163 0 0       0 if (defined wantarray) {
164 0         0 return Regexp::Grammars::ContextRestorer->new($old_context_width);
165             }
166             }
167              
168             # Rewrite a string currently being matched, to make \n and \t visible
169             sub _show_metas {
170 0   0 0   0 my $context_str = shift // q{};
171              
172             # Quote newlines (\n -> \\n, without using a regex)...
173 0         0 my $index = index($context_str,"\n");
174 0         0 while ($index >= 0) {
175 0         0 substr($context_str, $index, 1, '\\n');
176 0         0 $index = index($context_str,"\n",$index+2);
177             }
178              
179             # Quote tabs (\t -> \\t, without using a regex)...
180 0         0 $index = index($context_str,"\t");
181 0         0 while ($index >= 0) {
182 0         0 substr($context_str, $index, 1, '\\t');
183 0         0 $index = index($context_str,"\t",$index+2);
184             }
185              
186 0         0 return $context_str;
187             }
188              
189             # Minimize whitespace in a string...
190             sub _squeeze_ws {
191 1826     1826   2513 my ($str) = @_;
192              
193 1826         2508 $str =~ tr/\n\t/ /;
194              
195 1826         2970 my $index = index($str,q{ });
196 1826         3126 while ($index >= 0) {
197 7254         8058 substr($str, $index, 2, q{ });
198 7254         9960 $index = index($str,q{ },$index);
199             }
200              
201 1826         3359 return $str;
202             }
203              
204             # Prepare for debugging...
205             sub _init_try_stack {
206 0     0   0 our (@try_stack, $last_try_pos, $last_context_str);
207              
208             # Start with a representation of the entire grammar match...
209 0         0 @try_stack = ({
210             subrule => '',
211             height => 0,
212             errmsg => ' \\FAIL ',
213             });
214              
215             # Initialize tracking of location and context...
216 0         0 $last_try_pos = -1;
217 0         0 $last_context_str = q{};
218              
219             # Report...
220 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context('=>')
  0         0  
221             . 'Trying from position ' . pos();
222             }
223              
224             # Create a "context string" showing where the regex is currently matching...
225             sub _debug_context {
226 0     0   0 my ($fill_chars) = @_;
227              
228             # Determine minimal sufficient width for context field...
229 0   0     0 my $field_width = length(_show_metas($_//q{}));
230 0 0       0 if ($field_width > $MAX_CONTEXT_WIDTH) {
    0          
231 0         0 $field_width = $MAX_CONTEXT_WIDTH;
232             }
233             elsif ($field_width < $MIN_CONTEXT_WIDTH) {
234 0         0 $field_width = $MIN_CONTEXT_WIDTH;
235             }
236              
237             # Get current matching position (and some additional trailing context)...
238 0   0     0 my $context_str
      0        
239             = substr(_show_metas(substr(($_//q{}).q{},pos()//0,$field_width)),0,$field_width);
240              
241             # Build the context string, handling special cases...
242 0   0     0 our $last_context_str //= q{};
243 0 0       0 if ($fill_chars) {
244             # If caller supplied a 1- or 2-char fill sequence, use that instead...
245 0 0       0 my $last_fill_char = length($fill_chars) > 1
246             ? substr($fill_chars,-1,1,q{})
247             : $fill_chars
248             ;
249 0         0 $context_str = $fill_chars x ($field_width-1) . $last_fill_char;
250             }
251             else {
252             # Make end-of-string visible in empty context string...
253 0 0       0 if ($context_str eq q{}) {
254 0         0 $context_str = '[eos]';
255             }
256              
257             # Don't repeat consecutive identical context strings...
258 0 0       0 if ($context_str eq $last_context_str) {
259 0         0 $context_str = q{ } x $field_width;
260             }
261             else {
262             # If not repeating, remember for next time...
263 0         0 $last_context_str = $context_str;
264             }
265             }
266              
267             # Left justify and return context string...
268 0         0 return sprintf("%-*s ",$field_width,$context_str);
269             }
270              
271             # Show a debugging message (mainly used for compile-time errors and info)...
272             sub _debug_notify {
273             # Single arg is a line to be printed with a null severity...
274 2 50   2   12 my ($severity, @lines) = @_==1 ? (q{},@_) : @_;
275 2         6 chomp @lines;
276              
277             # Formatting string for all lines...
278 2         3 my $format = qq{%*s | %s\n};
279              
280             # Track previous severity and avoid repeating the same level...
281 2         4 state $prev_severity = q{};
282 2 50 33     12 if ($severity !~ /\S/) {
    50          
283             # Do nothing
284             }
285             elsif ($severity eq 'info' && $prev_severity eq 'info' ) {
286 0         0 $severity = q{};
287             }
288             else {
289 2         4 $prev_severity = $severity;
290             }
291              
292             # Display first line with severity indicator (unless same as previous)...
293 2         2 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, $severity, shift @lines;
  2         213  
294              
295             # Display first line without severity indicator
296 2         12 for my $next_line (@lines) {
297 8         15 printf {*Regexp::Grammars::LOGFILE} $format, $MIN_CONTEXT_WIDTH, q{}, $next_line;
  8         237  
298             }
299             }
300              
301             # Handle user interactions during runtime debugging...
302             sub _debug_interact {
303 0     0   0 my ($stack_height, $leader, $curr_frame_ref, $min_debug_level) = @_;
304              
305 0         0 our $DEBUG; # ...stores current debug level within regex
306              
307             # Only interact with terminals, and if debug level is appropriate...
308 0 0 0     0 if (-t *Regexp::Grammars::LOGFILE
      0        
      0        
309             && defined $DEBUG
310             && ($DEBUG_LEVEL{$DEBUG}//0) >= $DEBUG_LEVEL{$min_debug_level}
311             ) {
312 0         0 local $/ = "\n"; # ...in case some caller is being clever
313             INPUT:
314 0         0 while (1) {
315 0   0     0 my $cmd = readline // q{};
316 0         0 chomp $cmd;
317              
318             # Input of 'd' means 'display current result frame'...
319 0 0       0 if ($cmd eq 'd') {
320 0         0 print {*Regexp::Grammars::LOGFILE} join "\n",
321 0 0       0 map { $leader . ($stack_height?'| ':q{})
  0         0  
322             . ' : ' . $_
323             }
324             split "\n", q{ }x8 . substr(Dumper($curr_frame_ref),8);
325 0         0 print "\t";
326             }
327             # Any other (valid) input changes debugging level and continues...
328             else {
329 0 0       0 if (defined $DEBUG_LEVEL{$cmd}) { $DEBUG = $cmd; }
  0         0  
330 0         0 last INPUT;
331             }
332             }
333             }
334             # When interaction not indicated, just complete the debugging line...
335             else {
336 0         0 print {*Regexp::Grammars::LOGFILE} "\n";
  0         0  
337             }
338             }
339              
340             # Handle reporting of unsuccessful match attempts...
341             sub _debug_handle_failures {
342 0     0   0 my ($stack_height, $subrule, $in_match) = @_;
343 0         0 our @try_stack;
344              
345             # Unsuccessful match attempts leave "leftovers" on the attempt stack...
346             CLEANUP:
347 0   0     0 while (@try_stack && $try_stack[-1]{height} >= $stack_height) {
348             # Grab record of (potentially) unsuccessful attempt...
349 0         0 my $error_ref = pop @try_stack;
350              
351             # If attempt was the one whose match is being reported, go and report...
352             last CLEANUP if $in_match
353             && $error_ref->{height} == $stack_height
354 0 0 0     0 && $error_ref->{subrule} eq $subrule;
      0        
355              
356             # Otherwise, report the match failure...
357 0         0 say {*Regexp::Grammars::LOGFILE} _debug_context(q{ }) . $error_ref->{errmsg};
  0         0  
358             }
359             }
360              
361             # Handle attempts to call non-existent subrules...
362             sub _debug_fatal {
363 0     0   0 my ($naughty_construct) = @_;
364              
365 0         0 print {*Regexp::Grammars::LOGFILE}
  0         0  
366             "_________________________________________________________________\n",
367             "Fatal error: Entire parse terminated prematurely while attempting\n",
368             " to call non-existent rule: $naughty_construct\n",
369             "_________________________________________________________________\n";
370 0         0 $@ = "Entire parse terminated prematurely while attempting to call non-existent rule: $naughty_construct";
371             }
372              
373             # Handle objrules that don't return hashes...
374             sub _debug_non_hash {
375 334     334   13652 my ($obj, $name) = @_;
376              
377             # If the object is okay, no further action required...
378 334 100       6586 return q{} if reftype($obj) eq 'HASH';
379              
380             # Generate error messages...
381 1         3 print {*Regexp::Grammars::LOGFILE}
  1         20  
382             "_________________________________________________________________\n",
383             "Fatal error: returned a non-hash-based object\n",
384             "_________________________________________________________________\n";
385 1         6 $@ = " returned a non-hash-based object";
386              
387 1         21 return '(*COMMIT)(*FAIL)';
388             }
389              
390              
391             # Print a message in context...
392             sub _debug_logmsg {
393 0     0   0 my ($stack_height, @msg) = @_;
394              
395             # Determine indent for messages...
396 0         0 my $leader = _debug_context() . q{| } x ($stack_height-1) . '|';
397              
398             # Report the attempt...
399 0         0 print {*Regexp::Grammars::LOGFILE} map { "$leader$_\n" } @msg;
  0         0  
  0         0  
400             }
401              
402             # Print a message indicating a (sub)match attempt...
403             sub _debug_trying {
404 0     0   0 my ($stack_height, $curr_frame_ref, $subrule) = @_;
405              
406             # Clean up after any preceding unsuccessful attempts...
407 0         0 _debug_handle_failures($stack_height, $subrule);
408              
409             # Determine indent for messages...
410 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
411              
412             # Detect and report any backtracking prior to this attempt...
413 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
414 0         0 my $backtrack_distance = $last_try_pos - pos();
415 0 0       0 if ($backtrack_distance > 0) {
416 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
417             . q{| } x ($stack_height-2)
418             . qq{|...Backtracking $backtrack_distance char}
419             . ($backtrack_distance > 1 ? q{s} : q{})
420             . q{ and trying new match}
421             ;
422             }
423              
424             # Report the attempt...
425 0         0 print {*Regexp::Grammars::LOGFILE} $leader, "|...Trying $subrule\t";
  0         0  
426              
427             # Handle user interactions during debugging...
428 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'step');
429              
430             # Record the attempt, for later error handling in _debug_matched()...
431 0 0       0 if ($subrule ne 'next alternative') {
432 0         0 our @try_stack;
433 0         0 push @try_stack, {
434             height => $stack_height,
435             subrule => $subrule,
436             # errmsg should align under: |...Trying $subrule\t
437             errmsg => q{| } x ($stack_height-2) . "| \\FAIL $subrule",
438             };
439             }
440 0         0 $last_try_pos = pos();
441             }
442              
443             # Print a message indicating a successful (sub)match...
444             sub _debug_matched {
445 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $matched_text) = @_;
446              
447             # Clean up any intervening unsuccessful attempts...
448 0         0 _debug_handle_failures($stack_height, $subrule, 'in match');
449              
450             # Build debugging message...
451 0         0 my $debug_context = _debug_context();
452 0         0 my $leader = $debug_context . q{| } x ($stack_height-2);
453 0 0       0 my $message = ($stack_height ? '| ' : q{})
454             . " \\_____$subrule matched ";
455 0 0       0 my $filler = $stack_height
456             ? '| ' . q{ } x (length($message)-4)
457             : q{ } x length($message);
458              
459 0   0     0 our $last_try_pos //= 0; #...Stores the pos() of the most recent match attempt?
460              
461             # Report if match required backtracking...
462 0   0     0 my $backtrack_distance = $last_try_pos - (pos()//0);
463 0 0       0 if ($backtrack_distance > 0) {
464 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
465             . q{| } x ($stack_height-2)
466             . qq{|...Backtracking $backtrack_distance char}
467             . ($backtrack_distance > 1 ? q{s} : q{})
468             . qq{ and rematching $subrule}
469             ;
470             }
471 0         0 $last_try_pos = pos();
472              
473             # Format match text (splitting multi-line texts and indent them correctly)...
474 0 0       0 $matched_text = defined($matched_text)
475             ? $matched_text = q{'} . join("\n$leader$filler", split "\n", $matched_text) . q{'}
476             : q{};
477              
478             # Print match message...
479 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $matched_text . qq{\t};
  0         0  
480              
481             # Check for user interaction...
482 0 0       0 _debug_interact($stack_height, $leader, $curr_frame_ref, $stack_height ? 'match' : 'run');
483             }
484              
485             # Print a message indicating a successful (sub)match...
486             sub _debug_require {
487 0     0   0 my ($stack_height, $condition, $succeeded) = @_;
488              
489             # Build debugging message...
490 0         0 my $debug_context = _debug_context();
491 0         0 my $leader = $debug_context . q{| } x ($stack_height-1);
492 0 0       0 my $message1 = ($stack_height ? '|...' : q{})
493             . "Testing condition: $condition"
494             ;
495 0 0       0 my $message2 = ($stack_height ? '| ' : q{})
    0          
496             . " \\_____"
497             . ($succeeded ? 'Satisfied' : 'FAILED')
498             ;
499              
500             # Report if match required backtracking...
501 0         0 our $last_try_pos;
502 0         0 my $backtrack_distance = $last_try_pos - pos();
503 0 0       0 if ($backtrack_distance > 0) {
504 0 0       0 say {*Regexp::Grammars::LOGFILE} ' <' . q{~} x (length(_debug_context(q{ }))-3) . q{ }
  0         0  
505             . q{| } x ($stack_height-1)
506             . qq{|...Backtracking $backtrack_distance char}
507             . ($backtrack_distance > 1 ? q{s} : q{})
508             . qq{ and rematching}
509             ;
510             }
511              
512             # Remember where the condition was tried...
513 0         0 $last_try_pos = pos();
514              
515             # Print match message...
516 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message1;
  0         0  
517 0         0 say {*Regexp::Grammars::LOGFILE} $leader . $message2;
  0         0  
518             }
519              
520             # Print a message indicating a successful store-result-of-code-block...
521             sub _debug_executed {
522 0     0   0 my ($stack_height, $curr_frame_ref, $subrule, $value) = @_;
523              
524             # Build message...
525 0         0 my $leader = _debug_context() . q{| } x ($stack_height-2);
526 0         0 my $message = "|...Action $subrule\n";
527 0         0 my $message2 = "| saved value: '";
528 0         0 $message .= $leader . $message2;
529 0         0 my $filler = q{ } x length($message2);
530              
531             # Split multiline results over multiple lines (properly indented)...
532 0         0 $value = join "\n$leader$filler", split "\n", $value;
533              
534             # Report the action...
535 0         0 print {*Regexp::Grammars::LOGFILE} $leader . $message . $value . qq{'\t};
  0         0  
536              
537             # Check for user interaction...
538 0         0 _debug_interact($stack_height, $leader, $curr_frame_ref, 'match');
539             }
540              
541             # Create the code to be inserted into the regex to facilitate debugging...
542             sub _build_debugging_statements {
543 423     423   827 my ($debugging_active, $subrule, $extra_pre_indent) = @_;
544              
545 423 100       1244 return (q{}, q{}) if ! $debugging_active;;
546              
547 1   50     4 $extra_pre_indent //= 0;
548              
549 1         3 $subrule = "q{$subrule}";
550              
551             return (
552 1         6 qq{Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent, \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], $subrule)
553             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};},
554             qq{Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1, \$Regexp::Grammars::RESULT_STACK[-1], $subrule, \$^N)
555             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};},
556             );
557             }
558              
559             sub _build_raw_debugging_statements {
560 1824     1824   2693 my ($debugging_active, $subpattern, $extra_pre_indent) = @_;
561              
562 1824 50       4060 return (q{}, q{}) if ! $debugging_active;
563              
564 0   0     0 $extra_pre_indent //= 0;
565              
566 0 0       0 if ($subpattern eq '|') {
567             return (
568 0         0 q{},
569             qq{(?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
570             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], 'next alternative')
571             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
572             );
573             }
574             else {
575             return (
576 0         0 qq{(?{;Regexp::Grammars::_debug_trying(\@Regexp::Grammars::RESULT_STACK+$extra_pre_indent,
577             \$Regexp::Grammars::RESULT_STACK[-2+$extra_pre_indent], q{subpattern /$subpattern/}, \$^N)
578             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
579             qq{(?{;Regexp::Grammars::_debug_matched(\@Regexp::Grammars::RESULT_STACK+1,
580             \$Regexp::Grammars::RESULT_STACK[-1], q{subpattern /$subpattern/}, \$^N)
581             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG};})},
582             );
583             }
584             }
585              
586              
587             #=====[ SUPPORT FOR AUTOMATIC TIMEOUTS ]=========================
588              
589             sub _test_timeout {
590 0     0   0 our ($DEBUG, $TIMEOUT);
591              
592 0 0       0 return q{} if time() < $TIMEOUT->{'limit'};
593              
594             my $duration = "$TIMEOUT->{duration} second"
595 0 0       0 . ( $TIMEOUT->{duration} == 1 ? q{} : q{s} );
596              
597 0 0 0     0 if (defined($DEBUG) && $DEBUG ne 'off') {
598 0         0 my $leader = _debug_context(q{ });
599 0         0 say {*LOGFILE} $leader . '|';
  0         0  
600 0         0 say {*LOGFILE} $leader . "|...Invoking {duration}>";
  0         0  
601 0         0 say {*LOGFILE} $leader . "| \\_____No match after $duration";
  0         0  
602 0         0 say {*LOGFILE} $leader . '|';
  0         0  
603 0         0 say {*LOGFILE} $leader . " \\FAIL ";
  0         0  
604             }
605              
606 0 0       0 if (! @!) {
607 0         0 @! = "Internal error: Timed out after $duration (as requested)";
608             }
609 0         0 return q{(*COMMIT)(*FAIL)};
610             }
611              
612              
613             #=====[ SUPPORT FOR UPDATING THE RESULT STACK ]=========================
614              
615             # Create a clone of the current result frame with an new key/value...
616             sub _extend_current_result_frame_with_scalar {
617 2011     2011   21691 my ($stack_ref, $key, $value) = @_;
618              
619             # Autovivify null stacks (only occur when grammar invokes no subrules)...
620 2011 50       2522 if (!@{$stack_ref}) {
  2011         3634  
621 0         0 $stack_ref = [{}];
622             }
623              
624             # Copy existing frame, appending new value so it overwrites any old value...
625             my $cloned_result_frame = {
626 2011         2387 %{$stack_ref->[-1]},
  2011         5333  
627             $key => $value,
628             };
629              
630             # Make the copy into an object, if the original was one...
631 2011 50       4733 if (my $class = blessed($stack_ref->[-1])) {
632 0         0 bless $cloned_result_frame, $class;
633             }
634              
635 2011         18025 return $cloned_result_frame;
636             }
637              
638             # Create a clone of the current result frame with an additional key/value
639             # (As above, but preserving the "listiness" of the key being added to)...
640             sub _extend_current_result_frame_with_list {
641 131     131   1615 my ($stack_ref, $key, $value) = @_;
642              
643             # Copy existing frame, appending new value to appropriate element's list...
644             my $cloned_result_frame = {
645 131         353 %{$stack_ref->[-1]},
646             $key => [
647 131   100     216 @{$stack_ref->[-1]{$key}//[]},
  131         691  
648             $value,
649             ],
650             };
651              
652             # Make the copy into an object, if the original was one...
653 131 50       1118 if (my $class = blessed($stack_ref->[-1])) {
654 0         0 bless $cloned_result_frame, $class;
655             }
656              
657 131         1780 return $cloned_result_frame;
658             }
659              
660             # Pop current result frame and add it to a clone of previous result frame
661             # (flattening it if possible, and preserving any blessing)...
662             sub _pop_current_result_frame {
663 820     820   14120 my ($stack_ref, $key, $original_name, $value) = @_;
664              
665             # Where are we in the stack?
666 820         1334 my $curr_frame = $stack_ref->[-1];
667 820         1046 my $caller_frame = $stack_ref->[-2];
668              
669             # Track which frames are objects...
670 820         1574 my $is_blessed_curr = blessed($curr_frame);
671 820         1234 my $is_blessed_caller = blessed($caller_frame);
672              
673             # Remove "private" captures (i.e. those starting with _)...
674 820         1023 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  820         1232  
  1585         3306  
  820         1982  
675              
676             # Remove "nocontext" marker...
677 820         1315 my $nocontext = delete $curr_frame->{'~'};
678              
679             # Build a clone of the current frame...
680             my $cloned_result_frame
681             = exists $curr_frame->{'='} ? $curr_frame->{'='}
682 545         1479 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
683 820 50 100     2262 : keys %{$curr_frame} ? $curr_frame->{q{}}
  102 100       241  
    100          
684             : $value
685             ;
686              
687             # Apply any appropriate handler...
688 820 100       1635 if ($RULE_HANDLER) {
689 88 100 66     447 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
690 22         53 my $replacement_result_frame
691             = $RULE_HANDLER->$original_name($cloned_result_frame);
692 22 50       1003 if (defined $replacement_result_frame) {
693 22         31 $cloned_result_frame = $replacement_result_frame;
694             }
695             }
696             }
697              
698             # Remove capture if not requested...
699 820 100 100     1833 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  110   100     333  
700 28         41 delete $cloned_result_frame->{q{}};
701             }
702              
703             # Nest a clone of current frame inside a clone of the caller frame...
704             my $cloned_caller_frame = {
705 820   50     1002 %{$caller_frame//{}},
  820         2685  
706             $key => $cloned_result_frame,
707             };
708              
709             # Make the copies into objects, if the originals were...
710 820 100 66     2029 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
711 213         378 bless $cloned_caller_frame->{$key}, $is_blessed_curr;
712             }
713 820 50       1356 if ($is_blessed_caller) {
714 0         0 bless $cloned_caller_frame, $is_blessed_caller;
715             }
716              
717 820         12599 return $cloned_caller_frame;
718             }
719              
720             # Pop current result frame and add it to a clone of previous result frame
721             # (flattening it if possible, and preserving any blessing)
722             # (As above, but preserving listiness of key being added to)...
723             sub _pop_current_result_frame_with_list {
724 1402     1402   32822 my ($stack_ref, $key, $original_name, $value) = @_;
725              
726             # Where are we in the stack?
727 1402         2143 my $curr_frame = $stack_ref->[-1];
728 1402         1709 my $caller_frame = $stack_ref->[-2];
729              
730             # Track which frames are objects...
731 1402         2470 my $is_blessed_curr = blessed($curr_frame);
732 1402         1927 my $is_blessed_caller = blessed($caller_frame);
733              
734             # Remove "private" captures (i.e. those starting with _)...
735 1402         1622 delete @{$curr_frame}{grep {substr($_,0,1) eq '_'} keys %{$curr_frame} };
  1402         1955  
  1964         4178  
  1402         3242  
736              
737             # Remove "nocontext" marker...
738 1402         2140 my $nocontext = delete $curr_frame->{'~'};
739              
740             # Clone the current frame...
741             my $cloned_result_frame
742             = exists $curr_frame->{'='} ? $curr_frame->{'='}
743 233         581 : $is_blessed_curr || length(join(q{}, keys %{$curr_frame})) ? { q{} => $value, %{$curr_frame} }
744 1402 50 100     3274 : keys %{$curr_frame} ? $curr_frame->{q{}}
  951 100       1708  
    100          
745             : $value
746             ;
747              
748             # Apply any appropriate handler...
749 1402 100       2491 if ($RULE_HANDLER) {
750 174 100 66     608 if ($RULE_HANDLER->can($original_name) || $RULE_HANDLER->can('AUTOLOAD')) {
751 106         214 my $replacement_result_frame
752             = $RULE_HANDLER->$original_name($cloned_result_frame);
753 106 50       1989 if (defined $replacement_result_frame) {
754 106         150 $cloned_result_frame = $replacement_result_frame;
755             }
756             }
757             }
758              
759             # Remove capture if not requested...
760 1402 100 100     2585 if ($nocontext && ref $cloned_result_frame eq 'HASH' && keys %{$cloned_result_frame} > 1) {
  86   100     253  
761 82         121 delete $cloned_result_frame->{q{}};
762             }
763              
764             # Append a clone of current frame inside a clone of the caller frame...
765             my $cloned_caller_frame = {
766 1402         2729 %{$caller_frame},
767             $key => [
768 1402   100     1529 @{$caller_frame->{$key}//[]},
  1402         5649  
769             $cloned_result_frame,
770             ],
771             };
772              
773             # Make the copies into objects, if the originals were...
774 1402 100 66     3517 if ($is_blessed_curr && !exists $curr_frame->{'='} ) {
775 120         204 bless $cloned_caller_frame->{$key}[-1], $is_blessed_curr;
776             }
777 1402 50       2073 if ($is_blessed_caller) {
778 0         0 bless $cloned_caller_frame, $is_blessed_caller;
779             }
780              
781 1402         23472 return $cloned_caller_frame;
782             }
783              
784              
785             #=====[ MISCELLANEOUS CONSTANTS ]=========================
786              
787             # Namespace in which grammar inheritance occurs...
788             my $CACHE = 'Regexp::Grammars::_CACHE_::';
789             my $CACHE_LEN = length $CACHE;
790             my %CACHE; #...for subrule tracking
791              
792             # This code inserted at the start of every grammar regex
793             # (initializes the result stack cleanly and backtrackably, via local)...
794             my $PROLOGUE = q{((?{; @! = () if !pos;
795             local @Regexp::Grammars::RESULT_STACK
796             = (@Regexp::Grammars::RESULT_STACK, {});
797             local $Regexp::Grammars::TIMEOUT = { limit => -1>>1 };
798             local $Regexp::Grammars::DEBUG = 'off' }) };
799              
800             # This code inserted at the end of every grammar regex
801             # (puts final result in %/. Also defines default , , etc.)...
802             my $EPILOGUE = q{)(?{; $Regexp::Grammars::RESULT_STACK[-1]{q{}} //= $^N;;
803             local $Regexp::Grammars::match_frame = pop @Regexp::Grammars::RESULT_STACK;
804             delete @{$Regexp::Grammars::match_frame}{
805             '~', grep {substr($_,0,1) eq '_'} keys %{$Regexp::Grammars::match_frame}
806             };
807             if (exists $Regexp::Grammars::match_frame->{'='}) {
808             if (ref($Regexp::Grammars::match_frame->{'='}) eq 'HASH') {
809             $Regexp::Grammars::match_frame
810             = $Regexp::Grammars::match_frame->{'='};
811             }
812             }
813             if (@Regexp::Grammars::RESULT_STACK) {
814             $Regexp::Grammars::RESULT_STACK[-1]{'(?R)'}
815             = $Regexp::Grammars::match_frame;
816             }
817             Regexp::Grammars::clear_rule_handler();
818             */ = $Regexp::Grammars::match_frame;
819             })|\Z(?{Regexp::Grammars::clear_rule_handler();})(?!)(?(DEFINE)
820             (? \\s* )
821             (?
822             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
823             \\s*
824             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
825             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
826             })
827             )
828             (? \\S+ )
829             (?
830             (?{$Regexp::Grammars::RESULT_STACK[-1]{'!'}=$#{!};})
831             \\S+
832             (?{;$#{!}=delete($Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
833             delete($Regexp::Grammars::RESULT_STACK[-1]{'@'});
834             })
835             )
836             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = pos; }) )
837             (? (?{; $Regexp::Grammars::RESULT_STACK[-1]{"="} = 1 + substr($_,0,pos) =~ tr/\n/\n/; }) )
838             )
839             };
840             my $EPILOGUE_NC = $EPILOGUE;
841             $EPILOGUE_NC =~ s{ ; .* ;;}{;}xms;
842              
843              
844             #=====[ MISCELLANEOUS PATTERNS THAT MATCH USEFUL THINGS ]========
845              
846             # Match an identifier...
847             my $IDENT = qr{ [^\W\d] \w*+ }xms;
848             my $QUALIDENT = qr{ (?: $IDENT :: )*+ $IDENT }xms;
849              
850             # Match balanced parentheses, taking into account \-escapes and []-escapes...
851             my $PARENS = qr{
852             (?&VAR_PARENS)
853             (?(DEFINE)
854             (? \( (?: \\. | (?&VAR_PARENS) | (?&CHARSET) | [^][()\\]++)*+ \) )
855             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]])*+ \] )
856              
857             )
858             }xms;
859              
860             # Match a directive within rules...
861             my $WS_PATTERN = qr{]++ | $PARENS )*+) >}xms;
862              
863              
864             #=====[ UTILITY SUBS FOR ERROR AND WARNING MESSAGES ]========
865              
866             sub _uniq {
867 53     53   190 my %seen;
868 53 50       81 return grep { defined $_ && !$seen{$_}++ } @_;
  86         1235  
869             }
870              
871             # Default translator for error messages...
872             my $ERRORMSG_TRANSLATOR = sub {
873             my ($errormsg, $rulename, $context) = @_;
874              
875             $rulename = 'valid input' if $rulename eq q{};
876             $context //= '';
877              
878             # Unimplemented subrule when rulename starts with '-'...
879             if (substr($rulename,0,1) eq '-') {
880             $rulename = substr($rulename,1);
881             return "Can't match subrule <$rulename> (not implemented)";
882             }
883              
884             # Empty message converts to a "Expected...but found..." message...
885             if ($errormsg eq q{}) {
886             $rulename =~ tr/_/ /;
887             $rulename = lc($rulename);
888             return "Expected $rulename, but found '$context' instead";
889             }
890              
891             # "Expecting..." messages get "but found" added...
892             if (lc(substr($errormsg,0,6)) eq 'expect') {
893             return "$errormsg, but found '$context' instead";
894             }
895              
896             # Everything else stays "as is"...
897             return $errormsg;
898             };
899              
900             # Allow user to set translation...
901             sub set_error_translator {
902 0         0 { package Regexp::Grammars::TranslatorRestorer;
903             sub new {
904 1     1   4 my ($class, $old_translator) = @_;
905 1         5 bless \$old_translator, $class;
906             }
907             sub DESTROY {
908 1     1   642 my ($old_translator_ref) = @_;
909 1         2 $ERRORMSG_TRANSLATOR = ${$old_translator_ref};
  1         12  
910             }
911             }
912              
913 1     1 0 105 my ($translator_ref) = @_;
  1         3  
914 1 50       6 die "Usage: set_error_translator(\$subroutine_reference)\n"
915             if ref($translator_ref) ne 'CODE';
916              
917 1         3 my $old_translator_ref = $ERRORMSG_TRANSLATOR;
918 1         2 $ERRORMSG_TRANSLATOR = $translator_ref;
919              
920             return defined wantarray
921 1 50       11 ? Regexp::Grammars::TranslatorRestorer->new($old_translator_ref)
922             : ();
923             }
924              
925             # Dispatch to current translator for error messages...
926             sub _translate_errormsg {
927 53     53   2105 goto &{$ERRORMSG_TRANSLATOR};
  53         153  
928             }
929              
930             #=====[ SUPPORT FOR TRANSLATING GRAMMAR-ENHANCED REGEX TO NATIVE REGEX ]====
931              
932             # Store any specified grammars...
933             my %user_defined_grammar;
934              
935             my %REPETITION_DESCRIPTION_FOR = (
936             '+' => 'once or more',
937             '*' => 'any number of times',
938             '?' => 'if possible',
939             '+?' => 'as few times as possible',
940             '*?' => 'as few times as possible',
941             '??' => 'if necessary',
942             '++' => 'as many times as possible',
943             '*+' => 'as many times as possible',
944             '?+' => 'if possible',
945             );
946              
947             sub _translate_raw_regex {
948 1826     1826   3858 my ($regex, $debug_build, $debug_runtime) = @_;
949              
950 1826   66     5707 my $is_comment = substr($regex, 0, 1) eq q{#}
951             || substr($regex, 0, 3) eq q{(?#};
952 1826         2917 my $visible_regex = _squeeze_ws($regex);
953              
954             # Report how regex was interpreted, if requested to...
955 1826 0 33     3305 if ($debug_build && $visible_regex ne q{} && $visible_regex ne q{ }) {
      33        
956 0 0       0 _debug_notify( info =>
957             " |",
958             " |...Treating '$visible_regex' as:",
959             ($is_comment ? " | \\ a comment (which will be ignored)"
960             : " | \\ normal Perl regex syntax"
961             ),
962             );
963             }
964              
965 1826 100       2563 return q{} if $is_comment;
966              
967             # Generate run-time debugging code (if any)...
968 1824         2875 my ($debug_pre, $debug_post)
969             = _build_raw_debugging_statements($debug_runtime,$visible_regex, +1);
970              
971             # Replace negative lookahead with one that works under R::G...
972 1824         3209 $regex =~ s{\(\?!}{(?!(?!)|}gxms;
973             # TODO: Also replace positive lookahead with one that works under R::G...
974             # This replacement should be of the form:
975             # $regex =~ s{\(\?!}{(?!(?!)|(?!(?!)|}gxms;
976             # but need to find a way to insert the extra ) at the other end
977              
978 1824 50 33     6382 return $debug_runtime && $regex eq '|' ? $regex . $debug_post
    50 33        
979             : $debug_runtime && $regex =~ /\S/ ? "(?#)(?:$debug_pre($regex)$debug_post(?#))"
980              
981             # TODO: REWORK THIS INSUFFICENT FIX FOR t/grammar_autospace.t...
982             # : $regex !~ /\S/ ? "(?:$regex)"
983              
984             : $regex;
985             }
986              
987             # Report and convert a debugging directive...
988             sub _translate_debug_directive {
989 0     0   0 my ($construct, $cmd, $debug_build) = @_;
990              
991             # Report how directive was interpreted, if requested to...
992 0 0       0 if ($debug_build) {
993 0         0 _debug_notify( info =>
994             " |",
995             " |...Treating $construct as:",
996             " | \\ Change run-time debugging mode to '$cmd'",
997             );
998             }
999              
1000 0         0 return qq{(?{; local \$Regexp::Grammars::DEBUG = q{$cmd}; }) };
1001             }
1002              
1003             # Report and convert a timeout directive...
1004             sub _translate_timeout_directive {
1005 0     0   0 my ($construct, $timeout, $debug_build) = @_;
1006              
1007             # Report how directive was interpreted, if requested to...
1008 0 0       0 if ($debug_build) {
1009 0 0       0 _debug_notify( info =>
    0          
1010             " |",
1011             " |...Treating $construct as:",
1012             ($timeout > 0
1013             ? " | \\ Cause the entire parse to fail after $timeout second" . ($timeout==1 ? q{} : q{s})
1014             : " | \\ Cause the entire parse to fail immediately"
1015             ),
1016             );
1017             }
1018              
1019 0 0       0 return $timeout > 0
1020             ? qq{(?{; local \$Regexp::Grammars::TIMEOUT = { duration => $timeout, limit => time() + $timeout }; }) }
1021             : qq{(*COMMIT)(*FAIL)};
1022             }
1023              
1024             # Report and convert a directive...
1025             sub _translate_require_directive {
1026 0     0   0 my ($construct, $condition, $debug_build) = @_;
1027              
1028 0         0 $condition = substr($condition, 3, -2);
1029              
1030             # Report how directive was interpreted, if requested to...
1031 0 0       0 if ($debug_build) {
1032 0         0 _debug_notify( info =>
1033             " |",
1034             " |...Treating $construct as:",
1035             " | \\ Require that {$condition} is true",
1036             );
1037             }
1038              
1039 0         0 my $quoted_condition = $condition;
1040 0         0 $quoted_condition =~ s{\$}{}xms;
1041              
1042 0         0 return qq{(?(?{;$condition})
1043             (?{;Regexp::Grammars::_debug_require(
1044             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 1)
1045             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})
1046             | (?{;Regexp::Grammars::_debug_require(
1047             scalar \@Regexp::Grammars::RESULT_STACK, q{$quoted_condition}, 0)
1048             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}})(?!))
1049             };
1050             }
1051              
1052              
1053             # Report and convert a directive...
1054             sub _translate_minimize_directive {
1055 3     3   11 my ($construct, $debug_build) = @_;
1056              
1057             # Report how directive was interpreted, if requested to...
1058 3 50       8 if ($debug_build) {
1059 0         0 _debug_notify( info =>
1060             " |",
1061             " |...Treating $construct as:",
1062             " | \\ Minimize result value if possible",
1063             );
1064             }
1065              
1066 3         7 return q{(?{;
1067             if (1 == grep { $_ ne '!' && $_ ne '@' && $_ ne '~' } keys %MATCH) { # ...single alnum key
1068             local %Regexp::Grammars::matches = %MATCH;
1069             delete @Regexp::Grammars::matches{'!', '@', '~'};
1070             local ($Regexp::Grammars::only_key) = keys %Regexp::Grammars::matches;
1071             local $Regexp::Grammars::array_ref = $MATCH{$Regexp::Grammars::only_key};
1072             if (ref($Regexp::Grammars::array_ref) eq 'ARRAY' && 1 == @{$Regexp::Grammars::array_ref}) {
1073             $MATCH = $Regexp::Grammars::array_ref->[0];
1074             }
1075             }
1076             })};
1077             }
1078              
1079             # Report and convert a debugging directive...
1080             sub _translate_error_directive {
1081 19     19   83 my ($construct, $type, $msg, $debug_build, $subrule_name) = @_;
1082 19   50     68 $subrule_name //= 'undef';
1083              
1084             # Determine severity...
1085 19 100       45 my $severity = ($type eq 'error') ? 'fail' : 'non-fail';
1086              
1087             # Determine fatality (and build code to invoke it)...
1088 19 100       41 my $fatality = ($type eq 'fatal') ? '(*COMMIT)(*FAIL)' : q{};
1089              
1090             # Unpack message...
1091 19 100       45 if (substr($msg,0,3) eq '(?{') {
1092 4         12 $msg = 'do'. substr($msg,2,-1);
1093             }
1094             else {
1095 15         30 $msg = quotemeta $msg;
1096 15         35 $msg = qq{qq{$msg}};
1097             }
1098              
1099             # Report how directive was interpreted, if requested to...
1100 19 50       43 if ($debug_build) {
1101 0 0       0 _debug_notify( info => " |",
1102             " |...Treating $construct as:",
1103             ( $type eq 'log' ? " | \\ Log a message to the logfile"
1104             : " | \\ Append a $severity error message to \@!"
1105             ),
1106             );
1107             }
1108              
1109             # Generate the regex...
1110 19 100       118 return $type eq 'log'
    50          
1111             ? qq{(?{Regexp::Grammars::_debug_logmsg(scalar \@Regexp::Grammars::RESULT_STACK,$msg)
1112             if \$Regexp::Grammars::DEBUG_LEVEL{\$Regexp::Grammars::DEBUG}
1113             })}
1114              
1115             : qq{(?:(?{;local \$Regexp::Grammar::_memopos=pos();})
1116             (?>\\s*+((?-s).{0,$MAX_CONTEXT_WIDTH}+))
1117             (?{; pos() = \$Regexp::Grammar::_memopos;
1118             @! = Regexp::Grammars::_uniq(
1119             @!,
1120             Regexp::Grammars::_translate_errormsg($msg,q{$subrule_name},\$CONTEXT)
1121             ) }) (?!)|}
1122             . ($severity eq 'fail' ? q{(?!)} : $fatality)
1123             . q{)}
1124             ;
1125             }
1126              
1127             sub _translate_subpattern {
1128 110     110   459 my ($construct, $alias, $subpattern, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout, $backref)
1129             = @_;
1130              
1131             # Determine save behaviour...
1132 110         213 my $is_noncapturing = $savemode eq 'noncapturing';
1133 110         156 my $is_listifying = $savemode eq 'list';
1134 110         188 my $is_codeblock = substr($subpattern,0,3) eq '(?{';
1135 110 100       201 my $value_saved = $is_codeblock ? '$^R' : '$^N';
1136 110 100       190 my $do_something_with = $is_codeblock ? 'execute the code block' : 'match the pattern';
1137 110 100       184 my $result = $is_codeblock ? 'result' : 'matched substring';
1138 110 100       244 my $description = $is_codeblock ? substr($subpattern,2,-1)
    100          
1139             : defined $backref ? $backref
1140             : $subpattern;
1141 110 100       208 my $debug_construct
1142             = $is_codeblock ? '<' . substr($alias,1,-1) . '= (?{;' . substr($subpattern,3,-2) . '})>'
1143             : $construct
1144             ;
1145              
1146             # Report how construct was interpreted, if requested to...
1147 110   50     359 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1148 110 50 66     527 my $results = $is_listifying && $postmodifier ? "each $result"
    50 33        
    50          
1149             : substr($postmodifier,0,1) eq '?' ? "any $result"
1150             : $postmodifier && !$is_noncapturing ? "only the final $result"
1151             : "the $result"
1152             ;
1153 110 50       214 if ($debug_build) {
1154 0 0       0 _debug_notify( info =>
    0          
1155             " |",
1156             " |...Treating $construct as:",
1157             " | | $do_something_with $description $repeatedly",
1158             ( $is_noncapturing ? " | \\ but don't save $results"
1159             : $is_listifying ? " | \\ appending $results to \@{\$MATCH{$alias}}"
1160             : " | \\ saving $results in \$MATCH{$alias}"
1161             )
1162             );
1163             }
1164              
1165             # Generate run-time debugging code (if any)...
1166 110         226 my ($debug_pre, $debug_post)
1167             = _build_debugging_statements($debug_runtime,$debug_construct, +1);
1168              
1169             # Generate post-match result-capturing code, if match captures...
1170 110 100       354 my $post_action = $is_noncapturing
1171             ? q{}
1172             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1173             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1174             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1175             \\\@Regexp::Grammars::RESULT_STACK, $alias, $value_saved
1176             ),
1177             );}
1178             ;
1179              
1180             # Generate timeout test...
1181 110 50       203 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1182              
1183             # Translate to standard regex code...
1184 110         465 return qq{$timeout_test(?{;local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;$debug_pre})(?:($subpattern)(?{;$post_action$debug_post}))$postmodifier};
1185             }
1186              
1187              
1188             sub _translate_hashmatch {
1189 14     14   78 my ($construct, $alias, $hashname, $keypat, $savemode, $postmodifier, $debug_build, $debug_runtime, $timeout)
1190             = @_;
1191              
1192             # Empty or missing keypattern defaults to <.hk>...
1193 14 100 66     66 if (!defined $keypat || $keypat !~ /\S/) {
1194 8         12 $keypat = '(?&hk__implicit__)'
1195             }
1196             else {
1197 6         13 $keypat = substr($keypat, 1, -1);
1198             }
1199              
1200             # Determine save behaviour...
1201 14         19 my $is_noncapturing = $savemode eq 'noncapturing';
1202 14         19 my $is_listifying = $savemode eq 'list';
1203              
1204             # Convert hash to hash lookup...
1205 14         30 my $hash_lookup = '$' . substr($hashname, 1). '{$^N}';
1206              
1207             # Report how construct was interpreted, if requested to...
1208 14   100     41 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1209 14 50 66     67 my $results = $is_listifying && $postmodifier ? 'each matched key'
    50 66        
    100          
1210             : substr($postmodifier,0,1) eq '?' ? 'any matched key'
1211             : $postmodifier && !$is_noncapturing ? 'only the final matched key'
1212             : 'the matched key'
1213             ;
1214 14 50       31 if ($debug_build) {
1215 0 0       0 _debug_notify( info =>
    0          
1216             " |",
1217             " |...Treating $construct as:",
1218             " | | match a key from the hash $hashname $repeatedly",
1219             ( $is_noncapturing ? " | \\ but don't save $results"
1220             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1221             : " | \\ saving $results in \$MATCH{$alias}"
1222             )
1223             );
1224             }
1225              
1226             # Generate run-time debugging code (if any)...
1227 14         30 my ($debug_pre, $debug_post)
1228             = _build_debugging_statements($debug_runtime,$construct, +1);
1229              
1230             # Generate post-match result-capturing code, if match captures...
1231 14 100       37 my $post_action = $is_noncapturing
1232             ? q{}
1233             : qq{local \@Regexp::Grammars::RESULT_STACK = (
1234             \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-2],
1235             Regexp::Grammars::_extend_current_result_frame_with_$savemode(
1236             \\\@Regexp::Grammars::RESULT_STACK, $alias, \$^N
1237             ),
1238             );}
1239             ;
1240              
1241             # Generate timeout test...
1242 14 50       25 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1243              
1244             # Translate to standard regex code...
1245 14         62 return qq{$timeout_test(?:(?{;local \@Regexp::Grammars::RESULT_STACK
1246             = \@Regexp::Grammars::RESULT_STACK;$debug_pre})(?:($keypat)(??{exists $hash_lookup ? q{} : q{(?!)}})(?{;$post_action$debug_post})))$postmodifier};
1247             }
1248              
1249              
1250             # Convert a " % " construct to pure Perl 5.10...
1251             sub _translate_separated_list {
1252 73     73   242 my ($term, $op, $separator, $term_trans, $sep_trans,
1253             $ws, $debug_build, $debug_runtime, $timeout) = @_;
1254              
1255             # This insertion ensures backtracking upwinds the stack correctly...
1256 73         131 state $CHECKPOINT = q{(?{;@Regexp::Grammars::RESULT_STACK = @Regexp::Grammars::RESULT_STACK;})};
1257              
1258             # Translate meaningful whitespace...
1259 73 100       196 $ws = length($ws) ? q{(?&ws__implicit__)} : q{};
1260              
1261             # Generate support for optional trailing separator...
1262 73 100       238 my $opt_trailing = substr($op,-2) eq '%%' ? qq{$ws$sep_trans?}
1263             : q{};
1264              
1265             # Generate timeout test...
1266 73 50       161 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1267              
1268             # Report how construct was interpreted, if requested to...
1269 73 50       191 if ($debug_build) {
1270 0 0       0 _debug_notify( info =>
1271             " |",
1272             " |...Treating $term $op $separator as:",
1273             " | | repeatedly match the subrule $term",
1274             " | \\ as long as the matches are separated by matches of $separator",
1275             (substr($op,-2) eq '%%' ?
1276             " | \\ and allowing an optional trailing $separator"
1277             : q{}
1278             )
1279             );
1280             }
1281              
1282             # One-or-more...
1283 73 100       750 return qq{$timeout_test(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+$opt_trailing}
1284             if $op =~ m{ [*][*]() | [+]([+?]?) \s* %%?+ | \{ 1, \}([+?]?) \s* %%?+ }xms;
1285              
1286             # Zero-or-more...
1287             return
1288 22 100       103 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans)*$+)?$+$opt_trailing}
1289             if $op =~ m{ [*]([+?]?) \s* %%? | \{ 0, \}([+?]?) \s* %%? }xms;
1290              
1291             # One-or-zero...
1292 18 100       78 return qq{?$+$opt_trailing}
1293             if $op =~ m{ [?]([+?]?) \s* %%? | \{ 0,1 \}([+?]?) \s* %%? }xms;
1294              
1295             # Zero exactly...
1296 14 100       56 return qq{{0}$ws$opt_trailing}
1297             if $op =~ m{ \{ 0 \}[+?]? \s* %%? }xms;
1298              
1299             # N exactly...
1300 12 100       55 if ($op =~ m{ \{ (\d+) \}([+?]?) \s* %%? }xms ) {
1301 2         8 my $min = $1-1;
1302             return
1303 2         15 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min}$+$opt_trailing)}
1304             }
1305              
1306             # Zero-to-N...
1307 10 100       43 if ($op =~ m{ \{ 0,(\d+) \}([+?]?) \s* %%? }xms ) {
1308 2         6 my $max = $1-1;
1309             return
1310 2         15 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){0,$max}$+)?$+$opt_trailing}
1311             }
1312              
1313             # M-to-N and M-to-whatever...
1314 8 50       38 if ($op =~ m{ \{ (\d+),(\d*) \} ([+?]?) \s* %%? }xms ) {
1315 8         31 my $min = $1-1;
1316 8 100       31 my $max = $2 ? $2-1 : q{};
1317             return
1318 8         63 qq{{0}$timeout_test$ws(?:$term_trans(?:$ws$CHECKPOINT$sep_trans$ws$term_trans){$min,$max}$+$opt_trailing)}
1319             }
1320              
1321             # Somehow we missed a case (this should never happen)...
1322 0         0 die "Internal error: missing case in separated list handler";
1323             }
1324              
1325             sub _translate_subrule_call {
1326 299     299   2368 my ($source_line, $source_file, $rulename, $grammar_name, $construct, $alias,
1327             $subrule, $args, $savemode, $postmodifier,
1328             $debug_build, $debug_runtime, $timeout, $valid_subrule_names_ref) = @_;
1329              
1330             # Translate arg list, if provided...
1331 299         552 my $arg_desc;
1332 299 100       725 if ($args eq q{}) {
    100          
1333 289         490 $args = q{()};
1334             }
1335             elsif (substr($args,0,3) eq '(?{') {
1336             # Turn parencode into do block...
1337 1         2 $arg_desc = substr($args,3,-2);
1338 1         3 substr($args,1,1) = 'do';
1339             }
1340             else {
1341             # Turn abbreviated format into a key=>value list...
1342 9         39 $args =~ s{ [(,] \s* \K : (\w+) (?= \s* [,)] ) }{$1 => \$MATCH{'$1'}}gxms;
1343 9         49 $arg_desc = substr($args,1,-1);
1344             }
1345              
1346             # Transform qualified subrule names...
1347 299         440 my $simple_subrule = $subrule;
1348 299 100       835 my $start_grammar = (($simple_subrule =~ s{(.*)::}{}xms) ? $1 : "");
1349 299 100       794 if ($start_grammar !~ /^NEXT$|::/) {
1350 297         881 $start_grammar = caller(3).'::'.$start_grammar;
1351             }
1352              
1353 299 100       2243 my @candidates = $start_grammar eq 'NEXT' ? _ancestry_of($grammar_name)
1354             : _ancestry_of($start_grammar);
1355              
1356             # Rename fully-qualified rule call, if to ancestor grammar...
1357             RESOLVING:
1358 299         635 for my $parent_class (@candidates) {
1359 302         525 my $inherited_subrule = $parent_class.'::'.$simple_subrule;
1360 302 100       787 if ($CACHE{$inherited_subrule}) {
1361 3         4 $subrule = $inherited_subrule;
1362 3         8 last RESOLVING;
1363             }
1364             }
1365              
1366             # Replace package separators, which regex engine can't handle...
1367 299         453 my $internal_subrule = $subrule;
1368 299         489 $internal_subrule =~ s{::}{_88_}gxms;
1369              
1370             # Shortcircuit if unknown subrule invoked...
1371 299 50       709 if (!$valid_subrule_names_ref->{$subrule}) {
1372 0         0 _debug_notify( error =>
1373             qq{Found call to $construct inside definition of $rulename},
1374             qq{near $source_file line $source_line.},
1375             qq{But no or was defined in the grammar},
1376             qq{(Did you misspell $construct? Or forget to define the rule?)},
1377             q{},
1378             );
1379 0         0 return "(?{Regexp::Grammars::_debug_fatal('$construct')})(*COMMIT)(*FAIL)";
1380             }
1381              
1382             # Determine save behaviour...
1383 299         521 my $is_noncapturing = $savemode =~ /noncapturing|lookahead/;
1384 299         435 my $is_listifying = $savemode eq 'list';
1385              
1386 299 100       1009 my $save_code =
    100          
1387             $is_noncapturing?
1388             q{ @Regexp::Grammars::RESULT_STACK[0..@Regexp::Grammars::RESULT_STACK-2] }
1389             : $is_listifying?
1390             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1391             Regexp::Grammars::_pop_current_result_frame_with_list(
1392             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1393             ),
1394             }
1395             :
1396             qq{ \@Regexp::Grammars::RESULT_STACK[0..\@Regexp::Grammars::RESULT_STACK-3],
1397             Regexp::Grammars::_pop_current_result_frame(
1398             \\\@Regexp::Grammars::RESULT_STACK, $alias, '$simple_subrule', \$^N
1399             ),
1400             }
1401             ;
1402              
1403             # Report how construct was interpreted, if requested to...
1404 299   100     952 my $repeatedly = $REPETITION_DESCRIPTION_FOR{$postmodifier} // q{};
1405 299 100 100     1061 my $results = $is_listifying && $postmodifier ? 'each match'
    100          
1406             : substr($postmodifier,0,1) eq '?' ? 'any match'
1407             : 'the match'
1408             ;
1409 299 100       749 my $do_something_with = $savemode eq 'neglookahead' ? 'lookahead for anything except'
    100          
1410             : $savemode eq 'poslookahead' ? 'lookahead for'
1411             : 'match'
1412             ;
1413 299 50       523 if ($debug_build) {
1414 0 0       0 _debug_notify( info =>
    0          
    0          
1415             " |",
1416             " |...Treating $construct as:",
1417             " | | $do_something_with the subrule <$subrule> $repeatedly",
1418             (defined $arg_desc ? " | | passing the args: ($arg_desc)"
1419             : ()
1420             ),
1421             ( $is_noncapturing ? " | \\ but don't save anything"
1422             : $is_listifying ? " | \\ appending $results to \$MATCH{$alias}"
1423             : " | \\ saving $results in \$MATCH{$alias}"
1424             ),
1425             );
1426             }
1427              
1428             # Generate post-match result-capturing code, if match captures...
1429 299         602 my ($debug_pre, $debug_post)
1430             = _build_debugging_statements($debug_runtime, $construct);
1431              
1432             # Generate timeout test...
1433 299 50       576 my $timeout_test = $timeout ? q{(??{;Regexp::Grammars::_test_timeout()})} : q{};
1434              
1435             # Translate to standard regex code...
1436 299         1781 return qq{(?:$timeout_test(?{;
1437             local \@Regexp::Grammars::RESULT_STACK = (\@Regexp::Grammars::RESULT_STACK, {'\@'=>{$args}});
1438             $debug_pre})((?&$internal_subrule))(?{;
1439             local \@Regexp::Grammars::RESULT_STACK = (
1440             $save_code
1441             );$debug_post
1442             }))$postmodifier};
1443             }
1444              
1445             sub _translate_rule_def {
1446 260     260   949 my ($type, $qualifier, $name, $callname, $qualname, $body, $objectify, $local_ws, $nocontext)
1447             = @_;
1448 260         846 $qualname =~ s{::}{_88_}gxms;
1449              
1450             # Return object if requested...
1451 260 100       671 my $objectification =
1452             $objectify ? qq{(??{; local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
1453             \$Regexp::Grammars::RESULT_STACK[-1] = '$qualifier$name'->can('new')
1454             ? '$qualifier$name'->new(\$Regexp::Grammars::RESULT_STACK[-1])
1455             : bless \$Regexp::Grammars::RESULT_STACK[-1], '$qualifier$name';
1456             Regexp::Grammars::_debug_non_hash(\$Regexp::Grammars::RESULT_STACK[-1],'$name');
1457             })}
1458             : q{};
1459              
1460             # Each rule or token becomes a DEFINE'd Perl 5.10 named capture...
1461 260 100 100     980 my $implicit_version
1462             = ($callname eq 'ws' || $callname eq 'hk')
1463             ? qq{(?<${callname}__implicit__> $body) }
1464             : qq{};
1465 260         1640 return qq{
1466             (?(DEFINE) $local_ws
1467             (?<$qualname>
1468             (?<$callname>
1469             (?{\@{\$Regexp::Grammars::RESULT_STACK[-1]}{'!','~'}=(\$#{!}, $nocontext);})
1470             (?:$body) $objectification
1471             (?{;\$#{!}=delete(\$Regexp::Grammars::RESULT_STACK[-1]{'!'})//0;
1472             delete(\$Regexp::Grammars::RESULT_STACK[-1]{'\@'});
1473             })
1474             ))
1475             $implicit_version
1476             )
1477             };
1478             }
1479              
1480              
1481             # Locate any valid <...> sequences and replace with native regex code...
1482             sub _translate_subrule_calls {
1483 389     389   1140 my ($source_file, $source_line,
1484             $grammar_name,
1485             $grammar_spec,
1486             $compiletime_debugging_requested,
1487             $runtime_debugging_requested,
1488             $timeout_requested,
1489             $pre_match_debug,
1490             $post_match_debug,
1491             $rule_name,
1492             $subrule_names_ref,
1493             $magic_ws,
1494             ) = @_;
1495              
1496 389 100       1052 my $pretty_rule_name = $rule_name ? ($magic_ws ? '"
    100          
1497             : 'main regex (before first rule)';
1498              
1499             # Remember the preceding construct, so as to implement the +% etc. operators...
1500 389         526 my $prev_construct = q{};
1501 389         481 my $prev_translation = q{};
1502 389         471 my $curr_line_num = 1;
1503              
1504             # Translate all other calls (MAIN GRAMMAR FOR MODULE)...
1505 389         71786 $grammar_spec =~ s{
1506 2357         50338 (?{ $curr_line_num = substr($_, 0, pos) =~ tr/\n//; })
1507             (? (? \s*+) (? (?&SEPLIST_OP) ) (? \s*+) )?
1508             (?
1509             (?
1510             <
1511             (?:
1512             (?
1513             \. \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1514             )
1515             | (?
1516             (? \? | \! ) \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1517             )
1518             | (?
1519             \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1520              
1521             )
1522             | (?
1523             \[ \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1524             )
1525             | (?
1526             (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s*
1527              
1528             )
1529             | (?
1530             \[ (?(?&IDENT)) \s* = \s* (?(?&QUALIDENT)) \s* (?(?&ARGLIST)) \s* \]
1531             )
1532              
1533             | (?
1534             \s* : (?(?&QUALIDENT)) \s*
1535             )
1536             | (?
1537             (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s*
1538             )
1539             | (?
1540             \[ (?(?&IDENT)) \s* = \s* : (?(?&QUALIDENT)) \s* \]
1541             )
1542              
1543             | (?
1544             \. (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1545             )
1546             | (?
1547             (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s*
1548             )
1549             | (?
1550             \[ (?(?&IDENT)) \s* = \s* (?(?&PARENCODE)|(?&PARENS)|(?&LITERAL)) \s* \]
1551             )
1552             | (?
1553             (?(?&HASH)) \s* (?(?&BRACES))? \s*
1554             )
1555             | (?
1556             (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s*
1557             )
1558             | (?
1559             \[ (?(?&IDENT)) \s* = \s* (?(?&HASH)) \s* (?(?&BRACES))? \s* \]
1560             )
1561             | (?
1562             \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1563             | \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1564             )
1565             | (?
1566             (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s*
1567             | (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s*
1568             )
1569             | (?
1570             \[ (?(?&IDENT)) \s* = \s* (? \\ | /) (? [:] (?&QUALIDENT)) \s* \]
1571             | \[ (?(?&IDENT)) \s* = \s* (? \\_ | /) (? (?&QUALIDENT)) \s* \]
1572             )
1573             |
1574             (?
1575             minimize \s* : \s*
1576             )
1577             |
1578             (?
1579             require \s* : \s* (? (?&PARENCODE) ) \s*
1580             )
1581             |
1582             (?
1583             debug \s* : \s* (? run | match | step | try | off | on) \s*
1584             )
1585             |
1586             (?
1587             timeout \s* : \s* (? \d+) \s*
1588             )
1589             |
1590             (?
1591             context \s* : \s*
1592             )
1593             |
1594             (?
1595             nocontext \s* : \s*
1596             )
1597             |
1598             (?
1599             [.][.][.]
1600             | [!][!][!]
1601             | [?][?][?]
1602             )
1603             |
1604             (?
1605             (? error | fatal ) \s*+ : \s*+
1606             )
1607             |
1608             (?
1609             (? log | error | warning | fatal )
1610             \s*+ : \s*+
1611             (? (?&PARENCODE) | .+? )
1612             \s*+
1613             )
1614             )
1615             > (? \s* (?! (?&SEPLIST_OP) ) [?+*][?+]? | )
1616             |
1617             (?
1618             $WS_PATTERN
1619             )
1620             |
1621             (?
1622             (?&SEPLIST_OP) \s* (? \S* )
1623             )
1624             |
1625             (?
1626             \(\?\<\w+\>
1627             )
1628             |
1629             (?
1630             < [^>\n]* [>\n]
1631             )
1632             |
1633             (?
1634             (?
1635             | (?
1636             )
1637             |
1638             (?
1639             (?: \\[^shv]
1640             | (?! (?&PARENCODE) ) (?&PARENS)
1641             | (?&CHARSET)
1642             | \w++
1643             | \|
1644             )
1645             (?&QUANTIFIER)?
1646             )
1647             |
1648             (?
1649             \s++
1650             | \\. (?&QUANTIFIER)?
1651             | \(\?!
1652             | \(\?\# [^)]* \) # (?# -> old style inline comment)
1653             | (?&PARENCODE)
1654             | \# [^\n]*+
1655             | [^][\s()<>#\\]++
1656             )
1657             )
1658              
1659             (?(DEFINE)
1660             (? \*\* | [*+?] [+?]?+ \s* %%?+ | \{ \d+(,\d*)? \} [+?]?+ \s* %%?+ )
1661             (? \( (?:[?] (?: <[=!] | [:>] ))?
1662             (?: \\. | (?&PARENCODE) | (?&PARENS) | (?&CHARSET) | [^][()\\<>]++ )*+
1663             \)
1664             )
1665             (? \{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \} )
1666             (? \(\?\??\{ (?: \\. | (?&BRACES) | [^{}\\]++ )*+ \}\) )
1667             (? \% (?&IDENT) (?: :: (?&IDENT) )* )
1668             (? \[ \^?+ \]?+ (?: \[:\w+:\] | \\. | [^]] )*+ \] )
1669             (? [^\W\d]\w*+ )
1670             (? (?: [^\W\d]\w*+ :: )* [^\W\d]\w*+ )
1671             (? (?&NUMBER) | (?&STRING) | (?&VAR) )
1672             (? [+-]? \d++ (?:\. \d++)? (?:[eE] [+-]? \d++)? )
1673             (? ' [^\\']++ (?: \\. [^\\']++ )* ' )
1674             (? (?&PARENCODE) | \( \s* (?&ARGS)? \s* \) | (?# NOTHING ) )
1675             (? (?&ARG) \s* (?: , \s* (?&ARG) \s* )* ,? )
1676             (? (?&VAR) | (?&KEY) \s* => \s* (?&LITERAL) )
1677             (? : (?&IDENT) )
1678             (? (?&IDENT) | (?&LITERAL) )
1679             (? [*+?][+?]? | \{ \d+,?\d* \} [+?]? )
1680             )
1681             }{
1682 82     82   601997 my $curr_construct = $+{construct};
  82         29536  
  82         187387  
  2275         10337  
1683 2275   100     10195 my $list_marker = $+{list_marker} // q{};
1684 2275 100 100     10114 my $alias = ($+{alias}//'MATCH') eq 'MATCH' ? q{'='} : qq{'$+{alias}'};
1685              
1686             # Determine and remember the necessary translation...
1687 2275         3447 my $curr_translation = do{
1688              
1689             # Translate subrule calls of the form: ...
1690 2275 100 100     54151 if (defined $+{alias_parens_scalar}) {
    50 66        
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    0          
    0          
1691 84 100       476 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1692             _translate_subpattern(
1693             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1694 84         308 $compiletime_debugging_requested,
1695             $runtime_debugging_requested, $timeout_requested,
1696             );
1697             }
1698             elsif (defined $+{alias_parens_scalar_nocap}) {
1699 0 0       0 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1700             _translate_subpattern(
1701             $curr_construct, $alias, $pattern, 'noncapturing', $+{modifier},
1702 0         0 $compiletime_debugging_requested,
1703             $runtime_debugging_requested, $timeout_requested,
1704             );
1705             }
1706             elsif (defined $+{alias_parens_list}) {
1707 16 50       94 my $pattern = substr($+{pattern},0,1) eq '(' ? $+{pattern} : "(?{$+{pattern}})";
1708             _translate_subpattern(
1709             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1710 16         73 $compiletime_debugging_requested,
1711             $runtime_debugging_requested, $timeout_requested,
1712             );
1713             }
1714              
1715             # Translate subrule calls of the form: ...
1716             elsif (defined $+{alias_hash_scalar}) {
1717             _translate_hashmatch(
1718             $curr_construct, $alias, $+{varname}, $+{keypat}, 'scalar', $+{modifier},
1719 7         29 $compiletime_debugging_requested,
1720             $runtime_debugging_requested,
1721             $timeout_requested,
1722             );
1723             }
1724             elsif (defined $+{alias_hash_scalar_nocap}) {
1725             _translate_hashmatch(
1726             $curr_construct, $alias, $+{varname}, $+{keypat}, 'noncapturing', $+{modifier},
1727 4         20 $compiletime_debugging_requested,
1728             $runtime_debugging_requested,
1729             $timeout_requested,
1730             );
1731             }
1732             elsif (defined $+{alias_hash_list}) {
1733             _translate_hashmatch(
1734             $curr_construct, $alias, $+{varname}, $+{keypat}, 'list', $+{modifier},
1735 3         16 $compiletime_debugging_requested,
1736             $runtime_debugging_requested,
1737             $timeout_requested,
1738             );
1739             }
1740              
1741             # Translate subrule calls of the form: ...
1742             elsif (defined $+{alias_subrule_scalar}) {
1743             _translate_subrule_call(
1744             $source_line, $source_file,
1745             $pretty_rule_name,
1746             $grammar_name,
1747             $curr_construct, $alias, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1748 22         106 $compiletime_debugging_requested,
1749             $runtime_debugging_requested,
1750             $timeout_requested,
1751             $subrule_names_ref,
1752             );
1753             }
1754             elsif (defined $+{alias_subrule_list}) {
1755             _translate_subrule_call(
1756             $source_line, $source_file,
1757             $pretty_rule_name,
1758             $grammar_name,
1759             $curr_construct, $alias, $+{subrule}, $+{args}, 'list', $+{modifier},
1760 30         138 $compiletime_debugging_requested,
1761             $runtime_debugging_requested,
1762             $timeout_requested,
1763             $subrule_names_ref,
1764             );
1765             }
1766              
1767             # Translate subrule calls of the form: and ...
1768             elsif (defined $+{self_subrule_lookahead}) {
1769              
1770             # Determine type of lookahead, and work around capture problem...
1771 2         5 my ($type, $pre, $post) = ( 'neglookahead', '(?!(?!)|', ')' );
1772 2 100       7 if ($+{sign} eq '?') {
1773 1         2 $type = 'poslookahead';
1774 1         2 $pre x= 2;
1775 1         2 $post x= 2;
1776             }
1777              
1778             $pre . _translate_subrule_call(
1779             $source_line, $source_file,
1780             $pretty_rule_name,
1781             $grammar_name,
1782 2         11 $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, $type, q{},
1783             $compiletime_debugging_requested,
1784             $runtime_debugging_requested,
1785             $timeout_requested,
1786             $subrule_names_ref,
1787             )
1788             . $post;
1789             }
1790             elsif (defined $+{self_subrule_scalar_nocap}) {
1791             _translate_subrule_call(
1792             $source_line, $source_file,
1793             $pretty_rule_name,
1794             $grammar_name,
1795             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'noncapturing', $+{modifier},
1796 8         54 $compiletime_debugging_requested,
1797             $runtime_debugging_requested,
1798             $timeout_requested,
1799             $subrule_names_ref,
1800             );
1801             }
1802             elsif (defined $+{self_subrule_scalar}) {
1803             _translate_subrule_call(
1804             $source_line, $source_file,
1805             $pretty_rule_name,
1806             $grammar_name,
1807             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'scalar', $+{modifier},
1808 175         1210 $compiletime_debugging_requested,
1809             $runtime_debugging_requested,
1810             $timeout_requested,
1811             $subrule_names_ref,
1812             );
1813             }
1814             elsif (defined $+{self_subrule_list}) {
1815             _translate_subrule_call(
1816             $source_line, $source_file,
1817             $pretty_rule_name,
1818             $grammar_name,
1819             $curr_construct, qq{'$+{subrule}'}, $+{subrule}, $+{args}, 'list', $+{modifier},
1820 62         415 $compiletime_debugging_requested,
1821             $runtime_debugging_requested,
1822             $timeout_requested,
1823             $subrule_names_ref,
1824             );
1825             }
1826              
1827             # Translate subrule calls of the form: ...
1828             elsif (defined $+{alias_argrule_scalar}) {
1829 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1830             _translate_subpattern(
1831             $curr_construct, $alias, $pattern, 'scalar', $+{modifier},
1832 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1833             "in \$ARG{'$+{subrule}'}"
1834             );
1835             }
1836             elsif (defined $+{alias_argrule_list}) {
1837 0         0 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1838             _translate_subpattern(
1839             $curr_construct, $alias, $pattern, 'list', $+{modifier},
1840 0         0 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1841             "in \$ARG{'$+{subrule}'}"
1842             );
1843             }
1844              
1845             # Translate subrule calls of the form: <:ARGNAME>...
1846             elsif (defined $+{self_argrule_scalar}) {
1847 1         4 my $pattern = qq{(??{;\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}{'$+{subrule}'} // '(?!)'})};
1848             _translate_subpattern(
1849             $curr_construct, qq{'$+{subrule}'}, $pattern, 'noncapturing', $+{modifier},
1850 1         7 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1851             "in \$ARG{'$+{subrule}'}"
1852             );
1853             }
1854              
1855             # Translate subrule calls of the form: <\IDENT> or ...
1856             elsif (defined $+{backref} || $+{alias_backref} || $+{alias_backref_list}) {
1857             # Use "%ARGS" if subrule names starts with a colon...
1858 9         33 my $subrule = $+{subrule};
1859 9 100       27 if (substr($subrule,0,1) eq ':') {
1860 3         6 substr($subrule,0,1,"\@'}{'");
1861             }
1862              
1863 9         25 my $backref = qq{\$Regexp::Grammars::RESULT_STACK[-1]{'$subrule'}};
1864 9 100 100     65 my $quoter = $+{slash} eq '\\' || $+{slash} eq '\\_'
1865             ? "quotemeta($backref)"
1866             : "Regexp::Grammars::_invert_delim($backref)"
1867             ;
1868 9         31 my $pattern = qq{ (??{ defined $backref ? $quoter : q{(?!)}})};
1869             my $type = $+{backref} ? 'noncapturing'
1870 9 100       40 : $+{alias_backref} ? 'scalar'
    100          
1871             : 'list'
1872             ;
1873             _translate_subpattern(
1874             $curr_construct, $alias, $pattern, $type, $+{modifier},
1875 9         37 $compiletime_debugging_requested, $runtime_debugging_requested, $timeout_requested,
1876             "in \$MATCH{'$subrule'}"
1877             );
1878             }
1879              
1880             # Translate reportable raw regexes (add debugging support)...
1881             elsif (defined $+{reportable_raw_regex}) {
1882             _translate_raw_regex(
1883 481         1257 $+{reportable_raw_regex}, $compiletime_debugging_requested, $runtime_debugging_requested
1884             );
1885             }
1886              
1887             # Translate too-complex repetition specifications...
1888             elsif (defined $+{complex_repetition}) {
1889 0         0 my ($repetition, $separator) = @+{'complex_repetition', 'complex_separator'};
1890 0         0 my ($metaop) = $repetition =~ m{(%%?)};
1891 0         0 my $quotedop = quotemeta($metaop);
1892 0         0 $separator =~ s/\s+/ /g;
1893 0 0       0 my $problem = $separator =~ /\S/
1894             ? ["The $separator... separator you specified after the $metaop is too complex",
1895             "(Try refactoring it into a single subrule call)",
1896             ]
1897             : ["No separator was specified after the $metaop",
1898             "(Or did you need a $quotedop instead, to match a literal '$metaop'?)",
1899             ];
1900             _debug_notify( fatal =>
1901             "Invalid separation specifier: $metaop",
1902             "at line $curr_line_num of $pretty_rule_name",
1903 0         0 @{$problem},
  0         0  
1904             );
1905 0         0 exit(1);
1906             }
1907              
1908             # Translate non-reportable raw regexes (leave as is)...
1909             elsif (defined $+{raw_regex}) {
1910             # Handle raw % and %%
1911 1345         3859 my $raw_regex = $+{raw_regex};
1912 1345 50       3507 if ($raw_regex =~ / \A %%?+ /x) {
1913 0         0 _debug_notify( fatal =>
1914             "Invalid separation specifier: $&",
1915             "at line $curr_line_num of $pretty_rule_name",
1916             "(Did you forget to put a repetition quantifier before the $&",
1917             " or did you need a " . quotemeta($&) . " instead, to match a literal '$&'?)",
1918             );
1919 0         0 exit(1);
1920             }
1921              
1922             # Handle any other raw regex...
1923             _translate_raw_regex(
1924 1345         2404 $raw_regex, $compiletime_debugging_requested
1925             );
1926             }
1927              
1928             # Translate directives...
1929             elsif (defined $+{require_directive}) {
1930             _translate_require_directive(
1931 0         0 $curr_construct, $+{condition}, $compiletime_debugging_requested
1932             );
1933             }
1934             elsif (defined $+{minimize_directive}) {
1935             _translate_minimize_directive(
1936 3         14 $curr_construct, $+{condition}, $compiletime_debugging_requested
1937             );
1938             }
1939             elsif (defined $+{debug_directive}) {
1940             _translate_debug_directive(
1941 0         0 $curr_construct, $+{cmd}, $compiletime_debugging_requested
1942             );
1943             }
1944             elsif (defined $+{timeout_directive}) {
1945             _translate_timeout_directive(
1946 0         0 $curr_construct, $+{timeout}, $compiletime_debugging_requested
1947             );
1948             }
1949             elsif (defined $+{error_directive}) {
1950             _translate_error_directive(
1951             $curr_construct, $+{error_type}, $+{msg},
1952 8         40 $compiletime_debugging_requested, $rule_name
1953             );
1954             }
1955             elsif (defined $+{autoerror_directive}) {
1956             _translate_error_directive(
1957 7         28 $curr_construct, $+{error_type}, q{},
1958             $compiletime_debugging_requested, $rule_name
1959             );
1960             }
1961             elsif (defined $+{yadaerror_directive}) {
1962             _translate_error_directive(
1963             $curr_construct,
1964 4 50       51 ($+{yadaerror_directive} eq '???' ? 'warning' : 'error'),
1965             q{},
1966             $compiletime_debugging_requested, -$rule_name
1967             );
1968             }
1969             elsif (defined $+{context_directive}) {
1970 0 0       0 if ($compiletime_debugging_requested) {
1971 0         0 _debug_notify( info => " |",
1972             " |...Treating $curr_construct as:",
1973             " | \\ Turn on context-saving for the current rule"
1974             );
1975             }
1976 0         0 q{}; # Remove the directive
1977             }
1978             elsif (defined $+{nocontext_directive}) {
1979 0 0       0 if ($compiletime_debugging_requested) {
1980 0         0 _debug_notify( info => " |",
1981             " |...Treating $curr_construct as:",
1982             " | \\ Turn off context-saving for the current rule"
1983             );
1984             }
1985 0         0 q{}; # Remove the directive
1986             }
1987             elsif (defined $+{ws_directive}) {
1988 4 50       9 if ($compiletime_debugging_requested) {
1989 0         0 _debug_notify( info => " |",
1990             " |...Treating $curr_construct as:",
1991             " | \\ Change whitespace matching for the current rule"
1992             );
1993             }
1994 4         8 $curr_construct;
1995             }
1996              
1997             # Something that looks like a rule call or directive, but isn't...
1998             elsif (defined $+{incomplete_request}) {
1999 0         0 my $request = $+{incomplete_request};
2000 0         0 $request =~ s/\n//g;
2001 0 0       0 if ($request =~ /\A\s*<\s*\Z/) {
2002 0         0 _debug_notify( fatal =>
2003             qq{Invalid < metacharacter near line $curr_line_num of $pretty_rule_name},
2004             qq{If you meant to match a literal '<', use: \\<},
2005             );
2006             }
2007             else {
2008 0         0 _debug_notify( fatal =>
2009             qq{Possible failed attempt to specify},
2010             qq{a subrule call or directive: $request},
2011             qq{near line $curr_line_num of $pretty_rule_name},
2012             qq{If you meant to match literally, use: \\$request},
2013             );
2014             }
2015 0         0 exit(1);
2016             }
2017              
2018             # A quantifier that isn't quantifying anything...
2019             elsif (defined $+{loose_quantifier}) {
2020 0         0 my $quant = $+{loose_quantifier};
2021 0         0 $quant =~ s{^\s+}{};
2022 0         0 my $literal = quotemeta($quant);
2023 0         0 _debug_notify( fatal =>
2024             qq{Quantifier that doesn't quantify anything: $quant},
2025             qq{at line $curr_line_num in declaration of $pretty_rule_name},
2026             qq{(Did you mean to match literally? If so, try: $literal)},
2027             q{},
2028             );
2029 0         0 exit(1);
2030             }
2031              
2032             # There shouldn't be any other possibility...
2033             else {
2034 0         0 die qq{Internal error: this shouldn't happen!\n},
2035             qq{Near '$curr_construct' at $curr_line_num of $pretty_rule_name\n};
2036             }
2037             };
2038              
2039             # Handle the **/*%/*%%/+%/{n,m}%/etc operators...
2040 2275 100       8657 if ($list_marker) {
2041 73 100       416 my $ws = $magic_ws ? $+{ws1} . $+{ws2} : q{};
2042 73         271 my $op = $+{op};
2043              
2044 73         239 $curr_translation = _translate_separated_list(
2045             $prev_construct, $op, $curr_construct,
2046             $prev_translation, $curr_translation, $ws,
2047             $compiletime_debugging_requested,
2048             $runtime_debugging_requested, $timeout_requested,
2049             );
2050 73         209 $curr_construct = qq{$prev_construct $op $curr_construct};
2051             }
2052              
2053             # Finally, remember this latest translation, and return it...
2054 2275         3134 $prev_construct = $curr_construct;
2055 2275         11495 $prev_translation = $curr_translation;;
2056             }exmsg;
2057              
2058             # Translate magic hash accesses...
2059 389         2278 $grammar_spec =~ s{\$(?:\:\:)?MATCH (?= \s*\{) }
2060 389         816 {\$Regexp::Grammars::RESULT_STACK[-1]}xmsg;
2061             $grammar_spec =~ s{\$(?:\:\:)?ARG (?= \s*\{) }
2062             {\$Regexp::Grammars::RESULT_STACK[-1]{'\@'}}xmsg;
2063 389         1389  
2064             # Translate magic scalars and hashes...
2065             state $translate_scalar = {
2066             q{%$MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2067             q{@$MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2068             q{$MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2069             q{%MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2070             q{$CAPTURE} => q{$^N},
2071             q{$CONTEXT} => q{$^N},
2072             q{$DEBUG} => q{$Regexp::Grammars::DEBUG},
2073             q{$INDEX} => q{${\\pos()}},
2074             q{%ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2075              
2076             q{%$::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2077             q{@$::MATCH} => q{@{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}}},
2078             q{$::MATCH} => q{$Regexp::Grammars::RESULT_STACK[-1]{q{=}}},
2079             q{%::MATCH} => q{%{$Regexp::Grammars::RESULT_STACK[-1]}},
2080             q{$::CAPTURE} => q{$^N},
2081             q{$::CONTEXT} => q{$^N},
2082             q{$::DEBUG} => q{$Regexp::Grammars::DEBUG},
2083             q{$::INDEX} => q{${\\pos()}},
2084             q{%::ARG} => q{%{$Regexp::Grammars::RESULT_STACK[-1]{'@'}}},
2085              
2086 1440         2129 };
2087 4377         4859 state $translatable_scalar
2088 389         527 = join '|', map {quotemeta $_}
  80         645  
2089             sort {length $b <=> length $a}
2090 389         9988 keys %{$translate_scalar};
2091              
2092 389         1710 $grammar_spec =~ s{ ($translatable_scalar) (?! \s* (?: \[ | \{) ) }
2093             {$translate_scalar->{$1}}oxmsg;
2094              
2095             return $grammar_spec;
2096             }
2097 0     0   0  
2098             # Generate a "decimal timestamp" and insert in a template...
2099             sub _timestamp {
2100 0 0       0 my ($template) = @_;
2101 0         0  
2102 0         0 # Generate and insert any timestamp...
  0         0  
2103 0         0 if ($template =~ /%t/) {
2104             my ($sec, $min, $hour, $day, $mon, $year) = localtime;
2105 0         0 $mon++; $year+=1900;
2106             my $timestamp = sprintf("%04d%02d%02d.%02d%02d%02d",
2107             $year, $mon, $day, $hour, $min, $sec);
2108 0         0 $template =~ s{%t}{$timestamp}xms;;
2109             }
2110              
2111             return $template;
2112             }
2113 0     0   0  
2114 0         0 # Open (or re-open) the requested log file...
2115 0         0 sub _autoflush {
2116 0         0 my ($fh) = @_;
2117             my $originally_selected = select $fh;
2118             $|=1;
2119             select $originally_selected;
2120 3778     3778   620606 }
2121 3778   50     6478  
2122             sub _open_log {
2123             my ($mode, $filename, $from_where) = @_;
2124 3778 50       6123 $from_where //= q{};
    0          
2125 3778         47114  
2126             # Special case: '-' --> STDERR
2127             if ($filename eq q{-}) {
2128             return *STDERR{IO};
2129 0         0 }
2130 0         0 # Otherwise, just open the named file...
2131             elsif (open my $fh, $mode, $filename) {
2132             _autoflush($fh);
2133             return $fh;
2134 0         0 }
2135 0 0       0 # Otherwise, generate a warning and default to STDERR...
2136             else {
2137             local *Regexp::Grammars::LOGFILE = *STDERR{IO};
2138             _debug_notify( warn =>
2139             qq{Unable to open log file '$filename'},
2140             ($from_where ? $from_where : ()),
2141             qq{($!)},
2142 0         0 qq{Defaulting to STDERR instead.},
2143             q{},
2144             );
2145             return *STDERR{IO};
2146             }
2147 1087     1087   7175 }
2148 1087         1456  
2149 1087         1375 sub _invert_delim {
2150 1087         15269 my ($delim) = @_;
2151             $delim = reverse $delim;
2152             $delim =~ tr/<>[]{}()??`'/><][}{)(??'`/;
2153             return quotemeta $delim;
2154             }
2155              
2156             # Regex to detect if other regexes contain a grammar specification...
2157             my $GRAMMAR_DIRECTIVE
2158             = qr{ < grammar: \s* (? $QUALIDENT ) \s* > }xms;
2159              
2160             # Regex to detect if other regexes contain a grammar inheritance...
2161             my $EXTENDS_DIRECTIVE
2162             = qr{ < extends: \s* (? $QUALIDENT ) \s* > }xms;
2163              
2164             # Cache of rule/token names within defined grammars...
2165             my %subrule_names_for;
2166 434     434   770  
2167             # Build list of ancestors for a given grammar...
2168 434 50       878 sub _ancestry_of {
2169             my ($grammar_name) = @_;
2170 82     82   2507  
  82         3786  
  82         741  
2171 434         554 return () if !$grammar_name;
  456         1773  
  434         2869  
2172              
2173             use mro;
2174             return map { substr($_, $CACHE_LEN) } @{mro::get_linear_isa($CACHE.$grammar_name, 'c3')};
2175             }
2176 135     135   370  
2177             # Detect and translate any requested grammar inheritances...
2178             sub _extract_inheritances {
2179             my ($source_line, $source_file, $regex, $compiletime_debugging_requested, $derived_grammar_name) = @_;
2180 135         547  
2181              
2182 13         71 # Detect and remove inheritance requests...
2183 13         28 while ($regex =~ s{$EXTENDS_DIRECTIVE}{}xms) {
2184 13 100       34 # Normalize grammar name and report...
2185 4         11 my $orig_grammar_name = $+{base_grammar_name};
2186             my $grammar_name = $orig_grammar_name;
2187             if ($grammar_name !~ /::/) {
2188 13 50       28 $grammar_name = caller(2).'::'.$grammar_name;
2189 13 50       24 }
2190 0         0  
2191             if (exists $user_defined_grammar{$grammar_name}) {
2192             if ($compiletime_debugging_requested) {
2193             _debug_notify( info =>
2194             "Processing inheritance request for $grammar_name...",
2195             q{},
2196             );
2197 82     82   15850 }
  82         182  
  82         211671  
2198 13         17  
  13         197  
2199             # Specify new relationship...
2200             no strict 'refs';
2201 0         0 push @{$CACHE.$derived_grammar_name.'::ISA'}, $CACHE.$grammar_name;
2202             }
2203             else {
2204             _debug_notify( fatal =>
2205             "Inheritance from unknown grammar requested",
2206             "by directive",
2207 0         0 "in regex grammar declared at $source_file line $source_line",
2208             q{},
2209             );
2210             exit(1);
2211             }
2212 135         344 }
2213              
2214             # Retrieve ancestors (but not self) in C3 dispatch order...
2215 135         312 my (undef, @ancestors) = _ancestry_of($derived_grammar_name);
  18         19  
  18         127  
2216 135         444  
2217             # Extract subrule names and implementations for ancestors...
2218 135         315 my %subrule_names = map { %{$subrule_names_for{$_}} } @ancestors;
  18         57  
2219             $_ = -1 for values %subrule_names;
2220 135         401 my $implementation
2221             = join "\n", map { $user_defined_grammar{$_} } @ancestors;
2222              
2223             return $implementation, \%subrule_names;
2224             }
2225 139     139   268  
2226 139         313 # Transform grammar-augmented regex into pure Perl 5.10 regex...
2227             sub _build_grammar {
2228             my ($grammar_spec) = @_;
2229 139 100       761 $grammar_spec .= q{};
2230 4         1199  
2231             # Check for lack of Regexp::Grammar-y constructs and short-circuit...
2232             if ($grammar_spec !~ m{ < (?: [.?![:%\\/]? [^\W\d]\w* [^>]* | [.?!]{3} ) > }xms) {
2233             return $grammar_spec;
2234 135         688 }
2235 135         549  
2236             # Remember where we parked...
2237             my ($source_file, $source_line) = (caller 1)[1,2];
2238 135         548 $source_line -= $grammar_spec =~ tr/\n//;
2239 135         5953  
2240             # Check for dubious repeated constructs that throw away captures...
2241             my $dubious_line = $source_line;
2242             while ($grammar_spec =~ m{
2243             (.*?)
2244             (
2245             < (?! \[ ) # not <[SUBRULE]>
2246             ( $IDENT (?: = [^>]*)? ) # but or
2247             > \s*
2248             ( # followed by a quantifier...
2249             [+*][?+]? # either symbolic
2250             | \{\d+(?:,\d*)?\}[?+]? # or numeric
2251 0         0 )
2252 0         0 )
2253 0         0 }gxms) {
2254             my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2255             $dubious_line += $prefix =~ tr/\n//;
2256             _debug_notify( warn =>
2257             qq{Repeated subrule <$rule>$qual},
2258             qq{at $source_file line $dubious_line},
2259             qq{will only capture its final match},
2260 0         0 qq{(Did you mean <[$rule]>$qual instead?)},
2261             q{},
2262             );
2263             $dubious_line += $match =~ tr/\n//;
2264 135         352 }
2265 135         1309  
2266             # Check for dubious non-backtracking constructs...
2267             $dubious_line = $source_line;
2268             while (
2269             $grammar_spec =~ m{
2270             (.*?)
2271             (
2272             <
2273             (?! (?:obj)? (?:rule: | token ) )
2274             ( [^>]+ )
2275             >
2276             \s*
2277 2         18 ( [?+*][+] | \{.*\}[+] )
2278 2         4 )
2279 2         5 }gxms) {
2280 2         12 my ($prefix, $match, $rule, $qual) = ($1, $2, $3, $4);
2281             $dubious_line += $prefix =~ tr/\n//;
2282             my $safe_qual = substr($qual,0,-1);
2283             _debug_notify( warn =>
2284             qq{Non-backtracking subrule call <$rule>$qual},
2285             qq{at $source_file line $dubious_line},
2286             qq{may not revert correctly during backtracking.},
2287 2         21 qq{(If grammar does not work, try <$rule>$safe_qual instead)},
2288             q{},
2289             );
2290             $dubious_line += $match =~ tr/\n//;
2291 135         222 }
2292 135         240  
2293 135         219 # Check whether a log file was specified...
2294             my $compiletime_debugging_requested;
2295 135         377 local *Regexp::Grammars::LOGFILE = *Regexp::Grammars::LOGFILE;
2296 135         302 my $logfile = q{-};
2297 0         0  
2298             my $log_where = "for regex grammar defined at $source_file line $source_line";
2299             $grammar_spec =~ s{ ^ [^#]* < logfile: \s* ([^>]+?) \s* > }{
2300 0         0 $logfile = _timestamp($1);
2301 0         0  
2302             # Presence of implies compile-time logging...
2303             $compiletime_debugging_requested = 1;
2304 0         0 *Regexp::Grammars::LOGFILE = _open_log('>',$logfile, $log_where );
2305              
2306             # Delete directive...
2307             q{};
2308 135         4590 }gexms;
2309              
2310             # Look ahead for any run-time debugging or timeout requests...
2311             my $runtime_debugging_requested
2312             = $grammar_spec =~ m{
2313             ^ [^#]*
2314             < debug: \s* (run | match | step | try | on | same ) \s* >
2315 135         286 | \$DEBUG (?! \s* (?: \[ | \{) )
2316             }xms;
2317              
2318             my $timeout_requested
2319             = $grammar_spec =~ m{
2320             ^ [^#]*
2321             < timeout: \s* \d+ \s* >
2322             }xms;
2323              
2324 135 100       479  
2325             # Standard actions set up and clean up any regex debugging...
2326             # Before entire match, set up a stack of attempt records and report...
2327             my $pre_match_debug
2328             = $runtime_debugging_requested
2329             ? qq{(?{; *Regexp::Grammars::LOGFILE
2330             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where');
2331             Regexp::Grammars::_init_try_stack(); })}
2332             : qq{(?{; *Regexp::Grammars::LOGFILE
2333             = Regexp::Grammars::_open_log('>>','$logfile', '$log_where'); })}
2334 135 100       296 ;
2335              
2336             # After entire match, report whether successful or not...
2337             my $post_match_debug
2338             = $runtime_debugging_requested
2339             ? qq{(?{;Regexp::Grammars::_debug_matched(0,\\%/,'',\$^N)})
2340             |(?>(?{;Regexp::Grammars::_debug_handle_failures(0,''); }) (?!))
2341             }
2342             : q{}
2343 135         361 ;
2344              
2345             # Remove comment lines...
2346             $grammar_spec =~ s{^ ([^#\n]*) \s \# [^\n]* }{$1}gxms;
2347              
2348             # Subdivide into rule and token definitions, preparing to process each...
2349             # REWRITE THIS, USING (PROBABLY NEED TO REFACTOR ALL GRAMMARS TO REUSe
2350             # THESE COMPONENTS:
2351             # (? \( \s* (?&PARAMS)? \s* \) | (?# NOTHING ) )
2352             # (? (?&PARAM) \s* (?: , \s* (?&PARAM) \s* )* ,? )
2353 135         10302 # (? (?&VAR) (?: \s* = \s* (?: (?&LITERAL) | (?&PARENCODE) ) )? )
2354             # (? (?&NUMBER) | (?&STRING) | (?&VAR) )
2355             # (? : (?&IDENT) )
2356             my @defns = split m{
2357             (< (obj|)(rule|token) \s*+ :
2358             \s*+ ((?:${IDENT}::)*+) (?: ($IDENT) \s*+ = \s*+ )?+
2359             ($IDENT)
2360             \s* >)
2361             }xms, $grammar_spec;
2362 135         857  
  260         781  
2363 135         582 # Extract up list of names of defined rules/tokens...
2364 135         240 # (Name is every 6th item out of every seven, skipping the first item)
2365             my @subrule_names = @defns[ map { $_ * 7 + 6 } 0 .. ((@defns-1)/7-1) ];
2366             my @defns_copy = @defns[1..$#defns];
2367 135         282 my %subrule_names;
2368 135         188  
2369 135         278 # Build a look-up table of subrule names, checking for duplicates...
2370 260         762 my $defn_line = $source_line + $defns[0] =~ tr/\n//;
2371 260 50       824 my %first_decl_explanation;
2372             for my $subrule_name (@subrule_names) {
2373             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns_copy, 0, 7);
2374             if (++$subrule_names{$subrule_name} > 1) {
2375             _debug_notify( warn =>
2376 0         0 "Redeclaration of <$objectify$type: $subrule_name>",
  0         0  
2377             "at $source_file line $defn_line",
2378             "will be ignored.",
2379             @{ $first_decl_explanation{$subrule_name} },
2380             q{},
2381 260         996 );
2382             }
2383             else {
2384             $first_decl_explanation{$subrule_name} = [
2385             "(Hidden by the earlier declaration of <$objectify$type: $subrule_name>",
2386 260         667 " at $source_file line $defn_line)"
2387             ];
2388             }
2389             $defn_line += ($full_decl.$body) =~ tr/\n//;
2390 135         526 }
2391              
2392             # Add the built-ins...
2393 135         250 @subrule_names{'ws', 'hk', 'matchpos', 'matchline'} = (1) x 4;
2394 135 50       679  
2395 0         0 # An empty main rule will never match anything...
2396             my $main_regex = shift @defns;
2397             if ($main_regex =~ m{\A (?: \s++ | \(\?\# [^)]* \) | \# [^\n]++ )* \z}xms) {
2398             _debug_notify( error =>
2399             "No main regex specified before rule definitions",
2400             "in regex grammar declared at $source_file line $source_line",
2401             "Grammar will never match anything.",
2402             "(Or did you forget a specification?)",
2403             q{},
2404             );
2405 135         241 }
2406 135         226  
2407             # Compile the regex or grammar...
2408             my $regex = q{};
2409             my $grammar_name;
2410 135 100       536 my $is_grammar;
2411              
2412 6         45 # Is this a grammar specification?
2413 6 100       21 if ($main_regex =~ $GRAMMAR_DIRECTIVE) {
2414 4         11 # Normalize grammar name and report...
2415             $grammar_name = $+{grammar_name};
2416 6         10 if ($grammar_name !~ /::/) {
2417             $grammar_name = caller(1) . "::$grammar_name";
2418             }
2419 6         10 $is_grammar = 1;
2420 11         23  
2421             # Add subrule definitions to namespace...
2422             for my $subrule_name (@subrule_names) {
2423             $CACHE{$grammar_name.'::'.$subrule_name} = 1;
2424 129         215 }
2425 129         267 }
2426             else {
2427             state $dummy_grammar_index = 0;
2428             $grammar_name = '______' . $dummy_grammar_index++;
2429 135         356 }
2430              
2431             # Extract any inheritance information...
2432             my ($inherited_rules, $inherited_subrule_names)
2433             = _extract_inheritances(
2434             $source_line, $source_file,
2435             $main_regex,
2436             $compiletime_debugging_requested,
2437             $grammar_name
2438 135         6746 );
2439              
2440             # Remove requests...
2441 135         298 $main_regex =~ s{ $EXTENDS_DIRECTIVE }{}gxms;
2442 135         364  
  135         290  
2443             # Add inherited subrule names to allowed subrule names;
2444             @subrule_names{ keys %{$inherited_subrule_names} }
2445 135         940 = values %{$inherited_subrule_names};
2446              
2447             # Remove comments from top-level grammar...
2448             $main_regex =~ s{
2449             \(\?\# [^)]* \)
2450             | (?
2451             }{}gxms;
2452 135 0       257  
  0         0  
2453 135 50       495 # Remove any top-level nocontext directive...
    100          
2454             # 1 2 3 4
2455             $main_regex =~ s{^( (.*?) (\\*) (\# [^\n]*) )$}{length($3) % 2 ? $1 : $2.substr($3,0,-1)}gexms;
2456             my $nocontext = ($main_regex =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2457             : ($main_regex =~ s{ < context \s* : \s* > }{}gxms) ? 0
2458 135 100       343 : 0;
2459              
2460 6 50       20 # If so, set up to save the grammar...
2461 0         0 if ($is_grammar) {
2462             # Normalize grammar name and report...
2463 6 50       70 if ($grammar_name !~ /::/) {
2464 0         0 $grammar_name = caller(1) . "::$grammar_name";
2465             }
2466             if ($compiletime_debugging_requested) {
2467             _debug_notify( info =>
2468             "Processing definition of grammar $grammar_name...",
2469             q{},
2470             );
2471 6         355 }
2472              
2473             # Remove the grammar directive...
2474             $main_regex =~ s{
2475 6         23 ( $GRAMMAR_DIRECTIVE
  6         20  
2476             | < debug: \s* (run | match | step | try | on | off | same ) \s* >
2477             )
2478 6 50       27 }{$source_line += $1 =~ tr/\n//; q{}}gexms;
2479 0         0  
2480             # Check for anything else in the main regex...
2481             if ($main_regex =~ /\A(\s*)\S/) {
2482             $source_line += $1 =~ tr/\n//;
2483             _debug_notify( warn =>
2484 0         0 "Unexpected item before first subrule specification",
  0         0  
2485             "in definition of ",
2486             "at $source_file line $source_line:",
2487             map({ " $_"} grep /\S/, split "\n", $main_regex),
2488             "(this will be ignored when defining the grammar)",
2489             q{},
2490             );
2491             }
2492              
2493 56         79 # Remember set of valid subrule names...
2494 6         20 $subrule_names_for{$grammar_name}
  38         100  
  56         92  
2495             = {
2496             map({ ($_ => 1) } keys %subrule_names),
2497             map({ ($grammar_name.'::'.$_ => 1) } grep { !/::/ } keys %subrule_names),
2498             };
2499 129 50       303 }
2500 0         0 else { #...not a grammar specification
2501             # Report how main regex was interpreted, if requested to...
2502             if ($compiletime_debugging_requested) {
2503             _debug_notify( info =>
2504             "Processing the main regex before any rule definitions",
2505             );
2506 129         417 }
2507              
2508             # Any actual regex is processed first...
2509             $regex = _translate_subrule_calls(
2510             $source_file, $source_line,
2511             $grammar_name,
2512             $main_regex,
2513             $compiletime_debugging_requested,
2514             $runtime_debugging_requested,
2515             $timeout_requested,
2516             $pre_match_debug,
2517             $post_match_debug,
2518             q{}, # Expected...what?
2519             \%subrule_names,
2520             0, # Whitespace isn't magical
2521 129         482 );
2522              
2523             # Wrap the main regex (to ensure |'s don't segment pre and post commands)...
2524 129 50       372 $regex = "(?:$regex)";
2525 0         0  
2526             # Report how construct was interpreted, if requested to...
2527             if ($compiletime_debugging_requested) {
2528             _debug_notify( q{} =>
2529             q{ |},
2530             q{ \\___End of main regex},
2531             q{},
2532             );
2533             }
2534 135         296 }
2535              
2536             # Update line number...
2537 135         359 $source_line += $main_regex =~ tr/\n//;
2538              
2539 260         864 # Then iterate any following rule definitions...
2540 260   66     1095 while (@defns) {
2541 260         446 # Grab details of each rule defn (as extracted by previous split)...
2542             my ($full_decl, $objectify, $type, $qualifier, $name, $callname, $body) = splice(@defns, 0, 7);
2543             $name //= $callname;
2544 260 50       540 my $qualified_name = $grammar_name.'::'.$callname;
2545 0 0       0  
2546             # Report how construct was interpreted, if requested to...
2547             if ($compiletime_debugging_requested) {
2548             _debug_notify( info =>
2549             "Defining a $type: <$callname>",
2550             " |...Returns: " . ($objectify ? "an object of class '$qualifier$name'" : "a hash"),
2551 260 100       824 );
    100          
2552             }
2553              
2554             my $local_nocontext
2555             = ($body =~ s{ < nocontext \s* : \s* > }{}gxms) ? 1
2556             : ($body =~ s{ < context \s* : \s* > }{}gxms) ? 0
2557 260         762 : $nocontext;
2558              
2559             # Translate any nested <...> constructs...
2560             my $trans_body = _translate_subrule_calls(
2561             $source_file, $source_line,
2562             $grammar_name,
2563             $body,
2564             $compiletime_debugging_requested,
2565             $runtime_debugging_requested,
2566             $timeout_requested,
2567             $pre_match_debug,
2568             $post_match_debug,
2569             $callname, # Expected...what?
2570             \%subrule_names,
2571             $type eq 'rule', # Is whitespace magical?
2572 260 50       637 );
2573 0         0  
2574             # Report how construct was interpreted, if requested to...
2575             if ($compiletime_debugging_requested) {
2576             _debug_notify( q{} =>
2577             q{ |},
2578             q{ \\___End of rule definition},
2579             q{},
2580             );
2581 260         361 }
2582 260         359  
2583             # Make allowance for possible local whitespace definitions...
2584             my $local_ws_defn = q{};
2585 260 100       551 my $local_ws_call = q{(?&ws__implicit__)};
2586              
2587 135         197 # Rules make non-code literal whitespace match textual whitespace...
2588             if ($type eq 'rule') {
2589 135         691 # Implement any local whitespace definition...
2590 4         11 my $first_ws = 1;
2591 4 50       12 WS_DIRECTIVE:
    50          
2592 0         0 while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2593             my $defn = $1;
2594             if ($defn !~ m{\S}xms) {
2595             _debug_notify( warn =>
2596             qq{Ignoring useless empty directive},
2597             qq{in definition of },
2598             qq{near $source_file line $source_line},
2599 0         0 qq{(Did you mean instead?)},
2600             q{},
2601             );
2602 0         0 next WS_DIRECTIVE;
2603             }
2604             elsif (!$first_ws) {
2605             _debug_notify( warn =>
2606             qq{Ignoring useless extra directive},
2607             qq{in definition of },
2608             qq{at $source_file line $source_line},
2609 0         0 qq{(No more than one is permitted per rule!)},
2610             q{},
2611             );
2612 4         6 next WS_DIRECTIVE;
2613             }
2614 4         5 else {
2615 4         5 $first_ws = 0;
2616 4         9 }
2617 4         12 state $ws_counter = 0;
2618             $ws_counter++;
2619             $local_ws_defn = qq{(?<__RG_ws_$ws_counter> $defn)};
2620             $local_ws_call = qq{(?&__RG_ws_$ws_counter)};
2621 135         332 }
2622              
2623             # Implement auto-whitespace...
2624             state $CODE_OR_SPACE = qr{
2625              
2626             # TODO: REWORK THIS INSUFFICENT FIX FOR t/grammar_autospace.t...
2627             #
2628             # (? \(\?: \s++ \) ) # Explicitly walled off space is magic
2629             # |
2630              
2631             (? # These are not magic...
2632             \( \?\?? (?&BRACED) \) # Embedded code blocks
2633             | \s++ # Whitespace followed by...
2634             (?= \| # ...an OR
2635             | \(\?\#\) # ...a null comment
2636             | (?: \) \s* )? \z # ...the end of the rule
2637             | \(\(?\?\&ws\) # ...an explicit ws match
2638             | \(\?\??\{ # ...an embedded code block
2639             | \\[shv] # ...an explicit space match
2640             )
2641             )
2642             |
2643             (? \s++ ) # All other whitespace is magic
2644 135   66     5299  
  1202         55432  
2645             (?(DEFINE) (? \{ (?: \\. | (?&BRACED) | [^{}] )* \} ) )
2646             }xms;
2647 125         504 $trans_body =~ s{($CODE_OR_SPACE)}{ $+{ignorable_space} // $local_ws_call }exmsg;
2648 0         0 }
2649             else {
2650             while ($trans_body =~ s{$WS_PATTERN}{}oxms) {
2651             _debug_notify( warn =>
2652             qq{Ignoring useless directive},
2653             qq{in definition of },
2654             qq{at $source_file line $source_line},
2655             qq{(Did you need to define instead of ?)},
2656             q{},
2657             );
2658 260         1437 }
2659             }
2660              
2661             $regex
2662             .= "\n###############[ $source_file line $source_line ]###############\n"
2663             . _translate_rule_def(
2664             $type, $qualifier, $name, $callname, $qualified_name, $trans_body, $objectify,
2665             $local_ws_defn, $local_nocontext,
2666 260         1020 );
2667              
2668             # Update line number...
2669             $source_line += ($full_decl.$body) =~ tr/\n//;
2670 135         2019 }
2671              
2672             # Insert checkpoints into any user-defined code block...
2673             $regex =~ s{ \( \?\?? \{ \K (?!;) }{
2674             local \@Regexp::Grammars::RESULT_STACK = \@Regexp::Grammars::RESULT_STACK;
2675 135         444 }xmsg;
2676              
2677             # Check for any suspicious left-overs from the start of the regex...
2678 135 100       389 pos $regex = 0;
2679 6         12  
2680 6         583 # If a grammar definition, save grammar and return a placeholder...
2681             if ($is_grammar) {
2682             $user_defined_grammar{$grammar_name} = $regex;
2683             return qq{(?{
2684             warn "Can't match directly against a pure grammar: \n";
2685             })(*COMMIT)(?!)};
2686 129         663 }
2687             # Otherwise, aggregrate the final grammar...
2688             else {
2689             return _complete_regex($regex.$inherited_rules, $pre_match_debug, $post_match_debug, $nocontext);
2690             }
2691 129     129   325 }
2692              
2693 129 100       125282 sub _complete_regex {
2694             my ($regex, $pre_match_debug, $post_match_debug, $nocontext) = @_;
2695              
2696             return $nocontext ? qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE_NC$post_match_debug}
2697             : qq{(?x)$pre_match_debug$PROLOGUE$regex$EPILOGUE$post_match_debug};
2698             }
2699              
2700             1; # Magic true value required at end of module
2701              
2702             __END__