File Coverage

blib/lib/MarpaX/G4/MarpaGen.pm
Criterion Covered Total %
statement 680 978 69.5
branch 301 536 56.1
condition 154 303 50.8
subroutine 60 77 77.9
pod 0 71 0.0
total 1195 1965 60.8


line stmt bran cond sub pod time code
1             # ----------------------------------------------------------------------------------------------------- #
2             # MarpaX::MarpaGen #
3             # #
4             # translate the parsed antlr4 rules to Marpa::R2 syntax. #
5             # #
6             # ----------------------------------------------------------------------------------------------------- #
7              
8             package MarpaX::G4::MarpaGen;
9              
10 2     2   13 use strict;
  2         3  
  2         53  
11 2     2   8 use warnings FATAL => 'all';
  2         5  
  2         52  
12 2     2   8 no warnings 'recursion';
  2         3  
  2         56  
13              
14 2     2   9 use Data::Dumper;
  2         3  
  2         67  
15              
16 2     2   10 use MarpaX::G4::Parser;
  2         2  
  2         63  
17 2     2   10 use MarpaX::G4::Symboltable;
  2         3  
  2         19826  
18              
19             # --------------------------------------------------- #
20             # exposed class methods #
21             # --------------------------------------------------- #
22              
23             sub new
24             {
25 1     1 0 2 my $invocant = shift;
26 1   33     4 my $class = ref($invocant) || $invocant; # Object or class name
27 1         3 my $self = {}; # initiate our handy hashref
28 1         3 bless($self,$class); # make it usable
29              
30 1         5 $self->{symboltable} = undef;
31 1         2 $self->{verbosity} = 0;
32 1         3 $self->{level} = -1;
33 1         3 $self->{outputfilename} = '-';
34 1         3 $self->{outf} = undef;
35 1         2 $self->{rules} = {};
36 1         3 $self->{subrules} = {};
37 1         3 $self->{generationoptions} = {};
38              
39 1         3 return $self;
40             }
41              
42 0 0   0 0 0 sub setVerbosity { my ($self, $verbosity) = @_; $self->{verbosity} = defined($verbosity) ? $verbosity : 0; }
  0         0  
43 48     48 0 63 sub symboltable { my ($self) = @_; return $self->{symboltable}; }
  48         65  
44 1     1 0 4 sub setoutputfile { my ($self, $outputfilename) = @_; $self->{outputfilename} = $outputfilename; }
  1         2  
45              
46 1     1 0 3 sub fragment2class { my ($self) = @_; $self->{generationoptions}{fragment2class} = 'true'; }
  1         3  
47 0     0 0 0 sub embedactions { my ($self) = @_; $self->{generationoptions}{embedactions} = 'true'; }
  0         0  
48 0     0 0 0 sub stripactions { my ($self) = @_; $self->{generationoptions}{stripactions} = 'true'; }
  0         0  
49 0     0 0 0 sub stripallcomments { my ($self) = @_; $self->{generationoptions}{stripallcomments} = 'true'; }
  0         0  
50 1     1 0 2 sub matchcaseinsensitive { my ($self) = @_; $self->{generationoptions}{matchcaseinsensitive} = 'true'; }
  1         3  
51 0     0 0 0 sub shiftlazytogreedy { my ($self) = @_; $self->{generationoptions}{shiftlazytogreedy} = 'true'; }
  0         0  
52 1     1 0 2 sub buildkeywords { my ($self) = @_; $self->{generationoptions}{buildkeywords} = 'true'; }
  1         3  
53              
54             sub testoption
55             {
56 189     189 0 266 my ( $self, $option ) = @_;
57              
58 189 50       278 return undef if !defined $option;
59 189         246 $option = lc $option;
60 189 50       294 return undef if !exists $self->{generationoptions}{$option};
61              
62 189         555 return $self->{generationoptions}{$option};
63             }
64              
65             sub dumpStructure
66             {
67 0     0 0 0 my ( $self, $title, $structure ) = @_;
68              
69 0         0 printf "=== %s\n", $title;
70 0         0 $Data::Dumper::Indent = 1;
71 0         0 print Dumper($structure);
72             }
73              
74             sub abortWithError
75             {
76 0     0 0 0 my ( $self, $msg, $structure ) = @_;
77              
78 0         0 $self->dumpStructure($msg, $structure);
79 0         0 die "aborting on unrecoverable error";
80             }
81              
82             sub mydie
83             {
84 0     0 0 0 my ( $msg ) = @_;
85 0         0 die "${msg}";
86             }
87              
88             sub addMarpaRule
89             {
90 32     32 0 58 my ( $self, $rulename, $marparule ) = @_;
91              
92 32         35 my $rules = $self->{rules};
93 32 50       47 mydie "INTERNAL: rule $rulename defined more than once" if exists $rules->{$rulename};
94 32         66 $rules->{$rulename} = $marparule;
95             }
96              
97             ##
98             # deleteMarpaRule : delete '$rulename'
99             ##
100             sub deleteMarpaRule
101             {
102 0     0 0 0 my ( $self, $rulename ) = @_;
103              
104 0         0 my $rules = $self->{rules};
105              
106 0 0       0 mydie "INTERNAL: can't delete undefined rule $rulename" if !exists $rules->{$rulename};
107              
108 0         0 $self->deleteAllSubrules($rulename);
109 0         0 $rules->{$rulename} = { deletedafterconversiontoclass => 'true' };
110             }
111              
112             ##
113             # deleteAllSubrules : delete all subrules of '$rulename'
114             ##
115             sub deleteAllSubrules
116             {
117 1     1 0 3 my ($self, $rulename ) = @_;
118              
119 1         3 my $subrules = $self->{subrules};
120 1         2 my $rules = $self->{rules};
121              
122 1 50       3 return if !exists $subrules->{$rulename};
123              
124 0         0 for my $subrulename (@{$subrules->{$rulename}})
  0         0  
125             {
126 0         0 $rules->{$subrulename} = { deletedafterconversiontoclass => 'true' };
127             }
128             }
129              
130             ##
131             # deleteSingleSubrule : delete 'subrulename' if it is a subrule of 'rulename'
132             ##
133             sub deleteSingleSubrule
134             {
135 0     0 0 0 my ( $self, $rulename, $subrulename ) = @_;
136              
137 0 0       0 return if !exists $self->{subrules}{$rulename};
138              
139 0         0 my $subrules = $self->{subrules}{$rulename};
140              
141             ##
142             # remove the subrule by creating a new list of subrules
143             ##
144 0         0 my $status = 0;
145 0         0 my @newrules;
146              
147             map {
148 0 0       0 if ( $_ eq $subrulename )
  0         0  
149             {
150 0         0 $status = 1;
151             }
152             else
153             {
154 0         0 push @newrules, $_;
155             }
156             } @$subrules;
157              
158 0         0 $self->{subrules}{$rulename} = \@newrules;
159 0 0       0 if ($status)
160             {
161 0         0 my $rules = $self->{rules};
162 0 0       0 mydie "INTERNAL: can't delete undefined rule $subrulename" if !exists $rules->{$subrulename};
163 0         0 $rules->{$subrulename} = { deletedafterconversiontoclass => 'true' }
164             }
165             }
166              
167             sub checkMarpaRuleExists
168             {
169 5     5 0 8 my ( $self, $rulename ) = @_;
170              
171 5         7 my $rules = $self->{rules};
172              
173 5 50       19 return undef if !exists $rules->{$rulename};
174              
175 5         9 return $rules->{$rulename};
176             }
177              
178             sub getMarpaRule
179             {
180 96     96 0 136 my ( $self, $parent, $rulename ) = @_;
181              
182 96         110 my $rules = $self->{rules};
183              
184 96 50       152 if (!exists $rules->{$rulename})
185             {
186 0 0       0 $parent = "" if !defined $parent;
187 0         0 mydie "INTERNAL: rule $rulename is undefined under parent $parent";
188             }
189              
190 96         128 return $rules->{$rulename};
191             }
192              
193             sub tagMarpaRule
194             {
195 32     32 0 46 my ( $self, $rulename ) = @_;
196              
197 32         40 my $rules = $self->{rules};
198 32 50       54 mydie "INTERNAL: rule $rulename is undefined" if !exists $rules->{$rulename};
199 32         49 $rules->{$rulename}{status} = 'done';
200             }
201              
202             sub isSubrule
203             {
204 5     5 0 9 my ( $self, $rulename, $subrulename ) = @_;
205              
206 5         7 my $subrules = $self->{subrules};
207              
208 5 50       10 return 0 if !exists $subrules->{$rulename};
209              
210 5         6 for my $sr (@{$subrules->{$rulename}})
  5         12  
211             {
212 9 100       21 return 1 if $sr eq $subrulename;
213             }
214              
215 0         0 return 0;
216             }
217              
218             # --------------------------------------------------- #
219             # grammar generator #
220             # --------------------------------------------------- #
221              
222             sub enterLevel
223             {
224 14     14 0 17 my ($self, $rulename, $context) = @_;
225              
226 14 50       29 my $level = $context->{level} if exists $context->{level};
227 14 50       22 $level = -1 if !defined $level;
228 14         16 ++$level;
229 14         18 $context->{level} = $level;
230             }
231              
232             sub exitLevel
233             {
234 14     14 0 26 my ($self, $rulename, $context) = @_;
235              
236 14         21 my $level = $context->{level};
237 14         15 --$level;
238 14         19 $context->{level} = $level;
239             }
240              
241             sub retrieveModifier
242             {
243 360     360 0 436 my ($token, $tag) = @_;
244              
245 360 50       556 return undef if ref $token ne 'HASH';
246              
247 360         368 my $modifier = undef;
248 360 100       501 $modifier = $token->{$tag} if exists $token->{$tag};
249              
250 360         487 return $modifier;
251             }
252              
253             sub negateElement
254             {
255 1     1 0 4 my ($self, $rulename, $mxtoken, $context) = @_;
256              
257             # if ($rulename =~ /NATIONAL_CHAR_STRING_LIT/)
258             # {
259             # printf "found!\n";
260             # }
261              
262 1 50       4 mydie "INTERNAL: negated element is not a hash in rule $rulename" if ref $mxtoken ne 'HASH';
263              
264 1         2 my $mxelement;
265              
266 1 50       4 if (! exists $mxtoken->{class4list} )
267             {
268 0         0 mydie "INTERNAL: can't negate missing class4list in rule $rulename";
269             ##
270             # TODO: verify if we could continue by mapping this error to 'unsupported'
271             # so that we could process the complete input
272             ##
273 0         0 $mxelement = { type => 'unsupported', msg => "can't negate non-class token in $rulename" };
274             }
275              
276 1         3 my $class4list = $mxtoken->{class4list};
277 1         5 $mxelement = { type => 'negatedclass', value => "^${class4list}" };
278              
279 1 50       4 $mxelement->{grammarstate} = $mxtoken->{grammarstate} if exists $mxtoken->{grammarstate};
280 1 50       4 $mxelement->{isFragmentOrChild} = $mxtoken->{isFragmentOrChild} if exists $mxtoken->{isFragmentOrChild};
281              
282             ##
283             # delete the negated subrule since it will be replaced with a new subrule.
284             ##
285 1 0 33     4 if (exists $mxtoken->{rhs} && exists $mxtoken->{rhs}{token})
286             {
287 0         0 my $token = $mxtoken->{rhs}{token};
288 0 0       0 if (ref $token eq "")
289             {
290 0         0 $self->deleteSingleSubrule($rulename, $token);
291             }
292             }
293              
294 1         3 return $mxelement;
295             }
296              
297             sub translateLiteralCase
298             {
299 0     0 0 0 my ($string, $case) = @_;
300              
301 0 0       0 return $case =~ /^U/i ? uc $string : lc $string;
302             }
303              
304             ##
305             # generatematchcaseinsensitive : make a literal (somewhat) case-insensitive by creating a subrule with 2 match alternatives :
306             #
307             # - all uppercase
308             # - characterclass with the 1st character in upper/lower case, the rest all lowercase
309             #
310             # if the literal is only a single letter, we only retain the characterclass
311             # if the first character is not a letter, we don't generate a class.
312             #
313             ## --------------------------------------------------------------------------------------------------------- ##
314             ## CAVEAT : introduction of the ':i' and ':ic' literal and class modifiers has made this function obsolete. ##
315             ## we retain it as a demo of generated subrules. ##
316             ## --------------------------------------------------------------------------------------------------------- ##
317             sub generateMatchCaseInsensitive
318             {
319 0     0 0 0 my ($tokenvalue) = @_;
320              
321 0         0 my $lctoken = translateLiteralCase($tokenvalue, 'L');
322 0         0 my $uctoken = translateLiteralCase($tokenvalue, 'U');
323 0         0 my $prefix = substr($lctoken, 0, 1);
324 0         0 my $rest = substr($lctoken, 1);
325 0         0 my $lcp = lc $prefix;
326 0         0 my $ucp = uc $prefix;
327 0         0 my $mixedcase = "${lcp}${ucp}";
328              
329 0         0 my $literaltoken;
330              
331 0 0       0 if ($lcp =~ /[A-Z]/i)
332             {
333 0         0 $literaltoken = { type => 'tokengroup',
334             definition => [{token => {type => 'class', value => $mixedcase}}]
335             };
336 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $rest }} if length($rest) > 0;
  0         0  
337 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $uctoken}, alternative=> 'true'} if length($uctoken) > 1;
  0         0  
338             }
339             else
340             {
341 0         0 $literaltoken = { type => 'tokengroup',
342             definition => [{token => {type => 'literal', value => $lctoken}}]
343             };
344 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $uctoken}, alternative=> 'true'} if length($uctoken) > 1;
  0         0  
345             }
346              
347 0         0 return $literaltoken;
348             }
349              
350             sub isSingleChar
351             {
352 21     21 0 36 my ($string) = @_;
353              
354 21 100       73 return 1 if $string =~ /^.$/;
355 4 100       11 return 1 if $string =~ /^\\[^u]$/i;
356 3 50       16 return 1 if $string =~ /^\\u[0-9A-F]{4,4}$/i;
357 3 50       6 return 1 if $string =~ /^\\u\{[0-9A-F]{5,5}\}$/i;
358 3         7 return 0;
359             }
360              
361             sub isKeywordLetter
362             {
363 71     71 0 107 my ($self, $rulename, $rule ) = @_;
364              
365 71 100       188 return 0 if !exists $rule->{class4list};
366 9         17 my $s = $rule->{class4list};
367 9 0 33     29 return 1 if $rulename =~ /^[a-z]$/i && $s =~ /([a-z])([a-z])/i && ( uc "$1" eq "$2" || lc "$1" eq "$2");
      0        
      33        
368 9         21 return 0;
369             }
370              
371             ##
372             # isKeywordFragment : return 1 if '$string' is a characterclass with a pair of lower/upper case letters
373             ##
374             sub isKeywordFragment
375             {
376 60     60 0 96 my ($self, $rulename, $rule ) = @_;
377              
378 60 100       104 return undef if !exists $rule->{class4list};
379 30         41 my $string = $rule->{class4list};
380              
381 30 100 66     100 return lc substr($string, 0, 1) if $string =~ /([a-z])([a-z])/i && ( uc "$1" eq "$2" || lc "$1" eq "$2");
      100        
382 29 100       77 return substr($string, 0, 1) if $string !~ /[a-z]/i;
383 8 100       28 return substr($string, 0, 1) if $string =~ /[0-9]/i;
384              
385 3         4 return undef;
386             }
387              
388             sub convertLiteralToClass
389             {
390 21     21 0 28 my ($literal) = @_;
391              
392 21 100       30 return undef if !isSingleChar($literal);
393              
394 18         33 return "${literal}";
395             }
396              
397             sub convertRangeToClass
398             {
399 0     0 0 0 my ($token) = @_;
400              
401 0         0 my $begr = $token->{begr};
402 0         0 my $endr = $token->{endr};
403              
404 0 0 0     0 return { type => 'unsupported', msg => "can't convert non-literal beg/end range to class" } if $begr->{type} ne 'literal' || $endr->{type} ne 'literal';
405              
406 0         0 $begr = $begr->{value};
407 0         0 $endr = $endr->{value};
408              
409 0 0 0     0 return { type => 'unsupported', msg => "can't convert range (${begr} .. ${endr}) to class" } if !isSingleChar($begr) || !isSingleChar($endr);
410              
411 0         0 my $classtext = "${begr}-${endr}";
412              
413 0         0 return { type => 'class', value => $classtext, isLexeme => 1, class4list => $classtext };
414             }
415              
416             sub mergeClass4List
417             {
418 120     120 0 146 my ($tracker, $mxelement) = @_;
419              
420 120 50       177 mydie "tracker has no 'status'" if !exists $tracker->{status};
421              
422 120 100 100     239 if ( !exists $mxelement->{class4list} || $tracker->{status} == -1 )
423             {
424 90         110 $tracker->{status} = -1;
425             }
426             else
427             {
428 30         54 $tracker->{value} .= $mxelement->{class4list};
429             }
430             }
431              
432             sub mergeKeywordFragment
433             {
434 60     60 0 83 my ($tracker, $mxelement) = @_;
435              
436 60 50       91 mydie "tracker has no 'status'" if !exists $tracker->{status};
437              
438 60 100 100     129 if ( !exists $mxelement->{keywordfragment} || $tracker->{status} == -1 )
439             {
440 45         52 $tracker->{status} = -1;
441             }
442             else
443             {
444 15         23 $tracker->{value} .= $mxelement->{keywordfragment};
445             }
446             }
447              
448             sub generateSubruleName
449             {
450 18     18 0 26 my ($self, $rulename, $cardinality, $context) = @_;
451              
452 18 100       38 $context->{subruleindex}{$rulename}{index} = 0 if !exists $context->{subruleindex}{$rulename}{index};
453              
454 18         23 my $subruleindex = ++$context->{subruleindex}{$rulename}{index};
455 18         51 my $subrulename = sprintf "%s_%03d", $rulename, $subruleindex;
456              
457 18 100 100     46 if ( defined $cardinality && $cardinality eq "?" )
458             {
459 4         9 $subrulename = "opt_" . $subrulename;
460             }
461              
462 18         19 push @{$self->{subrules}{$rulename}}, $subrulename;
  18         40  
463              
464 18         29 return $subrulename;
465             }
466              
467             ##
468             # createSubRule : create a (non-)anonymous subrule to 'rulename'.
469             # create a negated class if the class or token group was negated.
470             #
471             # CAVEAT: remember to reset the 'negation' flag in the caller
472             # to avoid negating twice.
473             ##
474             sub createSubRule
475             {
476 18     18 0 33 my ($self, $rulename, $mxelement, $negation, $cardinality, $context ) = @_;
477              
478 18 50       30 if ( defined $negation )
479             {
480 0         0 $mxelement = $self->negateElement($rulename, $mxelement, $context);
481             }
482              
483 18         29 my $subrulename = $self->generateSubruleName($rulename, $cardinality, $context);
484              
485             ##
486             # propagate grammar fragment states to subrules (i.e. children)
487             ##
488 18 100       46 $mxelement->{grammarstate} = $context->{subruleindex}{$rulename}{grammarstate} if exists $context->{subruleindex}{$rulename}{grammarstate};
489 18 100       37 $mxelement->{isFragmentOrChild} = $context->{subruleindex}{$rulename}{isFragmentOrChild} if exists $context->{subruleindex}{$rulename}{isFragmentOrChild};
490              
491             ##
492             # wrap cardinality "?" into an 'opt_' rule
493             ##
494 18 100 100     39 $mxelement->{cardinality} = $cardinality if defined $cardinality && $cardinality eq "?";
495              
496 18         35 $self->addMarpaRule($subrulename, $mxelement);
497              
498 18         41 my $mxalias = { rule => $subrulename, rhs => { token => $subrulename } };
499              
500             ##
501             # CAVEAT: don't propagate the lexeme tag upwards.
502             # this avoids confusion in G1 rules.
503             ##
504 18 50 66     42 $mxalias->{class4list} = $mxelement->{class4list} if exists $mxelement->{class4list} && !defined $cardinality;
505 18 100       36 $mxalias->{grammarstate} = $context->{subruleindex}{$rulename}{grammarstate} if exists $context->{subruleindex}{$rulename}{grammarstate};
506 18 100       43 $mxalias->{isFragmentOrChild} = $context->{subruleindex}{$rulename}{isFragmentOrChild} if exists $context->{subruleindex}{$rulename}{isFragmentOrChild};
507              
508 18         26 return $mxalias;
509             }
510              
511             ## ------------------------------------------------------
512             # 'walktoken' : process all token flavours
513             ## ------------------------------------------------------
514             sub walktoken
515             {
516 40     40 0 64 my ($self, $rulename, $token, $context) = @_;
517              
518             # if ( $rulename eq "EOF")
519             # {
520             # printf "found!\n";
521             # }
522              
523 40         61 my $alternative = retrieveModifier( $token, 'alternative' );
524 40         54 my $cardinality = retrieveModifier( $token, 'cardinality' );
525              
526 40         52 my $mxToken = {};
527 40         55 my $comments = [];
528              
529 40 50 33     115 if (ref $token eq 'HASH' && exists $token->{comment} && !$self->testoption('stripallcomments'))
      33        
530             {
531 0 0       0 mydie "comment not a plain string in rule $rulename" if ref $token->{comment} ne "";
532 0         0 my $cl = $self->renderComment($rulename, [$token->{comment}]);
533 0         0 push (@$comments, @$cl);
534             }
535              
536 40         53 my $negation = retrieveModifier($token, 'negation');
537              
538             SWITCH:
539             {
540 40 50       44 (ref $token eq "") && do {
  40         67  
541 0         0 $mxToken = $self->processSubRule($token, $context);
542 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
543             # reset 'negation' since it was processed by 'createSubRule'
544 0         0 $negation = undef;
545 0         0 last SWITCH;
546             };
547 40 100 66     149 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'tokengroup') && do {
      100        
548 9         32 $mxToken = $self->walkgroup($rulename, $token, $context);
549 9         18 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
550             # reset 'negation' since it was processed by 'createSubRule'
551 9         12 $negation = undef;
552 9         12 last SWITCH;
553             };
554 31 50 66     115 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'rulegroup') && do {
      66        
555 0         0 $mxToken = $self->walkgroup($rulename, $token, $context);
556             # strip lexeme status off rule groups
557 0         0 delete $mxToken->{isLexeme};
558 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
559             # reset 'negation' since it was processed by 'createSubRule'
560 0         0 $negation = undef;
561 0         0 last SWITCH;
562             };
563 31 100 66     112 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'literal') && do {
      100        
564 21         26 my $tokenvalue = $token->{value};
565 21         70 $mxToken = { type => 'literal', value => $tokenvalue, isLexeme => 1 };
566 21         68 my $class4list = convertLiteralToClass($tokenvalue);
567 21 100       45 $mxToken->{class4list} = $class4list if defined $class4list;
568 21         32 last SWITCH;
569             };
570 10 100 66     40 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'class') && do {
      66        
571 9         27 $mxToken = { type => 'class', value => $token->{value}, isLexeme => 1, class4list => $token->{value} };
572 9         15 last SWITCH;
573             };
574 1 50 33     11 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'regex') && do {
      33        
575 0         0 $mxToken = { type => 'unsupported', msg => "can't convert arbitrary regex to Marpa in $rulename" };
576 0         0 last SWITCH;
577             };
578 1 50 33     9 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'range') && do {
      33        
579 0         0 $mxToken = convertRangeToClass($token);
580 0         0 last SWITCH;
581             };
582 1 50 33     8 (ref $token eq 'HASH' && exists $token->{type} && $token->{type} eq 'value') && do {
      33        
583 0         0 $mxToken = { type => 'unsupported', msg => "can't convert 'value' to Marpa in $rulename" };
584 0         0 last SWITCH;
585             };
586 1 50 33     6 (ref $token eq 'HASH' && exists $token->{token}) && do {
587 1         4 $mxToken = $self->walktoken($rulename, $token->{token}, $context);
588 1         3 last SWITCH;
589             };
590 0 0 0     0 (ref $token eq 'HASH' && exists $token->{action}) && do {
591 0         0 $mxToken = { type => 'ignore', msg => "embedded action in $rulename" };
592 0         0 last SWITCH;
593             };
594 0 0 0     0 (ref $token eq 'HASH' && exists $token->{comment}) && do {
595 0         0 $mxToken = { type => 'ignore', msg => "comment in $rulename" };
596 0         0 last SWITCH;
597             };
598 0         0 do {
599 0         0 $self->abortWithError( "can't process token for rule $rulename", $token );
600 0         0 last SWITCH;
601             };
602             }
603              
604             ##
605             # apply 'negation' to every token type but groups and subrules
606             ##
607 40 100       57 if ( defined $negation )
608             {
609 1         4 $mxToken = $self->negateElement($rulename, $mxToken, $context);
610 1         1 $negation = undef;
611             }
612              
613             ###
614             # create explicit subrule for
615             # - nonterminals
616             # - subgroups
617             # annotated for cardinality
618             ##
619 40 50       85 if ( defined $cardinality )
620             {
621 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, undef, $cardinality, $context );
622              
623             # strip lexeme status off annotated subrules
624 0         0 delete $mxToken->{isLexeme};
625             }
626              
627 40 50       60 $mxToken->{alternative} = 'A' if defined $alternative;
628 40 50 33     72 $mxToken->{comments} = $comments if scalar @$comments > 0 && !$self->testoption('stripallcomments');
629              
630 40         66 return $mxToken;
631             }
632              
633             sub walknonterminal
634             {
635 0     0 0 0 my ( $self, $rulename, $nonterminal, $context ) = @_;
636              
637 0 0       0 mydie "nonterminal is not a hash for rule $rulename" if ref $nonterminal ne 'HASH';
638              
639 0         0 my $mxToken = {};
640              
641             SWITCH:
642             {
643 0 0       0 (exists $nonterminal->{rhs}) && do {
  0         0  
644 0         0 my $rhs = $nonterminal->{rhs};
645 0         0 $mxToken = $self->walktoken($rulename, $rhs, $context);
646 0         0 last SWITCH;
647             };
648 0         0 do {
649 0         0 $self->abortWithError( "can't process nonterminal for rule $rulename", $nonterminal );
650 0         0 last SWITCH;
651             };
652             }
653              
654 0         0 return $mxToken;
655             }
656              
657             ## -------------------------------------------------------------------------------
658             # 'retagSimpleRule' : check if the cardinality can be added to 'mxtoken'.
659             # Conditions :
660             # - '?' is consumed by 'opt_' subrules, so we have to reject it here
661             # - 'mxtoken' must be a subrule of 'rule'
662             # - the parent rule must consist of a single element
663             # (responsibility of the caller to verify)
664             ## -------------------------------------------------------------------------------
665             sub retagSimpleRule
666             {
667 6     6 0 11 my ( $self, $rulename, $mxtoken, $cardinality, $context, $options ) = @_;
668              
669 6 50       15 mydie "retagSimpleRule : token must be a hash in $rulename" if ref $mxtoken ne 'HASH';
670 6 50       10 return 0 if $cardinality eq "?";
671              
672 6 50 33     14 $cardinality =~ s/([+*])\?/$1/ if $cardinality =~ /[+*]\?/ && $self->testoption('shiftlazytogreedy');
673 6 50       10 mydie sprintf "lazy quantifier %s in rule %s not supported by Marpa. use option -g to map to greedy.", $cardinality, $rulename if $cardinality =~ /[+*]\?/;
674              
675 6 100       11 $options = {} if !defined $options;
676              
677 6         9 my $abortatfirstlevel = exists $options->{abortatfirstlevel};
678              
679             SWITCH: {
680 6 100       7 (exists $mxtoken->{rhs}) && do {
  6         14  
681 5         11 my $rhs = $mxtoken->{rhs};
682              
683 5 50       8 mydie "'rhs' must be a hash in $rulename" if ref $rhs ne 'HASH';
684 5 50 33     19 mydie "'token' must be a scalar in 'rhs' in rule $rulename" if !exists $rhs->{token} || ref $rhs->{token} ne "";
685              
686 5 50       12 return 0 if !defined $self->checkMarpaRuleExists($rhs->{token});
687              
688 5         25 my $rc = 0;
689              
690 5 50       11 if ($abortatfirstlevel)
691             {
692 5         6 my $subrulename = $rhs->{token};
693              
694             # don't push cardinality to a rule unless it is a subrule of the parent
695 5 50       12 return 0 if !$self->isSubrule($rulename, $subrulename);
696              
697 5         9 my $rule = $self->getMarpaRule($rulename, $subrulename);
698 5 50       9 if ($cardinality ne "?")
699             {
700 5         11 $rule->{cardinality} = $cardinality;
701             # cardinality-tagged tokens loose 'class4list' status
702 5         6 delete $rule->{class4list};
703 5         9 $rc = 1;
704             }
705             }
706             else
707             {
708 0         0 my $subrulename = $rhs->{token};
709              
710             # don't push cardinality to a rule unless it is a subrule of the parent
711 0 0       0 return 0 if !$self->isSubrule($rulename, $subrulename);
712              
713 0         0 my $rule = $self->getMarpaRule($rulename, $subrulename);
714 0         0 $rc = $self->retagSimpleRule($rulename, $rule, $cardinality, $context, $options);
715 0 0       0 if ($rc)
716             {
717 0 0       0 mydie "can't retag subrule $subrulename twice in rule $rulename" if exists $rule->{cardinality};
718 0         0 $rule->{cardinality} = $cardinality;
719             # cardinality-tagged tokens loose 'class4list' status
720 0         0 delete $rule->{class4list};
721             }
722             }
723 5         15 return $rc;
724 0         0 last SWITCH;
725             };
726 1 50 33     13 (exists $mxtoken->{type} && $mxtoken->{type} eq "group") && do {
727 0 0       0 mydie "group rhs must contain 'list' in $rulename" if !exists $mxtoken->{list};
728 0         0 my $ec = 0;
729 0         0 for my $al (@{$mxtoken->{list}})
  0         0  
730             {
731 0 0 0     0 mydie "alternative list is not an array in rule $rulename" if ref $al ne 'HASH' || !exists $al->{list};
732 0         0 for my $le (@{$al->{list}})
  0         0  
733             {
734 0 0 0     0 mydie "alternative element is not simple in rule $rulename" if exists $le->{type} && $le->{type} !~ /literal|class/;
735 0         0 $ec += 1;
736             }
737             }
738 0         0 return $ec <= 1;
739 0         0 last SWITCH;
740             };
741 1 50 33     7 (exists $mxtoken->{type} && $mxtoken->{type} eq 'literal') && do {
742 0         0 $mxtoken->{cardinality} = $cardinality;
743 0         0 return 1;
744 0         0 last SWITCH;
745             };
746 1 50 33     12 (exists $mxtoken->{type} && $mxtoken->{type} eq "class") && do {
747 1 50       4 mydie "'class' can't be multiplied twice in $rulename" if exists $mxtoken->{cardinality};
748 1         2 $mxtoken->{cardinality} = $cardinality;
749 1         4 return 1;
750 0         0 last SWITCH;
751             };
752 0 0 0     0 (exists $mxtoken->{type} && $mxtoken->{type} eq "negatedclass") && do {
753 0 0       0 mydie "'negatedclass' can't be multiplied twice in $rulename" if exists $mxtoken->{cardinality};
754 0         0 $mxtoken->{cardinality} = $cardinality;
755 0         0 return 1;
756 0         0 last SWITCH;
757             };
758 0         0 do {
759 0         0 $self->abortWithError( "don't know how to analyze element in $rulename", $mxtoken );
760 0         0 last SWITCH;
761             };
762             }
763              
764 0         0 return 0;
765             }
766              
767             ## ------------------------------------------------------
768             # removeCaseEquivalentBranches :
769             # go over the branches from the alternative lists.
770             # if 2 branches
771             # - consist of exactly 1 symbol
772             # - are case-equivalent
773             # then remove one of them.
774             ## ------------------------------------------------------
775             sub removeCaseEquivalentBranches
776             {
777 23     23 0 31 my ($self, $alternativelists) = @_;
778              
779 23 100       44 return $alternativelists if scalar @$alternativelists < 2;
780              
781 6         10 my $filteredlist = [];
782 6         10 my $filtered = 0;
783 6         7 my $literal;
784              
785 6         11 for my $branch (@$alternativelists)
786             {
787 17 50       29 if ( !exists $branch->{list})
788             {
789 0         0 push @$filteredlist, $branch;
790             }
791             else
792             {
793 17         18 my $branchlist = $branch->{list};
794 17 100       26 if (scalar @$branchlist > 1)
795             {
796 5         9 push @$filteredlist, $branch;
797             }
798             else
799             {
800 12         16 my $rhs = $branchlist->[0];
801 12         13 my $matchfound = 0;
802              
803 12 100 100     25 if ( exists $rhs->{type} && $rhs->{type} eq 'literal' )
804             {
805 4         6 my $newliteral = $rhs->{value};
806              
807 4 100       9 if (defined $literal)
808             {
809 2         9 my $tmp1 = uc $literal;
810 2         3 my $tmp2 = uc $newliteral;
811 2   33     8 $matchfound = $tmp1 ne "" && $tmp1 eq $tmp2;
812 2 50       5 $filtered = 1 if $matchfound;
813             }
814             else
815             {
816 2         4 $literal = $newliteral;
817             }
818             }
819              
820 12 50       27 push @$filteredlist, $branch if !$matchfound;
821             }
822             }
823             }
824              
825 6 50       13 return $alternativelists if !$filtered;
826              
827 0         0 return $filteredlist;
828             }
829              
830             ## ------------------------------------------------------
831             # 'processRightSides' converts a single list of tokens some of which
832             # are tagged as 'alternative' into (possibly) multiple lists
833             # each of which is an alternative for a group or rule
834             ## ------------------------------------------------------
835             sub processRightSides
836             {
837 23     23 0 39 my ( $self, $rulename, $rhslist, $context ) = @_;
838              
839             # symbol lists of alternative branches
840 23         28 my $alternativelists = [];
841             # current alternative branch
842 23         31 my $currentlist = [];
843             # number of symbols in current alternative branch
844 23         24 my $alternativelength = 0;
845              
846 23         50 my $metalist = { comments => [], actions => [] };
847              
848             # status tracking for conversion to keyword, lexeme or class
849 23         50 my $class4list = { status => 0, value => "" };
850 23         44 my $class4group = { status => 0, value => "" };
851 23         42 my $groupisLexeme = { status => 0 };
852 23         33 my $keywordfragment = { status => 0 };
853              
854             # if ( $rulename eq "outer_join_sign")
855             # {
856             # printf "found!\n";
857             # }
858              
859 23         38 for my $e (@$rhslist)
860             {
861 60 100 66     249 $e = $e->{rhs} if ref $e eq 'HASH' && exists $e->{rhs} && !exists $e->{token};
      66        
862 60 50 66     177 $e = { token => $e } if ref $e eq 'HASH' && exists $e->{type} && exists $e->{definition};
      66        
863              
864 60         80 my $negation = retrieveModifier($e, 'negation');
865 60         77 my $cardinality = retrieveModifier($e, 'cardinality');
866              
867 60 50 33     184 if (exists $e->{comment} || exists $e->{action})
868             {
869 0 0 0     0 if (exists $e->{comment} && !$self->testoption('stripallcomments'))
870             {
871 0         0 push @{$metalist->{comments}}, $self->renderComment($rulename, [$e->{comment}]);
  0         0  
872 0         0 $metalist->{state} = 1;
873             }
874              
875 0 0 0     0 if (exists $e->{action} && !$self->testoption('stripactions'))
876             {
877 0 0       0 push @{$metalist->{actions}}, $self->testoption('embedactions') ? $e->{action} : $self->renderComment($rulename, [$e->{action}]);
  0         0  
878 0         0 $metalist->{state} = 1;
879             }
880              
881             ##
882             # skip the rest of processing for comment/action tokens
883             ##
884 0         0 next;
885             }
886              
887 60 50 33     154 if (ref $e eq 'HASH' && !exists $e->{token})
888             {
889 0 0 0     0 $self->abortWithError( "generic regex rules not supported in rule $rulename", $e ) if exists $e->{type} && $e->{type} eq "regex";
890 0         0 $self->abortWithError( "INTERNAL: 'e' must be a hash and contain 'token' in rule $rulename", $e );
891             }
892              
893 60         77 my $alternative = retrieveModifier($e, 'alternative');
894              
895 60 100       89 if (defined $alternative)
896             {
897 11 50       21 if (scalar @$currentlist > 0)
898             {
899             ##
900             # close the current alternative list
901             ##
902 11         12 my $newlist;
903              
904             ##
905             # replace a keyword built from lower/upper case character classes by a literal
906             ##
907 11         16 my $keyword = $keywordfragment->{value};
908 11 50 66     22 if ($self->testoption("buildkeywords") && $keywordfragment->{status} == 0 && $keyword =~ /^[a-z]/i)
      66        
909             {
910 0         0 $currentlist = [
911             { type => 'literal', value => $keyword, isLexeme => 1 }
912             ];
913 0         0 $newlist = { list => $currentlist, isLexeme => 1 };
914             }
915             else
916             {
917 11         23 $newlist = { list => $currentlist };
918 11 100       31 $newlist->{class4list} = $class4list->{value} if $class4list->{status} != -1;
919 11 50       22 $newlist->{metalist} = $metalist if exists $metalist->{state};
920             }
921              
922 11         16 push @$alternativelists, $newlist;
923             ##
924             # re-initialize the list state
925             ##
926 11         16 $currentlist = [];
927 11         25 $class4list = { status => 0, value => "" };
928 11         26 $metalist = { comments => [], actions => [] };
929 11         20 $keywordfragment = { status => 0 };
930 11         14 $alternativelength = 0;
931             }
932             }
933             else
934             {
935             ##
936             # a sequence of literals can't be mapped to a class, so we reset class4list
937             ##
938 49 100       72 if ($alternativelength > 1)
939             {
940 12         20 $class4group->{status} = -1;
941 12         16 delete $class4group->{value};
942             }
943             }
944              
945 60         63 my $mxelement;
946              
947 60 100       90 if (ref $e->{token} eq "")
948             {
949             ##
950             # translate a G4 subrule into a Marpa subrule
951             ##
952 21         28 my $symbol = $e->{token};
953 21         42 $mxelement = $self->processSubRule($symbol, $context);
954             }
955             else
956             {
957 39 50 33     127 $self->abortWithError( "can't process group for rule $rulename", $rhslist ) if ref $e ne 'HASH' || !exists $e->{token};
958 39         89 $mxelement = $self->walktoken($rulename, $e->{token}, $context);
959             # ignore embedded comments/actions
960 39 50 66     104 next if exists $mxelement->{type} && $mxelement->{type} eq "ignore";
961              
962 39         76 my $keywordfragment = $self->isKeywordFragment($rulename, $mxelement);
963 39 100       73 $mxelement->{keywordfragment} = $keywordfragment if defined $keywordfragment;
964             }
965              
966             ##
967             # collect embedded comments in the alternative branch state
968             ##
969 60 50       94 if (exists $mxelement->{comments})
970             {
971 0         0 push (@{$metalist->{comments}}, @{$mxelement->{comments}});
  0         0  
  0         0  
972 0         0 $metalist->{state} = 1;
973 0         0 delete $mxelement->{comments};
974 0 0       0 next if scalar keys %$mxelement == 0;
975             }
976              
977             ##
978             # CAVEAT: retrieve 'isLexeme' from $mxelement
979             ##
980 60         78 my $il = retrieveModifier($mxelement, 'isLexeme');
981 60 100       97 $groupisLexeme->{status} = -1 if !defined $il;
982              
983             ##
984             # negation should not come from the token
985             ##
986 60 50       96 mydie "processRightsides : unexpected negation in 'mxelement'" if exists $mxelement->{negation};
987              
988 60 50       83 if ( defined $negation )
989             {
990 0         0 $mxelement = $self->negateElement($rulename, $mxelement, $context);
991             }
992              
993             ##
994             # CAVEAT: use 'cardinality' from $e since $mxtoken
995             # was computed without metainformation.
996             ##
997 60 100       125 if (defined $cardinality)
998             {
999             ##
1000             # if both the parent and the sub rules are simple, we can propagate the cardinality
1001             # to the subrule.
1002             # in all other cases we create a new subrule.
1003             ##
1004 10 100 66     27 if ( scalar @$rhslist <= 1 && $self->retagSimpleRule($rulename, $mxelement, $cardinality, $context) )
1005             {
1006             ##
1007             # if the tagged element is part of an alternative branch (i.e. a branch that has more than one element),
1008             # we have to make it into a subrule
1009             ##
1010 1 50 33     4 if (defined $alternative && scalar @$rhslist > 1)
1011             {
1012 0         0 $mxelement = $self->createSubRule( $rulename, $mxelement, undef, undef, $context );
1013             }
1014             }
1015             else
1016             {
1017             ##
1018             # create a subrule for cardinality-tagged element lists
1019             ##
1020 9         17 $mxelement = $self->createSubRule( $rulename, $mxelement, undef, $cardinality, $context );
1021              
1022             ##
1023             # propagate the cardinality to the new subrule
1024             # ("?" is wrapped into an 'opt_' rule in 'createSubRule', so it was already consumed).
1025             #
1026             # TODO : check if the '!defined $alternative' clause below should be reinstated
1027             ##
1028             # if ( !defined $alternative && $cardinality ne "?" && !$self->retagSimpleRule($rulename, $mxelement, $cardinality, $context, {abortatfirstlevel => 'true'}) )
1029 9 50 66     33 if ( $cardinality ne "?" && !$self->retagSimpleRule($rulename, $mxelement, $cardinality, $context, {abortatfirstlevel => 'true'}) )
1030             {
1031 0 0       0 mydie "INTERNAL: no destination for cardinality found in rule '$rulename'" if scalar @$rhslist > 1;
1032 0 0       0 $mxelement->{cardinality} = $cardinality if $cardinality ne "?";
1033             }
1034             }
1035              
1036             # cardinality-annotated subrules loose lexeme status
1037 10         16 delete $mxelement->{class4list};
1038 10         12 delete $mxelement->{isLexeme};
1039 10         13 $groupisLexeme->{status} = -1;
1040             }
1041              
1042 60         121 mergeClass4List( \%$class4list, $mxelement );
1043 60         108 mergeClass4List( \%$class4group, $mxelement );
1044 60         113 mergeKeywordFragment( \%$keywordfragment, $mxelement );
1045              
1046 60         83 push @$currentlist, $mxelement;
1047 60         64 ++$alternativelength;
1048              
1049             ##
1050             # a list of lexemes is not itself a lexeme (unless declared in the 'lexer' grammar).
1051             ##
1052 60 100       122 $groupisLexeme->{status} = -1 if $alternativelength > 1;
1053             }
1054              
1055             ##
1056             # replace a keyword built from lower/upper case character classes by a literal.
1057             # keyword literals must start with a letter.
1058             ##
1059 23 50 66     37 if ($self->testoption("buildkeywords") && $keywordfragment->{status} == 0 && $keywordfragment->{value} =~ /^[a-z]/i)
      66        
1060             {
1061             $currentlist = [
1062 0         0 { type => 'literal', value => $keywordfragment->{value}, isLexeme => 1 }
1063             ];
1064 0         0 $groupisLexeme->{status} = 0;
1065             }
1066              
1067             ##
1068             # append the current list to the alternative list if it has entries.
1069             ##
1070 23 50       46 if ( scalar @$currentlist > 0)
1071             {
1072 23         39 my $newlist = { list => $currentlist };
1073 23 100       42 $newlist->{class4list} = $class4list->{value} if $class4list->{status} != -1;
1074 23 100       42 $newlist->{isLexeme} = 1 if $groupisLexeme->{status} == 0;
1075 23 50       33 $newlist->{metalist} = $metalist if exists $metalist->{state};
1076 23         39 push @$alternativelists, $newlist;
1077             }
1078              
1079             ##
1080             # remove branches that are case-equivalent
1081             ##
1082 23 50       35 $alternativelists = $self->removeCaseEquivalentBranches($alternativelists) if $self->testoption('matchcaseinsensitive');
1083              
1084 23         53 my $grouplist = { type => 'group', list => $alternativelists };
1085             # single lists of literals can be tagged as lexemes
1086             # regular rules with alternative right sides will be tagged as G1
1087 23 100       43 $grouplist->{class4list} = $class4group->{value} if $class4group->{status} != -1;
1088 23 100       38 $grouplist->{isLexeme} = 1 if $groupisLexeme->{status} == 0;
1089              
1090 23         63 return $grouplist;
1091             }
1092              
1093             ## ------------------------------------------------------
1094             # 'walkgroup' processes the tokens from a parenthesized group
1095             # or from a complete rule
1096             ## ------------------------------------------------------
1097             sub walkgroup
1098             {
1099 9     9 0 18 my ($self, $rulename, $tokengroup, $context) = @_;
1100              
1101 9 50 33     28 $self->abortWithError( "INTERNAL: group is missing 'definition' in rule $rulename", $tokengroup ) if ref $tokengroup ne 'HASH' || !exists $tokengroup->{definition};
1102 9         15 my $definition = $tokengroup->{definition};
1103 9         27 my $grouplist = $self->processRightSides( $rulename, $definition, $context );
1104              
1105 9         14 return $grouplist;
1106             }
1107              
1108             ## ------------------------------------------------------
1109             # 'walkrule' processes the 'rightsides' list of a parsed rule
1110             ## ------------------------------------------------------
1111             sub walkrule
1112             {
1113 23     23 0 37 my ($self, $rulename, $rule, $context) = @_;
1114              
1115             # if ( $rulename eq "outer_join_sign")
1116             # {
1117             # printf "found!\n";
1118             # }
1119              
1120 23         36 my $symboltable = $self->symboltable;
1121 23         46 my $rulestatus = $symboltable->rulestatus($rulename);
1122              
1123             # test if we traversed this node already
1124 23 100       42 if ( defined $rulestatus )
1125             {
1126 9         13 my $mxrule;
1127              
1128             SWITCH: {
1129 9 100       9 ($rulestatus eq "inprogress") && do {$mxrule = $symboltable->rule( $rulename ); last SWITCH; };
  9         14  
  3         7  
  3         6  
1130 6 50       13 ($rulestatus eq "done") && do {$mxrule = $self->getMarpaRule( undef, $rulename ); last SWITCH; };
  6         11  
  6         8  
1131             ($rulestatus eq "synthetic" && !$self->checkMarpaRuleExists($rulename)) && do
1132 0 0 0     0 {
1133 0         0 my $mxrightside = $self->processRightSides( $rulename, $rule->{rightsides}, $context );
1134 0         0 $self->addMarpaRule($rulename, $mxrightside);
1135 0         0 $symboltable->tagrule($rulename, 'done');
1136 0         0 $rulestatus = $symboltable->rulestatus($rulename);
1137 0         0 $mxrule = {};
1138 0         0 last SWITCH;
1139             };
1140 0         0 do { mydie "unexpected rule status $rulestatus in rule $rulename" };
  0         0  
1141             }
1142              
1143 9         27 my $result = { mxrule => $rulename, rhs => { token => $rulename } };
1144 9 100       17 $result->{class4list} = $mxrule->{class4list} if exists $mxrule->{class4list};
1145              
1146 9         17 return $result;
1147             }
1148              
1149 14         31 $self->enterLevel($rulename, $context);
1150              
1151 14         17 my $mxrightside = {};
1152 14         22 my $rightside = $rule->{rightsides};
1153 14 50       23 if (defined $rightside)
1154             {
1155 14 50       28 $self->abortWithError( "rhs is not an array ref in $rulename", $rightside ) if ref $rightside ne 'ARRAY';
1156              
1157 14 100 66     52 $context->{subruleindex}{$rulename}{grammarstate} = $rule->{grammarstate} if exists $rule->{grammarstate} && $rule->{grammarstate} eq "lexer";
1158 14 100 66     36 $context->{subruleindex}{$rulename}{isFragmentOrChild} = 'true' if exists $rule->{type} && $rule->{type} eq "fragment";
1159              
1160             ##
1161             # prevent infinite recursion by tagging any symbol
1162             # before processing begins with 'inprogress'.
1163             # the 'defined($rulestatus)' clause at the top ensures
1164             # that we don't pick up a symbol that is already
1165             # being processed.
1166             ##
1167 14         38 $symboltable->tagrule($rulename, 'inprogress');
1168 14         47 $mxrightside = $self->processRightSides( $rulename, $rightside, $context );
1169 14         31 $symboltable->tagrule($rulename, 'done');
1170             }
1171              
1172 14 100 66     57 $mxrightside->{grammarstate} = $rule->{grammarstate} if exists $rule->{grammarstate} && $rule->{grammarstate} eq "lexer";
1173 14 100 66     40 $mxrightside->{isFragmentOrChild} = 'true' if exists $rule->{type} && $rule->{type} eq "fragment";
1174 14 100       24 if (exists $rule->{redirect})
1175             {
1176 1         2 $mxrightside->{redirected} = 'true';
1177 1         7 push @{$self->{discarded}}, $rulename;
  1         3  
1178             }
1179              
1180 14         33 $self->addMarpaRule($rulename, $mxrightside);
1181              
1182 14         34 $self->exitLevel($rulename, $context);
1183              
1184 14         18 return $mxrightside;
1185             }
1186              
1187             ## ------------------------------------------------------
1188             # 'processSub' processes a reference to a G4 subrule
1189             ## ------------------------------------------------------
1190             sub processSubRule
1191             {
1192 21     21 0 51 my ($self, $rulename, $context) = @_;
1193              
1194 21         30 my $symboltable = $self->symboltable;
1195 21         47 my $rule = $symboltable->rule($rulename);
1196              
1197 21         46 my $mxrule = $self->walkrule( $rulename, $rule, $context );
1198              
1199 21         58 my $result = { rule => $rulename, rhs => { token => $rulename } };
1200              
1201 21         37 my $keywordfragment = $self->isKeywordFragment($rulename, $mxrule);
1202 21 100       37 $result->{keywordfragment} = $keywordfragment if defined $keywordfragment;
1203              
1204 21         39 return $result;
1205             }
1206              
1207             ## ------------------------------------------------------
1208             # 'reportUnusedRules' report all symbol table entries
1209             # that are not referred to by
1210             # parent rules.
1211             ## ------------------------------------------------------
1212             sub printHeaderUnusedRulesReport
1213             {
1214 0     0 0 0 printf "\n";
1215 0         0 printf "WARNING: the rules listed below are orphaned. They can't be reached from the start rule:\n";
1216 0         0 printf "=======:\n\n";
1217 0         0 printf <<'END_OF_SOURCE';
1218             +----------------------------- rule name
1219             +--!----------------------------- Lexical (L) or parser rule
1220             +--!--!----------------------------- Fragment (F) or regular rule
1221             ! ! !
1222             V V V
1223             END_OF_SOURCE
1224             }
1225              
1226             sub reportUnusedRules
1227             {
1228 1     1 0 2 my ($self) = @_;
1229              
1230 1         4 my $symboltable = $self->symboltable;
1231              
1232 1         2 my $status = 0;
1233 1         4 my @symbols = $symboltable->symbols;
1234              
1235 1         7 for my $rulename (sort @symbols)
1236             {
1237 14         21 my $rule = $symboltable->rule($rulename);
1238 14 50       25 if (!exists $rule->{generationstatus})
1239             {
1240 0 0       0 if (!$status)
1241             {
1242 0         0 printHeaderUnusedRulesReport();
1243 0         0 $status = 1;
1244             }
1245              
1246             printf "[%1s][%1s] %s\n",
1247             (exists $rule->{grammarstate} && $rule->{grammarstate} =~ /lexer/i) ? "L" : "",
1248 0 0 0     0 (exists $rule->{isFragmentOrChild}) ? "F" : "",
    0          
1249             $rulename;
1250             }
1251             }
1252              
1253 1 50       4 printf "\n\n" if $status;
1254             }
1255              
1256             ## ------------------------------------------------------
1257             # 'processRedirectRules' define Marpa rules for all
1258             # G4 rules that are tagged 'redirect'
1259             # and are not referred in parent rules
1260             ## ------------------------------------------------------
1261             sub processRedirectRules
1262             {
1263 1     1 0 3 my ($self) = @_;
1264              
1265 1         3 my $symboltable = $self->symboltable;
1266              
1267 1         5 my @symbols = $symboltable->symbols;
1268              
1269 1         10 for my $rulename (sort @symbols)
1270             {
1271 14         26 my $rule = $symboltable->rule($rulename);
1272 14 50 66     37 if (!exists $rule->{generationstatus} && exists $rule->{redirect})
1273             {
1274 1         3 $self->walkrule( $rulename, $rule, {level => 0} );
1275             }
1276             }
1277             }
1278              
1279             ## -------------------------------------------------------------------------------------
1280             # 'translateG4Grammar' create nested Marpa rules for the G4 rules from the symbol table
1281             # by doing a depth-first traversal starting with the grammar's
1282             # start symbol.
1283             # this creates the convex hull of the start symbol collecting all
1284             # symbols that can be reached from the start symbol.
1285             # 'reportUnusedRules' creates a report of all symbols from the
1286             # parse tree that are not part of the convex hull.
1287             ## -------------------------------------------------------------------------------------
1288             sub translateG4Grammar
1289             {
1290 1     1 0 3 my ($self) = @_;
1291              
1292 1         4 my $symboltable = $self->symboltable;
1293              
1294 1         5 my $startrule = $symboltable->startrule;
1295 1         3 my $startrulename = $startrule->{name};
1296 1         5 $startrule = $symboltable->rule($startrulename);
1297              
1298 1 50       3 mydie "INTERNAL: start rule $startrulename is missing 'rightsides'" if !exists $startrule->{rightsides};
1299              
1300 1         8 $self->walkrule( $startrulename, $startrule, {level => 0} );
1301              
1302 1         10 $self->processRedirectRules;
1303 1         6 $self->reportUnusedRules;
1304             }
1305              
1306             ## ================================================== #
1307             # write the generated Marpa rules to the output file #
1308             ## ================================================== #
1309              
1310             sub openOutputFile
1311             {
1312 1     1 0 4 my ($self) = @_;
1313              
1314 1         2 my $outputfile = $self->{outputfilename};
1315              
1316 1 50       4 if ( $outputfile ne "-" )
1317             {
1318 1         2 my $outf;
1319 1 50       166 mydie("cannot open output file $outputfile") unless open( $outf, ">$outputfile" );
1320 1         5 $self->{outf} = $outf;
1321 1         5 $self->{is_stdout} = 0;
1322             }
1323             else
1324             {
1325 0         0 $self->{outf} = *STDOUT;
1326 0         0 $self->{is_stdout} = 1;
1327             }
1328             }
1329              
1330             sub closeOutputFile
1331             {
1332 1     1 0 3 my ($self) = @_;
1333 1 50       693 close($self->{outf}) if $self->{is_stdout};
1334             }
1335              
1336             sub pad
1337             {
1338 50     50 0 98 my ($self, $s ) = @_;
1339              
1340 50 50       78 return undef if !defined $s;
1341              
1342 50         59 my $len = length($s);
1343 50         69 my $pad = $self->{indent} - $len;
1344              
1345 50 100       79 return $s if $pad < 0;
1346              
1347 49         188 return $s . (" " x $pad);
1348             }
1349              
1350             sub renderComment
1351             {
1352 0     0 0 0 my ($self, $rulename, $comments) = @_;
1353              
1354 0 0       0 mydie "INTERNAL : 'comments' is not an array in rule $rulename" if ref $comments ne 'ARRAY';
1355              
1356 0         0 my $result = [];
1357 0         0 for my $comment (@$comments)
1358             {
1359 0         0 $comment =~ s/\/\*(.*)\*\//$1/;
1360 0         0 my @commentlist = split '\n', $comment;
1361              
1362 0         0 for my $cl (@commentlist)
1363             {
1364 0         0 $cl =~ s/^\/\/\s*//;
1365 0         0 $cl =~ s/^#\s*//;
1366 0         0 push @$result, "# ${cl}";
1367             }
1368             }
1369              
1370 0         0 return $result;
1371             }
1372              
1373             sub processRedirected
1374             {
1375 39     39 0 61 my ($self, $token) = @_;
1376              
1377 39 50       54 mydie "processRedirected : 'token' must be a scalar" if ref $token ne "";
1378              
1379 39         54 my $rule = $self->getMarpaRule(undef, $token);
1380              
1381 39 50 33     62 if ($self->testoption('matchcaseinsensitive') && $self->isKeywordLetter($token, $rule))
1382             {
1383 0         0 my $value = $rule->{class4list};
1384 0         0 $value = substr($value, 0, 1);
1385 0         0 return { value => "'${value}':i" };
1386             }
1387              
1388 39 50       59 return { discard => 'true' } if exists $rule->{redirected};
1389              
1390 39         76 return { value => $token };
1391             }
1392              
1393             sub fragmentEligible2Convert
1394             {
1395 28     28 0 36 my ($rule) = @_;
1396              
1397 28 100       74 return 0 if !exists $rule->{class4list};
1398 1         2 my $class4list = $rule->{class4list};
1399              
1400             # replace escaped characters and unicode codepoints with a single character
1401 1         3 $class4list =~ s/\\([^u])/$1/g;
1402 1         2 $class4list =~ s/\\u([0-9a-f]{4,4})/u/ig;
1403              
1404 1         4 return length($class4list) > 1;
1405             }
1406              
1407             sub writeFragmentAsClass
1408             {
1409 1     1 0 3 my ($self, $rulename, $rule, $options ) = @_;
1410              
1411 1         4 my $synthclass = { type => "class", value => $rule->{class4list} };
1412 1         5 my $outputline = $self->printRhs( $rulename, $synthclass, {status => 0, delimiter => '|', assignop => '~'} );
1413 1         7 $self->deleteAllSubrules($rulename);
1414              
1415 1         2 my $outf = $self->{outf};
1416 1         4 printf $outf "%s\n", $outputline;
1417             }
1418              
1419             #
1420             # translateunicode : translate unicodes embedded in literals or classes.
1421             # die if we are in codepages beyond 00
1422             ##
1423             sub translateunicode
1424             {
1425 30     30 0 51 my ($self, $rulename, $string ) = @_;
1426              
1427 30         54 while ( $string =~ /(\\u([0-9A-F]{4,4}))|(\\u\{([0-9A-F]+)\})/i )
1428             {
1429 2         5 my $fourmatch = $2;
1430 2         4 my $bracedmatch = $4;
1431              
1432             SWITCH: {
1433 2 50       3 (defined $fourmatch) && do {
  2         5  
1434 2 50       8 mydie "translateunicode : class string $string not in codepage 00 in $rulename" if $fourmatch !~ /00([0-9A-Z][0-9A-Z])/i;
1435 2         4 my $translatedcode = $1;
1436 2         24 $string =~ s/\\u${fourmatch}/\\x${translatedcode}/i;
1437 2         11 last SWITCH;
1438             };
1439 0 0       0 (defined $bracedmatch) && do {
1440 0 0       0 mydie "translateunicode : class string $string not in codepage 00 in $rulename" if $bracedmatch !~ /0*([0-9A-Z]{2,2})/i;
1441 0         0 my $translatedcode = $1;
1442 0         0 $string =~ s/\\u\{${bracedmatch}\}/\\x${translatedcode}/i;
1443 0         0 last SWITCH;
1444             };
1445 0         0 do {
1446 0         0 mydie "translateunicode : unexpected match";
1447 0         0 last SWITCH;
1448             };
1449             }
1450             }
1451              
1452 30         56 return $string;
1453             }
1454              
1455             #
1456             # normalizeClassString : remove class elements that are redundant when the class is made case-insensitive
1457             # return the modified class text as well as an indicator if the class contains letters
1458             ##
1459             sub normalizeClassString
1460             {
1461 3     3 0 43 my ($self, $classstring ) = @_;
1462              
1463 3         7 my $classhash = {};
1464 3         5 my $characterhash = {};
1465              
1466             # strip [/] from classtring
1467 3         5 $classstring =~ s/^\[([^\]]*)\]$/$1/;
1468              
1469 3         4 my $result = "";
1470 3         5 my $isalphaclass = 0;
1471 3         7 while ( $classstring ne "" )
1472             {
1473             SWITCH: {
1474 12 50       14 ($classstring =~ /(\\[ux][0-9a-f]+)-(\\[ux][0-9a-f]+)(.*)$/i) && do {
  12         20  
1475 0         0 my $beg = $1;
1476 0         0 my $end = $2;
1477 0         0 $result .= "${1}-${2}";
1478 0         0 $classstring = $3;
1479 0         0 last SWITCH;
1480             };
1481 12 50       20 ($classstring =~ /(\\[ux][0-9a-f]{2,4})(.*)$/i) && do {
1482 0         0 my $c = $1;
1483 0         0 $result .= $c;
1484 0         0 $classstring = $2;
1485 0         0 last SWITCH;
1486             };
1487 12 100       24 ($classstring =~ /([^-])-([^-])(.*)$/) && do {
1488 3         6 my $beg = $1;
1489 3         4 my $end = $2;
1490 3         8 my $key = "${beg}-${end}";
1491 3 50 66     12 if (!exists $classhash->{lc $key} && !exists $classhash->{uc $key})
1492             {
1493 2         5 $classhash->{$key} = 1;
1494 2         3 $result .= $key;
1495 2 100 66     11 $isalphaclass = 1 if $beg =~ /[a-z]/i || $end =~ /[a-z]/i;
1496             }
1497 3         6 $classstring = $3;
1498 3         6 last SWITCH;
1499             };
1500 9 100       20 ($classstring =~ /(\\.)(.*)$/) && do {
1501 1         3 my $c = $1;
1502 1 50       3 if (!exists $characterhash->{$c})
1503             {
1504 1         3 $characterhash->{$c} = 1;
1505 1         2 $result .= $c;
1506             }
1507 1         3 $classstring = $2;
1508 1         3 last SWITCH;
1509             };
1510 8 50       21 ($classstring =~ /(.)(.*)$/) && do {
1511 8         11 my $c = $1;
1512 8 100 66     58 if (!exists $characterhash->{lc $c} && !exists $characterhash->{uc $c})
1513             {
1514 7         17 $characterhash->{$c} = 1;
1515 7         9 $result .= $c;
1516 7 100       17 $isalphaclass = 1 if $c =~ /^[a-z]$/i;
1517             }
1518 8         10 $classstring = $2;
1519 8         15 last SWITCH;
1520             };
1521             }
1522             }
1523              
1524 3         11 return { isalphaclass => $isalphaclass, classstring => $result };
1525             }
1526              
1527             sub isAlphaLiteral
1528             {
1529 29     29 0 43 my ($self, $literal ) = @_;
1530              
1531 29 100       94 return 0 if $literal !~ /[a-z]/i;
1532              
1533 8         14 my $isAlpha = 0;
1534              
1535 8         15 while ( $literal ne "" )
1536             {
1537             SWITCH: {
1538 43 100       41 ($literal =~ /(\\.)(.*)$/) && do {
  43         72  
1539 4         9 my $c = $1;
1540 4         6 $literal = $2;
1541 4         7 last SWITCH;
1542             };
1543 39 50       82 ($literal =~ /(.)(.*)$/) && do {
1544 39         57 my $c = $1;
1545 39         49 $literal = $2;
1546 39 100       65 $isAlpha = 1 if $c =~ /^[a-z]$/i;
1547 39         61 last SWITCH;
1548             };
1549             }
1550             }
1551              
1552 8         20 return $isAlpha;
1553             }
1554              
1555             ##
1556             # computeRhs :
1557             ##
1558             sub computeRhs
1559             {
1560 69     69 0 90 my ($self, $rulename, $rhs, $options ) = @_;
1561              
1562             # if ($rulename eq "HEXNUMBER_003")
1563             # {
1564             # printf "found!\n";
1565             # }
1566              
1567             SWITCH:
1568             {
1569 69 100 100     72 (exists $rhs->{type} && $rhs->{type} eq 'negatedclass') && do {
  69         138  
1570 1 50       3 mydie "computeRhs : 'value' missing in negatedclass $rulename" if !exists $rhs->{value};
1571 1         3 my $value = $rhs->{value};
1572 1         3 $value = $self->translateunicode($rulename, $value);
1573 1         4 $rhs = { value => "[${value}]" };
1574 1         2 last SWITCH;
1575             };
1576 68 50       91 (exists $rhs->{deletedafterconversiontoclass}) && do {
1577 0         0 $rhs = { discard => 'true' };
1578 0         0 last SWITCH;
1579             };
1580 68 50 66     127 (exists $rhs->{type} && $rhs->{type} eq 'unsupported') && do {
1581 0         0 mydie $rhs->{msg};
1582 0         0 last SWITCH;
1583             };
1584 68 100 100     157 (exists $rhs->{type} && $rhs->{type} eq 'class') && do {
1585 8         12 my $value = $rhs->{value};
1586 8         13 $value = $self->translateunicode($rulename, $value);
1587 8         11 my $modifier = "";
1588 8 100 66     13 if ($self->testoption('matchcaseinsensitive') && $self->isAlphaLiteral($value))
1589             {
1590 3         9 my $normclass = $self->normalizeClassString($value);
1591 3         6 $value = $normclass->{classstring};
1592 3 50       8 $modifier = ':ic' if $normclass->{isalphaclass};
1593             }
1594 8         24 $rhs = { value => "[${value}]${modifier}" };
1595 8         11 last SWITCH;
1596             };
1597 60 100 66     106 (exists $rhs->{type} && $rhs->{type} eq 'literal') && do {
1598 21         31 my $literal = $rhs->{value};
1599 21         33 $literal = $self->translateunicode($rulename, $literal);
1600 21 50       42 $literal = ($literal eq "\\'") ? "[']" : "'${literal}'";
1601 21 100 66     35 if ($self->testoption('matchcaseinsensitive') && $self->isAlphaLiteral($literal))
1602             {
1603 4         10 $literal = lc $literal;
1604 4         5 $literal .= ':i';
1605             }
1606 21         46 $rhs = { value => $literal };
1607 21         29 last SWITCH;
1608             };
1609 39 50 33     69 (exists $rhs->{type} && $rhs->{type} eq 'action') && do {
1610 0         0 push @{$options->{comments}}, " # Action: " . $rhs->{token}{action};
  0         0  
1611 0         0 $rhs = { discard => 'true' };
1612 0         0 last SWITCH;
1613             };
1614 39 50       60 (exists $rhs->{rhs}) && do {
1615 39         45 $rhs = $rhs->{rhs};
1616 39 50 33     109 mydie "computeRhs: scalar 'token' missing in rhs" if !exists $rhs->{token} || ref $rhs->{token} ne "";
1617 39         57 $rhs = $self->processRedirected($rhs->{token});
1618 39         55 last SWITCH;
1619             };
1620 0 0       0 (exists $rhs->{token}) && do {
1621 0         0 $rhs = $self->processRedirected($rhs->{token});
1622 0         0 last SWITCH;
1623             };
1624 0 0       0 (exists $rhs->{comments}) && do {
1625 0         0 my $cl = $self->renderComment($rulename, $rhs->{comments});
1626 0         0 push (@{$options->{comments}}, @$cl );
  0         0  
1627 0         0 $rhs = { discard => 'true' };
1628 0         0 last SWITCH;
1629             };
1630 0         0 do {
1631 0         0 $self->abortWithError( "computeRhs : don't know how to process rhs for $rulename, modify this rule to make it convertible", $rhs );
1632             };
1633             }
1634              
1635 69         86 return $rhs;
1636             }
1637              
1638             ##
1639             # printRhs : print a single right hand clause (alternative) of a rule.
1640             # depending on $option->{status} format the clause :
1641             # 0 : prefix the rule name and use '::=' or '~' as operator
1642             # 1 : start an alternative branch
1643             # 2 : add a clause to a branch
1644             ##
1645             sub printRhs
1646             {
1647 69     69 0 100 my ( $self, $rulename, $rule, $options ) = @_;
1648              
1649 69 50       99 mydie "printRhs: 'status' not defined for rule $rulename" if !exists $options->{status};
1650              
1651 69         77 my $status = $options->{status};
1652 69         99 my $rhs = $self->computeRhs($rulename, $rule, $options);
1653              
1654 69 50       108 return "" if exists $rhs->{discard};
1655              
1656 69 50 33     182 mydie "'options' not well-defined" if ref $options ne 'HASH' || !exists $options->{delimiter};
1657              
1658 69         81 my $delimiter = $options->{delimiter};
1659              
1660 69         100 my $cardinality = "";
1661 69 100       100 $cardinality = $options->{cardinality} if exists $options->{cardinality};
1662 69 50 33     110 $cardinality =~ s/([+*])\?/$1/ if $cardinality =~ /[+*]\?/ && $self->testoption('shiftlazytogreedy');
1663 69 50       94 mydie sprintf "lazy quantifier %s in rule %s not supported by Marpa", $cardinality, $rulename if $cardinality =~ /[+*]\?/;
1664              
1665             # if ($rulename eq "local_xmlindex_clause_001")
1666             # {
1667             # printf "found!\n";
1668             # }
1669              
1670 69         72 my $definitionop = "::=";
1671 69 100       100 $definitionop = $options->{assignop} if exists $options->{assignop};
1672              
1673 69         70 my $result;
1674              
1675             # process optional rules
1676 69 100 66     159 if ( defined $cardinality && $cardinality eq "?")
1677             {
1678 4         18 $result = sprintf "%s %-3s %s%s\n", $self->pad($rulename), $definitionop, "", "";
1679 4         7 $cardinality = "";
1680             }
1681              
1682             SWITCH:
1683             {
1684 69 100       75 ($status == 0) && do {
  69         97  
1685 32         53 $result .= sprintf "%s %-3s %s%s", $self->pad($rulename), $definitionop, $rhs->{value}, $cardinality;
1686 32         43 last SWITCH;
1687             };
1688 37 100       56 ($status == 1) && do {
1689 11         25 $result .= sprintf "%s %-3s %s%s", $self->pad(""), $delimiter, $rhs->{value}, $cardinality;
1690 11         19 last SWITCH;
1691             };
1692 26 50       38 ($status == 2) && do {
1693 26         60 $result .= sprintf " %s%s", $rhs->{value}, $cardinality;
1694 26         29 last SWITCH;
1695             };
1696 0         0 do {
1697 0         0 mydie "printRhs : illegal status";
1698             };
1699             }
1700              
1701 69         145 return $result;
1702             }
1703              
1704             ##
1705             # writeMarpaRuleList : print all alternative branches of a rule
1706             ##
1707             sub writeMarpaRuleList
1708             {
1709 22     22 0 38 my ( $self, $rulename, $rule, $options ) = @_;
1710              
1711             # if ($rulename eq "SAFECODEPOINT")
1712             # {
1713             # printf "found!\n";
1714             # }
1715              
1716 22         27 $options->{status} = 0;
1717              
1718 22         26 my $outf = $self->{outf};
1719              
1720 22 50       34 mydie "writeMarpaRuleList : 'list' missing from 'rule' in $rulename" if !exists $rule->{list};
1721              
1722 22         26 my $list = $rule->{list};
1723 22         24 my $actiontext;
1724              
1725 22         31 for my $rulelist (@$list)
1726             {
1727 33 50       55 mydie "writeMarpaRuleList : 'list' missing from 'rulelist' in $rulename" if !exists $rulelist->{list};
1728              
1729 33 50       49 if (exists $rulelist->{metalist})
1730             {
1731 0         0 my $metalist = $rulelist->{metalist};
1732              
1733 0         0 for my $cl (@{$metalist->{comments}})
  0         0  
1734             {
1735 0 0       0 $cl = $cl->[0] if ref $cl eq 'ARRAY';
1736 0         0 printf $outf "%s\n", $cl;
1737             }
1738 0 0       0 if (exists $metalist->{actions})
1739             {
1740 0 0       0 if (!$self->testoption('embedactions'))
1741             {
1742 0         0 for my $cl (@{$metalist->{actions}})
  0         0  
1743             {
1744 0 0       0 $cl = $cl->[0] if ref $cl eq 'ARRAY';
1745 0         0 printf $outf "%s\n", $cl;
1746             }
1747             }
1748             else
1749             {
1750 0         0 $actiontext = "";
1751 0         0 for my $cl (@{$metalist->{actions}})
  0         0  
1752             {
1753 0 0       0 $cl = $cl->[0] if ref $cl eq 'ARRAY';
1754 0         0 $actiontext .= $cl;
1755             }
1756             }
1757             }
1758             }
1759              
1760 33         37 my $outputline = "";
1761 33         36 my $linelen = 0;
1762              
1763 33         33 for my $rhs (@{$rulelist->{list}})
  33         52  
1764             {
1765 59 100       94 my $cardinality = $rhs->{cardinality} if exists $rhs->{cardinality};
1766              
1767 59         69 $options->{delimiter} = "|";
1768 59 100       86 $options->{cardinality} = $cardinality if defined $cardinality;
1769              
1770 59 50       86 if (exists $rhs->{comments})
1771             {
1772 0         0 my $cl = $rhs->{comments};
1773 0         0 mydie "TODO : comments";
1774             }
1775              
1776 59         60 my $nextclause;
1777              
1778             SWITCH:
1779             {
1780 59 0 66     60 (exists $rhs->{type} && $rhs->{type} eq "group" && exists $rhs->{list}) && do {
  59   33     128  
1781 0 0       0 if ( exists $rhs->{class4list})
1782             {
1783 0         0 $nextclause = $self->printRhs( $rulename, $rhs, $options );
1784 0         0 $options->{status} = 2;
1785             }
1786             else
1787             {
1788 0         0 mydie "writeMarpaRuleList : unexpected embedded 'list'";
1789             }
1790 0         0 last SWITCH;
1791             };
1792 59         62 do {
1793 59         92 $nextclause = $self->printRhs( $rulename, $rhs, $options );
1794 59         81 $options->{status} = 2;
1795 59         66 last SWITCH;
1796             };
1797             }
1798              
1799 59         73 $options->{delimiter}= "|";
1800              
1801 59 50       97 if ($linelen + length($nextclause) > $self->{indent} + 3 + 125)
1802             {
1803 0         0 $outputline .= "\n" . (' ' x $self->{indent}) . (' ' x 3) . ' ' . $nextclause;
1804 0         0 $linelen = $self->{indent} + 3 + length($nextclause);
1805             }
1806             else
1807             {
1808 59         71 $outputline .= $nextclause;
1809 59         86 $linelen += length($nextclause);
1810             }
1811             }
1812              
1813 33 50       52 if (defined $actiontext)
1814             {
1815 0         0 my $padlen = $self->{indent} + 40 - length($outputline);
1816 0 0       0 $padlen = 1 if $padlen < 0;
1817 0         0 $outputline .= ' ' x $padlen . "action => ${actiontext}";
1818 0         0 $actiontext = undef;
1819             }
1820              
1821 33         40 $options->{status} = 1;
1822 33 50       86 printf $outf "%s\n", $outputline if length($outputline) > 0;
1823 33         54 $linelen = 0;
1824             }
1825              
1826 22 50       45 if (exists $options->{comments})
1827             {
1828 0         0 for my $cl (@{$options->{comments}})
  0         0  
1829             {
1830 0         0 printf $outf "%s\n", $cl;
1831             }
1832 0         0 delete $options->{comments};
1833             }
1834             }
1835              
1836             ##
1837             # writeMarpaRule : print a complute rule :
1838             # - rule name + 1st branch
1839             # - all subsequent branches
1840             ##
1841             sub writeMarpaRule
1842             {
1843 32     32 0 52 my ($self, $rulename ) = @_;
1844              
1845 32 50       50 $self->dumpMarpaRule($rulename) if $self->{verbosity};
1846              
1847 32         65 $self->tagMarpaRule($rulename);
1848              
1849             # if ( $rulename eq "IDENTIFIER")
1850             # {
1851             # printf "found!\n";
1852             # }
1853              
1854 32         50 my $rule = $self->getMarpaRule(undef, $rulename);
1855              
1856 32 100       61 my $cardinality = $rule->{cardinality} if exists $rule->{cardinality};
1857              
1858 32         52 my $options = { delimiter => "", status => 0 };
1859 32 100       113 $options->{cardinality} = $cardinality if defined $cardinality;
1860 32 100 100     115 $options->{assignop} = '~' if exists $rule->{isLexeme} || exists $rule->{isFragmentOrChild} || exists $rule->{grammarstate};
      100        
1861              
1862 32         41 my $outputline = "";
1863 32         41 my $outf = $self->{outf};
1864              
1865 32         34 my $printoutputcreated = 1;
1866              
1867             SWITCH:
1868             {
1869             ##
1870             # discard rules that implement a case-insensitive single-letter literal
1871             # if we are building case-insensitive keyword literals.
1872             ##
1873 32 50 33     35 ($self->testoption('buildkeywords') && $self->isKeywordLetter($rulename, $rule)) && do {
  32         47  
1874 0         0 $self->deleteMarpaRule($rulename);
1875 0         0 $printoutputcreated = 0;
1876 0         0 last SWITCH;
1877             };
1878             #(exists $rule->{isFragmentOrChild} && $self->testoption('fragment2class') && fragmentEligible2Convert($rule)) && do {
1879             ##
1880             # conditionally translate eligible fragments to classes if the fragment is not tagged with a '?' quantifier
1881             ##
1882 32 100 66     63 ($self->testoption('fragment2class') && $rulename !~ /^opt_/ && fragmentEligible2Convert($rule)) && do {
      100        
1883 1         5 $self->writeFragmentAsClass($rulename, $rule, $options );
1884 1         2 last SWITCH;
1885             };
1886             ##
1887             # print a rule with alternative branches
1888             ##
1889 31 100       52 (exists $rule->{list}) && do {
1890 22         42 $self->writeMarpaRuleList($rulename, $rule, $options );
1891 22         29 last SWITCH;
1892             };
1893             ##
1894             # print a simple rule
1895             ##
1896 9         9 do {
1897 9         15 $outputline = $self->printRhs( $rulename, $rule, $options );
1898 9 50       22 $printoutputcreated = 0 if !length($outputline);
1899 9         10 last SWITCH;
1900             };
1901             }
1902              
1903 32 100       57 printf $outf "%s\n", $outputline if length($outputline) > 0;
1904              
1905 32         67 return $printoutputcreated;
1906             }
1907              
1908             sub dumpMarpaRule
1909             {
1910 0     0 0 0 my ( $self, $rulename ) = @_;
1911              
1912 0         0 my $marparule = $self->getMarpaRule(undef, $rulename);
1913 0         0 $self->dumpStructure( sprintf("=== %s\n", $rulename), $marparule);
1914             }
1915              
1916             ##
1917             # 'computeIndentation' : compute the maximum rulename length
1918             ##
1919             sub computeIndentation
1920             {
1921 1     1 0 2 my ($self) = @_;
1922              
1923 1         2 my $symboltable = $self->{symboltable};
1924 1         2 my $ruletable = $symboltable->ruletable;
1925 1         3 my $subrules = $self->{subrules};
1926              
1927 1         3 my $indent = -1;
1928 1         4 for my $rule (@$ruletable)
1929             {
1930 16 50 66     53 next if !defined $rule || !exists $rule->{name} || !exists $rule->{generationstatus};
      33        
1931 14         19 my $rulename = $rule->{name};
1932 14 100       22 $indent = length($rulename) if length($rulename) > $indent;
1933 14 100       26 next if !exists $subrules->{$rulename};
1934 8         11 for my $subrulename (@{$subrules->{$rulename}})
  8         12  
1935             {
1936 18 100       29 $indent = length($subrulename) if length($subrulename) > $indent;
1937             }
1938             }
1939              
1940 1 50       6 mydie "INTERNAL: couldn't compute rhs indentation" if $indent == -1;
1941              
1942 1         3 $self->{indent} = $indent;
1943             }
1944              
1945             sub generateGenericOptions
1946             {
1947 1     1 0 2 my ($self) = @_;
1948 1         3 my $outf = $self->{outf};
1949 1         10 printf $outf "%s\n\n", "lexeme default = latm => 1";
1950             }
1951              
1952             sub generateStartClause
1953             {
1954 1     1 0 3 my ($self) = @_;
1955              
1956 1         4 my $startrule = $self->symboltable->startrule;
1957 1 50       3 return if !defined $startrule;
1958              
1959 1         3 my $outf = $self->{outf};
1960              
1961 1         10 printf $outf "%s %-3s %s\n\n", $self->pad(":start"), "::=", $startrule->{name};
1962             }
1963              
1964             sub convertRedirectToDiscard
1965             {
1966 1     1 0 4 my ($self) = @_;
1967              
1968 1 50 33     11 return if !exists $self->{discarded} || ref $self->{discarded} ne 'ARRAY' || scalar @{$self->{discarded}} < 1;
  1   33     5  
1969              
1970 1         3 my $outf = $self->{outf};
1971              
1972 1         3 printf $outf "# ---\n";
1973 1         3 printf $outf "# Discard rule from redirect options :\n";
1974 1         3 printf $outf "%s %-3s %s\n", $self->pad(":discard"), "~", "";
1975              
1976 1         3 my $lhs = "";
1977 1         2 my $delimiter = "~";
1978 1         2 for my $rulename (@{$self->{discarded}})
  1         3  
1979             {
1980 1         4 printf $outf "%s %-3s %s\n", $self->pad($lhs), $delimiter, $rulename;
1981 1         3 $lhs = "";
1982 1         3 $delimiter = "|";
1983             }
1984              
1985 1         3 printf $outf "# ---\n";
1986             }
1987              
1988             ## -------------------------------------------------------------------------------------
1989             # 'writeMarpaGrammar' create an output file with the collected rules in Marpa syntax.
1990             # the sequence of rules is identical to that from the original
1991             # input files. Subrules are aligned with parent rules.
1992             ## -------------------------------------------------------------------------------------
1993             sub writeMarpaGrammar
1994             {
1995 1     1 0 2 my ($self) = @_;
1996              
1997 1         3 my $symboltable = $self->{symboltable};
1998 1         4 my $startrule = $symboltable->startrule;
1999 1         3 my $startrulename = $startrule->{name};
2000              
2001 1         4 my $ruletable = $symboltable->ruletable;
2002 1         2 my $subrules = $self->{subrules};
2003              
2004 1         7 $self->computeIndentation;
2005              
2006 1         4 $self->openOutputFile;
2007              
2008 1         10 $self->generateGenericOptions;
2009 1         5 $self->generateStartClause;
2010              
2011             # create a ':discard' rule at the top of the output file
2012             # from the rules tagged 'redirect'
2013 1         4 $self->convertRedirectToDiscard;
2014              
2015 1         2 my $outf = $self->{outf};
2016              
2017             ##
2018             # print rules in the order of the input file
2019             ##
2020 1         2 my $status = 0;
2021 1         3 my $subrulestatus = 0;
2022 1         2 my $rulegeneratedoutput = 1;
2023              
2024 1         3 for my $rule (@$ruletable)
2025             {
2026 16 50 33     36 if (exists $rule->{comment} && !$self->testoption('stripallcomments'))
2027             {
2028 0         0 my $comment = $rule->{comment};
2029 0 0       0 $comment = [$comment] if ref $comment ne 'ARRAY';
2030 0         0 my $cl = $self->renderComment("grammar", $comment);
2031 0         0 for my $s (@$cl)
2032             {
2033 0         0 printf $outf "%s\n", $s;
2034             }
2035             }
2036              
2037 16 100 66     66 next if !defined $rule || !exists $rule->{name} || !exists $rule->{generationstatus};
      66        
2038              
2039 14         18 my $rulename = $rule->{name};
2040              
2041 14 100       23 if ( !$status )
2042             {
2043 1 50       4 printf "WARNING : first rule '%s' is not startrule '%s'\n", $rulename, $startrulename if $rulename ne $startrulename;
2044 1         2 $status = 1;
2045             }
2046              
2047             ##
2048             # create a block of text for a rule with its subrules
2049             ##
2050 14 100 100     67 printf $outf "\n" if exists $subrules->{$rulename} && !$subrulestatus && $rulegeneratedoutput;
      66        
2051 14         18 $subrulestatus = 0;
2052              
2053 14         25 $rulegeneratedoutput = $self->writeMarpaRule($rulename);
2054              
2055             ##
2056             # print the generated subrules immediately after the parent rule
2057             ##
2058 14 100       32 if (exists $subrules->{$rulename})
2059             {
2060 8         9 my $subrulegeneratedoutput = 0;
2061              
2062 8         9 for my $subrulename (@{$subrules->{$rulename}})
  8         17  
2063             {
2064 18         29 $subrulegeneratedoutput += $self->writeMarpaRule($subrulename);
2065             }
2066              
2067 8 50       20 printf $outf "\n" if $subrulegeneratedoutput;
2068 8         13 $subrulestatus = 1;
2069             }
2070             }
2071              
2072             ##
2073             # print the rest (if any)
2074             ##
2075 1         4 for my $rulename (sort keys %{$self->{rules}})
  1         21  
2076             {
2077 32         49 my $g4rule = $symboltable->rule($rulename);
2078 32 100       60 next if !exists $g4rule->{generationstatus};
2079              
2080 14         22 my $marparule = $self->getMarpaRule(undef, $rulename);
2081 14 50       27 next if exists $marparule->{status};
2082              
2083 0 0       0 printf "%s\n", $rulename if $self->{verbosity};
2084 0         0 $self->writeMarpaRule($rulename);
2085             }
2086              
2087 1         4 $self->closeOutputFile;
2088             }
2089              
2090             # --------------------------------------------------- #
2091             # driver #
2092             # --------------------------------------------------- #
2093              
2094             sub generate
2095             {
2096 1     1 0 3 my ($self, $symboltable) = @_;
2097              
2098 1         2 $self->{symboltable} = $symboltable;
2099              
2100 1         5 $self->translateG4Grammar;
2101 1 50       4 $self->dumpStructure("Rules", $self->{rules}) if $self->{verbosity} > 1;
2102 1         4 $self->writeMarpaGrammar;
2103             }
2104              
2105             1;
2106              
2107             # ABSTRACT: translate parsed antlr4 rules to Marpa2 syntax
2108              
2109             =head1 SYNOPSIS
2110             use MarpaX::G4::MarpaGen;
2111             my $generator = new MarpaX::MarpaGen;
2112             $generator->generate($symboltable);
2113             =head1 DESCRIPTION
2114             Translate the rules from the symbol table created from the imported ANTLR4 grammar
2115             into Marpa syntax and write them to output.
2116              
2117             use MarpaX::Symboltable;
2118             my $symboltable = new MarpaX::Symboltable;
2119              
2120             my $grammartext = readFile($infile);
2121             my $data = MarpaX::G4::parse_rules($grammartext);
2122             $symboltable->importParseTree($data);
2123             $symboltable->validateSymbolTable();
2124              
2125             my $generator = new MarpaX::MarpaGen;
2126              
2127             $generator->stripallcomments if exists $options->{c};
2128             $generator->embedactions if exists $options->{e};
2129             $generator->fragment2class if exists $options->{f};
2130             $generator->shiftlazytogreedy if exists $options->{g};
2131             $generator->buildkeywords if exists $options->{k};
2132             $generator->stripactions if exists $options->{p};
2133             $generator->setVerbosity(2) if exists $options->{t};
2134             $generator->matchcaseinsensitive if exists $options->{u} || exists $options->{k};
2135              
2136             my $outputfile = '-';
2137             $outputfile = $options->{o} if exists $options->{o};
2138             $generator->setoutputfile($outputfile);
2139              
2140             $generator->generate($symboltable);
2141             =cut