File Coverage

blib/lib/MarpaX/G4/MarpaGen.pm
Criterion Covered Total %
statement 666 953 69.8
branch 300 530 56.6
condition 156 303 51.4
subroutine 59 76 77.6
pod 0 70 0.0
total 1181 1932 61.1


line stmt bran cond sub pod time code
1             # ----------------------------------------------------------------------------------------------------- #
2             # MarpaX::MarpaGen #
3             # #
4             # translate the parsed antlr4 rules to Marpa::R2 syntax. #
5             # #
6             # ----------------------------------------------------------------------------------------------------- #
7              
8             package MarpaX::G4::MarpaGen;
9              
10 2     2   11 use strict;
  2         3  
  2         56  
11 2     2   9 use warnings FATAL => 'all';
  2         3  
  2         58  
12 2     2   16 no warnings 'recursion';
  2         3  
  2         61  
13              
14 2     2   9 use Data::Dumper;
  2         3  
  2         84  
15              
16 2     2   10 use MarpaX::G4::Parser;
  2         2  
  2         43  
17 2     2   8 use MarpaX::G4::Symboltable;
  2         3  
  2         17471  
18              
19             # --------------------------------------------------- #
20             # exposed class methods #
21             # --------------------------------------------------- #
22              
23             sub new
24             {
25 1     1 0 2 my $invocant = shift;
26 1   33     5 my $class = ref($invocant) || $invocant; # Object or class name
27 1         2 my $self = {}; # initiate our handy hashref
28 1         3 bless($self,$class); # make it usable
29              
30 1         6 $self->{symboltable} = undef;
31 1         2 $self->{verbosity} = 0;
32 1         3 $self->{level} = -1;
33 1         1 $self->{outputfilename} = "-";
34 1         2 $self->{outf} = undef;
35 1         3 $self->{rules} = {};
36 1         2 $self->{subrules} = {};
37 1         3 $self->{generationoptions} = {};
38              
39 1         2 return $self;
40             }
41              
42 0 0   0 0 0 sub setVerbosity { my ($self, $verbosity) = @_; $self->{verbosity} = defined($verbosity) ? $verbosity : 0; }
  0         0  
43 48     48 0 52 sub symboltable { my ($self) = @_; return $self->{symboltable}; }
  48         61  
44 1     1 0 2 sub setoutputfile { my ($self, $outputfilename) = @_; $self->{outputfilename} = $outputfilename; }
  1         2  
45              
46 1     1 0 3 sub fragment2class { my ($self) = @_; $self->{generationoptions}{fragment2class} = 'true'; }
  1         3  
47 0     0 0 0 sub embedactions { my ($self) = @_; $self->{generationoptions}{embedactions} = 'true'; }
  0         0  
48 0     0 0 0 sub stripactions { my ($self) = @_; $self->{generationoptions}{stripactions} = 'true'; }
  0         0  
49 0     0 0 0 sub stripallcomments { my ($self) = @_; $self->{generationoptions}{stripallcomments} = 'true'; }
  0         0  
50 1     1 0 3 sub matchcaseinsensitive { my ($self) = @_; $self->{generationoptions}{matchcaseinsensitive} = 'true'; }
  1         3  
51 0     0 0 0 sub shiftlazytogreedy { my ($self) = @_; $self->{generationoptions}{shiftlazytogreedy} = 'true'; }
  0         0  
52 1     1 0 2 sub buildkeywords { my ($self) = @_; $self->{generationoptions}{buildkeywords} = 'true'; }
  1         3  
53              
54             sub testoption
55             {
56 189     189 0 227 my ( $self, $option ) = @_;
57              
58 189 50       230 return undef if !defined $option;
59 189         209 $option = lc $option;
60 189 50       239 return undef if !exists $self->{generationoptions}{$option};
61              
62 189         485 return $self->{generationoptions}{$option};
63             }
64              
65             sub dumpStructure
66             {
67 0     0 0 0 my ( $self, $title, $structure ) = @_;
68              
69 0         0 printf "=== %s\n", $title;
70 0         0 $Data::Dumper::Indent = 1;
71 0         0 print Dumper($structure);
72             }
73              
74             sub abortWithError
75             {
76 0     0 0 0 my ( $self, $msg, $structure ) = @_;
77              
78 0         0 $self->dumpStructure($msg, $structure);
79 0         0 die "aborting on unrecoverable error";
80             }
81              
82             sub mydie
83             {
84 0     0 0 0 my ( $msg ) = @_;
85 0         0 die "${msg}";
86             }
87              
88             sub addMarpaRule
89             {
90 32     32 0 41 my ( $self, $rulename, $marparule ) = @_;
91              
92 32         36 my $rules = $self->{rules};
93 32 50       63 mydie "INTERNAL: rule $rulename defined more than once" if exists $rules->{$rulename};
94 32         55 $rules->{$rulename} = $marparule;
95             }
96              
97             ##
98             # deleteMarpaRule : delete 'subrulename' if it is a subrule of 'rulename'
99             ##
100             sub deleteMarpaRule
101             {
102 0     0 0 0 my ( $self, $rulename ) = @_;
103              
104 0         0 my $rules = $self->{rules};
105              
106 0 0       0 mydie "INTERNAL: can't delete undefined rule $rulename" if !exists $rules->{$rulename};
107              
108 0         0 $rules->{$rulename} = { deletedafterconversiontoclass => "true" };
109 0         0 $self->deleteAllSubrules($rulename);
110             }
111              
112             ##
113             # deleteSingleSubrule : delete 'subrulename' if it is a subrule of 'rulename'
114             ##
115             sub deleteSingleSubrule
116             {
117 0     0 0 0 my ( $self, $rulename, $subrulename ) = @_;
118              
119 0 0       0 return if !exists $self->{subrules}{$rulename};
120              
121 0         0 my $subrules = $self->{subrules}{$rulename};
122              
123 0         0 my $status = 0;
124 0         0 my @newrules;
125              
126             map {
127 0 0       0 if ( $_ eq $subrulename )
  0         0  
128             {
129 0         0 $status = 1;
130             }
131             else
132             {
133 0         0 push @newrules, $_;
134             }
135             } @$subrules;
136              
137 0         0 $self->{subrules}{$rulename} = \@newrules;
138 0 0       0 if ($status)
139             {
140 0         0 my $rules = $self->{rules};
141 0 0       0 mydie "INTERNAL: can't delete undefined rule $subrulename" if !exists $rules->{$subrulename};
142 0         0 $rules->{$subrulename} = { deletedafterconversiontoclass => "true" }
143             }
144             }
145              
146             sub checkMarpaRuleExists
147             {
148 5     5 0 9 my ( $self, $rulename ) = @_;
149              
150 5         6 my $rules = $self->{rules};
151              
152 5 50       10 return undef if !exists $rules->{$rulename};
153              
154 5         10 return $rules->{$rulename};
155             }
156              
157             sub getMarpaRule
158             {
159 96     96 0 110 my ( $self, $parent, $rulename ) = @_;
160              
161 96         125 my $rules = $self->{rules};
162              
163 96 50       128 if (!exists $rules->{$rulename})
164             {
165 0 0       0 $parent = "" if !defined $parent;
166 0         0 mydie "INTERNAL: rule $rulename is undefined under parent $parent";
167             }
168              
169 96         114 return $rules->{$rulename};
170             }
171              
172             sub tagMarpaRule
173             {
174 32     32 0 38 my ( $self, $rulename ) = @_;
175              
176 32         33 my $rules = $self->{rules};
177 32 50       42 mydie "INTERNAL: rule $rulename is undefined" if !exists $rules->{$rulename};
178 32         50 $rules->{$rulename}{status} = "done";
179             }
180              
181             sub isSubrule
182             {
183 5     5 0 9 my ( $self, $rulename, $subrulename ) = @_;
184              
185 5         6 my $subrules = $self->{subrules};
186              
187 5 50       9 return 0 if !exists $subrules->{$rulename};
188              
189 5         6 for my $sr (@{$subrules->{$rulename}})
  5         9  
190             {
191 9 100       19 return 1 if $sr eq $subrulename;
192             }
193              
194 0         0 return 0;
195             }
196              
197             # --------------------------------------------------- #
198             # grammar generator #
199             # --------------------------------------------------- #
200              
201             sub enterLevel
202             {
203 14     14 0 15 my ($self, $rulename, $context) = @_;
204              
205 14 50       23 my $level = $context->{level} if exists $context->{level};
206 14 50       22 $level = -1 if !defined $level;
207 14         15 ++$level;
208 14         16 $context->{level} = $level;
209             }
210              
211             sub exitLevel
212             {
213 14     14 0 20 my ($self, $rulename, $context) = @_;
214              
215 14         16 my $level = $context->{level};
216 14         15 --$level;
217 14         15 $context->{level} = $level;
218             }
219              
220             sub generateSubruleName
221             {
222 18     18 0 29 my ($self, $rulename, $cardinality, $context) = @_;
223              
224 18 100       33 $context->{subruleindex}{$rulename}{index} = 0 if !exists $context->{subruleindex}{$rulename}{index};
225              
226 18         21 my $subruleindex = ++$context->{subruleindex}{$rulename}{index};
227 18         48 my $subrulename = sprintf "%s_%03d", $rulename, $subruleindex;
228              
229 18 100 100     41 if ( defined $cardinality && $cardinality eq "?" )
230             {
231 4         7 $subrulename = "opt_" . $subrulename;
232             }
233              
234 18         17 push @{$self->{subrules}{$rulename}}, $subrulename;
  18         35  
235              
236 18         27 return $subrulename;
237             }
238              
239             sub retrieveModifier
240             {
241 360     360 0 397 my ($token, $tag) = @_;
242              
243 360 50       457 return undef if ref $token ne "HASH";
244              
245 360         322 my $modifier = undef;
246 360 100       477 $modifier = $token->{$tag} if exists $token->{$tag};
247              
248 360         407 return $modifier;
249             }
250              
251             sub negateElement
252             {
253 1     1 0 3 my ($self, $rulename, $mxtoken, $context) = @_;
254              
255             # if ($rulename =~ /NATIONAL_CHAR_STRING_LIT/)
256             # {
257             # printf "found!\n";
258             # }
259              
260 1 50       3 mydie "INTERNAL: negated element is not a hash in rule $rulename" if ref $mxtoken ne "HASH";
261              
262 1         2 my $mxelement;
263              
264 1 50       4 if (! exists $mxtoken->{class4list} )
265             {
266 0         0 mydie "INTERNAL: can't negate missing class4list in rule $rulename";
267             ##
268             # TODO: verify if we could continue by mapping this error to 'unsupported'
269             # so that we could process the complete input
270             ##
271 0         0 $mxelement = { type => 'unsupported', msg => "can't negate non-class token in $rulename" };
272             }
273              
274 1         3 my $class4list = $mxtoken->{class4list};
275 1         4 $mxelement = { type => 'negatedclass', value => "^${class4list}" };
276              
277 1 50       3 $mxelement->{grammarstate} = $mxtoken->{grammarstate} if exists $mxtoken->{grammarstate};
278 1 50       4 $mxelement->{isFragmentOrChild} = $mxtoken->{isFragmentOrChild} if exists $mxtoken->{isFragmentOrChild};
279              
280             ##
281             # delete the negated subrule since it will be replaced with a new subrule.
282             ##
283 1 0 33     3 if (exists $mxtoken->{rhs} && exists $mxtoken->{rhs}{token})
284             {
285 0         0 my $token = $mxtoken->{rhs}{token};
286 0 0       0 if (ref $token eq "")
287             {
288 0         0 $self->deleteSingleSubrule($rulename, $token);
289             }
290             }
291              
292 1         3 return $mxelement;
293             }
294              
295             sub translateLiteralCase
296             {
297 0     0 0 0 my ($string, $case) = @_;
298              
299 0 0       0 return $case =~ /^U/i ? uc $string : lc $string;
300             }
301              
302             ##
303             # generatematchcaseinsensitive : make a literal (somewhat) case-insensitive by creating a subrule with 2 match alternatives :
304             #
305             # - all uppercase
306             # - characterclass with the 1st character in upper/lower case, the rest all lowercase
307             #
308             # if the literal is only a single letter, we only retain the characterclass
309             # if the first character is not a letter, we don't generate a class.
310             #
311             ## --------------------------------------------------------------------------------------------------------- ##
312             ## CAVEAT : introduction of the ':i' and ':ic' literal and class modifiers has made this function obsolete. ##
313             ## we retain it as a demo of generated subrules. ##
314             ## --------------------------------------------------------------------------------------------------------- ##
315             sub generateMatchCaseInsensitive
316             {
317 0     0 0 0 my ($tokenvalue) = @_;
318              
319 0         0 my $lctoken = translateLiteralCase($tokenvalue, 'L');
320 0         0 my $uctoken = translateLiteralCase($tokenvalue, 'U');
321 0         0 my $prefix = substr($lctoken, 0, 1);
322 0         0 my $rest = substr($lctoken, 1);
323 0         0 my $lcp = lc $prefix;
324 0         0 my $ucp = uc $prefix;
325 0         0 my $mixedcase = "${lcp}${ucp}";
326              
327 0         0 my $literaltoken;
328              
329 0 0       0 if ($lcp =~ /[A-Z]/i)
330             {
331 0         0 $literaltoken = { type => 'tokengroup',
332             definition => [{token => {type => 'class', value => $mixedcase}}]
333             };
334 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $rest }} if length($rest) > 0;
  0         0  
335 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $uctoken}, alternative=> 'true'} if length($uctoken) > 1;
  0         0  
336             }
337             else
338             {
339 0         0 $literaltoken = { type => 'tokengroup',
340             definition => [{token => {type => 'literal', value => $lctoken}}]
341             };
342 0 0       0 push @{$literaltoken->{definition}}, {token => {type => 'literal', value => $uctoken}, alternative=> 'true'} if length($uctoken) > 1;
  0         0  
343             }
344              
345 0         0 return $literaltoken;
346             }
347              
348             sub isSingleChar
349             {
350 21     21 0 29 my ($string) = @_;
351              
352 21 100       68 return 1 if $string =~ /^.$/;
353 4 100       13 return 1 if $string =~ /^\\[^u]$/i;
354 3 50       6 return 1 if $string =~ /^\\u[0-9A-F]{4,4}$/i;
355 3 50       6 return 1 if $string =~ /^\\u\{[0-9A-F]{5,5}\}$/i;
356 3         6 return 0;
357             }
358              
359             sub isKeywordLetter
360             {
361 71     71 0 88 my ($self, $rulename, $rule ) = @_;
362              
363 71 100       161 return 0 if !exists $rule->{class4list};
364 9         12 my $s = $rule->{class4list};
365 9 0 33     34 return 1 if $rulename =~ /^[a-z]$/i && $s =~ /([a-z])([a-z])/i && ( uc "$1" eq "$2" || lc "$1" eq "$2");
      0        
      33        
366 9         18 return 0;
367             }
368              
369             ##
370             # isKeywordFragment : return 1 if '$string' is a characterclass with a pair of lower/upper case letters
371             ##
372             sub isKeywordFragment
373             {
374 60     60 0 76 my ($self, $rulename, $rule ) = @_;
375              
376 60 100       93 return undef if !exists $rule->{class4list};
377 30         32 my $string = $rule->{class4list};
378              
379 30 100 66     84 return lc substr($string, 0, 1) if $string =~ /([a-z])([a-z])/i && ( uc "$1" eq "$2" || lc "$1" eq "$2");
      100        
380 29 100       79 return substr($string, 0, 1) if $string !~ /[a-z]/i;
381 8 100       20 return substr($string, 0, 1) if $string =~ /[0-9]/i;
382              
383 3         5 return undef;
384             }
385              
386             sub convertLiteralToClass
387             {
388 21     21 0 24 my ($literal) = @_;
389              
390 21 100       49 return undef if !isSingleChar($literal);
391              
392 18         29 return "${literal}";
393             }
394              
395             sub convertRangeToClass
396             {
397 0     0 0 0 my ($token) = @_;
398              
399 0         0 my $begr = $token->{begr};
400 0         0 my $endr = $token->{endr};
401              
402 0 0 0     0 return { type => 'unsupported', msg => "can't convert non-literal beg/end range to class" } if $begr->{type} ne 'literal' || $endr->{type} ne 'literal';
403              
404 0         0 $begr = $begr->{value};
405 0         0 $endr = $endr->{value};
406              
407 0 0 0     0 return { type => 'unsupported', msg => "can't convert range (${begr} .. ${endr}) to class" } if !isSingleChar($begr) || !isSingleChar($endr);
408              
409 0         0 my $classtext = "${begr}-${endr}";
410              
411 0         0 return { type => 'class', value => $classtext, isLexeme => 1, class4list => $classtext };
412             }
413              
414             sub mergeClass4List
415             {
416 120     120 0 131 my ($tracker, $mxelement) = @_;
417              
418 120 50       162 mydie "tracker has no 'status'" if !exists $tracker->{status};
419              
420 120 100 100     202 if ( !exists $mxelement->{class4list} || $tracker->{status} == -1 )
421             {
422 90         92 $tracker->{status} = -1;
423             }
424             else
425             {
426 30         44 $tracker->{value} .= $mxelement->{class4list};
427             }
428             }
429              
430             sub mergeKeywordFragment
431             {
432 60     60 0 64 my ($tracker, $mxelement) = @_;
433              
434 60 50       93 mydie "tracker has no 'status'" if !exists $tracker->{status};
435              
436 60 100 100     114 if ( !exists $mxelement->{keywordfragment} || $tracker->{status} == -1 )
437             {
438 45         47 $tracker->{status} = -1;
439             }
440             else
441             {
442 15         20 $tracker->{value} .= $mxelement->{keywordfragment};
443             }
444             }
445              
446             ##
447             # createSubRule : create a (non-)anonymous subrule to 'rulename'.
448             # create a negated class if the class or token group was negated.
449             #
450             # CAVEAT: remember to reset the 'negation' flag in the caller
451             # to avoid negating twice.
452             ##
453             sub createSubRule
454             {
455 18     18 0 26 my ($self, $rulename, $mxelement, $negation, $cardinality, $context ) = @_;
456              
457 18 50       26 if ( defined $negation )
458             {
459 0         0 $mxelement = $self->negateElement($rulename, $mxelement, $context);
460             }
461              
462 18         35 my $subrulename = $self->generateSubruleName($rulename, $cardinality, $context);
463              
464             ##
465             # propagate grammar fragment states to subrules (i.e. children)
466             ##
467 18 100       36 $mxelement->{grammarstate} = $context->{subruleindex}{$rulename}{grammarstate} if exists $context->{subruleindex}{$rulename}{grammarstate};
468 18 100       30 $mxelement->{isFragmentOrChild} = $context->{subruleindex}{$rulename}{isFragmentOrChild} if exists $context->{subruleindex}{$rulename}{isFragmentOrChild};
469              
470             ##
471             # wrap cardinality "?" into an 'opt_' rule
472             ##
473 18 100 100     41 $mxelement->{cardinality} = $cardinality if defined $cardinality && $cardinality eq "?";
474              
475 18         28 $self->addMarpaRule($subrulename, $mxelement);
476              
477 18         39 my $mxalias = { rule => $subrulename, rhs => { token => $subrulename } };
478              
479             ##
480             # CAVEAT: don't propagate the lexeme tag upwards.
481             # this avoids confusion in G1 rules.
482             ##
483 18 50 66     34 $mxalias->{class4list} = $mxelement->{class4list} if exists $mxelement->{class4list} && !defined $cardinality;
484 18 100       37 $mxalias->{grammarstate} = $context->{subruleindex}{$rulename}{grammarstate} if exists $context->{subruleindex}{$rulename}{grammarstate};
485 18 100       24 $mxalias->{isFragmentOrChild} = $context->{subruleindex}{$rulename}{isFragmentOrChild} if exists $context->{subruleindex}{$rulename}{isFragmentOrChild};
486              
487 18         36 return $mxalias;
488             }
489              
490             sub deleteAllSubrules
491             {
492 1     1 0 3 my ($self, $rulename ) = @_;
493              
494 1         2 my $subrules = $self->{subrules};
495 1         2 my $rules = $self->{rules};
496              
497 1 50       4 return if !exists $subrules->{$rulename};
498              
499 0         0 for my $subrulename (@{$subrules->{$rulename}})
  0         0  
500             {
501 0         0 $rules->{$subrulename} = { deletedafterconversiontoclass => "true" };
502             }
503             }
504              
505             ## ------------------------------------------------------
506             # 'walktoken' : process all token flavours
507             ## ------------------------------------------------------
508             sub walktoken
509             {
510 40     40 0 50 my ($self, $rulename, $token, $context) = @_;
511              
512             # if ( $rulename eq "EOF")
513             # {
514             # printf "found!\n";
515             # }
516              
517 40         51 my $alternative = retrieveModifier( $token, 'alternative' );
518 40         48 my $cardinality = retrieveModifier( $token, 'cardinality' );
519              
520 40         46 my $mxToken = {};
521 40         46 my $comments = [];
522              
523 40 50 33     116 if (ref $token eq "HASH" && exists $token->{comment} && !$self->testoption('stripallcomments'))
      33        
524             {
525 0 0       0 mydie "comment not a plain string in rule $rulename" if ref $token->{comment} ne "";
526 0         0 my $cl = $self->renderComment($rulename, [$token->{comment}]);
527 0         0 push (@$comments, @$cl);
528             }
529              
530 40         45 my $negation = retrieveModifier($token, 'negation');
531              
532             SWITCH:
533             {
534 40 50       41 (ref $token eq "") && do {
  40         52  
535 0         0 $mxToken = $self->processSubRule($token, $context);
536 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
537             # reset 'negation' since it was processed by 'createSubRule'
538 0         0 $negation = undef;
539 0         0 last SWITCH;
540             };
541 40 100 66     125 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "tokengroup") && do {
      100        
542 9         27 $mxToken = $self->walkgroup($rulename, $token, $context);
543 9         18 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
544             # reset 'negation' since it was processed by 'createSubRule'
545 9         9 $negation = undef;
546 9         11 last SWITCH;
547             };
548 31 50 66     96 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "rulegroup") && do {
      66        
549 0         0 $mxToken = $self->walkgroup($rulename, $token, $context);
550             # strip lexeme status off rule groups
551 0         0 delete $mxToken->{isLexeme};
552 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
553             # reset 'negation' since it was processed by 'createSubRule'
554 0         0 $negation = undef;
555 0         0 last SWITCH;
556             };
557 31 100 66     98 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "literal") && do {
      100        
558 21         29 my $tokenvalue = $token->{value};
559             # if ( !$self->testoption('matchcaseinsensitive') || !$self->isAlphaLiteral($tokenvalue) )
560             # {
561 21         49 $mxToken = { type => 'literal', value => $tokenvalue, isLexeme => 1 };
562 21         52 my $class4list = convertLiteralToClass($tokenvalue);
563 21 100       37 $mxToken->{class4list} = $class4list if defined $class4list;
564             # }
565             # else
566             # {
567             # my $literaltoken = generatematchcaseinsensitive($tokenvalue);
568             # delete $self->{generationoptions}{matchcaseinsensitive};
569             # $mxToken = $self->walkgroup($rulename, $literaltoken, $context);
570             # $mxToken = $self->createSubRule( $rulename, $mxToken, $negation, $cardinality, $context );
571             # $self->{generationoptions}{matchcaseinsensitive} = 'true';
572             # }
573 21         27 last SWITCH;
574             };
575 10 100 66     39 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "class") && do {
      66        
576 9         29 $mxToken = { type => 'class', value => $token->{value}, isLexeme => 1, class4list => $token->{value} };
577 9         10 last SWITCH;
578             };
579 1 50 33     9 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "regex") && do {
      33        
580 0         0 $mxToken = { type => 'unsupported', msg => "can't convert arbitrary regex to Marpa in $rulename" };
581 0         0 last SWITCH;
582             };
583 1 50 33     8 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "range") && do {
      33        
584 0         0 $mxToken = convertRangeToClass($token);
585 0         0 last SWITCH;
586             };
587 1 50 33     9 (ref $token eq "HASH" && exists $token->{type} && $token->{type} eq "value") && do {
      33        
588 0         0 $mxToken = { type => 'unsupported', msg => "can't convert 'value' to Marpa in $rulename" };
589 0         0 last SWITCH;
590             };
591 1 50 33     6 (ref $token eq "HASH" && exists $token->{token}) && do {
592 1         4 $mxToken = $self->walktoken($rulename, $token->{token}, $context);
593 1         2 last SWITCH;
594             };
595 0 0 0     0 (ref $token eq "HASH" && exists $token->{action}) && do {
596 0         0 $mxToken = { type => 'ignore', msg => "embedded action in $rulename" };
597 0         0 last SWITCH;
598             };
599 0 0 0     0 (ref $token eq "HASH" && exists $token->{comment}) && do {
600 0         0 $mxToken = { type => 'ignore', msg => "comment in $rulename" };
601 0         0 last SWITCH;
602             };
603 0         0 do {
604 0         0 $self->abortWithError( "can't process token for rule $rulename", $token );
605 0         0 last SWITCH;
606             };
607             }
608              
609             ##
610             # apply 'negation' to every token type but groups and subrules
611             ##
612 40 100       53 if ( defined $negation )
613             {
614 1         5 $mxToken = $self->negateElement($rulename, $mxToken, $context);
615 1         2 $negation = undef;
616             }
617              
618             ###
619             # create explicit subrule for
620             # - nonterminals
621             # - subgroups
622             # annotated for cardinality
623             ##
624 40 50       54 if ( defined $cardinality )
625             {
626 0         0 $mxToken = $self->createSubRule( $rulename, $mxToken, undef, $cardinality, $context );
627              
628             # strip lexeme status off annotated subrules
629 0         0 delete $mxToken->{isLexeme};
630             }
631              
632 40 50       62 $mxToken->{alternative} = 'A' if defined $alternative;
633 40 50 33     69 $mxToken->{comments} = $comments if scalar @$comments > 0 && !$self->testoption('stripallcomments');
634              
635 40         51 return $mxToken;
636             }
637              
638             sub walknonterminal
639             {
640 0     0 0 0 my ( $self, $rulename, $nonterminal, $context ) = @_;
641              
642 0 0       0 mydie "nonterminal is not a hash for rule $rulename" if ref $nonterminal ne "HASH";
643              
644 0         0 my $mxToken = {};
645              
646             SWITCH:
647             {
648 0 0       0 (exists $nonterminal->{rhs}) && do {
  0         0  
649 0         0 my $rhs = $nonterminal->{rhs};
650 0         0 $mxToken = $self->walktoken($rulename, $rhs, $context);
651 0         0 last SWITCH;
652             };
653 0         0 do {
654 0         0 $self->abortWithError( "can't process nonterminal for rule $rulename", $nonterminal );
655 0         0 last SWITCH;
656             };
657             }
658              
659 0         0 return $mxToken;
660             }
661              
662             ## -------------------------------------------------------------------------------
663             # 'retagSimpleRule' : check if the cardinality can be added to 'mxtoken'.
664             # Rules :
665             # - '?' is consumed by 'opt_' subrules, so we have to reject it here
666             # - 'mxtoken' must be a subrule of 'rule'
667             # - the parent rule must consist of a single element
668             # (responsibility of the caller to verify)
669             ## -------------------------------------------------------------------------------
670             sub retagSimpleRule
671             {
672 6     6 0 10 my ( $self, $rulename, $mxtoken, $cardinality, $context, $options ) = @_;
673              
674 6 50       12 mydie "token must be a hash in $rulename" if ref $mxtoken ne "HASH";
675 6 50       11 return 0 if $cardinality eq "?";
676              
677 6 50 33     12 $cardinality =~ s/([+*])\?/$1/ if $cardinality =~ /[+*]\?/ && $self->testoption('shiftlazytogreedy');
678 6 50       10 mydie sprintf "lazy quantifier %s in rule %s not supported by Marpa", $cardinality, $rulename if $cardinality =~ /[+*]\?/;
679              
680 6   66     21 my $abortatfirstlevel = defined $options && ref $options eq "HASH" && exists $options->{abortatfirstlevel};
681              
682             SWITCH: {
683 6 100       8 (exists $mxtoken->{rhs}) && do {
  6         12  
684 5         6 my $rhs = $mxtoken->{rhs};
685              
686 5 50       18 mydie "'rhs' must be a hash in $rulename" if ref $rhs ne "HASH";
687 5 50 33     16 mydie "'token' must be a scalar in 'rhs' in rule $rulename" if !exists $rhs->{token} || ref $rhs->{token} ne "";
688              
689 5 50       11 return 0 if !defined $self->checkMarpaRuleExists($rhs->{token});
690              
691 5         5 my $rc = 0;
692              
693 5 50       9 if ($abortatfirstlevel)
694             {
695 5         7 my $subrulename = $rhs->{token};
696              
697             # don't push cardinality to a rule unless it is a subrule of the parent
698 5 50       8 return 0 if !$self->isSubrule($rulename, $subrulename);
699              
700 5         8 my $rule = $self->getMarpaRule($rulename, $subrulename);
701 5 50       9 if ($cardinality ne "?")
702             {
703 5         9 $rule->{cardinality} = $cardinality;
704             # cardinality-tagged tokens loose 'class4list' status
705 5         6 delete $rule->{class4list};
706 5         7 $rc = 1;
707             }
708             }
709             else
710             {
711 0         0 my $subrulename = $rhs->{token};
712              
713             # don't push cardinality to a rule unless it is a subrule of the parent
714 0 0       0 return 0 if !$self->isSubrule($rulename, $subrulename);
715              
716 0         0 my $rule = $self->getMarpaRule($rulename, $subrulename);
717 0         0 $rc = $self->retagSimpleRule($rulename, $rule, $cardinality, $context, $options);
718 0 0       0 if ($rc)
719             {
720 0 0       0 mydie "can't retag subrule $subrulename twice in rule $rulename" if exists $rule->{cardinality};
721 0         0 $rule->{cardinality} = $cardinality;
722             # cardinality-tagged tokens loose 'class4list' status
723 0         0 delete $rule->{class4list};
724             }
725             }
726 5         23 return $rc;
727 0         0 last SWITCH;
728             };
729 1 50 33     13 (exists $mxtoken->{type} && $mxtoken->{type} eq "group") && do {
730 0 0       0 mydie "group rhs must contain 'list' in $rulename" if !exists $mxtoken->{list};
731 0         0 my $ec = 0;
732 0         0 for my $al (@{$mxtoken->{list}})
  0         0  
733             {
734 0 0 0     0 mydie "alternative list is not an array in rule $rulename" if ref $al ne "HASH" || !exists $al->{list};
735 0         0 for my $le (@{$al->{list}})
  0         0  
736             {
737 0 0 0     0 mydie "alternative element is not simple in rule $rulename" if exists $le->{type} && $le->{type} !~ /literal|class/;
738 0         0 $ec += 1;
739             }
740             }
741 0         0 return $ec <= 1;
742 0         0 last SWITCH;
743             };
744 1 50 33     5 (exists $mxtoken->{type} && $mxtoken->{type} eq "literal") && do {
745 0         0 $mxtoken->{cardinality} = $cardinality;
746 0         0 return 1;
747 0         0 last SWITCH;
748             };
749 1 50 33     7 (exists $mxtoken->{type} && $mxtoken->{type} eq "class") && do {
750 1 50       4 mydie "'class' can't be multiplied twice in $rulename" if exists $mxtoken->{cardinality};
751 1         2 $mxtoken->{cardinality} = $cardinality;
752 1         4 return 1;
753 0         0 last SWITCH;
754             };
755 0 0 0     0 (exists $mxtoken->{type} && $mxtoken->{type} eq "negatedclass") && do {
756 0 0       0 mydie "'negatedclass' can't be multiplied twice in $rulename" if exists $mxtoken->{cardinality};
757 0         0 $mxtoken->{cardinality} = $cardinality;
758 0         0 return 1;
759 0         0 last SWITCH;
760             };
761 0         0 do {
762 0         0 $self->abortWithError( "don't know how to analyze element in $rulename", $mxtoken );
763 0         0 last SWITCH;
764             };
765             }
766              
767 0         0 return 0;
768             }
769              
770             ## ------------------------------------------------------
771             # removeCaseEquivalentBranches :
772             # go over the branches from the alternative lists.
773             # if 2 branches
774             # - consist of exactly 1 symbol
775             # - are case-equivalent
776             # then remove one of them.
777             ## ------------------------------------------------------
778             sub removeCaseEquivalentBranches
779             {
780 23     23 0 27 my ($self, $alternativelists) = @_;
781              
782 23 100       38 return $alternativelists if scalar @$alternativelists < 2;
783              
784 6         9 my $filteredlist = [];
785 6         6 my $filtered = 0;
786 6         7 my $literal;
787              
788 6         10 for my $branch (@$alternativelists)
789             {
790 17 50       25 if ( !exists $branch->{list})
791             {
792 0         0 push @$filteredlist, $branch;
793             }
794             else
795             {
796 17         17 my $branchlist = $branch->{list};
797 17 100       19 if (scalar @$branchlist > 1)
798             {
799 5         8 push @$filteredlist, $branch;
800             }
801             else
802             {
803 12         13 my $rhs = $branchlist->[0];
804 12         24 my $matchfound = 0;
805              
806 12 100 100     26 if ( exists $rhs->{type} && $rhs->{type} eq 'literal' )
807             {
808 4         6 my $newliteral = $rhs->{value};
809              
810 4 100       8 if (defined $literal)
811             {
812 2         4 my $tmp1 = uc $literal;
813 2         3 my $tmp2 = uc $newliteral;
814 2   33     7 $matchfound = $tmp1 ne "" && $tmp1 eq $tmp2;
815 2 50       6 $filtered = 1 if $matchfound;
816             }
817             else
818             {
819 2         2 $literal = $newliteral;
820             }
821             }
822              
823 12 50       22 push @$filteredlist, $branch if !$matchfound;
824             }
825             }
826             }
827              
828 6 50       13 return $alternativelists if !$filtered;
829              
830 0         0 return $filteredlist;
831             }
832              
833             ## ------------------------------------------------------
834             # 'processRightSides' converts a single list of tokens some of which
835             # are tagged as 'alternative' into (possibly) multiple lists
836             # each of which is an alternative for a group or rule
837             ## ------------------------------------------------------
838             sub processRightSides
839             {
840 23     23 0 35 my ( $self, $rulename, $rhslist, $context ) = @_;
841              
842             # symbol lists of alternative branches
843 23         23 my $alternativelists = [];
844             # current alternative branch
845 23         88 my $currentlist = [];
846             # number of symbols in current alternative branch
847 23         36 my $alternativelength = 0;
848              
849 23         64 my $metalist = { comments => [], actions => [] };
850              
851             # status tracking for conversion to keyword, lexeme or class
852 23         45 my $class4list = { status => 0, value => "" };
853 23         34 my $class4group = { status => 0, value => "" };
854 23         34 my $groupisLexeme = { status => 0 };
855 23         28 my $keywordfragment = { status => 0 };
856              
857             # if ( $rulename eq "outer_join_sign")
858             # {
859             # printf "found!\n";
860             # }
861              
862 23         35 for my $e (@$rhslist)
863             {
864 60 100 66     212 $e = $e->{rhs} if ref $e eq "HASH" && exists $e->{rhs} && !exists $e->{token};
      66        
865 60 50 66     150 $e = { token => $e } if ref $e eq "HASH" && exists $e->{type} && exists $e->{definition};
      66        
866              
867 60         79 my $negation = retrieveModifier($e, 'negation');
868 60         68 my $cardinality = retrieveModifier($e, 'cardinality');
869              
870 60 50 33     136 if (exists $e->{comment} || exists $e->{action})
871             {
872 0 0 0     0 if (exists $e->{comment} && !$self->testoption('stripallcomments'))
873             {
874 0         0 push @{$metalist->{comments}}, $self->renderComment($rulename, [$e->{comment}]);
  0         0  
875 0         0 $metalist->{state} = 1;
876             }
877              
878 0 0 0     0 if (exists $e->{action} && !$self->testoption('stripactions'))
879             {
880 0 0       0 push @{$metalist->{actions}}, $self->testoption('embedactions') ? $e->{action} : $self->renderComment($rulename, [$e->{action}]);
  0         0  
881 0         0 $metalist->{state} = 1;
882             }
883              
884             ##
885             # skip the rest of processing for comment/action tokens
886             ##
887 0         0 next;
888             }
889              
890 60 50 33     133 if (ref $e eq "HASH" && !exists $e->{token})
891             {
892 0 0 0     0 $self->abortWithError( "generic regex rules not supported in rule $rulename", $e ) if exists $e->{type} && $e->{type} eq "regex";
893 0         0 $self->abortWithError( "INTERNAL: 'e' must be a hash and contain 'token' in rule $rulename", $e );
894             }
895              
896 60         69 my $alternative = retrieveModifier($e, 'alternative');
897              
898 60 100       82 if (defined $alternative)
899             {
900 11 50       16 if (scalar @$currentlist > 0)
901             {
902             ##
903             # close the current alternative list
904             ##
905 11         12 my $newlist;
906              
907             ##
908             # replace a keyword built from lower/upper case character classes by a literal
909             ##
910 11         12 my $keyword = $keywordfragment->{value};
911 11 50 66     18 if ($self->testoption("buildkeywords") && $keywordfragment->{status} == 0 && $keyword =~ /^[a-z]/i)
      66        
912             {
913 0         0 $currentlist = [
914             { type => "literal", value => $keyword, isLexeme => 1 }
915             ];
916 0         0 $newlist = { list => $currentlist, isLexeme => 1 };
917             }
918             else
919             {
920 11         19 $newlist = { list => $currentlist };
921 11 100       20 $newlist->{class4list} = $class4list->{value} if $class4list->{status} != -1;
922 11 50       18 $newlist->{metalist} = $metalist if exists $metalist->{state};
923             }
924              
925 11         14 push @$alternativelists, $newlist;
926             ##
927             # re-initialize the list state
928             ##
929 11         13 $currentlist = [];
930 11         22 $class4list = { status => 0, value => "" };
931 11         24 $metalist = { comments => [], actions => [] };
932 11         14 $keywordfragment = { status => 0 };
933 11         14 $alternativelength = 0;
934             }
935             }
936             else
937             {
938             ##
939             # a sequence of literals can't be mapped to a class, so we reset class4list
940             ##
941 49 100       64 if ($alternativelength > 1)
942             {
943 12         13 $class4group->{status} = -1;
944 12         15 delete $class4group->{value};
945             }
946             }
947              
948 60         56 my $mxelement;
949              
950 60 100       96 if (ref $e->{token} eq "")
951             {
952             ##
953             # translate a G4 subrule into a Marpa subrule
954             ##
955 21         25 my $symbol = $e->{token};
956 21         42 $mxelement = $self->processSubRule($symbol, $context);
957             }
958             else
959             {
960 39 50 33     92 $self->abortWithError( "can't process group for rule $rulename", $rhslist ) if ref $e ne "HASH" || !exists $e->{token};
961 39         78 $mxelement = $self->walktoken($rulename, $e->{token}, $context);
962             # ignore embedded comments/actions
963 39 50 66     95 next if exists $mxelement->{type} && $mxelement->{type} eq "ignore";
964              
965 39         65 my $keywordfragment = $self->isKeywordFragment($rulename, $mxelement);
966 39 100       84 $mxelement->{keywordfragment} = $keywordfragment if defined $keywordfragment;
967             }
968              
969             ##
970             # collect embedded comments in the alternative branch state
971             ##
972 60 50       85 if (exists $mxelement->{comments})
973             {
974 0         0 push (@{$metalist->{comments}}, @{$mxelement->{comments}});
  0         0  
  0         0  
975 0         0 $metalist->{state} = 1;
976 0         0 delete $mxelement->{comments};
977 0 0       0 next if scalar keys %$mxelement == 0;
978             }
979              
980             ##
981             # CAVEAT: retrieve 'isLexeme' from $mxelement
982             ##
983 60         72 my $il = retrieveModifier($mxelement, 'isLexeme');
984 60 100       92 $groupisLexeme->{status} = -1 if !defined $il;
985              
986             ##
987             # negation should not come from the token
988             ##
989 60 50       83 mydie "processRightsides : unexpected negation in 'mxelement'" if exists $mxelement->{negation};
990              
991 60 50       73 if ( defined $negation )
992             {
993 0         0 $mxelement = $self->negateElement($rulename, $mxelement, $context);
994             }
995              
996             ##
997             # CAVEAT: use 'cardinality' from $e since $mxtoken
998             # was computed without metainformation.
999             ##
1000 60 100       77 if (defined $cardinality)
1001             {
1002             ##
1003             # if both the parent and the sub rules are simple, we can propagate the cardinality
1004             # to the subrule.
1005             # in all other cases we create a new subrule.
1006             ##
1007 10 100 66     21 if ( scalar @$rhslist <= 1 && $self->retagSimpleRule($rulename, $mxelement, $cardinality, $context) )
1008             {
1009             ##
1010             # if the tagged element is part of an alternative branch, we have to make it into a subrule
1011             ##
1012 1 50       3 if (defined $alternative)
1013             {
1014 0 0       0 $mxelement = $self->createSubRule( $rulename, $mxelement, undef, undef, $context ) if scalar @$rhslist > 1;
1015             }
1016             }
1017             else
1018             {
1019             ##
1020             # create a subrule for cardinality-tagged element lists
1021             ##
1022 9         18 $mxelement = $self->createSubRule( $rulename, $mxelement, undef, $cardinality, $context );
1023              
1024             ##
1025             # propagate the cardinality to the new subrule
1026             # ("?" is wrapped into an 'opt_' rule in 'createSubRule', so it was already consumed).
1027             #
1028             # TODO : check if the '!defined $alternative' clause below should be reinstated
1029             ##
1030             # if ( !defined $alternative && $cardinality ne "?" && !$self->retagSimpleRule($rulename, $mxelement, $cardinality, $context, {abortatfirstlevel => 'true'}) )
1031 9 50 66     27 if ( $cardinality ne "?" && !$self->retagSimpleRule($rulename, $mxelement, $cardinality, $context, {abortatfirstlevel => 'true'}) )
1032             {
1033 0 0       0 mydie "INTERNAL: no destination for cardinality found in rule '$rulename'" if scalar @$rhslist > 1;
1034 0 0       0 $mxelement->{cardinality} = $cardinality if $cardinality ne "?";
1035             }
1036             }
1037              
1038             # cardinality-annotated subrules loose lexeme status
1039 10         13 delete $mxelement->{class4list};
1040 10         11 delete $mxelement->{isLexeme};
1041 10         12 $groupisLexeme->{status} = -1;
1042             }
1043              
1044 60         107 mergeClass4List( \%$class4list, $mxelement );
1045 60         113 mergeClass4List( \%$class4group, $mxelement );
1046 60         94 mergeKeywordFragment( \%$keywordfragment, $mxelement );
1047              
1048 60         73 push @$currentlist, $mxelement;
1049 60         60 ++$alternativelength;
1050              
1051             ##
1052             # a list of lexemes is not itself a lexeme (unless declared in the 'lexer' grammar).
1053             ##
1054 60 100       111 $groupisLexeme->{status} = -1 if $alternativelength > 1;
1055             }
1056              
1057             ##
1058             # replace a keyword built from lower/upper case character classes by a literal.
1059             # keyword literals must start with a letter.
1060             ##
1061 23 50 66     33 if ($self->testoption("buildkeywords") && $keywordfragment->{status} == 0 && $keywordfragment->{value} =~ /^[a-z]/i)
      66        
1062             {
1063             $currentlist = [
1064 0         0 { type => "literal", value => $keywordfragment->{value}, isLexeme => 1 }
1065             ];
1066 0         0 $groupisLexeme->{status} = 0;
1067             }
1068              
1069             ##
1070             # append the current list to the alternative list if it has entries.
1071             ##
1072 23 50       42 if ( scalar @$currentlist > 0)
1073             {
1074 23         37 my $newlist = { list => $currentlist };
1075 23 100       49 $newlist->{class4list} = $class4list->{value} if $class4list->{status} != -1;
1076 23 100       32 $newlist->{isLexeme} = 1 if $groupisLexeme->{status} == 0;
1077 23 50       32 $newlist->{metalist} = $metalist if exists $metalist->{state};
1078 23         30 push @$alternativelists, $newlist;
1079             }
1080              
1081             ##
1082             # remove branches that are case-equivalent
1083             ##
1084 23 50       30 $alternativelists = $self->removeCaseEquivalentBranches($alternativelists) if $self->testoption('matchcaseinsensitive');
1085              
1086 23         41 my $grouplist = { type => 'group', list => $alternativelists };
1087             # single lists of literals can be tagged as lexemes
1088             # regular rules with alternative right sides will be tagged as G1
1089 23 100       38 $grouplist->{class4list} = $class4group->{value} if $class4group->{status} != -1;
1090 23 100       32 $grouplist->{isLexeme} = 1 if $groupisLexeme->{status} == 0;
1091              
1092 23         59 return $grouplist;
1093             }
1094              
1095             ## ------------------------------------------------------
1096             # 'walkgroup' processes the tokens from a parenthesized group
1097             # or from a complete rule
1098             ## ------------------------------------------------------
1099             sub walkgroup
1100             {
1101 9     9 0 14 my ($self, $rulename, $tokengroup, $context) = @_;
1102              
1103 9 50 33     27 $self->abortWithError( "INTERNAL: group is missing 'definition' in rule $rulename", $tokengroup ) if ref $tokengroup ne "HASH" || !exists $tokengroup->{definition};
1104 9         10 my $definition = $tokengroup->{definition};
1105 9         30 my $grouplist = $self->processRightSides( $rulename, $definition, $context );
1106              
1107 9         12 return $grouplist;
1108             }
1109              
1110             ## ------------------------------------------------------
1111             # 'walkrule' processes the 'rightsides' list of a parsed rule
1112             ## ------------------------------------------------------
1113             sub walkrule
1114             {
1115 23     23 0 37 my ($self, $rulename, $rule, $context) = @_;
1116              
1117             # if ( $rulename eq "outer_join_sign")
1118             # {
1119             # printf "found!\n";
1120             # }
1121              
1122 23         39 my $symboltable = $self->symboltable;
1123 23         39 my $rulestatus = $symboltable->rulestatus($rulename);
1124              
1125             # test if we traversed this node already
1126 23 100       34 if ( defined $rulestatus )
1127             {
1128 9 50 33     18 if ($rulestatus eq "synthetic" && !$self->checkMarpaRuleExists($rulename))
1129             {
1130 0         0 my $mxrightside = $self->processRightSides( $rulename, $rule->{rightsides}, $context );
1131 0         0 $self->addMarpaRule($rulename, $mxrightside);
1132 0         0 $symboltable->tagrule($rulename, 'done');
1133 0         0 $rulestatus = $symboltable->rulestatus($rulename);
1134             }
1135              
1136 9         8 my $mxrule;
1137              
1138             SWITCH: {
1139 9 100       10 ($rulestatus eq "inprogress") && do {$mxrule = $symboltable->rule( $rulename ); last SWITCH; };
  9         14  
  3         6  
  3         4  
1140 6 50       11 ($rulestatus eq "done") && do {$mxrule = $self->getMarpaRule( undef, $rulename ); last SWITCH; };
  6         10  
  6         7  
1141 0         0 do { mydie "unexpected rule status $rulestatus in rule $rulename" };
  0         0  
1142             }
1143              
1144 9         26 my $result = { mxrule => $rulename, rhs => { token => $rulename } };
1145 9 100       17 $result->{class4list} = $mxrule->{class4list} if exists $mxrule->{class4list};
1146 9         12 return $result;
1147             }
1148              
1149 14         30 $self->enterLevel($rulename, $context);
1150              
1151 14         15 my $mxrightside = {};
1152 14         18 my $rightside = $rule->{rightsides};
1153 14 50       23 if (defined $rightside)
1154             {
1155 14 50       24 $self->abortWithError( "rhs is not an array ref in $rulename", $rightside ) if ref $rightside ne "ARRAY";
1156              
1157 14 100 66     47 $context->{subruleindex}{$rulename}{grammarstate} = $rule->{grammarstate} if exists $rule->{grammarstate} && $rule->{grammarstate} eq "lexer";
1158 14 100 66     33 $context->{subruleindex}{$rulename}{isFragmentOrChild} = 'true' if exists $rule->{type} && $rule->{type} eq "fragment";
1159              
1160             ##
1161             # prevent infinite recursion by tagging any symbol
1162             # before processing begins with 'inprogress'.
1163             # the 'defined($rulestatus)' clause at the top ensures
1164             # that we don't pick up a symbol that is already
1165             # being processed.
1166             ##
1167 14         31 $symboltable->tagrule($rulename, 'inprogress');
1168 14         44 $mxrightside = $self->processRightSides( $rulename, $rightside, $context );
1169 14         30 $symboltable->tagrule($rulename, 'done');
1170             }
1171              
1172 14 100 66     41 $mxrightside->{grammarstate} = $rule->{grammarstate} if exists $rule->{grammarstate} && $rule->{grammarstate} eq "lexer";
1173 14 100 66     30 $mxrightside->{isFragmentOrChild} = 'true' if exists $rule->{type} && $rule->{type} eq "fragment";
1174 14 100       17 if (exists $rule->{redirect})
1175             {
1176 1         3 $mxrightside->{redirected} = 'true';
1177 1         2 push @{$self->{discarded}}, $rulename;
  1         2  
1178             }
1179              
1180 14         31 $self->addMarpaRule($rulename, $mxrightside);
1181 14         26 $self->exitLevel($rulename, $context);
1182              
1183 14         24 return $mxrightside;
1184             }
1185              
1186             ## ------------------------------------------------------
1187             # 'processSub' processes a reference to a G4 subrule
1188             ## ------------------------------------------------------
1189             sub processSubRule
1190             {
1191 21     21 0 49 my ($self, $rulename, $context) = @_;
1192              
1193 21         28 my $symboltable = $self->symboltable;
1194 21         39 my $rule = $symboltable->rule($rulename);
1195              
1196 21         41 my $mxrule = $self->walkrule( $rulename, $rule, $context );
1197              
1198 21         50 my $result = { rule => $rulename, rhs => { token => $rulename } };
1199              
1200 21         32 my $keywordfragment = $self->isKeywordFragment($rulename, $mxrule);
1201 21 100       35 $result->{keywordfragment} = $keywordfragment if defined $keywordfragment;
1202              
1203 21         32 return $result;
1204             }
1205              
1206             ## ------------------------------------------------------
1207             # 'reportUnusedRules' report all symbol table entries
1208             # that are not referred to by
1209             # parent rules.
1210             ## ------------------------------------------------------
1211             sub printHeaderUnusedRulesReport
1212             {
1213 0     0 0 0 printf "\n";
1214 0         0 printf "WARNING: the rules listed below are orphaned. They can't be reached from the start rule:\n";
1215 0         0 printf "=======:\n\n";
1216 0         0 printf <<'END_OF_SOURCE';
1217             +----------------------------- rule name
1218             +--!----------------------------- Lexical (L) or parser rule
1219             +--!--!----------------------------- Fragment (F) or regular rule
1220             ! ! !
1221             V V V
1222             END_OF_SOURCE
1223             }
1224              
1225             sub reportUnusedRules
1226             {
1227 1     1 0 2 my ($self) = @_;
1228              
1229 1         3 my $symboltable = $self->symboltable;
1230              
1231 1         3 my $status = 0;
1232 1         3 my @symbols = $symboltable->symbols;
1233              
1234 1         6 for my $rulename (sort @symbols)
1235             {
1236 14         18 my $rule = $symboltable->rule($rulename);
1237 14 50       21 if (!exists $rule->{generationstatus})
1238             {
1239 0 0       0 if (!$status)
1240             {
1241 0         0 printHeaderUnusedRulesReport();
1242 0         0 $status = 1;
1243             }
1244              
1245             printf "[%1s][%1s] %s\n",
1246             (exists $rule->{grammarstate} && $rule->{grammarstate} =~ /lexer/i) ? "L" : "",
1247 0 0 0     0 (exists $rule->{isFragmentOrChild}) ? "F" : "",
    0          
1248             $rulename;
1249             }
1250             }
1251              
1252 1 50       6 printf "\n\n" if $status;
1253             }
1254              
1255             ## ------------------------------------------------------
1256             # 'processRedirectRules' define Marpa rules for all
1257             # G4 rules that are tagged 'redirect'
1258             # and are not referred in parent rules
1259             ## ------------------------------------------------------
1260             sub processRedirectRules
1261             {
1262 1     1 0 3 my ($self) = @_;
1263              
1264 1         4 my $symboltable = $self->symboltable;
1265              
1266 1         7 my @symbols = $symboltable->symbols;
1267              
1268 1         10 for my $rulename (sort @symbols)
1269             {
1270 14         21 my $rule = $symboltable->rule($rulename);
1271 14 50 66     28 if (!exists $rule->{generationstatus} && exists $rule->{redirect})
1272             {
1273 1         5 $self->walkrule( $rulename, $rule, {level => 0} );
1274             }
1275             }
1276             }
1277              
1278             ## -------------------------------------------------------------------------------------
1279             # 'translateG4Grammar' create nested Marpa rules for the G4 rules from the symbol table
1280             # by doing a depth-first traversal starting with the grammar's
1281             # start symbol.
1282             # this creates the convex hull of the start symbol collecting all
1283             # symbols that can be reached from the start symbol.
1284             # 'reportUnusedRules' creates a report of all symbols from the
1285             # parse tree that are not part of the convex hull.
1286             ## -------------------------------------------------------------------------------------
1287             sub translateG4Grammar
1288             {
1289 1     1 0 3 my ($self) = @_;
1290              
1291 1         3 my $symboltable = $self->symboltable;
1292              
1293 1         4 my $startrule = $symboltable->startrule;
1294 1         3 my $startrulename = $startrule->{name};
1295 1         4 $startrule = $symboltable->rule($startrulename);
1296              
1297 1 50       4 mydie "INTERNAL: start rule $startrulename is missing 'rightsides'" if !exists $startrule->{rightsides};
1298              
1299 1         4 $self->walkrule( $startrulename, $startrule, {level => 0} );
1300              
1301 1         12 $self->processRedirectRules;
1302 1         4 $self->reportUnusedRules;
1303             }
1304              
1305             ## ================================================== #
1306             # write the generated Marpa rules to the output file #
1307             ## ================================================== #
1308              
1309             sub openOutputFile
1310             {
1311 1     1 0 2 my ($self) = @_;
1312              
1313 1         3 my $outputfile = $self->{outputfilename};
1314              
1315 1 50       4 if ( $outputfile ne "-" )
1316             {
1317 1         1 my $outf;
1318 1 50       144 mydie("cannot open output file $outputfile") unless open( $outf, ">$outputfile" );
1319 1         6 $self->{outf} = $outf;
1320 1         4 $self->{is_stdout} = 0;
1321             }
1322             else
1323             {
1324 0         0 $self->{outf} = *STDOUT;
1325 0         0 $self->{is_stdout} = 1;
1326             }
1327             }
1328              
1329             sub closeOutputFile
1330             {
1331 1     1 0 2 my ($self) = @_;
1332 1 50       865 close($self->{outf}) if $self->{is_stdout};
1333             }
1334              
1335             sub pad
1336             {
1337 50     50 0 65 my ($self, $s ) = @_;
1338              
1339 50 50       72 return undef if !defined $s;
1340              
1341 50         47 my $len = length($s);
1342 50         59 my $pad = $self->{indent} - $len;
1343              
1344 50 100       66 return $s if $pad < 0;
1345              
1346 49         161 return $s . (" " x $pad);
1347             }
1348              
1349             sub renderComment
1350             {
1351 0     0 0 0 my ($self, $rulename, $comments) = @_;
1352              
1353 0 0       0 mydie "INTERNAL : 'comments' is not an array in rule $rulename" if ref $comments ne "ARRAY";
1354              
1355 0         0 my $result = [];
1356 0         0 for my $comment (@$comments)
1357             {
1358 0         0 $comment =~ s/\/\*(.*)\*\//$1/;
1359 0         0 my @commentlist = split '\n', $comment;
1360              
1361 0         0 for my $cl (@commentlist)
1362             {
1363 0         0 $cl =~ s/^\/\/\s*//;
1364 0         0 $cl =~ s/^#\s*//;
1365 0         0 push @$result, "# ${cl}";
1366             }
1367             }
1368              
1369 0         0 return $result;
1370             }
1371              
1372             sub processRedirected
1373             {
1374 39     39 0 53 my ($self, $token) = @_;
1375              
1376 39 50       52 mydie "processRedirected : 'token' must be a scalar" if ref $token ne "";
1377              
1378 39         49 my $rule = $self->getMarpaRule(undef, $token);
1379              
1380 39 50 33     50 if ($self->testoption('matchcaseinsensitive') && $self->isKeywordLetter($token, $rule))
1381             {
1382 0         0 my $value = $rule->{class4list};
1383 0         0 $value = substr($value, 0, 1);
1384 0         0 return { value => "'${value}':i" };
1385             }
1386              
1387 39 50       54 return { discard => 'true' } if exists $rule->{redirected};
1388              
1389 39         71 return { value => $token };
1390             }
1391              
1392             sub fragmentEligible2Convert
1393             {
1394 28     28 0 36 my ($rule) = @_;
1395              
1396 28 100       62 return 0 if !exists $rule->{class4list};
1397 1         2 my $class4list = $rule->{class4list};
1398              
1399             # replace escaped characters and unicode codepoints with a single character
1400 1         3 $class4list =~ s/\\([^u])/$1/g;
1401 1         2 $class4list =~ s/\\u([0-9a-f]{4,4})/u/ig;
1402              
1403 1         4 return length($class4list) > 1;
1404             }
1405              
1406             sub writeFragmentAsClass
1407             {
1408 1     1 0 3 my ($self, $rulename, $rule, $options ) = @_;
1409              
1410 1         3 my $synthclass = { type => "class", value => $rule->{class4list} };
1411 1         5 my $outputline = $self->printRhs( $rulename, $synthclass, {status => 0, delimiter => '|', assignop => '~'} );
1412 1         5 $self->deleteAllSubrules($rulename);
1413              
1414 1         2 my $outf = $self->{outf};
1415 1         3 printf $outf "%s\n", $outputline;
1416             }
1417              
1418             ##
1419             # normalizeClassString : remove class elements that are redundant when the class is made case-insensitive
1420             # return the modified class text as well as an indicator if the class contains letters
1421             ##
1422             sub normalizeClassString
1423             {
1424 3     3 0 7 my ($self, $classstring ) = @_;
1425              
1426 3         5 my $classhash = {};
1427 3         4 my $characterhash = {};
1428              
1429             # strip [/] from classtring
1430 3         5 $classstring =~ s/^\[([^\]]*)\]$/$1/;
1431              
1432 3         66 my $result = "";
1433 3         4 my $isalphaclass = 0;
1434 3         7 while ( $classstring ne "" )
1435             {
1436             SWITCH: {
1437 12 50       10 ($classstring =~ /(\\u[0-9a-f]+)-(\\u[0-9a-f]+)(.*)$/i) && do {
  12         16  
1438 0         0 my $beg = $1;
1439 0         0 my $end = $2;
1440 0         0 $result .= "${1}-${2}";
1441 0         0 $classstring = $3;
1442 0         0 last SWITCH;
1443             };
1444 12 50       20 ($classstring =~ /(\\u[0-9a-f]{4,4})(.*)$/i) && do {
1445 0         0 my $c = $1;
1446 0         0 $result .= $c;
1447 0         0 $classstring = $2;
1448 0         0 last SWITCH;
1449             };
1450 12 100       21 ($classstring =~ /([^-])-([^-])(.*)$/) && do {
1451 3         4 my $beg = $1;
1452 3         5 my $end = $2;
1453 3         4 my $key = "${beg}-${end}";
1454 3 50 66     12 if (!exists $classhash->{lc $key} && !exists $classhash->{uc $key})
1455             {
1456 2         5 $classhash->{$key} = 1;
1457 2         3 $result .= $key;
1458 2 100 66     9 $isalphaclass = 1 if $beg =~ /[a-z]/i || $end =~ /[a-z]/i;
1459             }
1460 3         4 $classstring = $3;
1461 3         6 last SWITCH;
1462             };
1463 9 100       14 ($classstring =~ /(\\.)(.*)$/) && do {
1464 1         2 my $c = $1;
1465 1 50       4 if (!exists $characterhash->{$c})
1466             {
1467 1         3 $characterhash->{$c} = 1;
1468 1         2 $result .= $c;
1469             }
1470 1         4 $classstring = $2;
1471 1         3 last SWITCH;
1472             };
1473 8 50       20 ($classstring =~ /(.)(.*)$/) && do {
1474 8         11 my $c = $1;
1475 8 100 66     23 if (!exists $characterhash->{lc $c} && !exists $characterhash->{uc $c})
1476             {
1477 7         14 $characterhash->{$c} = 1;
1478 7         8 $result .= $c;
1479 7 100       14 $isalphaclass = 1 if $c =~ /^[a-z]$/i;
1480             }
1481 8         43 $classstring = $2;
1482 8         14 last SWITCH;
1483             };
1484             }
1485             }
1486              
1487 3         10 return { isalphaclass => $isalphaclass, classstring => $result };
1488             }
1489              
1490             sub isAlphaLiteral
1491             {
1492 29     29 0 37 my ($self, $literal ) = @_;
1493              
1494 29 100       79 return 0 if $literal !~ /[a-z]/i;
1495              
1496 8         9 my $isAlpha = 0;
1497              
1498 8         15 while ( $literal ne "" )
1499             {
1500             SWITCH: {
1501 43 100       33 ($literal =~ /(\\.)(.*)$/) && do {
  43         63  
1502 4         10 my $c = $1;
1503 4         4 $literal = $2;
1504 4         7 last SWITCH;
1505             };
1506 39 50       72 ($literal =~ /(.)(.*)$/) && do {
1507 39         56 my $c = $1;
1508 39         43 $literal = $2;
1509 39 100       54 $isAlpha = 1 if $c =~ /^[a-z]$/i;
1510 39         56 last SWITCH;
1511             };
1512             }
1513             }
1514              
1515 8         17 return $isAlpha;
1516             }
1517              
1518             ##
1519             # computeRhs :
1520             ##
1521             sub computeRhs
1522             {
1523 69     69 0 80 my ($self, $rulename, $rhs, $options ) = @_;
1524              
1525 69 100       90 my $isLexeme = 1 if exists $rhs->{isLexeme};
1526              
1527             # if ($rulename eq "HEXNUMBER_003")
1528             # {
1529             # printf "found!\n";
1530             # }
1531              
1532             SWITCH: {
1533 69 100 100     65 (exists $rhs->{type} && $rhs->{type} eq "negatedclass") && do {
  69         114  
1534 1 50       3 mydie "computeRhs : 'value' missing in negatedclass $rulename" if !exists $rhs->{value};
1535 1         4 $rhs = { value => "[" . $rhs->{value} . "]" };
1536 1         2 last SWITCH;
1537             };
1538 68 50       86 (exists $rhs->{deletedafterconversiontoclass}) && do {
1539 0         0 $rhs = { discard => 'true' };
1540 0         0 last SWITCH;
1541             };
1542 68 50 66     115 (exists $rhs->{type} && $rhs->{type} eq "unsupported") && do {
1543 0         0 mydie $rhs->{msg};
1544 0         0 last SWITCH;
1545             };
1546 68 100 100     118 (exists $rhs->{type} && $rhs->{type} eq "class") && do {
1547 8         9 my $value = $rhs->{value};
1548 8         10 my $modifier = "";
1549 8 100 66     11 if ($self->testoption('matchcaseinsensitive') && $self->isAlphaLiteral($value))
1550             {
1551 3         7 my $normclass = $self->normalizeClassString($value);
1552 3         6 $value = $normclass->{classstring};
1553 3 50       6 $modifier = ':ic' if $normclass->{isalphaclass};
1554             }
1555 8         19 $rhs = { value => "[${value}]${modifier}" };
1556 8         10 last SWITCH;
1557             };
1558 60 100 66     96 (exists $rhs->{type} && $rhs->{type} eq "literal") && do {
1559 21         24 my $literal = $rhs->{value};
1560 21 50       50 $literal = ($literal eq "\\'") ? "[']" : "'${literal}'";
1561 21 100 66     26 if ($self->testoption('matchcaseinsensitive') && $self->isAlphaLiteral($literal))
1562             {
1563 4         6 $literal = lc $literal;
1564 4         5 $literal .= ':i';
1565             }
1566 21         38 $rhs = { value => $literal };
1567 21         25 last SWITCH;
1568             };
1569 39 50 33     52 (exists $rhs->{type} && $rhs->{type} eq "action") && do {
1570 0         0 push @{$options->{comments}}, " # Action: " . $rhs->{token}{action};
  0         0  
1571 0         0 $rhs = { discard => 'true' };
1572 0         0 last SWITCH;
1573             };
1574 39 50       55 (exists $rhs->{rhs}) && do {
1575 39         40 $rhs = $rhs->{rhs};
1576 39 50 33     93 mydie "computeRhs: scalar 'token' missing in rhs" if !exists $rhs->{token} || ref $rhs->{token} ne "";
1577 39         52 $rhs = $self->processRedirected($rhs->{token});
1578 39         45 last SWITCH;
1579             };
1580 0 0       0 (exists $rhs->{token}) && do {
1581 0         0 $rhs = $self->processRedirected($rhs->{token});
1582 0         0 last SWITCH;
1583             };
1584 0 0       0 (exists $rhs->{comments}) && do {
1585 0         0 my $cl = $self->renderComment($rulename, $rhs->{comments});
1586 0         0 push (@{$options->{comments}}, @$cl );
  0         0  
1587 0         0 $rhs = { discard => 'true' };
1588 0         0 last SWITCH;
1589             };
1590 0         0 do {
1591 0         0 $self->abortWithError( "computeRhs : don't know how to process rhs for $rulename, modify this rule to make it convertible", $rhs );
1592             };
1593             }
1594              
1595 69         74 return $rhs;
1596             }
1597              
1598             ##
1599             # printRhs : print a single right hand clause (alternative) of a rule.
1600             # depending on $option->{status} format the clause :
1601             # 0 : prefix the rule name and use '::=' or '~' as operator
1602             # 1 : start an alternative branch
1603             # 2 : add a clause to a branch
1604             ##
1605             sub printRhs
1606             {
1607 69     69 0 82 my ( $self, $rulename, $rule, $options ) = @_;
1608              
1609 69 50       98 mydie "printRhs: 'status' not defined for rule $rulename" if !exists $options->{status};
1610              
1611 69         67 my $status = $options->{status};
1612 69         135 my $rhs = $self->computeRhs($rulename, $rule, $options);
1613              
1614 69 50       96 return "" if exists $rhs->{discard};
1615              
1616 69 50 33     150 mydie "'options' not well-defined" if ref $options ne "HASH" || !exists $options->{delimiter};
1617              
1618 69         73 my $delimiter = $options->{delimiter};
1619              
1620 69         67 my $cardinality = "";
1621 69 100       80 $cardinality = $options->{cardinality} if exists $options->{cardinality};
1622 69 50 33     96 $cardinality =~ s/([+*])\?/$1/ if $cardinality =~ /[+*]\?/ && $self->testoption('shiftlazytogreedy');
1623 69 50       81 mydie sprintf "lazy quantifier %s in rule %s not supported by Marpa", $cardinality, $rulename if $cardinality =~ /[+*]\?/;
1624              
1625             # if ($rulename eq "local_xmlindex_clause_001")
1626             # {
1627             # printf "found!\n";
1628             # }
1629              
1630 69         53 my $definitionop = "::=";
1631 69 100       98 $definitionop = $options->{assignop} if exists $options->{assignop};
1632              
1633 69         60 my $result;
1634              
1635             # process optional rules
1636 69 100 66     121 if ( defined $cardinality && $cardinality eq "?")
1637             {
1638 4         8 $result = sprintf "%s %-3s %s%s\n", $self->pad($rulename), $definitionop, "", "";
1639 4         6 $cardinality = "";
1640             }
1641              
1642             SWITCH: {
1643 69 100       62 ($status == 0) && do {
  69         89  
1644 32         43 $result .= sprintf "%s %-3s %s%s", $self->pad($rulename), $definitionop, $rhs->{value}, $cardinality;
1645 32         39 last SWITCH;
1646             };
1647 37 100       51 ($status == 1) && do {
1648 11         16 $result .= sprintf "%s %-3s %s%s", $self->pad(""), $delimiter, $rhs->{value}, $cardinality;
1649 11         15 last SWITCH;
1650             };
1651 26 50       28 ($status == 2) && do {
1652 26         48 $result .= sprintf " %s%s", $rhs->{value}, $cardinality;
1653 26         27 last SWITCH;
1654             };
1655 0         0 do {
1656 0         0 mydie "printRhs : illegal status";
1657             };
1658             }
1659              
1660 69         123 return $result;
1661             }
1662              
1663             ##
1664             # writeMarpaRuleList : print all alternative branches of a rule
1665             ##
1666             sub writeMarpaRuleList
1667             {
1668 22     22 0 29 my ( $self, $rulename, $rule, $options ) = @_;
1669              
1670             # if ($rulename eq "MINUSCHAR")
1671             # {
1672             # printf "found!\n";
1673             # }
1674              
1675 22         22 $options->{status} = 0;
1676              
1677 22         23 my $outf = $self->{outf};
1678              
1679 22 50       34 mydie "writeMarpaRuleList : 'list' missing from 'rule' in $rulename" if !exists $rule->{list};
1680              
1681 22         24 my $list = $rule->{list};
1682 22         20 my $actiontext;
1683              
1684 22         30 for my $rulelist (@$list)
1685             {
1686 33 50       51 mydie "writeMarpaRuleList : 'list' missing from 'rulelist' in $rulename" if !exists $rulelist->{list};
1687              
1688 33 50       39 if (exists $rulelist->{metalist})
1689             {
1690 0         0 my $metalist = $rulelist->{metalist};
1691              
1692 0         0 for my $cl (@{$metalist->{comments}})
  0         0  
1693             {
1694 0 0       0 $cl = $cl->[0] if ref $cl eq "ARRAY";
1695 0         0 printf $outf "%s\n", $cl;
1696             }
1697 0 0       0 if (exists $metalist->{actions})
1698             {
1699 0 0       0 if (!$self->testoption('embedactions'))
1700             {
1701 0         0 for my $cl (@{$metalist->{actions}})
  0         0  
1702             {
1703 0 0       0 $cl = $cl->[0] if ref $cl eq "ARRAY";
1704 0         0 printf $outf "%s\n", $cl;
1705             }
1706             }
1707             else
1708             {
1709 0         0 $actiontext = "";
1710 0         0 for my $cl (@{$metalist->{actions}})
  0         0  
1711             {
1712 0 0       0 $cl = $cl->[0] if ref $cl eq "ARRAY";
1713 0         0 $actiontext .= $cl;
1714             }
1715             }
1716             }
1717             }
1718              
1719 33         36 my $outputline = "";
1720 33         34 my $linelen = 0;
1721              
1722 33         31 for my $rhs (@{$rulelist->{list}})
  33         45  
1723             {
1724 59 100       94 my $cardinality = $rhs->{cardinality} if exists $rhs->{cardinality};
1725              
1726 59         61 $options->{delimiter} = "|";
1727 59 100       73 $options->{cardinality} = $cardinality if defined $cardinality;
1728              
1729 59 50       70 if (exists $rhs->{comments})
1730             {
1731 0         0 my $cl = $rhs->{comments};
1732 0         0 mydie "TODO : comments";
1733             }
1734              
1735 59         53 my $nextclause;
1736              
1737             SWITCH: {
1738 59 0 66     54 (exists $rhs->{type} && $rhs->{type} eq "group" && exists $rhs->{list}) && do {
  59   33     109  
1739 0 0       0 if ( exists $rhs->{class4list})
1740             {
1741 0         0 $nextclause = $self->printRhs( $rulename, $rhs, $options );
1742 0         0 $options->{status} = 2;
1743             }
1744             else
1745             {
1746 0         0 mydie "writeMarpaRuleList : unexpected embedded 'list'";
1747             }
1748 0         0 last SWITCH;
1749             };
1750 59         53 do {
1751 59         81 $nextclause = $self->printRhs( $rulename, $rhs, $options );
1752 59         61 $options->{status} = 2;
1753 59         64 last SWITCH;
1754             };
1755             }
1756              
1757 59         60 $options->{delimiter}= "|";
1758              
1759 59 50       87 if ($linelen + length($nextclause) > $self->{indent} + 3 + 125)
1760             {
1761 0         0 $outputline .= "\n" . (' ' x $self->{indent}) . (' ' x 3) . ' ' . $nextclause;
1762 0         0 $linelen = $self->{indent} + 3 + length($nextclause);
1763             }
1764             else
1765             {
1766 59         59 $outputline .= $nextclause;
1767 59         72 $linelen += length($nextclause);
1768             }
1769             }
1770              
1771 33 50       46 if (defined $actiontext)
1772             {
1773 0         0 my $padlen = $self->{indent} + 40 - length($outputline);
1774 0 0       0 $padlen = 1 if $padlen < 0;
1775 0         0 $outputline .= ' ' x $padlen . "action => ${actiontext}";
1776 0         0 $actiontext = undef;
1777             }
1778              
1779 33         32 $options->{status} = 1;
1780 33 50       82 printf $outf "%s\n", $outputline if length($outputline) > 0;
1781 33         46 $linelen = 0;
1782             }
1783              
1784 22 50       38 if (exists $options->{comments})
1785             {
1786 0         0 for my $cl (@{$options->{comments}})
  0         0  
1787             {
1788 0         0 printf $outf "%s\n", $cl;
1789             }
1790 0         0 delete $options->{comments};
1791             }
1792             }
1793              
1794             ##
1795             # writeMarpaRule : print a complute rule :
1796             # - rule name + 1st branch
1797             # - all subsequent branches
1798             ##
1799             sub writeMarpaRule
1800             {
1801 32     32 0 43 my ($self, $rulename ) = @_;
1802              
1803 32 50       43 $self->dumpMarpaRule($rulename) if $self->{verbosity};
1804              
1805 32         55 $self->tagMarpaRule($rulename);
1806              
1807             # if ( $rulename eq "IDENTIFIER")
1808             # {
1809             # printf "found!\n";
1810             # }
1811              
1812 32         44 my $rule = $self->getMarpaRule(undef, $rulename);
1813              
1814 32 100       47 my $cardinality = $rule->{cardinality} if exists $rule->{cardinality};
1815              
1816 32         55 my $options = { delimiter => "", status => 0 };
1817 32 100       60 $options->{cardinality} = $cardinality if defined $cardinality;
1818 32 100 100     99 $options->{assignop} = '~' if exists $rule->{isLexeme} || exists $rule->{isFragmentOrChild} || exists $rule->{grammarstate};
      100        
1819              
1820 32         34 my $outputline = "";
1821 32         34 my $outf = $self->{outf};
1822              
1823 32         28 my $printoutputcreated = 1;
1824              
1825             SWITCH:
1826             {
1827             ##
1828             # discard rules that implement a case-insensitive single-letter literal
1829             # if we are building case-insensitive keyword literals.
1830             ##
1831 32 50 33     30 ($self->testoption('buildkeywords') && $self->isKeywordLetter($rulename, $rule)) && do {
  32         62  
1832 0         0 $self->deleteMarpaRule($rulename);
1833 0         0 $printoutputcreated = 0;
1834 0         0 last SWITCH;
1835             };
1836             #(exists $rule->{isFragmentOrChild} && $self->testoption('fragment2class') && fragmentEligible2Convert($rule)) && do {
1837             ##
1838             # conditionally translate eligible fragments to classes if the fragment is not tagged with a '?' quantifier
1839             ##
1840 32 100 66     46 ($self->testoption('fragment2class') && $rulename !~ /^opt_/ && fragmentEligible2Convert($rule)) && do {
      100        
1841 1         4 $self->writeFragmentAsClass($rulename, $rule, $options );
1842 1         2 last SWITCH;
1843             };
1844             ##
1845             # print a rule with alternative branches
1846             ##
1847 31 100       44 (exists $rule->{list}) && do {
1848 22         39 $self->writeMarpaRuleList($rulename, $rule, $options );
1849 22         26 last SWITCH;
1850             };
1851             ##
1852             # print a simple rule
1853             ##
1854 9         10 do {
1855 9         17 $outputline = $self->printRhs( $rulename, $rule, $options );
1856 9 50       15 $printoutputcreated = 0 if !length($outputline);
1857 9         9 last SWITCH;
1858             };
1859             }
1860              
1861 32 100       52 printf $outf "%s\n", $outputline if length($outputline) > 0;
1862              
1863 32         62 return $printoutputcreated;
1864             }
1865              
1866             sub dumpMarpaRule
1867             {
1868 0     0 0 0 my ( $self, $rulename ) = @_;
1869              
1870 0         0 my $marparule = $self->getMarpaRule(undef, $rulename);
1871 0         0 $self->dumpStructure( sprintf("=== %s\n", $rulename), $marparule);
1872             }
1873              
1874             ##
1875             # 'computeIndentation' : compute the maximum rulename length
1876             ##
1877             sub computeIndentation
1878             {
1879 1     1 0 3 my ($self) = @_;
1880              
1881 1         2 my $symboltable = $self->{symboltable};
1882 1         3 my $ruletable = $symboltable->ruletable;
1883 1         2 my $subrules = $self->{subrules};
1884              
1885 1         3 my $indent = -1;
1886 1         3 for my $rule (@$ruletable)
1887             {
1888 16 50 66     50 next if !defined $rule || !exists $rule->{name} || !exists $rule->{generationstatus};
      33        
1889 14         26 my $rulename = $rule->{name};
1890 14 100       19 $indent = length($rulename) if length($rulename) > $indent;
1891 14 100       19 next if !exists $subrules->{$rulename};
1892 8         8 for my $subrulename (@{$subrules->{$rulename}})
  8         11  
1893             {
1894 18 100       26 $indent = length($subrulename) if length($subrulename) > $indent;
1895             }
1896             }
1897              
1898 1 50       3 mydie "INTERNAL: couldn't compute rhs indentation" if $indent == -1;
1899              
1900 1         3 $self->{indent} = $indent;
1901             }
1902              
1903             sub generateGenericOptions
1904             {
1905 1     1 0 3 my ($self) = @_;
1906 1         3 my $outf = $self->{outf};
1907 1         11 printf $outf "%s\n\n", "lexeme default = latm => 1";
1908             }
1909              
1910             sub generateStartClause
1911             {
1912 1     1 0 3 my ($self) = @_;
1913              
1914 1         3 my $startrule = $self->symboltable->startrule;
1915 1 50       3 return if !defined $startrule;
1916              
1917 1         4 my $outf = $self->{outf};
1918              
1919 1         5 printf $outf "%s %-3s %s\n\n", $self->pad(":start"), "::=", $startrule->{name};
1920             }
1921              
1922             sub convertRedirectToDiscard
1923             {
1924 1     1 0 3 my ($self) = @_;
1925              
1926 1 50 33     10 return if !exists $self->{discarded} || ref $self->{discarded} ne "ARRAY" || scalar @{$self->{discarded}} < 1;
  1   33     4  
1927              
1928 1         2 my $outf = $self->{outf};
1929              
1930 1         3 printf $outf "# ---\n";
1931 1         3 printf $outf "# Discard rule from redirect options :\n";
1932 1         4 printf $outf "%s %-3s %s\n", $self->pad(":discard"), "~", "";
1933              
1934 1         2 my $lhs = "";
1935 1         2 my $delimiter = "~";
1936 1         2 for my $rulename (@{$self->{discarded}})
  1         4  
1937             {
1938 1         3 printf $outf "%s %-3s %s\n", $self->pad($lhs), $delimiter, $rulename;
1939 1         3 $lhs = "";
1940 1         2 $delimiter = "|";
1941             }
1942              
1943 1         3 printf $outf "# ---\n";
1944             }
1945              
1946             ## -------------------------------------------------------------------------------------
1947             # 'writeMarpaGrammar' create an output file with the collected rules in Marpa syntax.
1948             # the sequence of rules is identical to that from the original
1949             # input files. Subrules are aligned with parent rules.
1950             ## -------------------------------------------------------------------------------------
1951             sub writeMarpaGrammar
1952             {
1953 1     1 0 3 my ($self) = @_;
1954              
1955 1         4 my $symboltable = $self->{symboltable};
1956 1         5 my $startrule = $symboltable->startrule;
1957 1         2 my $startrulename = $startrule->{name};
1958              
1959 1         5 my $ruletable = $symboltable->ruletable;
1960 1         1 my $subrules = $self->{subrules};
1961              
1962 1         5 $self->computeIndentation;
1963              
1964 1         4 $self->openOutputFile;
1965              
1966 1         4 $self->generateGenericOptions;
1967 1         5 $self->generateStartClause;
1968              
1969             # create a ':discard' rule from the rules
1970             # tagged 'redirect' at the top of the
1971             # output file.
1972 1         4 $self->convertRedirectToDiscard;
1973              
1974 1         2 my $outf = $self->{outf};
1975              
1976             ##
1977             # print rules in the order of the input file
1978             ##
1979 1         2 my $status = 0;
1980 1         2 my $subrulestatus = 0;
1981 1         2 my $rulegeneratedoutput = 1;
1982              
1983 1         3 for my $rule (@$ruletable)
1984             {
1985 16 50 33     35 if (exists $rule->{comment} && !$self->testoption('stripallcomments'))
1986             {
1987 0         0 my $comment = $rule->{comment};
1988 0 0       0 $comment = [$comment] if ref $comment ne "ARRAY";
1989 0         0 my $cl = $self->renderComment("grammar", $comment);
1990 0         0 for my $s (@$cl)
1991             {
1992 0         0 printf $outf "%s\n", $s;
1993             }
1994             }
1995              
1996 16 100 66     57 next if !defined $rule || !exists $rule->{name} || !exists $rule->{generationstatus};
      66        
1997              
1998 14         18 my $rulename = $rule->{name};
1999              
2000 14 100       21 if ( !$status )
2001             {
2002 1 50       3 printf "WARNING : first rule '%s' is not startrule '%s'\n", $rulename, $startrulename if $rulename ne $startrulename;
2003 1         2 $status = 1;
2004             }
2005              
2006             ##
2007             # create a block of text for a rule with its subrules
2008             ##
2009 14 100 100     37 printf $outf "\n" if exists $subrules->{$rulename} && !$subrulestatus && $rulegeneratedoutput;
      66        
2010 14         16 $subrulestatus = 0;
2011              
2012 14         21 $rulegeneratedoutput = $self->writeMarpaRule($rulename);
2013              
2014             ##
2015             # print the generated subrules immediately after the parent rule
2016             ##
2017 14 100       23 if (exists $subrules->{$rulename})
2018             {
2019 8         9 my $subrulegeneratedoutput = 0;
2020              
2021 8         8 for my $subrulename (@{$subrules->{$rulename}})
  8         13  
2022             {
2023 18         27 $subrulegeneratedoutput += $self->writeMarpaRule($subrulename);
2024             }
2025              
2026 8 50       17 printf $outf "\n" if $subrulegeneratedoutput;
2027 8         12 $subrulestatus = 1;
2028             }
2029             }
2030              
2031             ##
2032             # print the rest (if any)
2033             ##
2034 1         2 for my $rulename (sort keys %{$self->{rules}})
  1         18  
2035             {
2036 32         47 my $g4rule = $symboltable->rule($rulename);
2037 32 100       46 next if !exists $g4rule->{generationstatus};
2038              
2039 14         17 my $marparule = $self->getMarpaRule(undef, $rulename);
2040 14 50       24 next if exists $marparule->{status};
2041              
2042 0 0       0 printf "%s\n", $rulename if $self->{verbosity};
2043 0         0 $self->writeMarpaRule($rulename);
2044             }
2045              
2046 1         4 $self->closeOutputFile;
2047             }
2048              
2049             # --------------------------------------------------- #
2050             # driver #
2051             # --------------------------------------------------- #
2052              
2053             sub generate
2054             {
2055 1     1 0 3 my ($self, $symboltable) = @_;
2056              
2057 1         2 $self->{symboltable} = $symboltable;
2058              
2059 1         5 $self->translateG4Grammar;
2060 1 50       4 $self->dumpStructure("Rules", $self->{rules}) if $self->{verbosity} > 1;
2061 1         4 $self->writeMarpaGrammar;
2062             }
2063              
2064             1;
2065              
2066             # ABSTRACT: translate parsed antlr4 rules to Marpa2 syntax
2067              
2068             =head1 SYNOPSIS
2069             use MarpaX::G4::MarpaGen;
2070             my $generator = new MarpaX::MarpaGen;
2071             $generator->generate($symboltable);
2072             =head1 DESCRIPTION
2073             Translate the rules from the symbol table created from the imported ANTLR4 grammar
2074             into Marpa syntax and write them to output.
2075              
2076             use MarpaX::Symboltable;
2077             my $symboltable = new MarpaX::Symboltable;
2078              
2079             my $grammartext = readFile($infile);
2080             my $data = MarpaX::G4::parse_rules($grammartext);
2081             $symboltable->importParseTree($data);
2082             $symboltable->validateSymbolTable();
2083              
2084             my $generator = new MarpaX::MarpaGen;
2085              
2086             $generator->stripallcomments if exists $options->{c};
2087             $generator->embedactions if exists $options->{e};
2088             $generator->fragment2class if exists $options->{f};
2089             $generator->shiftlazytogreedy if exists $options->{g};
2090             $generator->buildkeywords if exists $options->{k};
2091             $generator->stripactions if exists $options->{p};
2092             $generator->setVerbosity(2) if exists $options->{t};
2093             $generator->matchcaseinsensitive if exists $options->{u} || exists $options->{k};
2094              
2095             my $outputfile = '-';
2096             $outputfile = $options->{o} if exists $options->{o};
2097             $generator->setoutputfile($outputfile);
2098              
2099             $generator->generate($symboltable);
2100             =cut