File Coverage

html/lib/Marpa/R2/HTML/Config/Compile.pm
Criterion Covered Total %
statement 397 444 89.4
branch 95 144 65.9
condition 23 36 63.8
subroutine 23 26 88.4
pod 0 13 0.0
total 538 663 81.1


line stmt bran cond sub pod time code
1             # Copyright 2022 Jeffrey Kegler
2             # This file is part of Marpa::R2. Marpa::R2 is free software: you can
3             # redistribute it and/or modify it under the terms of the GNU Lesser
4             # General Public License as published by the Free Software Foundation,
5             # either version 3 of the License, or (at your option) any later version.
6             #
7             # Marpa::R2 is distributed in the hope that it will be useful,
8             # but WITHOUT ANY WARRANTY; without even the implied warranty of
9             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10             # Lesser General Public License for more details.
11             #
12             # You should have received a copy of the GNU Lesser
13             # General Public License along with Marpa::R2. If not, see
14             # http://www.gnu.org/licenses/.
15              
16             package Marpa::R2::HTML::Config::Compile;
17              
18 1     1   21 use 5.010001;
  1         4  
19 1     1   5 use strict;
  1         2  
  1         22  
20 1     1   4 use warnings;
  1         2  
  1         29  
21              
22 1     1   6 use vars qw($VERSION $STRING_VERSION);
  1         2  
  1         76  
23             $VERSION = '12.000000';
24             $STRING_VERSION = $VERSION;
25             ## no critic(BuiltinFunctions::ProhibitStringyEval)
26             $VERSION = eval $VERSION;
27             ## use critic
28              
29 1     1   6 use Data::Dumper;
  1         3  
  1         53  
30 1     1   6 use English qw( -no_match_vars );
  1         2  
  1         7  
31              
32 1     1   856 use Marpa::R2::HTML::Config::Core;
  1         3  
  1         30  
33 1     1   9 use Marpa::R2::Thin::Trace;
  1         1  
  1         42  
34              
35             # Indexes into the symbol table
36 1     1   7 use constant CONTEXT_CLOSED => 0;
  1         2  
  1         81  
37 1     1   7 use constant CONTENTS_CLOSED => 1;
  1         2  
  1         44  
38 1     1   5 use constant CONTEXT => 2;
  1         2  
  1         40  
39 1     1   6 use constant CONTENTS => 3;
  1         2  
  1         5715  
40              
41             sub do_is_included_statement {
42 10     10 0 21 my ( $self, $external_element, undef, undef, undef, $external_group ) = @_;
43 10         13 my $tag = $external_element;
44 10         43 $tag =~ s/\A [<] \s* //xms;
45 10         34 $tag =~ s/\s* [>] \z //xms;
46 10         19 my $element = 'ELE_' . $tag;
47 10         26 ( my $group_name = $external_group ) =~ s/\A [%] //xms;
48 10         20 my $group = 'GRP_' . $group_name;
49              
50 10         16 my $symbol_table = $self->{symbol_table};
51 10   50     25 my $element_entry = $symbol_table->{$element} //= [];
52 10         16 my $group_entry = $symbol_table->{$group};
53              
54             # For now, new groups cannot be defined
55 10 50       17 Carp::croak(
56             qq{Group "$group" does not exist\n},
57             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
58             ) if not defined $group_entry;
59              
60 10         17 my $closed_reason = $element_entry->[CONTEXT_CLOSED];
61 10 50       21 if ($closed_reason) {
62 0         0 Carp::croak(
63             qq{Context of "$element" cannot be changed:\n},
64             qq{ Reason: $closed_reason\n},
65             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
66             );
67             } ## end if ($closed_reason)
68 10         14 $closed_reason = $group_entry->[CONTENTS_CLOSED];
69 10 50       17 if ($closed_reason) {
70 0         0 Carp::croak(
71             qq{Contents of "$group" cannot be changed:\n},
72             qq{ Reason: $closed_reason\n},
73             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
74             );
75             } ## end if ($closed_reason)
76              
77             # If this is the first, it sets the primary group
78 10         30 my $primary_group_by_tag = $self->{primary_group_by_tag};
79 10   33     60 $primary_group_by_tag->{$tag} //= $group;
80 10         11 push @{ $element_entry->[CONTEXT] }, $group;
  10         25  
81              
82 10         28 return;
83              
84             } ## end sub do_is_included
85              
86             sub do_is_a_included_statement {
87 77     77 0 155 my ( $self, $external_element, undef, undef, $external_flow, undef, undef, $external_group ) = @_;
88 77         98 my $tag = $external_element;
89 77         309 $tag =~ s/\A [<] \s* //xms;
90 77         237 $tag =~ s/\s* [>] \z //xms;
91 77         182 ( my $flow_name = $external_flow ) =~ s/\A [*] //xms;
92 77         178 ( my $group_name = $external_group ) =~ s/\A [%] //xms;
93 77         149 my $flow = 'FLO_' . $flow_name;
94 77         108 my $group = 'GRP_' . $group_name;
95 77         113 my $element = 'ELE_' . $tag;
96 77         116 my $symbol_table = $self->{symbol_table};
97 77   50     302 my $element_entry = $symbol_table->{$element} //= [];
98 77         143 my $group_entry = $symbol_table->{$group};
99 77         98 my $flow_entry = $symbol_table->{$flow};
100              
101             # For now, new flows and groups cannot be defined
102 77 50       143 Carp::croak(
103             qq{Group "$group" does not exist\n},
104             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
105             ) if not defined $group_entry;
106 77 50       115 Carp::croak(
107             qq{Flow "$flow" does not exist\n},
108             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
109             ) if not defined $flow_entry;
110              
111 77         107 my $closed_reason = $element_entry->[CONTEXT_CLOSED];
112 77 50       136 if ($closed_reason) {
113 0         0 Carp::croak(
114             qq{Context of "$element" cannot be changed:\n},
115             qq{ Reason: $closed_reason\n},
116             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
117             );
118             } ## end if ($closed_reason)
119 77         88 $closed_reason = $element_entry->[CONTENTS_CLOSED];
120 77 50       127 if ($closed_reason) {
121 0         0 Carp::croak(
122             qq{Contents of "$element" cannot be changed:\n},
123             qq{ Reason: $closed_reason\n},
124             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
125             );
126             } ## end if ($closed_reason)
127 77         91 $closed_reason = $flow_entry->[CONTEXT_CLOSED];
128 77 50       112 if ($closed_reason) {
129 0         0 Carp::croak(
130             qq{Context of "$flow" cannot be changed:\n},
131             qq{ Reason: $closed_reason\n},
132             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
133             );
134             } ## end if ($closed_reason)
135 77         94 $closed_reason = $group_entry->[CONTENTS_CLOSED];
136 77 50       120 if ($closed_reason) {
137 0         0 Carp::croak(
138             qq{Contents of "$group" cannot be changed:\n},
139             qq{ Reason: $closed_reason\n},
140             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
141             );
142             } ## end if ($closed_reason)
143              
144             Carp::croak(
145 77 50       123 qq{Contents of "$element" are already being defined:\n},
146             qq{ Problem was in this line: },
147             $Marpa::R2::HTML::Config::Compile::LINE
148             ) if defined $element_entry->[CONTENTS];
149 77 50       130 Carp::croak(
150             qq{Context of "$element" is already being defined:\n},
151             qq{ Problem was in this line: },
152             $Marpa::R2::HTML::Config::Compile::LINE
153             ) if defined $element_entry->[CONTEXT];
154              
155             # Always sets the primary group
156 77         105 my $primary_group_by_tag = $self->{primary_group_by_tag};
157 77         188 $primary_group_by_tag->{$tag} = $group;
158 77         108 $element_entry->[CONTENTS] = $flow;
159 77         92 $element_entry->[CONTEXT] = $group;
160 77         120 $element_entry->[CONTEXT_CLOSED] = $element_entry->[CONTENTS_CLOSED] =
161             'Element is already fully defined';
162              
163 77         177 return;
164             } ## end sub do_is_a_included
165              
166             sub do_is_statement {
167              
168 12     12 0 26 my ( $self, $external_element, undef, $external_flow ) = @_;
169 12         15 my $tag = $external_element;
170 12         48 $tag =~ s/\A [<] \s* //xms;
171 12         42 $tag =~ s/\s* [>] \z //xms;
172 12         34 ( my $flow_name = $external_flow ) =~ s/\A [*] //xms;
173 12         27 my $flow = 'FLO_' . $flow_name;
174 12         17 my $element = 'ELE_' . $tag;
175 12         24 my $symbol_table = $self->{symbol_table};
176 12   100     53 my $element_entry = $symbol_table->{$element} //= [];
177 12         26 my $flow_entry = $symbol_table->{$flow};
178              
179             # For now, new flows cannot be defined
180 12 50       24 Carp::croak(
181             qq{Flow "$flow" does not exist\n},
182             qq{ Problem was in this line: },
183             $Marpa::R2::HTML::Config::Compile::LINE
184             ) if not defined $flow_entry;
185              
186 12         19 my $closed_reason = $element_entry->[CONTENTS_CLOSED];
187 12 50       19 if ($closed_reason) {
188 0         0 Carp::croak(
189             qq{Contents of "$element" cannot be changed:\n},
190             qq{ Reason: $closed_reason\n},
191             qq{ Problem was in this line: },
192             $Marpa::R2::HTML::Config::Compile::LINE
193             );
194             } ## end if ($closed_reason)
195 12         23 $closed_reason = $flow_entry->[CONTEXT_CLOSED];
196 12 50       20 if ($closed_reason) {
197 0         0 Carp::croak(
198             qq{Context of "$flow" cannot be changed:\n},
199             qq{ Reason: $closed_reason\n},
200             qq{ Problem was in this line: },
201             $Marpa::R2::HTML::Config::Compile::LINE
202             );
203             } ## end if ($closed_reason)
204              
205             Carp::croak(
206 12 50       27 qq{Contents of "$element" are already being defined:\n},
207             qq{ Problem was in this line: },
208             $Marpa::R2::HTML::Config::Compile::LINE
209             ) if defined $element_entry->[CONTENTS];
210              
211 12         21 $element_entry->[CONTENTS] = $flow;
212 12         15 $element_entry->[CONTENTS_CLOSED] =
213             'Contents of Element are already defined';
214              
215 12         31 return;
216             } ## end sub do_is
217              
218             sub problem_in_rule {
219 0     0 0 0 my ($string) = @_;
220 0         0 Marpa::R2::Context::bail( [ 'rule', $string, Marpa::R2::Context::location() ] );
221             }
222              
223             sub do_contains_statement {
224 17     17 0 29 my ( $self, $external_element, undef, $external_contents ) = @_;
225              
226             # Production is Element with custom flow
227 17         30 my $tag = $external_element;
228 17         64 $tag =~ s/\A [<] \s* //xms;
229 17         58 $tag =~ s/\s* [>] \z //xms;
230 17         35 my $element_symbol = 'ELE_' . $tag;
231 17         32 my $symbol_table = $self->{symbol_table};
232 17   100     74 my $element_entry = $symbol_table->{$element_symbol} //= [];
233 17         28 my $closed_reason = $element_entry->[CONTENTS_CLOSED];
234 17 50       28 if ($closed_reason) {
235 0         0 Carp::croak(
236             qq{Contents of "$element_symbol" cannot be changed:\n},
237             qq{ Reason: $closed_reason\n},
238             qq{ Problem was in this line: },
239             $Marpa::R2::HTML::Config::Compile::LINE
240             );
241             } ## end if ($closed_reason)
242              
243 17         24 my @contents = ();
244              
245             CONTAINED_SYMBOL:
246 17         25 for my $external_content_symbol (@{$external_contents}) {
  17         30  
247 27         33 my $content_symbol;
248 27 100       89 if ( $external_content_symbol =~ /\A [<] (\w+) [>] \z/xms ) {
249 24         53 $content_symbol = 'ELE_' . $1;
250             }
251 27 100       57 if ( $external_content_symbol =~ /\A [%] (\w+) \z/xms ) {
252 3         9 $content_symbol = 'GRP_' . $1;
253             }
254 27   33     39 $content_symbol //= $external_content_symbol;
255 27         46 my $content_entry = $symbol_table->{$content_symbol};
256 27 100       49 if ( not defined $content_entry ) {
257 11 50       27 if ( not $content_symbol =~ /\A ELE_ /xms ) {
258 0         0 problem_in_rule(
259             qq{Symbol "$external_content_symbol" is undefined\n});
260             }
261 11         21 $content_entry = [];
262             } ## end if ( not defined $content_entry )
263 27         35 $closed_reason = $content_entry->[CONTEXT_CLOSED];
264 27 50       43 if ($closed_reason) {
265 0         0 Carp::croak(
266             qq{Context of "$external_content_symbol" cannot be changed:\n},
267             qq{ Reason: $closed_reason\n},
268             qq{ Problem was in this line: },
269             $Marpa::R2::HTML::Config::Compile::LINE
270             );
271             } ## end if ($closed_reason)
272 27         70 push @contents, $content_symbol;
273             } ## end CONTAINED_SYMBOL: for my $external_content_symbol (@external_contents)
274              
275 17         24 push @{ $element_entry->[CONTENTS] }, @contents;
  17         58  
276              
277 17         42 return;
278              
279             } ## end sub do_contains
280              
281             sub do_array_assignment {
282 3     3 0 8 my ( $self, $external_list, undef, $external_members ) = @_;
283 3         14 ( my $new_list = $external_list ) =~ s/\A [@] //xms;
284 3         7 my $lists = $self->{lists};
285             Carp::croak(
286             "Problem in line: ", $Marpa::R2::HTML::Config::Compile::LINE,
287             "\n", 'list @' . $new_list . ' is already defined'
288 3 50       9 ) if defined $lists->{$new_list};
289 3         6 my @members = ();
290 3         5 RAW_MEMBER: for my $raw_member (@{$external_members}) {
  3         7  
291 10 100       23 if ( $raw_member =~ / \A [@] (.*) \z/xms ) {
292 1         3 my $member_list = $1;
293             Carp::croak(
294             "Problem in line: ",
295             $Marpa::R2::HTML::Config::Compile::LINE,
296             "\n",
297             'member list @' . $member_list . ' is not yet defined'
298 1 50       5 ) if not defined $lists->{$member_list};
299 1         2 push @members, @{ $lists->{$member_list} };
  1         4  
300 1         2 next RAW_MEMBER;
301             } ## end if ( $raw_member =~ / \A [@] (.*) \z/xms )
302 9         16 push @members, $raw_member;
303             } ## end RAW_MEMBER: for my $raw_member (@{$external_members})
304 3         8 $lists->{$new_list} = \@members;
305 3         7 return;
306             } ## end sub do_array_assignment
307              
308             sub do_ruby_statement {
309 27     27 0 54 my ( $self, $external_reject_symbol, undef, $external_candidates ) = @_;
310 27         38 my $lists = $self->{lists};
311 27         42 my @symbols = ($external_reject_symbol);
312 27         30 RAW_CANDIDATE: for my $raw_candidate ( @{$external_candidates} ) {
  27         46  
313 67 100       176 if ( $raw_candidate =~ / \A [@] (.*) \z/xms ) {
314 22         48 my $list = $1;
315             Carp::croak(
316             "Problem in line: ",
317             $Marpa::R2::HTML::Config::Compile::LINE,
318             "\n", 'candidate list @' . $list . ' is not yet defined'
319 22 50       47 ) if not defined $lists->{$list};
320 22         27 push @symbols, @{ $lists->{$list} };
  22         58  
321 22         54 next RAW_CANDIDATE;
322             } ## end if ( $raw_candidate =~ / \A [@] (.*) \z/xms )
323 45         64 push @symbols, $raw_candidate;
324             } ## end RAW_CANDIDATE: for my $raw_candidate ( @{$external_candidates} )
325 27         44 my @internal_symbols = ();
326 27         35 SYMBOL: for my $symbol (@symbols) {
327 157 100 100     459 if ( $symbol eq 'CDATA' or $symbol eq 'PCDATA' ) {
328 2         4 push @internal_symbols, $symbol;
329 2         7 next SYMBOL;
330             }
331 155 100       253 if ( $symbol =~ /\A ( [<] [%] (inline|head|block) [>] ) \z/xms ) {
332 2         5 my $special_symbol = $1;
333 2         15 push @internal_symbols, $special_symbol;
334 2         9 next SYMBOL;
335             }
336 153 50       228 if ( $symbol =~ m{\A ( [<] [/] [%] (inline|head|block) [>] ) \z}xms )
337             {
338 0         0 my $special_symbol = $1;
339 0         0 push @internal_symbols, $special_symbol;
340 0         0 next SYMBOL;
341             } ## end if ( $symbol =~ ...)
342 153 100       230 if ( $symbol =~ m{\A ( [<] [*] [>] ) \z}xms ) {
343 1         8 my $special_symbol = $1;
344 1         4 push @internal_symbols, $special_symbol;
345 1         3 next SYMBOL;
346             }
347 152 100       232 if ( $symbol =~ m{\A ( [<] [/] [*] [>] ) \z}xms ) {
348 12         26 my $special_symbol = $1;
349 12         25 push @internal_symbols, $special_symbol;
350 12         16 next SYMBOL;
351             }
352 140 100       419 if ( $symbol =~ /\A [<] (\w+) [>] \z/xms ) {
353 136         266 my $start_tag = 'S_' . $1;
354 136         224 push @internal_symbols, $start_tag;
355 136         189 next SYMBOL;
356             }
357 4 50       20 if ( $symbol =~ m{\A [<] [/](\w+) [>] \z}xms ) {
358 4         10 my $end_tag = 'E_' . $1;
359 4         7 push @internal_symbols, $end_tag;
360 4         8 next SYMBOL;
361             }
362             Carp::croak(
363 0         0 "Problem in line: ",
364             $Marpa::R2::HTML::Config::Compile::LINE,
365             "\n", qq{Misformed symbol "$symbol"}
366             );
367             } ## end SYMBOL: for my $symbol (@symbols)
368 27         36 my $rejected_symbol = shift @internal_symbols;
369 27         95 $self->{ruby_config}->{$rejected_symbol} = \@internal_symbols;
370 27         79 return;
371             } ## end sub do_ruby_statement
372              
373             sub die_on_read_problem {
374 0     0 0 0 my ( $rec, $t, $token_value, $string, $position ) = @_;
375 0 0       0 say $rec->show_progress() or die "say failed: $ERRNO";
376 0         0 my $problem_position = $position - length $1;
377 0         0 my $before_start = $problem_position - 40;
378 0 0       0 $before_start = 0 if $before_start < 0;
379 0         0 my $before_length = $problem_position - $before_start;
380 0         0 die "Problem near position $problem_position\n",
381             q{Problem is here: "},
382             ( substr $string, $before_start, $before_length + 40 ),
383             qq{"\n},
384             ( q{ } x ( $before_length + 18 ) ), qq{^\n},
385             q{Token rejected, "}, $t->[0], qq{", "$token_value"},
386             ;
387             } ## end sub die_on_read_problem
388              
389 47     47 0 55 sub do_array { shift; return [@_]; }
  47         115  
390              
391             sub do_what_I_mean {
392              
393             # The first argument is the per-parse variable.
394             # At this stage, just throw it away
395 427     427 0 478 shift;
396              
397             # Throw away any undef's
398 427         617 my @children = grep { defined } @_;
  572         1197  
399              
400             # Return what's left
401 427 50       1057 return scalar @children > 1 ? \@children : shift @children;
402             }
403              
404             # Order matters !!
405             my @terminals = (
406             [ kw_CDATA => qr/CDATA\b/xms ],
407             [ kw_PCDATA => qr/PCDATA\b/xms ],
408             [ kw_is => qr/is\b/ixms ],
409             [ kw_a => qr/a\b/ixms ],
410             [ kw_contains => qr/contains\b/ixms ],
411             [ kw_included => qr/included\b/ixms ],
412             [ kw_in => qr/in\b/ixms ],
413             [ flow => qr/[*]\w+\b/xms ],
414             [ group => qr/[%]\w+\b/xms ],
415             [ list => qr/[@]\w+\b/xms ],
416             [ start_tag => qr/[<]\w+[>]/xms ],
417             [ end_tag => qr{[<][/]\w+[>]}xms ],
418             [ wildcard_start_tag => qr/[<][*][>]/xms ],
419             [ wildcard_end_tag => qr{[<][/][*][>]}xms ],
420             [ group_start_tag => qr/[<][%]\w+[>]/xms ],
421             [ group_end_tag => qr/[<][%]\w+[>]/xms ],
422             [ op_assign => qr/[=]/xms ],
423             [ op_ruby => qr/[-][>]/xms ],
424             [ semi_colon => qr/[;]/xms ],
425             );
426              
427             sub create_grammar {
428              
429 1     1 0 2 my $source = <<'END_OF_GRAMMAR';
430             translation_unit ::= statement*
431             statement ::= is_included_statement
432             | is_a_included_statement
433             | is_statement
434             | contains_statement
435             | list_assignment
436             | ruby_statement
437             is_included_statement ::= element kw_is kw_included kw_in
438             action => do_is_included_statement
439             element ::= start_tag
440             is_a_included_statement ::= element kw_is kw_a flow kw_included kw_in
441             action => do_is_a_included_statement
442             is_statement ::= element kw_is flow
443             action => do_is_statement
444             contains_statement ::= element kw_contains contents
445             action => do_contains_statement
446             contents ::= content_item*
447             action => do_array
448             list_assignment ::= list op_assign list_members
449             action => do_array_assignment
450             list_members ::= list_member*
451             action => do_array
452             list_member ::= ruby_symbol
453             list_member ::= list
454             content_item ::= element | | kw_PCDATA | kw_CDATA
455             ruby_statement ::= ruby_symbol op_ruby ruby_symbol_list
456             action => do_ruby_statement
457             ruby_symbol_list ::= ruby_symbol*
458             action => do_array
459             ruby_symbol ::= kw_PCDATA | kw_CDATA
460             | start_tag | group_start_tag | wildcard_start_tag
461             | end_tag | group_end_tag | wildcard_end_tag
462             | list
463             END_OF_GRAMMAR
464            
465 1         27 my $grammar = Marpa::R2::Grammar->new(
466             { start => 'translation_unit',
467             action_object => __PACKAGE__,
468             rules =>$source,
469             default_action => 'do_what_I_mean'
470             }
471             );
472 1         9 $grammar->precompute();
473 1         4 return $grammar;
474             }
475              
476             sub source_by_location_range {
477 0     0 0 0 my ( $self, $start, $end ) = @_;
478 0         0 my $positions = $self->{positions};
479 0 0       0 my $start_pos = $start > 0 ? $positions->[$start] : 0;
480 0         0 my $end_pos = $positions->[$end];
481 0         0 return substr ${ $self->{source_ref} }, $start_pos, $end_pos - $start_pos;
  0         0  
482             } ## end sub source_by_location_range
483              
484             sub compile {
485 1     1 0 6 my ($source_ref) = @_;
486              
487             # A quasi-object, not used outside this routine
488 1         3 my $self = bless {}, __PACKAGE__;
489              
490 1         18 my %species_handler = (
491             cruft => 'SPE_CRUFT',
492             comment => 'SPE_COMMENT',
493             pi => 'SPE_PI',
494             decl => 'SPE_DECL',
495             document => 'SPE_TOP',
496             whitespace => 'SPE_WHITESPACE',
497             pcdata => 'SPE_PCDATA',
498             cdata => 'SPE_CDATA',
499             prolog => 'SPE_PROLOG',
500             trailer => 'SPE_TRAILER',
501             );
502              
503 1         3 my @core_rules = ();
504 1         1 my %runtime_tag = ();
505 1         2 my %primary_group_by_tag = ();
506 1         9 $self->{primary_group_by_tag} = \%primary_group_by_tag;
507              
508             {
509 1         3 LINE:
510 1         18 for my $line ( split /\n/xms,
511             $Marpa::R2::HTML::Internal::Core::CORE_BNF )
512             {
513 66         148 my $definition = $line;
514 66         75 chomp $definition;
515 66         102 $definition =~ s/ [#] .* //xms; # Remove comments
516             next LINE
517 66 100       143 if not $definition =~ / \S /xms; # ignore all-whitespace line
518 40         73 my $sequence = ( $definition =~ s/ [*] \s* $//xms );
519 40 50       137 if ( $definition =~ s/ \s* [:][:][=] \s* / /xms ) {
520              
521             # Production is Ordinary BNF rule
522 40         98 my @symbols = ( split q{ }, $definition );
523 40         50 my $lhs = shift @symbols;
524 40         93 my %rule_descriptor = (
525             lhs => $lhs,
526             rhs => \@symbols,
527             );
528 40 100       69 if ($sequence) {
529 7         9 $rule_descriptor{min} = 0;
530             }
531 40 100       89 if ( my $handler = $species_handler{$lhs} ) {
    100          
532 10         17 $rule_descriptor{action} = $handler;
533             }
534             elsif ( $lhs =~ /^ELE_/xms ) {
535 1         3 $rule_descriptor{action} = "$lhs";
536             }
537 40         56 push @core_rules, \%rule_descriptor;
538 40         107 next LINE;
539             } ## end if ( $definition =~ s/ \s* [:][:][=] \s* / /xms )
540 0         0 die "Badly formed line in grammar description: $line";
541             } ## end LINE: for my $line ( split /\n/xms, ...)
542             }
543              
544 1         15 my @core_symbols = map { ( $_->{lhs}, @{ $_->{rhs} } ) } @core_rules;
  40         46  
  40         63  
545              
546             # Start out by closing the context and contents of everything
547             my %symbol_table = map {
548 1         3 $_ =>
  88         161  
549             [ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
550             } @core_symbols;
551 1         12 $self->{symbol_table} = \%symbol_table;
552              
553             # A few token symbols are allowed as contents -- most non-element
554             # tokens are included via the SGML group
555 1         3 for my $token_symbol (qw(cdata pcdata)) {
556 2         9 $symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
557             }
558              
559             # Many groups are defined to to be used
560 1         2 for my $group_symbol (
561             qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
562             )
563             {
564 7         10 $symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
565             } ## end for my $group_symbol ( ...)
566              
567             # Flow symbols are almost all allowed as contents
568             FLOW_SYMBOL:
569 1         2 for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {
  88         124  
570              
571             # The SGML flow is included automatically as needed
572             # and should not be explicity specified
573 13 100       22 next FLOW_SYMBOL if $flow_symbol eq 'FLO_SGML';
574 7         11 $symbol_table{$flow_symbol}->[CONTEXT_CLOSED] = 0;
575             } ## end for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } ...)
576              
577             # A few groups are also extensible
578 1         3 for my $group_symbol (qw( GRP_anywhere GRP_block GRP_head GRP_inline )) {
579 4         7 $symbol_table{$group_symbol}->[CONTENTS_CLOSED] = 0;
580             }
581              
582             # As very special cases the contents of the and
583             # elements can be changed
584 1         2 for my $element_symbol (qw( ELE_head ELE_body )) {
585 2         4 $symbol_table{$element_symbol}->[CONTENTS_CLOSED] = 0;
586             }
587              
588             {
589             # Make sure everything for which we have a handler was defined in
590             # the core grammar
591 1         2 my @species_not_defined = grep { not defined $symbol_table{$_} }
  1         5  
  10         16  
592             keys %species_handler;
593 1 50       9 if ( scalar @species_not_defined ) {
594 0         0 die
595             'Definitions for the following required text components are missing: ',
596             join q{ }, @species_not_defined;
597             }
598             }
599              
600 1         2 my %ruby_config = ();
601 1         6 my %lists = ();
602 1         2 $self->{ruby_config} = \%ruby_config;
603 1         4 $self->{lists} = \%lists;
604 1         2 $self->{source_ref} = $source_ref;
605 1         3 my @positions = (0);
606 1         2 $self->{positions} = \@positions;
607              
608 1         4 state $grammar = create_grammar();
609 1         8 my $recce = Marpa::R2::Recognizer->new({ grammar => $grammar});
610 1         3 my $string = ${$source_ref};
  1         5  
611 1         4 my $length = length $string;
612 1         5 pos $string = 0;
613 1         6 TOKEN: while ( pos $string < $length ) {
614              
615             # skip comment
616 1662 100       3706 next TOKEN if $string =~ m/\G \s* [#] [^\n]* \n/gcxms;
617              
618             # skip whitespace
619 1645 100       4227 next TOKEN if $string =~ m/\G\s+/gcxms;
620              
621             # read other tokens
622 823         1354 TOKEN_TYPE: for my $t (@terminals) {
623 6602 100       71966 next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
624             # say join " ", $t->[0], '->', $1;
625 823 50       2720 if ( not defined $recce->read( $t->[0], $1 ) ) {
626 0         0 die_on_read_problem( $recce, $t, $1, $string, pos $string );
627             }
628 823         1802 my $latest_earley_set = $recce->latest_earley_set();
629 823         1571 $positions[$latest_earley_set] = pos $string;
630 823         2090 next TOKEN;
631             } ## end TOKEN_TYPE: for my $t (@terminals)
632              
633 0         0 die q{No token at "}, ( substr $string, pos $string, 40 ),
634             q{", position }, pos $string;
635             } ## end TOKEN: while ( pos $string < $length )
636              
637             # Value not used
638 1         5 my $parse_value_ref;
639 1         6 my $eval_ok = eval {
640              
641             # Have the new() just return the current $self
642 1     1   19 local *new = sub { return $self };
  1         4  
643 1         12 $parse_value_ref = $recce->value();
644 1         6 1;
645             };
646 1 50       5 if ( not defined $eval_ok ) {
647 0         0 my $eval_ref_type = ref $EVAL_ERROR;
648 0 0       0 die $EVAL_ERROR if not $eval_ref_type;
649 0 0 0     0 if ( $eval_ref_type eq 'ARRAY' and $EVAL_ERROR->[0] eq 'rule' ) {
650 0         0 my ( undef, $message, $start, $end ) = @{$EVAL_ERROR};
  0         0  
651 0         0 chomp $message;
652 0         0 die $message, "\n",
653             "Rule with problem was: ",
654             $self->source_by_location_range( $start, $end ), "\n";
655             } ## end if ( $eval_ref_type eq 'ARRAY' and $EVAL_ERROR->[0] ...)
656 0         0 die "Unknown exception: ", Data::Dumper::Dumper($EVAL_ERROR);
657             } ## end if ( not defined $eval_ok )
658 1 50       6 if ( not defined $parse_value_ref ) {
659 0         0 die "Compile of HTML configuration failed: source did not parse";
660             }
661              
662 1         4 my %sgml_flow_included = ();
663 1         39 SYMBOL: for my $element_symbol ( keys %symbol_table ) {
664 143 100       245 next SYMBOL if not 'ELE_' eq substr $element_symbol, 0, 4;
665 106         130 my $tag = substr $element_symbol, 4;
666 106         139 my $entry = $symbol_table{$element_symbol};
667 106         146 my $context = $entry->[CONTEXT];
668 106         117 my $contents = $entry->[CONTENTS];
669 106 100 100     179 next SYMBOL if not defined $context and not defined $contents;
670 105 50 66     239 if ( defined $context and not defined $contents ) {
671 0         0 Carp::croak(
672             qq{Element <$tag> was defined but was never given any contents}
673             );
674             }
675              
676             # Contents without context are OK at this point
677             # We will check later for elements defined but not used
678 105   100     176 $context //= [];
679              
680             # The special case where both are defined and both
681             # are scalars is for elements to be created at runtime
682 105 50 66     225 if ( not ref $context and not ref $contents ) {
683 77         122 $runtime_tag{$tag} = $contents;
684 77         117 next SYMBOL;
685             }
686              
687 28 100       40 if ( ref $contents ) {
688 16         34 my $contents_symbol = 'Contents_ELE_' . $tag;
689 16         24 my $item_symbol = 'GRP_ELE_' . $tag;
690 16         117 push @core_rules,
691             {
692             lhs => $element_symbol,
693             rhs => [ "S_$tag", $contents_symbol, "E_$tag" ],
694             action => $element_symbol,
695             },
696             {
697             lhs => $contents_symbol,
698             rhs => [$item_symbol],
699             min => 0
700             };
701 16         22 for my $content_item ( @{$contents} ) {
  16         25  
702 27         70 push @core_rules,
703             {
704             lhs => $item_symbol,
705             rhs => [$content_item],
706             };
707             } ## end for my $content_item ( @{$contents} )
708 16 50       33 if ( !$sgml_flow_included{$item_symbol} ) {
709 16         34 $sgml_flow_included{$item_symbol} = 1;
710 16         40 push @core_rules,
711             {
712             lhs => $item_symbol,
713             rhs => ['GRP_SGML'],
714             };
715             } ## end if ( !$sgml_flow_included{$item_symbol} )
716             } ## end if ( ref $contents )
717             else {
718 12         49 push @core_rules,
719             {
720             lhs => $element_symbol,
721             rhs => [ "S_$tag", $contents, "E_$tag" ],
722             action => $element_symbol,
723             };
724             } ## end else [ if ( ref $contents ) ]
725              
726 28 50       56 $context = [$context] if not ref $context;
727 28         30 for my $context_item ( @{$context} ) {
  28         52  
728 10         35 push @core_rules,
729             {
730             lhs => $context_item,
731             rhs => [$element_symbol],
732             };
733             } ## end for my $context_item ( @{$context} )
734             } ## end SYMBOL: for my $element_symbol ( keys %symbol_table )
735              
736             # Finish out the Ruby Slippers configuration
737             # Make sure the last resort defaults are always defined
738 1         9 for my $required_rubies_desc (qw( <*> )) {
739 3   100     14 $ruby_config{$required_rubies_desc} //= [];
740             }
741              
742 1         6 DESC: for my $rubies_desc ( keys %ruby_config ) {
743 29         36 my $candidates = $ruby_config{$rubies_desc};
744 29 100       29 next DESC if grep { '' eq $_ } @{$candidates};
  130         190  
  29         38  
745 17         20 $ruby_config{$rubies_desc} = [ @{$candidates}, '' ];
  17         42  
746             }
747              
748 1         4 my %is_empty_element = ();
749             {
750 1         11 for my $tag ( keys %runtime_tag ) {
751 77         89 my $contents = $runtime_tag{$tag};
752 77 100       126 $is_empty_element{$tag} = 1 if $contents eq 'FLO_empty';
753             }
754 1         10 RULE: for my $rule (@core_rules) {
755 137         203 my $lhs = $rule->{lhs};
756 137 100       275 next RULE if not 'ELE_' eq substr $lhs, 0, 4;
757 29         41 my $contents = $rule->{rhs}->[1];
758 29 100       58 $is_empty_element{ substr $lhs, 4 } = 1
759             if $contents eq 'FLO_empty';
760             } ## end RULE: for my $rule (@core_rules)
761             }
762              
763             {
764             # Make sure no ruby candidates or rejected symbols are
765             # end tags of empty elements
766 1         4 SYMBOL: for my $rejected_symbol ( keys %ruby_config ) {
  1         6  
767 29 100       52 next SYMBOL if 'E_' ne substr $rejected_symbol, 0, 2;
768 3         5 my $tag = substr $rejected_symbol, 2;
769 3 50       7 next SYMBOL if not $is_empty_element{$tag};
770 0         0 Carp::croak(
771             qq{Ruby Slippers alternatives specified for \n},
772             qq{ "$tag" is an empty element and this is not allowed"}
773             );
774             } ## end SYMBOL: for my $rejected_symbol ( keys %ruby_config )
775             SYMBOL:
776 1         10 for my $candidate_symbol ( map { @{$_} } values %ruby_config ) {
  29         29  
  29         77  
777 147 100       239 next SYMBOL if 'E_' ne substr $candidate_symbol, 0, 2;
778 1         14 my $tag = substr $candidate_symbol, 2;
779 1 50       5 next SYMBOL if not $is_empty_element{$tag};
780 0         0 Carp::croak(
781             qq{Tag specified as a Ruby Slippers alternative\n},
782             qq{ "$tag" is an empty element and this is not allowed"}
783             );
784             } ## end for my $candidate_symbol ( map { @{$_} } values ...)
785             }
786              
787             # Special case the EOF Ruby Slippers treatment
788             {
789 1         2 @{ $ruby_config{EOF} } =
  1         4  
  1         6  
790             qw( S_html S_head S_body E_body E_html );
791             }
792              
793             {
794             # Find the tag descriptors which refer to required
795             # elements and add them
796              
797             # Required elements are those which we may have to
798             # supply even though they are not in the physical input
799              
800             # Anything which has a start tag among the ruby candidates
801             # is required, since we may be required to create a
802             # non-physical one
803 1         9 my @ruby_start_tags =
804 153         250 grep { ( substr $_, 0, 2 ) eq 'S_' }
805 1         12 map { @{$_} } values %ruby_config;
  30         34  
  30         55  
806              
807             my %defined_in_core_rules =
808 29         79 map { ( substr $_, 4 ) => 'core' }
809 1         11 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  137         198  
  137         172  
810              
811 1         10 my %required_tags = map { ( substr $_, 2 ) => 1 } @ruby_start_tags;
  120         192  
812 1         14 TAG: for my $tag ( keys %required_tags ) {
813 14 100       43 next TAG if $defined_in_core_rules{$tag};
814 1         5 my $flow = $runtime_tag{$tag};
815 1 50       5 die qq{Required element "ELE_$tag" was never defined}
816             if not defined $flow;
817 1         3 my $group = $primary_group_by_tag{$tag};
818 1         4 my $element = 'ELE_' . $tag;
819 1         13 push @core_rules,
820             {
821             lhs => $element,
822             rhs => [ "S_$tag", $flow, "E_$tag" ],
823             action => $element
824             },
825             {
826             lhs => $group,
827             rhs => [$element],
828             };
829 1         5 delete $runtime_tag{$tag};
830             } ## end TAG: for my $tag ( keys %required_tags )
831             }
832              
833             {
834 1         3 my @mentioned_in_core =
  1         4  
835 38         67 map { substr $_, 4 }
836 1         6 grep {m/\A ELE_ /xms} map { @{ $_->{rhs} } } @core_rules;
  205         298  
  139         139  
  139         228  
837             my %defined_in_core =
838 30         65 map { ( substr $_, 4 ) => 'core' }
839 1         12 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  139         200  
  139         170  
840             my @symbols_with_no_ruby_status =
841 1   33     12 grep { !$defined_in_core{$_} and !$runtime_tag{$_} }
  38         76  
842             @mentioned_in_core;
843 1 50       10 die 'symbols with no ruby status: ', join q{ },
844             @symbols_with_no_ruby_status
845             if scalar @symbols_with_no_ruby_status;
846             }
847              
848             # Calculate the numeric Ruby ranks
849 1         3 my %ruby_rank = ();
850 1         6 for my $rejected_symbol ( keys %ruby_config ) {
851 30         33 my $rank = 1;
852 30         31 for my $candidate ( reverse @{ $ruby_config{$rejected_symbol} } ) {
  30         46  
853 153         251 $ruby_rank{$rejected_symbol}{$candidate} = $rank++;
854             }
855             } ## end for my $rejected_symbol ( keys %ruby_config )
856              
857             {
858             my %element_used =
859 38         77 map { ( $_ => 1 ) }
860 1         3 grep {m/\A ELE_ /xms} map { @{ $_->{rhs} } } @core_rules;
  205         337  
  139         143  
  139         214  
861             my @elements_defined_but_not_used =
862 30         43 grep { !$element_used{$_} }
863 1         19 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  139         206  
  139         175  
864 1 50       13 die 'elements defined but never used: ', join q{ },
865             @elements_defined_but_not_used
866             if scalar @elements_defined_but_not_used;
867             }
868              
869             {
870 1         4 my %seen = ();
  1         4  
  1         4  
871 1         3 for my $rule (@core_rules) {
872 139         188 my $lhs = $rule->{lhs};
873 139         153 my $rhs = $rule->{rhs};
874 139         148 my $desc = join q{ }, $lhs, '::=', @{$rhs};
  139         215  
875 139 50       230 if ( $seen{$desc} ) {
876 0         0 Carp::croak("Duplicate rule: $desc");
877             }
878 139         382 $seen{$desc}++;
879             } ## end for my $rule (@core_rules)
880             }
881              
882             return {
883 1         422 rules => \@core_rules,
884             runtime_tag => \%runtime_tag,
885             ruby_slippers_rank_by_name => \%ruby_rank,
886             is_empty_element => \%is_empty_element,
887             primary_group_by_tag => \%primary_group_by_tag
888             };
889              
890             } ## end sub compile
891              
892             1;
893              
894             # vim: expandtab shiftwidth=4: