File Coverage

html/lib/Marpa/R2/HTML/Config/Compile.pm
Criterion Covered Total %
statement 397 444 89.4
branch 95 144 65.9
condition 23 36 63.8
subroutine 23 26 88.4
pod 0 13 0.0
total 538 663 81.1


line stmt bran cond sub pod time code
1             # Copyright 2022 Jeffrey Kegler
2             # This file is part of Marpa::R2. Marpa::R2 is free software: you can
3             # redistribute it and/or modify it under the terms of the GNU Lesser
4             # General Public License as published by the Free Software Foundation,
5             # either version 3 of the License, or (at your option) any later version.
6             #
7             # Marpa::R2 is distributed in the hope that it will be useful,
8             # but WITHOUT ANY WARRANTY; without even the implied warranty of
9             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10             # Lesser General Public License for more details.
11             #
12             # You should have received a copy of the GNU Lesser
13             # General Public License along with Marpa::R2. If not, see
14             # http://www.gnu.org/licenses/.
15              
16             package Marpa::R2::HTML::Config::Compile;
17              
18 1     1   18 use 5.010001;
  1         4  
19 1     1   5 use strict;
  1         1  
  1         21  
20 1     1   4 use warnings;
  1         2  
  1         28  
21              
22 1     1   6 use vars qw($VERSION $STRING_VERSION);
  1         2  
  1         71  
23             $VERSION = '13.001_000';
24             $STRING_VERSION = $VERSION;
25             ## no critic(BuiltinFunctions::ProhibitStringyEval)
26             $VERSION = eval $VERSION;
27             ## use critic
28              
29 1     1   6 use Data::Dumper;
  1         1  
  1         52  
30 1     1   6 use English qw( -no_match_vars );
  1         1  
  1         7  
31              
32 1     1   747 use Marpa::R2::HTML::Config::Core;
  1         2  
  1         30  
33 1     1   5 use Marpa::R2::Thin::Trace;
  1         2  
  1         22  
34              
35             # Indexes into the symbol table
36 1     1   5 use constant CONTEXT_CLOSED => 0;
  1         2  
  1         65  
37 1     1   5 use constant CONTENTS_CLOSED => 1;
  1         2  
  1         43  
38 1     1   5 use constant CONTEXT => 2;
  1         2  
  1         40  
39 1     1   6 use constant CONTENTS => 3;
  1         2  
  1         5154  
40              
41             sub do_is_included_statement {
42 10     10 0 23 my ( $self, $external_element, undef, undef, undef, $external_group ) = @_;
43 10         12 my $tag = $external_element;
44 10         39 $tag =~ s/\A [<] \s* //xms;
45 10         36 $tag =~ s/\s* [>] \z //xms;
46 10         22 my $element = 'ELE_' . $tag;
47 10         22 ( my $group_name = $external_group ) =~ s/\A [%] //xms;
48 10         21 my $group = 'GRP_' . $group_name;
49              
50 10         16 my $symbol_table = $self->{symbol_table};
51 10   50     24 my $element_entry = $symbol_table->{$element} //= [];
52 10         18 my $group_entry = $symbol_table->{$group};
53              
54             # For now, new groups cannot be defined
55 10 50       20 Carp::croak(
56             qq{Group "$group" does not exist\n},
57             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
58             ) if not defined $group_entry;
59              
60 10         15 my $closed_reason = $element_entry->[CONTEXT_CLOSED];
61 10 50       17 if ($closed_reason) {
62 0         0 Carp::croak(
63             qq{Context of "$element" cannot be changed:\n},
64             qq{ Reason: $closed_reason\n},
65             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
66             );
67             } ## end if ($closed_reason)
68 10         17 $closed_reason = $group_entry->[CONTENTS_CLOSED];
69 10 50       17 if ($closed_reason) {
70 0         0 Carp::croak(
71             qq{Contents of "$group" cannot be changed:\n},
72             qq{ Reason: $closed_reason\n},
73             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
74             );
75             } ## end if ($closed_reason)
76              
77             # If this is the first, it sets the primary group
78 10         18 my $primary_group_by_tag = $self->{primary_group_by_tag};
79 10   33     46 $primary_group_by_tag->{$tag} //= $group;
80 10         13 push @{ $element_entry->[CONTEXT] }, $group;
  10         32  
81              
82 10         29 return;
83              
84             } ## end sub do_is_included
85              
86             sub do_is_a_included_statement {
87 77     77 0 169 my ( $self, $external_element, undef, undef, $external_flow, undef, undef, $external_group ) = @_;
88 77         95 my $tag = $external_element;
89 77         302 $tag =~ s/\A [<] \s* //xms;
90 77         256 $tag =~ s/\s* [>] \z //xms;
91 77         188 ( my $flow_name = $external_flow ) =~ s/\A [*] //xms;
92 77         172 ( my $group_name = $external_group ) =~ s/\A [%] //xms;
93 77         147 my $flow = 'FLO_' . $flow_name;
94 77         117 my $group = 'GRP_' . $group_name;
95 77         96 my $element = 'ELE_' . $tag;
96 77         122 my $symbol_table = $self->{symbol_table};
97 77   50     404 my $element_entry = $symbol_table->{$element} //= [];
98 77         141 my $group_entry = $symbol_table->{$group};
99 77         109 my $flow_entry = $symbol_table->{$flow};
100              
101             # For now, new flows and groups cannot be defined
102 77 50       120 Carp::croak(
103             qq{Group "$group" does not exist\n},
104             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
105             ) if not defined $group_entry;
106 77 50       119 Carp::croak(
107             qq{Flow "$flow" does not exist\n},
108             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
109             ) if not defined $flow_entry;
110              
111 77         101 my $closed_reason = $element_entry->[CONTEXT_CLOSED];
112 77 50       114 if ($closed_reason) {
113 0         0 Carp::croak(
114             qq{Context of "$element" cannot be changed:\n},
115             qq{ Reason: $closed_reason\n},
116             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
117             );
118             } ## end if ($closed_reason)
119 77         100 $closed_reason = $element_entry->[CONTENTS_CLOSED];
120 77 50       122 if ($closed_reason) {
121 0         0 Carp::croak(
122             qq{Contents of "$element" cannot be changed:\n},
123             qq{ Reason: $closed_reason\n},
124             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
125             );
126             } ## end if ($closed_reason)
127 77         97 $closed_reason = $flow_entry->[CONTEXT_CLOSED];
128 77 50       131 if ($closed_reason) {
129 0         0 Carp::croak(
130             qq{Context of "$flow" cannot be changed:\n},
131             qq{ Reason: $closed_reason\n},
132             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
133             );
134             } ## end if ($closed_reason)
135 77         101 $closed_reason = $group_entry->[CONTENTS_CLOSED];
136 77 50       113 if ($closed_reason) {
137 0         0 Carp::croak(
138             qq{Contents of "$group" cannot be changed:\n},
139             qq{ Reason: $closed_reason\n},
140             qq{ Problem was in this line: }, $Marpa::R2::HTML::Config::Compile::LINE
141             );
142             } ## end if ($closed_reason)
143              
144             Carp::croak(
145 77 50       128 qq{Contents of "$element" are already being defined:\n},
146             qq{ Problem was in this line: },
147             $Marpa::R2::HTML::Config::Compile::LINE
148             ) if defined $element_entry->[CONTENTS];
149 77 50       114 Carp::croak(
150             qq{Context of "$element" is already being defined:\n},
151             qq{ Problem was in this line: },
152             $Marpa::R2::HTML::Config::Compile::LINE
153             ) if defined $element_entry->[CONTEXT];
154              
155             # Always sets the primary group
156 77         102 my $primary_group_by_tag = $self->{primary_group_by_tag};
157 77         244 $primary_group_by_tag->{$tag} = $group;
158 77         117 $element_entry->[CONTENTS] = $flow;
159 77         113 $element_entry->[CONTEXT] = $group;
160 77         137 $element_entry->[CONTEXT_CLOSED] = $element_entry->[CONTENTS_CLOSED] =
161             'Element is already fully defined';
162              
163 77         186 return;
164             } ## end sub do_is_a_included
165              
166             sub do_is_statement {
167              
168 12     12 0 27 my ( $self, $external_element, undef, $external_flow ) = @_;
169 12         21 my $tag = $external_element;
170 12         50 $tag =~ s/\A [<] \s* //xms;
171 12         43 $tag =~ s/\s* [>] \z //xms;
172 12         31 ( my $flow_name = $external_flow ) =~ s/\A [*] //xms;
173 12         24 my $flow = 'FLO_' . $flow_name;
174 12         22 my $element = 'ELE_' . $tag;
175 12         18 my $symbol_table = $self->{symbol_table};
176 12   100     62 my $element_entry = $symbol_table->{$element} //= [];
177 12         25 my $flow_entry = $symbol_table->{$flow};
178              
179             # For now, new flows cannot be defined
180 12 50       24 Carp::croak(
181             qq{Flow "$flow" does not exist\n},
182             qq{ Problem was in this line: },
183             $Marpa::R2::HTML::Config::Compile::LINE
184             ) if not defined $flow_entry;
185              
186 12         17 my $closed_reason = $element_entry->[CONTENTS_CLOSED];
187 12 50       24 if ($closed_reason) {
188 0         0 Carp::croak(
189             qq{Contents of "$element" cannot be changed:\n},
190             qq{ Reason: $closed_reason\n},
191             qq{ Problem was in this line: },
192             $Marpa::R2::HTML::Config::Compile::LINE
193             );
194             } ## end if ($closed_reason)
195 12         19 $closed_reason = $flow_entry->[CONTEXT_CLOSED];
196 12 50       19 if ($closed_reason) {
197 0         0 Carp::croak(
198             qq{Context of "$flow" cannot be changed:\n},
199             qq{ Reason: $closed_reason\n},
200             qq{ Problem was in this line: },
201             $Marpa::R2::HTML::Config::Compile::LINE
202             );
203             } ## end if ($closed_reason)
204              
205             Carp::croak(
206 12 50       24 qq{Contents of "$element" are already being defined:\n},
207             qq{ Problem was in this line: },
208             $Marpa::R2::HTML::Config::Compile::LINE
209             ) if defined $element_entry->[CONTENTS];
210              
211 12         21 $element_entry->[CONTENTS] = $flow;
212 12         17 $element_entry->[CONTENTS_CLOSED] =
213             'Contents of Element are already defined';
214              
215 12         27 return;
216             } ## end sub do_is
217              
218             sub problem_in_rule {
219 0     0 0 0 my ($string) = @_;
220 0         0 Marpa::R2::Context::bail( [ 'rule', $string, Marpa::R2::Context::location() ] );
221             }
222              
223             sub do_contains_statement {
224 17     17 0 40 my ( $self, $external_element, undef, $external_contents ) = @_;
225              
226             # Production is Element with custom flow
227 17         23 my $tag = $external_element;
228 17         73 $tag =~ s/\A [<] \s* //xms;
229 17         61 $tag =~ s/\s* [>] \z //xms;
230 17         35 my $element_symbol = 'ELE_' . $tag;
231 17         27 my $symbol_table = $self->{symbol_table};
232 17   100     116 my $element_entry = $symbol_table->{$element_symbol} //= [];
233 17         32 my $closed_reason = $element_entry->[CONTENTS_CLOSED];
234 17 50       29 if ($closed_reason) {
235 0         0 Carp::croak(
236             qq{Contents of "$element_symbol" cannot be changed:\n},
237             qq{ Reason: $closed_reason\n},
238             qq{ Problem was in this line: },
239             $Marpa::R2::HTML::Config::Compile::LINE
240             );
241             } ## end if ($closed_reason)
242              
243 17         28 my @contents = ();
244              
245             CONTAINED_SYMBOL:
246 17         27 for my $external_content_symbol (@{$external_contents}) {
  17         35  
247 27         30 my $content_symbol;
248 27 100       96 if ( $external_content_symbol =~ /\A [<] (\w+) [>] \z/xms ) {
249 24         59 $content_symbol = 'ELE_' . $1;
250             }
251 27 100       53 if ( $external_content_symbol =~ /\A [%] (\w+) \z/xms ) {
252 3         10 $content_symbol = 'GRP_' . $1;
253             }
254 27   33     50 $content_symbol //= $external_content_symbol;
255 27         41 my $content_entry = $symbol_table->{$content_symbol};
256 27 100       46 if ( not defined $content_entry ) {
257 11 50       28 if ( not $content_symbol =~ /\A ELE_ /xms ) {
258 0         0 problem_in_rule(
259             qq{Symbol "$external_content_symbol" is undefined\n});
260             }
261 11         16 $content_entry = [];
262             } ## end if ( not defined $content_entry )
263 27         44 $closed_reason = $content_entry->[CONTEXT_CLOSED];
264 27 50       38 if ($closed_reason) {
265 0         0 Carp::croak(
266             qq{Context of "$external_content_symbol" cannot be changed:\n},
267             qq{ Reason: $closed_reason\n},
268             qq{ Problem was in this line: },
269             $Marpa::R2::HTML::Config::Compile::LINE
270             );
271             } ## end if ($closed_reason)
272 27         61 push @contents, $content_symbol;
273             } ## end CONTAINED_SYMBOL: for my $external_content_symbol (@external_contents)
274              
275 17         23 push @{ $element_entry->[CONTENTS] }, @contents;
  17         44  
276              
277 17         43 return;
278              
279             } ## end sub do_contains
280              
281             sub do_array_assignment {
282 3     3 0 19 my ( $self, $external_list, undef, $external_members ) = @_;
283 3         15 ( my $new_list = $external_list ) =~ s/\A [@] //xms;
284 3         8 my $lists = $self->{lists};
285             Carp::croak(
286             "Problem in line: ", $Marpa::R2::HTML::Config::Compile::LINE,
287             "\n", 'list @' . $new_list . ' is already defined'
288 3 50       10 ) if defined $lists->{$new_list};
289 3         7 my @members = ();
290 3         5 RAW_MEMBER: for my $raw_member (@{$external_members}) {
  3         7  
291 10 100       24 if ( $raw_member =~ / \A [@] (.*) \z/xms ) {
292 1         4 my $member_list = $1;
293             Carp::croak(
294             "Problem in line: ",
295             $Marpa::R2::HTML::Config::Compile::LINE,
296             "\n",
297             'member list @' . $member_list . ' is not yet defined'
298 1 50       4 ) if not defined $lists->{$member_list};
299 1         2 push @members, @{ $lists->{$member_list} };
  1         2  
300 1         3 next RAW_MEMBER;
301             } ## end if ( $raw_member =~ / \A [@] (.*) \z/xms )
302 9         17 push @members, $raw_member;
303             } ## end RAW_MEMBER: for my $raw_member (@{$external_members})
304 3         9 $lists->{$new_list} = \@members;
305 3         7 return;
306             } ## end sub do_array_assignment
307              
308             sub do_ruby_statement {
309 27     27 0 56 my ( $self, $external_reject_symbol, undef, $external_candidates ) = @_;
310 27         37 my $lists = $self->{lists};
311 27         50 my @symbols = ($external_reject_symbol);
312 27         34 RAW_CANDIDATE: for my $raw_candidate ( @{$external_candidates} ) {
  27         46  
313 67 100       169 if ( $raw_candidate =~ / \A [@] (.*) \z/xms ) {
314 22         45 my $list = $1;
315             Carp::croak(
316             "Problem in line: ",
317             $Marpa::R2::HTML::Config::Compile::LINE,
318             "\n", 'candidate list @' . $list . ' is not yet defined'
319 22 50       46 ) if not defined $lists->{$list};
320 22         35 push @symbols, @{ $lists->{$list} };
  22         67  
321 22         44 next RAW_CANDIDATE;
322             } ## end if ( $raw_candidate =~ / \A [@] (.*) \z/xms )
323 45         84 push @symbols, $raw_candidate;
324             } ## end RAW_CANDIDATE: for my $raw_candidate ( @{$external_candidates} )
325 27         34 my @internal_symbols = ();
326 27         41 SYMBOL: for my $symbol (@symbols) {
327 157 100 100     446 if ( $symbol eq 'CDATA' or $symbol eq 'PCDATA' ) {
328 2         4 push @internal_symbols, $symbol;
329 2         5 next SYMBOL;
330             }
331 155 100       260 if ( $symbol =~ /\A ( [<] [%] (inline|head|block) [>] ) \z/xms ) {
332 2         7 my $special_symbol = $1;
333 2         15 push @internal_symbols, $special_symbol;
334 2         6 next SYMBOL;
335             }
336 153 50       247 if ( $symbol =~ m{\A ( [<] [/] [%] (inline|head|block) [>] ) \z}xms )
337             {
338 0         0 my $special_symbol = $1;
339 0         0 push @internal_symbols, $special_symbol;
340 0         0 next SYMBOL;
341             } ## end if ( $symbol =~ ...)
342 153 100       274 if ( $symbol =~ m{\A ( [<] [*] [>] ) \z}xms ) {
343 1         3 my $special_symbol = $1;
344 1         4 push @internal_symbols, $special_symbol;
345 1         2 next SYMBOL;
346             }
347 152 100       258 if ( $symbol =~ m{\A ( [<] [/] [*] [>] ) \z}xms ) {
348 12         23 my $special_symbol = $1;
349 12         22 push @internal_symbols, $special_symbol;
350 12         17 next SYMBOL;
351             }
352 140 100       388 if ( $symbol =~ /\A [<] (\w+) [>] \z/xms ) {
353 136         277 my $start_tag = 'S_' . $1;
354 136         246 push @internal_symbols, $start_tag;
355 136         194 next SYMBOL;
356             }
357 4 50       17 if ( $symbol =~ m{\A [<] [/](\w+) [>] \z}xms ) {
358 4         12 my $end_tag = 'E_' . $1;
359 4         10 push @internal_symbols, $end_tag;
360 4         7 next SYMBOL;
361             }
362             Carp::croak(
363 0         0 "Problem in line: ",
364             $Marpa::R2::HTML::Config::Compile::LINE,
365             "\n", qq{Misformed symbol "$symbol"}
366             );
367             } ## end SYMBOL: for my $symbol (@symbols)
368 27         37 my $rejected_symbol = shift @internal_symbols;
369 27         84 $self->{ruby_config}->{$rejected_symbol} = \@internal_symbols;
370 27         78 return;
371             } ## end sub do_ruby_statement
372              
373             sub die_on_read_problem {
374 0     0 0 0 my ( $rec, $t, $token_value, $string, $position ) = @_;
375 0 0       0 say $rec->show_progress() or die "say failed: $ERRNO";
376 0         0 my $problem_position = $position - length $1;
377 0         0 my $before_start = $problem_position - 40;
378 0 0       0 $before_start = 0 if $before_start < 0;
379 0         0 my $before_length = $problem_position - $before_start;
380 0         0 die "Problem near position $problem_position\n",
381             q{Problem is here: "},
382             ( substr $string, $before_start, $before_length + 40 ),
383             qq{"\n},
384             ( q{ } x ( $before_length + 18 ) ), qq{^\n},
385             q{Token rejected, "}, $t->[0], qq{", "$token_value"},
386             ;
387             } ## end sub die_on_read_problem
388              
389 47     47 0 59 sub do_array { shift; return [@_]; }
  47         117  
390              
391             sub do_what_I_mean {
392              
393             # The first argument is the per-parse variable.
394             # At this stage, just throw it away
395 427     427 0 525 shift;
396              
397             # Throw away any undef's
398 427         637 my @children = grep { defined } @_;
  572         1201  
399              
400             # Return what's left
401 427 50       1101 return scalar @children > 1 ? \@children : shift @children;
402             }
403              
404             # Order matters !!
405             my @terminals = (
406             [ kw_CDATA => qr/CDATA\b/xms ],
407             [ kw_PCDATA => qr/PCDATA\b/xms ],
408             [ kw_is => qr/is\b/ixms ],
409             [ kw_a => qr/a\b/ixms ],
410             [ kw_contains => qr/contains\b/ixms ],
411             [ kw_included => qr/included\b/ixms ],
412             [ kw_in => qr/in\b/ixms ],
413             [ flow => qr/[*]\w+\b/xms ],
414             [ group => qr/[%]\w+\b/xms ],
415             [ list => qr/[@]\w+\b/xms ],
416             [ start_tag => qr/[<]\w+[>]/xms ],
417             [ end_tag => qr{[<][/]\w+[>]}xms ],
418             [ wildcard_start_tag => qr/[<][*][>]/xms ],
419             [ wildcard_end_tag => qr{[<][/][*][>]}xms ],
420             [ group_start_tag => qr/[<][%]\w+[>]/xms ],
421             [ group_end_tag => qr/[<][%]\w+[>]/xms ],
422             [ op_assign => qr/[=]/xms ],
423             [ op_ruby => qr/[-][>]/xms ],
424             [ semi_colon => qr/[;]/xms ],
425             );
426              
427             sub create_grammar {
428              
429 1     1 0 2 my $source = <<'END_OF_GRAMMAR';
430             translation_unit ::= statement*
431             statement ::= is_included_statement
432             | is_a_included_statement
433             | is_statement
434             | contains_statement
435             | list_assignment
436             | ruby_statement
437             is_included_statement ::= element kw_is kw_included kw_in
438             action => do_is_included_statement
439             element ::= start_tag
440             is_a_included_statement ::= element kw_is kw_a flow kw_included kw_in
441             action => do_is_a_included_statement
442             is_statement ::= element kw_is flow
443             action => do_is_statement
444             contains_statement ::= element kw_contains contents
445             action => do_contains_statement
446             contents ::= content_item*
447             action => do_array
448             list_assignment ::= list op_assign list_members
449             action => do_array_assignment
450             list_members ::= list_member*
451             action => do_array
452             list_member ::= ruby_symbol
453             list_member ::= list
454             content_item ::= element | | kw_PCDATA | kw_CDATA
455             ruby_statement ::= ruby_symbol op_ruby ruby_symbol_list
456             action => do_ruby_statement
457             ruby_symbol_list ::= ruby_symbol*
458             action => do_array
459             ruby_symbol ::= kw_PCDATA | kw_CDATA
460             | start_tag | group_start_tag | wildcard_start_tag
461             | end_tag | group_end_tag | wildcard_end_tag
462             | list
463             END_OF_GRAMMAR
464            
465 1         11 my $grammar = Marpa::R2::Grammar->new(
466             { start => 'translation_unit',
467             action_object => __PACKAGE__,
468             rules =>$source,
469             default_action => 'do_what_I_mean'
470             }
471             );
472 1         7 $grammar->precompute();
473 1         4 return $grammar;
474             }
475              
476             sub source_by_location_range {
477 0     0 0 0 my ( $self, $start, $end ) = @_;
478 0         0 my $positions = $self->{positions};
479 0 0       0 my $start_pos = $start > 0 ? $positions->[$start] : 0;
480 0         0 my $end_pos = $positions->[$end];
481 0         0 return substr ${ $self->{source_ref} }, $start_pos, $end_pos - $start_pos;
  0         0  
482             } ## end sub source_by_location_range
483              
484             sub compile {
485 1     1 0 3 my ($source_ref) = @_;
486              
487             # A quasi-object, not used outside this routine
488 1         2 my $self = bless {}, __PACKAGE__;
489              
490 1         8 my %species_handler = (
491             cruft => 'SPE_CRUFT',
492             comment => 'SPE_COMMENT',
493             pi => 'SPE_PI',
494             decl => 'SPE_DECL',
495             document => 'SPE_TOP',
496             whitespace => 'SPE_WHITESPACE',
497             pcdata => 'SPE_PCDATA',
498             cdata => 'SPE_CDATA',
499             prolog => 'SPE_PROLOG',
500             trailer => 'SPE_TRAILER',
501             );
502              
503 1         3 my @core_rules = ();
504 1         2 my %runtime_tag = ();
505 1         1 my %primary_group_by_tag = ();
506 1         8 $self->{primary_group_by_tag} = \%primary_group_by_tag;
507              
508             {
509 1         2 LINE:
510 1         14 for my $line ( split /\n/xms,
511             $Marpa::R2::HTML::Internal::Core::CORE_BNF )
512             {
513 66         91 my $definition = $line;
514 66         89 chomp $definition;
515 66         106 $definition =~ s/ [#] .* //xms; # Remove comments
516             next LINE
517 66 100       151 if not $definition =~ / \S /xms; # ignore all-whitespace line
518 40         96 my $sequence = ( $definition =~ s/ [*] \s* $//xms );
519 40 50       163 if ( $definition =~ s/ \s* [:][:][=] \s* / /xms ) {
520              
521             # Production is Ordinary BNF rule
522 40         89 my @symbols = ( split q{ }, $definition );
523 40         57 my $lhs = shift @symbols;
524 40         94 my %rule_descriptor = (
525             lhs => $lhs,
526             rhs => \@symbols,
527             );
528 40 100       67 if ($sequence) {
529 7         11 $rule_descriptor{min} = 0;
530             }
531 40 100       95 if ( my $handler = $species_handler{$lhs} ) {
    100          
532 10         16 $rule_descriptor{action} = $handler;
533             }
534             elsif ( $lhs =~ /^ELE_/xms ) {
535 1         3 $rule_descriptor{action} = "$lhs";
536             }
537 40         61 push @core_rules, \%rule_descriptor;
538 40         93 next LINE;
539             } ## end if ( $definition =~ s/ \s* [:][:][=] \s* / /xms )
540 0         0 die "Badly formed line in grammar description: $line";
541             } ## end LINE: for my $line ( split /\n/xms, ...)
542             }
543              
544 1         7 my @core_symbols = map { ( $_->{lhs}, @{ $_->{rhs} } ) } @core_rules;
  40         46  
  40         62  
545              
546             # Start out by closing the context and contents of everything
547             my %symbol_table = map {
548 1         3 $_ =>
  88         150  
549             [ 'Reserved by the core grammar', 'Reserved by the core grammar' ]
550             } @core_symbols;
551 1         12 $self->{symbol_table} = \%symbol_table;
552              
553             # A few token symbols are allowed as contents -- most non-element
554             # tokens are included via the SGML group
555 1         3 for my $token_symbol (qw(cdata pcdata)) {
556 2         4 $symbol_table{$token_symbol}->[CONTEXT_CLOSED] = 0;
557             }
558              
559             # Many groups are defined to to be used
560 1         2 for my $group_symbol (
561             qw( GRP_anywhere GRP_pcdata GRP_cdata GRP_mixed GRP_block GRP_head GRP_inline)
562             )
563             {
564 7         10 $symbol_table{$group_symbol}->[CONTEXT_CLOSED] = 0;
565             } ## end for my $group_symbol ( ...)
566              
567             # Flow symbols are almost all allowed as contents
568             FLOW_SYMBOL:
569 1         2 for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } @core_symbols ) {
  88         132  
570              
571             # The SGML flow is included automatically as needed
572             # and should not be explicity specified
573 13 100       24 next FLOW_SYMBOL if $flow_symbol eq 'FLO_SGML';
574 7         11 $symbol_table{$flow_symbol}->[CONTEXT_CLOSED] = 0;
575             } ## end for my $flow_symbol ( grep { $_ =~ m/\A FLO_ /xms } ...)
576              
577             # A few groups are also extensible
578 1         2 for my $group_symbol (qw( GRP_anywhere GRP_block GRP_head GRP_inline )) {
579 4         21 $symbol_table{$group_symbol}->[CONTENTS_CLOSED] = 0;
580             }
581              
582             # As very special cases the contents of the and
583             # elements can be changed
584 1         2 for my $element_symbol (qw( ELE_head ELE_body )) {
585 2         4 $symbol_table{$element_symbol}->[CONTENTS_CLOSED] = 0;
586             }
587              
588             {
589             # Make sure everything for which we have a handler was defined in
590             # the core grammar
591 1         3 my @species_not_defined = grep { not defined $symbol_table{$_} }
  1         6  
  10         17  
592             keys %species_handler;
593 1 50       4 if ( scalar @species_not_defined ) {
594 0         0 die
595             'Definitions for the following required text components are missing: ',
596             join q{ }, @species_not_defined;
597             }
598             }
599              
600 1         21 my %ruby_config = ();
601 1         3 my %lists = ();
602 1         2 $self->{ruby_config} = \%ruby_config;
603 1         2 $self->{lists} = \%lists;
604 1         2 $self->{source_ref} = $source_ref;
605 1         2 my @positions = (0);
606 1         2 $self->{positions} = \@positions;
607              
608 1         2 state $grammar = create_grammar();
609 1         8 my $recce = Marpa::R2::Recognizer->new({ grammar => $grammar});
610 1         4 my $string = ${$source_ref};
  1         3  
611 1         2 my $length = length $string;
612 1         5 pos $string = 0;
613 1         5 TOKEN: while ( pos $string < $length ) {
614              
615             # skip comment
616 1662 100       4151 next TOKEN if $string =~ m/\G \s* [#] [^\n]* \n/gcxms;
617              
618             # skip whitespace
619 1645 100       4688 next TOKEN if $string =~ m/\G\s+/gcxms;
620              
621             # read other tokens
622 823         1454 TOKEN_TYPE: for my $t (@terminals) {
623 6602 100       69414 next TOKEN_TYPE if not $string =~ m/\G($t->[1])/gcxms;
624             # say join " ", $t->[0], '->', $1;
625 823 50       2920 if ( not defined $recce->read( $t->[0], $1 ) ) {
626 0         0 die_on_read_problem( $recce, $t, $1, $string, pos $string );
627             }
628 823         1731 my $latest_earley_set = $recce->latest_earley_set();
629 823         1459 $positions[$latest_earley_set] = pos $string;
630 823         2194 next TOKEN;
631             } ## end TOKEN_TYPE: for my $t (@terminals)
632              
633 0         0 die q{No token at "}, ( substr $string, pos $string, 40 ),
634             q{", position }, pos $string;
635             } ## end TOKEN: while ( pos $string < $length )
636              
637             # Value not used
638 1         10 my $parse_value_ref;
639 1         5 my $eval_ok = eval {
640              
641             # Have the new() just return the current $self
642 1     1   11 local *new = sub { return $self };
  1         3  
643 1         9 $parse_value_ref = $recce->value();
644 1         5 1;
645             };
646 1 50       8 if ( not defined $eval_ok ) {
647 0         0 my $eval_ref_type = ref $EVAL_ERROR;
648 0 0       0 die $EVAL_ERROR if not $eval_ref_type;
649 0 0 0     0 if ( $eval_ref_type eq 'ARRAY' and $EVAL_ERROR->[0] eq 'rule' ) {
650 0         0 my ( undef, $message, $start, $end ) = @{$EVAL_ERROR};
  0         0  
651 0         0 chomp $message;
652 0         0 die $message, "\n",
653             "Rule with problem was: ",
654             $self->source_by_location_range( $start, $end ), "\n";
655             } ## end if ( $eval_ref_type eq 'ARRAY' and $EVAL_ERROR->[0] ...)
656 0         0 die "Unknown exception: ", Data::Dumper::Dumper($EVAL_ERROR);
657             } ## end if ( not defined $eval_ok )
658 1 50       4 if ( not defined $parse_value_ref ) {
659 0         0 die "Compile of HTML configuration failed: source did not parse";
660             }
661              
662 1         3 my %sgml_flow_included = ();
663 1         34 SYMBOL: for my $element_symbol ( keys %symbol_table ) {
664 143 100       250 next SYMBOL if not 'ELE_' eq substr $element_symbol, 0, 4;
665 106         134 my $tag = substr $element_symbol, 4;
666 106         120 my $entry = $symbol_table{$element_symbol};
667 106         122 my $context = $entry->[CONTEXT];
668 106         114 my $contents = $entry->[CONTENTS];
669 106 100 100     181 next SYMBOL if not defined $context and not defined $contents;
670 105 50 66     227 if ( defined $context and not defined $contents ) {
671 0         0 Carp::croak(
672             qq{Element <$tag> was defined but was never given any contents}
673             );
674             }
675              
676             # Contents without context are OK at this point
677             # We will check later for elements defined but not used
678 105   100     181 $context //= [];
679              
680             # The special case where both are defined and both
681             # are scalars is for elements to be created at runtime
682 105 50 66     226 if ( not ref $context and not ref $contents ) {
683 77         137 $runtime_tag{$tag} = $contents;
684 77         103 next SYMBOL;
685             }
686              
687 28 100       51 if ( ref $contents ) {
688 16         31 my $contents_symbol = 'Contents_ELE_' . $tag;
689 16         26 my $item_symbol = 'GRP_ELE_' . $tag;
690 16         100 push @core_rules,
691             {
692             lhs => $element_symbol,
693             rhs => [ "S_$tag", $contents_symbol, "E_$tag" ],
694             action => $element_symbol,
695             },
696             {
697             lhs => $contents_symbol,
698             rhs => [$item_symbol],
699             min => 0
700             };
701 16         58 for my $content_item ( @{$contents} ) {
  16         40  
702 27         85 push @core_rules,
703             {
704             lhs => $item_symbol,
705             rhs => [$content_item],
706             };
707             } ## end for my $content_item ( @{$contents} )
708 16 50       36 if ( !$sgml_flow_included{$item_symbol} ) {
709 16         44 $sgml_flow_included{$item_symbol} = 1;
710 16         43 push @core_rules,
711             {
712             lhs => $item_symbol,
713             rhs => ['GRP_SGML'],
714             };
715             } ## end if ( !$sgml_flow_included{$item_symbol} )
716             } ## end if ( ref $contents )
717             else {
718 12         71 push @core_rules,
719             {
720             lhs => $element_symbol,
721             rhs => [ "S_$tag", $contents, "E_$tag" ],
722             action => $element_symbol,
723             };
724             } ## end else [ if ( ref $contents ) ]
725              
726 28 50       55 $context = [$context] if not ref $context;
727 28         32 for my $context_item ( @{$context} ) {
  28         49  
728 10         33 push @core_rules,
729             {
730             lhs => $context_item,
731             rhs => [$element_symbol],
732             };
733             } ## end for my $context_item ( @{$context} )
734             } ## end SYMBOL: for my $element_symbol ( keys %symbol_table )
735              
736             # Finish out the Ruby Slippers configuration
737             # Make sure the last resort defaults are always defined
738 1         15 for my $required_rubies_desc (qw( <*> )) {
739 3   100     25 $ruby_config{$required_rubies_desc} //= [];
740             }
741              
742 1         7 DESC: for my $rubies_desc ( keys %ruby_config ) {
743 29         36 my $candidates = $ruby_config{$rubies_desc};
744 29 100       30 next DESC if grep { '' eq $_ } @{$candidates};
  130         194  
  29         41  
745 17         18 $ruby_config{$rubies_desc} = [ @{$candidates}, '' ];
  17         44  
746             }
747              
748 1         4 my %is_empty_element = ();
749             {
750 1         13 for my $tag ( keys %runtime_tag ) {
751 77         86 my $contents = $runtime_tag{$tag};
752 77 100       130 $is_empty_element{$tag} = 1 if $contents eq 'FLO_empty';
753             }
754 1         5 RULE: for my $rule (@core_rules) {
755 137         220 my $lhs = $rule->{lhs};
756 137 100       238 next RULE if not 'ELE_' eq substr $lhs, 0, 4;
757 29         40 my $contents = $rule->{rhs}->[1];
758 29 100       57 $is_empty_element{ substr $lhs, 4 } = 1
759             if $contents eq 'FLO_empty';
760             } ## end RULE: for my $rule (@core_rules)
761             }
762              
763             {
764             # Make sure no ruby candidates or rejected symbols are
765             # end tags of empty elements
766 1         2 SYMBOL: for my $rejected_symbol ( keys %ruby_config ) {
  1         5  
767 29 100       52 next SYMBOL if 'E_' ne substr $rejected_symbol, 0, 2;
768 3         8 my $tag = substr $rejected_symbol, 2;
769 3 50       8 next SYMBOL if not $is_empty_element{$tag};
770 0         0 Carp::croak(
771             qq{Ruby Slippers alternatives specified for \n},
772             qq{ "$tag" is an empty element and this is not allowed"}
773             );
774             } ## end SYMBOL: for my $rejected_symbol ( keys %ruby_config )
775             SYMBOL:
776 1         5 for my $candidate_symbol ( map { @{$_} } values %ruby_config ) {
  29         32  
  29         73  
777 147 100       250 next SYMBOL if 'E_' ne substr $candidate_symbol, 0, 2;
778 1         11 my $tag = substr $candidate_symbol, 2;
779 1 50       5 next SYMBOL if not $is_empty_element{$tag};
780 0         0 Carp::croak(
781             qq{Tag specified as a Ruby Slippers alternative\n},
782             qq{ "$tag" is an empty element and this is not allowed"}
783             );
784             } ## end for my $candidate_symbol ( map { @{$_} } values ...)
785             }
786              
787             # Special case the EOF Ruby Slippers treatment
788             {
789 1         4 @{ $ruby_config{EOF} } =
  1         4  
  1         5  
790             qw( S_html S_head S_body E_body E_html );
791             }
792              
793             {
794             # Find the tag descriptors which refer to required
795             # elements and add them
796              
797             # Required elements are those which we may have to
798             # supply even though they are not in the physical input
799              
800             # Anything which has a start tag among the ruby candidates
801             # is required, since we may be required to create a
802             # non-physical one
803 1         9 my @ruby_start_tags =
804 153         231 grep { ( substr $_, 0, 2 ) eq 'S_' }
805 1         4 map { @{$_} } values %ruby_config;
  30         32  
  30         56  
806              
807             my %defined_in_core_rules =
808 29         83 map { ( substr $_, 4 ) => 'core' }
809 1         14 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  137         215  
  137         177  
810              
811 1         14 my %required_tags = map { ( substr $_, 2 ) => 1 } @ruby_start_tags;
  120         192  
812 1         19 TAG: for my $tag ( keys %required_tags ) {
813 14 100       26 next TAG if $defined_in_core_rules{$tag};
814 1         5 my $flow = $runtime_tag{$tag};
815 1 50       14 die qq{Required element "ELE_$tag" was never defined}
816             if not defined $flow;
817 1         5 my $group = $primary_group_by_tag{$tag};
818 1         5 my $element = 'ELE_' . $tag;
819 1         12 push @core_rules,
820             {
821             lhs => $element,
822             rhs => [ "S_$tag", $flow, "E_$tag" ],
823             action => $element
824             },
825             {
826             lhs => $group,
827             rhs => [$element],
828             };
829 1         12 delete $runtime_tag{$tag};
830             } ## end TAG: for my $tag ( keys %required_tags )
831             }
832              
833             {
834 1         4 my @mentioned_in_core =
  1         4  
835 38         62 map { substr $_, 4 }
836 1         4 grep {m/\A ELE_ /xms} map { @{ $_->{rhs} } } @core_rules;
  205         357  
  139         139  
  139         241  
837             my %defined_in_core =
838 30         60 map { ( substr $_, 4 ) => 'core' }
839 1         16 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  139         206  
  139         171  
840             my @symbols_with_no_ruby_status =
841 1   33     22 grep { !$defined_in_core{$_} and !$runtime_tag{$_} }
  38         72  
842             @mentioned_in_core;
843 1 50       10 die 'symbols with no ruby status: ', join q{ },
844             @symbols_with_no_ruby_status
845             if scalar @symbols_with_no_ruby_status;
846             }
847              
848             # Calculate the numeric Ruby ranks
849 1         5 my %ruby_rank = ();
850 1         7 for my $rejected_symbol ( keys %ruby_config ) {
851 30         34 my $rank = 1;
852 30         31 for my $candidate ( reverse @{ $ruby_config{$rejected_symbol} } ) {
  30         39  
853 153         260 $ruby_rank{$rejected_symbol}{$candidate} = $rank++;
854             }
855             } ## end for my $rejected_symbol ( keys %ruby_config )
856              
857             {
858             my %element_used =
859 38         89 map { ( $_ => 1 ) }
860 1         12 grep {m/\A ELE_ /xms} map { @{ $_->{rhs} } } @core_rules;
  205         313  
  139         139  
  139         207  
861             my @elements_defined_but_not_used =
862 30         43 grep { !$element_used{$_} }
863 1         17 grep {m/\A ELE_ /xms} map { $_->{lhs} } @core_rules;
  139         228  
  139         171  
864 1 50       13 die 'elements defined but never used: ', join q{ },
865             @elements_defined_but_not_used
866             if scalar @elements_defined_but_not_used;
867             }
868              
869             {
870 1         3 my %seen = ();
  1         4  
  1         3  
871 1         3 for my $rule (@core_rules) {
872 139         174 my $lhs = $rule->{lhs};
873 139         155 my $rhs = $rule->{rhs};
874 139         145 my $desc = join q{ }, $lhs, '::=', @{$rhs};
  139         225  
875 139 50       221 if ( $seen{$desc} ) {
876 0         0 Carp::croak("Duplicate rule: $desc");
877             }
878 139         381 $seen{$desc}++;
879             } ## end for my $rule (@core_rules)
880             }
881              
882             return {
883 1         326 rules => \@core_rules,
884             runtime_tag => \%runtime_tag,
885             ruby_slippers_rank_by_name => \%ruby_rank,
886             is_empty_element => \%is_empty_element,
887             primary_group_by_tag => \%primary_group_by_tag
888             };
889              
890             } ## end sub compile
891              
892             1;
893              
894             # vim: expandtab shiftwidth=4: