File Coverage

blib/lib/Marpa/R2/HTML.pm
Criterion Covered Total %
statement 525 624 84.1
branch 166 272 61.0
condition 44 94 46.8
subroutine 26 31 83.8
pod 0 13 0.0
total 761 1034 73.6


line stmt bran cond sub pod time code
1             # Copyright 2022 Jeffrey Kegler
2             # This file is part of Marpa::R2. Marpa::R2 is free software: you can
3             # redistribute it and/or modify it under the terms of the GNU Lesser
4             # General Public License as published by the Free Software Foundation,
5             # either version 3 of the License, or (at your option) any later version.
6             #
7             # Marpa::R2 is distributed in the hope that it will be useful,
8             # but WITHOUT ANY WARRANTY; without even the implied warranty of
9             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10             # Lesser General Public License for more details.
11             #
12             # You should have received a copy of the GNU Lesser
13             # General Public License along with Marpa::R2. If not, see
14             # http://www.gnu.org/licenses/.
15              
16             package Marpa::R2::HTML;
17              
18 8     8   8576 use 5.010001;
  8         24  
19 8     8   40 use strict;
  8         20  
  8         175  
20 8     8   36 use warnings;
  8         13  
  8         223  
21              
22 8     8   40 use vars qw( $VERSION $STRING_VERSION );
  8         14  
  8         653  
23             $VERSION = '12.000000';
24             $STRING_VERSION = $VERSION;
25             ## no critic (BuiltinFunctions::ProhibitStringyEval)
26             $VERSION = eval $VERSION;
27             ## use critic
28              
29             our @EXPORT_OK;
30 8     8   53 use base qw(Exporter);
  8         20  
  8         935  
31 8     8   290 BEGIN { @EXPORT_OK = qw(html); }
32              
33             package Marpa::R2::HTML::Internal;
34              
35             # Data::Dumper is used in tracing
36 8     8   2090 use Data::Dumper;
  8         22136  
  8         477  
37              
38 8     8   3487 use Marpa::R2::HTML::Internal;
  8         17  
  8         240  
39 8     8   3495 use Marpa::R2::HTML::Config;
  8         25  
  8         250  
40 8     8   46 use Carp ();
  8         17  
  8         153  
41 8     8   105 use HTML::Parser 3.69;
  8         149  
  8         188  
42 8     8   45 use HTML::Entities qw(decode_entities);
  8         12  
  8         478  
43              
44             # versions below must be coordinated with
45             # those required in Build.PL
46              
47 8     8   51 use English qw( -no_match_vars );
  8         24  
  8         42  
48 8     8   6286 use Marpa::R2;
  8         38  
  8         853  
49             {
50             my $submodule_version = $Marpa::R2::VERSION;
51             die 'Marpa::R2::VERSION not defined' if not defined $submodule_version;
52             die
53             "Marpa::R2::VERSION ($submodule_version) does not match Marpa::R2::HTML::VERSION ",
54             $Marpa::R2::HTML::VERSION
55             if $submodule_version != $Marpa::R2::HTML::VERSION;
56             }
57              
58 8     8   69 use Marpa::R2::Thin::Trace;
  8         19  
  8         233  
59              
60             # constants
61              
62 8     8   48 use constant PHYSICAL_TOKEN => 42;
  8         17  
  8         676  
63 8     8   58 use constant RUBY_SLIPPERS_TOKEN => 43;
  8         20  
  8         1295  
64              
65             our @LIBMARPA_ERROR_NAMES = Marpa::R2::Thin::error_names();
66             our $UNEXPECTED_TOKEN_ID;
67             our $NO_MARPA_ERROR;
68             ERROR: for my $error_number ( 0 .. $#LIBMARPA_ERROR_NAMES ) {
69             my $error_name = $LIBMARPA_ERROR_NAMES[$error_number];
70             if ( $error_name eq 'MARPA_ERR_UNEXPECTED_TOKEN_ID' ) {
71             $UNEXPECTED_TOKEN_ID = $error_number;
72             next ERROR;
73             }
74             if ( $error_name eq 'MARPA_ERR_NONE' ) {
75             $NO_MARPA_ERROR = $error_number;
76             next ERROR;
77             }
78             } ## end ERROR: for my $error_number ( 0 .. $#LIBMARPA_ERROR_NAMES )
79              
80 8     8   4198 use Marpa::R2::HTML::Callback;
  8         25  
  8         55116  
81             {
82             my $submodule_version = $Marpa::R2::HTML::Callback::VERSION;
83             die 'Marpa::R2::HTML::Callback::VERSION not defined'
84             if not defined $submodule_version;
85             die
86             "Marpa::R2::HTML::Callback::VERSION ($submodule_version) does not match Marpa::R2::HTML::VERSION ",
87             $Marpa::R2::HTML::VERSION
88             if $submodule_version != $Marpa::R2::HTML::VERSION;
89             }
90              
91             sub earleme_to_linecol {
92 0     0 0 0 my ( $self, $earleme ) = @_;
93 0         0 my $html_parser_tokens = $self->{tokens};
94 0         0 my $html_token_ix = $self->{earleme_to_html_token_ix}->[$earleme] + 1;
95              
96 0 0       0 die if not defined $html_token_ix;
97              
98 0         0 return @{ $html_parser_tokens->[$html_token_ix] }[
  0         0  
99             Marpa::R2::HTML::Internal::Token::LINE,
100             Marpa::R2::HTML::Internal::Token::COLUMN,
101             ];
102              
103             } ## end sub earleme_to_linecol
104              
105             sub earleme_to_offset {
106 0     0 0 0 my ( $self, $earleme ) = @_;
107 0         0 my $html_parser_tokens = $self->{tokens};
108 0         0 my $html_token_ix = $self->{earleme_to_html_token_ix}->[$earleme] + 1;
109              
110 0 0       0 die if not defined $html_token_ix;
111              
112 0         0 return $html_parser_tokens->[$html_token_ix]
113             ->[Marpa::R2::HTML::Internal::Token::END_OFFSET];
114              
115             } ## end sub earleme_to_offset
116              
117             sub add_handler {
118 415     415 0 710 my ( $self, $handler_description ) = @_;
119 415   50     935 my $ref_type = ref $handler_description || 'not a reference';
120 415 50       738 Marpa::R2::exception(
121             "Long form handler description should be ref to hash, but it is $ref_type"
122             ) if $ref_type ne 'HASH';
123 415         703 my $element = delete $handler_description->{element};
124 415         668 my $class = delete $handler_description->{class};
125 415         607 my $pseudoclass = delete $handler_description->{pseudoclass};
126 415         622 my $action = delete $handler_description->{action};
127             Marpa::R2::exception(
128             'Unknown option(s) in Long form handler description: ',
129 0         0 ( join q{ }, keys %{$handler_description} )
130 415 50       519 ) if scalar keys %{$handler_description};
  415         1008  
131              
132 415 50       818 Marpa::R2::exception('Handler action must be CODE ref')
133             if ref $action ne 'CODE';
134              
135 415 100       778 if ( defined $pseudoclass ) {
136 317         543 $self->{handler_by_species}->{$pseudoclass} = $action;
137 317         871 return 1;
138             }
139              
140 98 100       199 $element = q{*} if not $element;
141 98         242 $element = lc $element;
142 98   100     397 $class //= q{*};
143 98         346 $self->{handler_by_element_and_class}->{ join q{;}, $element, $class } =
144             $action;
145 98         332 return 1;
146             } ## end sub add_handler
147              
148             sub add_handlers_from_hashes {
149 0     0 0 0 my ( $self, $handler_specs ) = @_;
150 0   0     0 my $ref_type = ref $handler_specs || 'not a reference';
151 0 0       0 Marpa::R2::exception(
152             "handlers arg must must be ref to ARRAY, it is $ref_type")
153             if $ref_type ne 'ARRAY';
154 0         0 for my $handler_spec ( keys %{$handler_specs} ) {
  0         0  
155 0         0 add_handler( $self, $handler_spec );
156             }
157 0         0 return 1;
158             } ## end sub add_handlers_from_hashes
159              
160             sub add_handlers {
161 91     91 0 200 my ( $self, $handler_specs ) = @_;
162 91         136 HANDLER_SPEC: for my $specifier ( keys %{$handler_specs} ) {
  91         324  
163 415         615 my ( $element, $class, $pseudoclass );
164 415         605 my $action = $handler_specs->{$specifier};
165 415 100 100     2830 ( $element, $class ) = ( $specifier =~ /\A ([^.]*) [.] (.*) \z/oxms )
166             or ( $element, $pseudoclass ) =
167             ( $specifier =~ /\A ([^:]*) [:] (.*) \z/oxms )
168             or $element = $specifier;
169             state $allowed_pseudoclasses =
170 415         672 { map { ( $_, 1 ) }
  50         123  
171             qw(TOP PI DECL COMMENT PROLOG TRAILER WHITESPACE CDATA PCDATA CRUFT)
172             };
173 415 50 66     1189 if ( $pseudoclass
174             and not exists $allowed_pseudoclasses->{$pseudoclass} )
175             {
176 0         0 Marpa::R2::exception(
177             qq{pseudoclass "$pseudoclass" is not known:\n},
178             "Specifier was $specifier\n" );
179             } ## end if ( $pseudoclass and not exists $allowed_pseudoclasses...)
180 415 50 66     1079 if ( $pseudoclass and $element ) {
181 0         0 Marpa::R2::exception(
182             qq{pseudoclass "$pseudoclass" may not have an element specified:\n},
183             "Specifier was $specifier\n"
184             );
185             } ## end if ( $pseudoclass and $element )
186             add_handler(
187 415         1510 $self,
188             { element => $element,
189             class => $class,
190             pseudoclass => $pseudoclass,
191             action => $action
192             }
193             );
194             } ## end HANDLER_SPEC: for my $specifier ( keys %{$handler_specs} )
195              
196 91         183 return 1;
197             } ## end sub add_handlers
198              
199             # If we factor this package, this will be the constructor.
200             ## no critic (Subroutines::RequireArgUnpacking)
201             sub create {
202              
203             ## use critic
204 94     94 0 193 my $self = {};
205 94         300 $self->{trace_fh} = \*STDERR;
206 94         234 ARG: for my $arg (@_) {
207 93   50     290 my $ref_type = ref $arg || 'not a reference';
208 93 100       238 if ( $ref_type eq 'HASH' ) {
209 91         268 Marpa::R2::HTML::Internal::add_handlers( $self, $arg );
210 91         209 next ARG;
211             }
212             Marpa::R2::exception(
213 2 50       5 "Argument must be hash or refs to hash: it is $ref_type")
214             if $ref_type ne 'REF';
215 2         4 my $option_hash = ${$arg};
  2         4  
216 2   50     7 $ref_type = ref $option_hash || 'not a reference';
217 2 50       5 Marpa::R2::exception(
218             "Argument must be hash or refs to hash: it is ref to $ref_type")
219             if $ref_type ne 'HASH';
220 2         3 OPTION: for my $option ( keys %{$option_hash} ) {
  2         9  
221 3 50       5 if ( $option eq 'handlers' ) {
222 0         0 add_handlers_from_hashes( $self, $option_hash->{$option} );
223             }
224             state $allowed_options = {
225 3         8 map { ( $_, 1 ) }
  9         20  
226             qw(trace_fh trace_values trace_handlers
227             trace_conflicts
228             trace_terminals trace_cruft
229             dump_AHFA dump_config compile
230             )
231             };
232 3 50       9 if ( not exists $allowed_options->{$option} ) {
233 0         0 Marpa::R2::exception("unknown option: $option");
234             }
235 3         9 $self->{$option} = $option_hash->{$option};
236             } ## end OPTION: for my $option ( keys %{$option_hash} )
237             } ## end ARG: for my $arg (@_)
238              
239 94         171 my $source_ref = $self->{compile};
240 94 100       217 if ( defined $source_ref ) {
241 1 50       7 ref $source_ref eq 'SCALAR'
242             or Marpa::R2::exception(
243             qq{value of "compile" option must be a SCALAR});
244 1         10 $self->{config} = Marpa::R2::HTML::Config->new_from_compile($source_ref);
245             } ## end if ( defined $source_ref )
246             else {
247 93         414 $self->{config} = Marpa::R2::HTML::Config->new();
248             }
249              
250 94         218 return $self;
251             } ## end sub create
252              
253             sub handler_find {
254 1341     1341 0 2247 my ( $self, $rule_id, $class ) = @_;
255 1341         2012 my $trace_handlers = $self->{trace_handlers};
256 1341         1621 my $handler;
257 1341   50     2364 $class //= q{*};
258 1341         2129 my $action = $self->{action_by_rule_id}->[$rule_id];
259             FIND_HANDLER: {
260              
261 1341 100       1660 last FIND_HANDLER if not defined $action;
  1341         2427  
262              
263 675 100       1454 if ( index( $action, 'SPE_' ) == 0 ) {
264 180         320 my $species = substr $action, 4;
265 180         341 $handler = $self->{handler_by_species}->{$species};
266 180 50 0     385 say {*STDERR}
  0   33     0  
267             qq{Rule $rule_id: Found handler by species: "$species"}
268             or Carp::croak("Cannot print: $ERRNO")
269             if $trace_handlers and defined $handler;
270 180         284 last FIND_HANDLER;
271             } ## end if ( index( $action, 'SPE_' ) == 0 )
272              
273             ## At this point action always is defined
274             ## and starts with 'ELE_'
275 495         885 my $element = substr $action, 4;
276              
277 495         1868 my @handler_keys = (
278             ( join q{;}, $element, $class ),
279             ( join q{;}, q{*}, $class ),
280             ( join q{;}, $element, q{*} ),
281             ( join q{;}, q{*}, q{*} ),
282             );
283             ($handler) =
284 1980         4094 grep {defined}
285 495         752 @{ $self->{handler_by_element_and_class} }{@handler_keys};
  495         1727  
286              
287 0         0 say {*STDERR} qq{Rule $rule_id: Found handler by action and class: "},
288 495 50 0     1376 ( grep { defined $self->{handler_by_element_and_class}->{$_} }
  0   33     0  
289             @handler_keys )[0], q{"}
290             or Carp::croak("Cannot print: $ERRNO")
291             if $trace_handlers and defined $handler;
292              
293             } ## end FIND_HANDLER:
294 1341 100       3092 return $handler if defined $handler;
295              
296 856 50 0     1400 say {*STDERR} qq{Rule $rule_id: Using default handler for action "},
  0   0     0  
297             ( $action // q{*} ), qq{" and class: "$class"}
298             or Carp::croak("Cannot print: $ERRNO")
299             if $trace_handlers;
300              
301 856         2634 return 'default_handler';
302             } ## end sub handler_find
303              
304             # "Original" value of a token range -- that is, the corresponding
305             # text of the original document, unchanged.
306             # Returned as a reference, because it may be very long
307             sub token_range_to_original {
308 1033     1033 0 1509 my ( $self, $first_token_ix, $last_token_ix ) = @_;
309              
310 1033 50       1625 return \q{} if not defined $first_token_ix;
311 1033         1762 my $document = $self->{document};
312 1033         1332 my $tokens = $self->{tokens};
313 1033         1789 my $start_offset =
314             $tokens->[$first_token_ix]
315             ->[Marpa::R2::HTML::Internal::Token::START_OFFSET];
316 1033         1507 my $end_offset =
317             $tokens->[$last_token_ix]
318             ->[Marpa::R2::HTML::Internal::Token::END_OFFSET];
319 1033         1160 my $original = substr ${$document}, $start_offset,
  1033         2261  
320             ( $end_offset - $start_offset );
321 1033         2128 return \$original;
322             } ## end sub token_range_to_original
323              
324             # "Original" value of token -- that is, the corresponding
325             # text of the original document, unchanged.
326             # The empty string if there is no such text.
327             # Returned as a reference, because it may be very long
328             sub tdesc_item_to_original {
329 0     0 0 0 my ( $self, $tdesc_item ) = @_;
330              
331 0         0 my $text = q{};
332 0         0 my $document = $self->{document};
333 0         0 my $tokens = $self->{tokens};
334 0         0 my $tdesc_item_type = $tdesc_item->[0];
335 0 0       0 return q{} if not defined $tdesc_item_type;
336              
337 0 0       0 if ( $tdesc_item_type eq 'PHYSICAL_TOKEN' ) {
338 0         0 return token_range_to_original(
339             $self,
340             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::START_TOKEN],
341             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::END_TOKEN],
342             );
343             } ## end if ( $tdesc_item_type eq 'PHYSICAL_TOKEN' )
344 0 0       0 if ( $tdesc_item_type eq 'VALUED_SPAN' ) {
345 0         0 return token_range_to_original(
346             $self,
347             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::START_TOKEN],
348             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::END_TOKEN],
349             );
350             } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' )
351 0         0 return q{};
352             } ## end sub tdesc_item_to_original
353              
354             # Given a token range and a tdesc list,
355             # return a reference to the literal value.
356             sub range_and_values_to_literal {
357 206     206 0 548 my ( $self, $next_token_ix, $final_token_ix, $tdesc_list ) = @_;
358              
359 206         332 my @flat_tdesc_list = ();
360 206         287 TDESC_ITEM: for my $tdesc_item ( @{$tdesc_list} ) {
  206         435  
361 693         901 my $type = $tdesc_item->[0];
362 693 50       1074 next TDESC_ITEM if not defined $type;
363 693 50       1161 next TDESC_ITEM if $type eq 'ZERO_SPAN';
364 693 50       1020 next TDESC_ITEM if $type eq 'RUBY_SLIPPERS_TOKEN';
365 693 100       1063 if ( $type eq 'VALUES' ) {
366             push @flat_tdesc_list,
367 4         8 @{ $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE] };
  4         13  
368 4         15 next TDESC_ITEM;
369             }
370 689         992 push @flat_tdesc_list, $tdesc_item;
371             } ## end TDESC_ITEM: for my $tdesc_item ( @{$tdesc_list} )
372              
373 206         339 my @literal_pieces = ();
374 206         307 TDESC_ITEM: for my $tdesc_item (@flat_tdesc_list) {
375              
376             my ( $tdesc_item_type, $next_explicit_token_ix,
377             $furthest_explicit_token_ix )
378 699         799 = @{$tdesc_item};
  699         1110  
379              
380 699 100       1201 if ( not defined $next_explicit_token_ix ) {
381             ## An element can contain no HTML tokens -- it may contain
382             ## only Ruby Slippers tokens.
383             ## Treat this as a special case.
384 10 50       36 if ( $tdesc_item_type eq 'VALUED_SPAN' ) {
385 10   100     41 my $value =
386             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE]
387             // q{};
388 10         35 push @literal_pieces, \( q{} . $value );
389             } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' )
390 10         34 next TDESC_ITEM;
391             } ## end if ( not defined $next_explicit_token_ix )
392              
393 689 100       1298 push @literal_pieces,
394             token_range_to_original( $self, $next_token_ix,
395             $next_explicit_token_ix - 1 )
396             if $next_token_ix < $next_explicit_token_ix;
397 689 100       1162 if ( $tdesc_item_type eq 'VALUED_SPAN' ) {
398 588         730 my $value =
399             $tdesc_item->[Marpa::R2::HTML::Internal::TDesc::VALUE];
400 588 100       910 if ( defined $value ) {
401 98         267 push @literal_pieces, \( q{} . $value );
402 98         144 $next_token_ix = $furthest_explicit_token_ix + 1;
403 98         193 next TDESC_ITEM;
404             }
405             ## FALL THROUGH
406             } ## end if ( $tdesc_item_type eq 'VALUED_SPAN' )
407 591 50       1172 push @literal_pieces,
408             token_range_to_original( $self, $next_explicit_token_ix,
409             $furthest_explicit_token_ix )
410             if $next_explicit_token_ix <= $furthest_explicit_token_ix;
411 591         893 $next_token_ix = $furthest_explicit_token_ix + 1;
412             } ## end TDESC_ITEM: for my $tdesc_item (@flat_tdesc_list)
413              
414 206         402 return \( join q{}, map { ${$_} } @literal_pieces );
  923         1004  
  923         2377  
415              
416             } ## end sub range_and_values_to_literal
417              
418             sub symbol_names_by_rule_id {
419 0     0 0 0 my ( $self, $rule_id ) = @_;
420 0         0 my $tracer = $self->{tracer};
421 0         0 my $grammar = $tracer->grammar();
422 0         0 my $rule_length = $grammar->rule_length($rule_id);
423 0 0       0 return if not defined $rule_length;
424 0         0 my @symbol_ids = ( $grammar->rule_lhs($rule_id) );
425             push @symbol_ids,
426 0         0 map { $grammar->rule_rhs( $rule_id, $_ ) } ( 0 .. $rule_length - 1 );
  0         0  
427 0         0 return map { $tracer->symbol_name($_) } @symbol_ids;
  0         0  
428             } ## end sub symbol_names_by_rule_id
429              
430             sub parse {
431 94     94 0 203 my ( $self, $document_ref ) = @_;
432              
433 94         177 my %tags = ();
434              
435             Marpa::R2::exception(
436             "parse() already run on this object\n",
437             'For a new parse, create a new object'
438 94 50       243 ) if $self->{document};
439              
440 94         147 my $trace_cruft = $self->{trace_cruft};
441 94   50     320 my $trace_terminals = $self->{trace_terminals} // 0;
442 94         149 my $trace_conflicts = $self->{trace_conflicts};
443 94         137 my $trace_handlers = $self->{trace_handlers};
444 94         157 my $trace_values = $self->{trace_values};
445 94         151 my $trace_fh = $self->{trace_fh};
446 94         193 my $ref_type = ref $document_ref;
447             Marpa::R2::exception('Arg to parse() must be ref to string')
448             if not $ref_type
449             or $ref_type ne 'SCALAR'
450 94 50 33     371 or not defined ${$document_ref};
  94   33     271  
451              
452 94         186 my $document = $self->{document} = $document_ref;
453              
454             my ($core_rules, $runtime_tag,
455             $rank_by_name, $is_empty_element,
456             $primary_group_by_tag
457 94         307 ) = $self->{config}->contents();
458 94         186 $self->{is_empty_element} = $is_empty_element;
459 94 100       225 if ($self->{dump_config}) {
460 2         11 return $self->{config}->as_string();
461             }
462 92         165 my @action_by_rule_id = ();
463 92         177 $self->{action_by_rule_id} = \@action_by_rule_id;
464 92         1222 my $thin_grammar = Marpa::R2::Thin::G->new( { if => 1 } );
465 92         444 my $tracer = Marpa::R2::Thin::Trace->new($thin_grammar);
466 92         243 $self->{tracer} = $tracer;
467              
468 92         145 RULE: for my $rule ( @{$core_rules} ) {
  92         184  
469 12788         20547 my $lhs = $rule->{lhs};
470 12788         16905 my $rhs = $rule->{rhs};
471 12788         15912 my $min = $rule->{min};
472 12788         16802 my $action = $rule->{action};
473 12788         16270 my @symbol_ids = ();
474 12788         15492 for my $symbol_name ( $lhs, @{$rhs} ) {
  12788         19446  
475 31648   100     58889 push @symbol_ids,
476             $tracer->symbol_by_name($symbol_name)
477             // $tracer->symbol_new($symbol_name);
478             }
479 12788         21862 my ($lhs_id, @rhs_ids) = @symbol_ids;
480 12788         15673 my $rule_id;
481 12788 100       19776 if ( defined $min ) {
482 2116         7182 $rule_id =
483             $thin_grammar->sequence_new( $lhs_id, $rhs_ids[0],
484             { min => $min } );
485             }
486             else {
487 10672         26794 $rule_id = $thin_grammar->rule_new( $lhs_id, \@rhs_ids );
488             }
489 12788         29124 $action_by_rule_id[$rule_id] = $action;
490             } ## end RULE: for my $rule ( @{$core_rules} )
491              
492             # Some constants that we will use a lot
493 92         250 my $SYMID_CRUFT = $tracer->symbol_by_name('CRUFT');
494 92         196 my $SYMID_CDATA = $tracer->symbol_by_name('CDATA');
495 92         197 my $SYMID_PCDATA = $tracer->symbol_by_name('PCDATA');
496 92         197 my $SYMID_WHITESPACE = $tracer->symbol_by_name('WHITESPACE');
497 92         205 my $SYMID_PI = $tracer->symbol_by_name('PI');
498 92         179 my $SYMID_C = $tracer->symbol_by_name('C');
499 92         213 my $SYMID_D = $tracer->symbol_by_name('D');
500 92         208 my $SYMID_EOF = $tracer->symbol_by_name('EOF');
501              
502 92         211 my @raw_tokens = ();
503 92         1003 my $p = HTML::Parser->new(
504             api_version => 3,
505             start_h => [
506             \@raw_tokens, q{tagname,'S',line,column,offset,offset_end,is_cdata,attr}
507             ],
508             end_h =>
509             [ \@raw_tokens, q{tagname,'E',line,column,offset,offset_end,is_cdata} ],
510             text_h => [
511             \@raw_tokens,
512             qq{'$SYMID_WHITESPACE','T',line,column,offset,offset_end,is_cdata}
513             ],
514             comment_h =>
515             [ \@raw_tokens, qq{'$SYMID_C','C',line,column,offset,offset_end,is_cdata} ],
516             declaration_h =>
517             [ \@raw_tokens, qq{'$SYMID_D','D',line,column,offset,offset_end,is_cdata} ],
518             process_h =>
519             [ \@raw_tokens, qq{'$SYMID_PI','PI',line,column,offset,offset_end,is_cdata} ],
520             unbroken_text => 1
521             );
522              
523 92         9472 $p->parse( ${$document} );
  92         2714  
524 92         498 $p->eof;
525              
526 92         177 my @html_parser_tokens = ();
527             HTML_PARSER_TOKEN:
528 92         196 for my $raw_token (@raw_tokens) {
529             my ( undef, $token_type, $line, $column, $offset, $offset_end, $is_cdata, $attr ) =
530 1379         1537 @{$raw_token};
  1379         2602  
531              
532             PROCESS_TOKEN_TYPE: {
533 1379 50       1634 if ($is_cdata) {
  1379         2135  
534 0         0 $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] =
535             $SYMID_CDATA;
536 0         0 last PROCESS_TOKEN_TYPE;
537             }
538 1379 100       2196 if ( $token_type eq 'T' ) {
539              
540             # White space as defined in HTML 4.01
541             # space (x20); ASCII tab (x09); ASCII form feed (x0C;); Zero-width space (x200B)
542             # and the two characters which appear in line breaks:
543             # carriage return (x0D) and line feed (x0A)
544             # I avoid the Perl character codes because I do NOT want
545             # localization
546             $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] =
547             $SYMID_PCDATA if
548             substr(
549 635 100       692 ${$document}, $offset, ( $offset_end - $offset )
  635         2340  
550             ) =~ / [^\x09\x0A\x0C\x0D\x20\x{200B}] /oxms;
551              
552 635         969 last PROCESS_TOKEN_TYPE;
553             } ## end if ( $token_type eq 'T' )
554 744 100 100     1805 if ( $token_type eq 'E' or $token_type eq 'S' ) {
555              
556             # If it's a virtual token from HTML::Parser,
557             # pretend it never existed.
558             # HTML::Parser supplies missing
559             # end tags for title elements, but for no
560             # others.
561             # This is not helpful and we need to special-case
562             # these zero-length tags and throw them away.
563 739 100       1170 next HTML_PARSER_TOKEN if $offset_end <= $offset;
564              
565 737         960 my $tag_name = $raw_token
566             ->[Marpa::R2::HTML::Internal::Token::TAG_NAME];
567 737         1145 my $terminal = $token_type . q{_} . $tag_name;
568 737         1451 my $terminal_id = $tracer->symbol_by_name($terminal);
569 737 100       1374 if ( not defined $terminal_id ) {
570 25   50     84 my $group_symbol = $primary_group_by_tag->{$tag_name}
571             // 'GRP_anywhere';
572 25   50     86 my $contents = $runtime_tag->{$tag_name} // 'FLO_mixed';
573 25         112 my @symbol_names = (
574             $group_symbol,
575             'ELE_' . $tag_name,
576             'S_' . $tag_name,
577             $contents, 'E_' . $tag_name
578             );
579 25         45 my @symbol_ids = ();
580 25         49 SYMBOL: for my $symbol_name (@symbol_names) {
581 125         246 my $symbol_id = $tracer->symbol_by_name($symbol_name);
582 125 100       257 if ( not defined $symbol_id ) {
583 75         163 $symbol_id = $tracer->symbol_new($symbol_name);
584             }
585 125         237 push @symbol_ids, $symbol_id;
586             } ## end SYMBOL: for my $symbol_name (@symbol_names)
587 25         77 my ( $top_id, $lhs_id, @rhs_ids ) = @symbol_ids;
588 25         121 $thin_grammar->rule_new( $top_id, [$lhs_id] );
589 25         84 my $element_rule_id =
590             $thin_grammar->rule_new( $lhs_id, \@rhs_ids );
591 25         71 $action_by_rule_id[$element_rule_id] = 'ELE_' . $tag_name;
592 25         59 $terminal_id = $tracer->symbol_by_name($terminal);
593              
594             } ## end if ( not defined $terminal_id )
595 737         1044 $raw_token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] =
596             $terminal_id;
597 737         1017 last PROCESS_TOKEN_TYPE;
598             } ## end if ( $token_type eq 'E' or $token_type eq 'S' )
599             } ## end PROCESS_TOKEN_TYPE:
600 1377         2294 push @html_parser_tokens, $raw_token;
601             } ## end HTML_PARSER_TOKEN: for my $raw_token (@raw_tokens)
602              
603             # Points AFTER the last HTML
604             # Parser token.
605             # The other logic needs to be ready for this.
606             {
607 92         152 my $document_length = length ${$document};
  92         144  
  92         166  
608 92         169 my $last_token = $html_parser_tokens[-1];
609             push @html_parser_tokens,
610             [
611             $SYMID_EOF, 'EOF',
612 92         134 @{$last_token}[
  92         310  
613             Marpa::R2::HTML::Internal::Token::LINE,
614             Marpa::R2::HTML::Internal::Token::COLUMN
615             ],
616             $document_length,
617             $document_length
618             ];
619             }
620              
621             # conserve memory
622 92         452 $p = undef;
623 92         203 @raw_tokens = ();
624              
625 92         275 $thin_grammar->start_symbol_set( $tracer->symbol_by_name('document') );
626 92         112106 $thin_grammar->precompute();
627              
628 92 50       452 if ($self->{dump_AHFA}) {
629 0         0 return \$tracer->show_AHFA();
630             }
631              
632             # Memoize these -- we use highest symbol a lot
633 92         313 my $highest_symbol_id = $thin_grammar->highest_symbol_id();
634 92         246 my $highest_rule_id = $thin_grammar->highest_rule_id();
635              
636             # For the Ruby Slippers engine
637             # We need to know quickly if a symbol is a start tag;
638 92         170 my @is_start_tag = ();
639              
640             # Find Ruby slippers ranks, by symbol ID
641 92         168 my @ruby_rank_by_id = ();
642             {
643 92         158 my @non_final_end_tag_ids = ();
  92         156  
644             SYMBOL:
645 92         673 for my $symbol_id ( 0 .. $highest_symbol_id ) {
646 14519         25388 my $symbol_name = $tracer->symbol_name($symbol_id);
647 14519 100       28568 next SYMBOL if not 0 == index $symbol_name, 'E_';
648             next SYMBOL
649 2785 100 100     7400 if $symbol_name eq 'E_body'
650             or $symbol_name eq 'E_html';
651 2601         4003 push @non_final_end_tag_ids, $symbol_id;
652             } ## end SYMBOL: for my $symbol_id ( 0 .. $highest_symbol_id )
653              
654 92         174 my %ruby_vectors = ();
655 92         174 for my $rejected_symbol_name ( keys %{$rank_by_name} ) {
  92         598  
656 2760         19504 my @ruby_vector_by_id = ( (0) x ( $highest_symbol_id + 1 ) );
657             my $rank_by_candidate_name =
658 2760         4140 $rank_by_name->{$rejected_symbol_name};
659             CANDIDATE:
660 2760         3222 for my $candidate_name ( keys %{$rank_by_candidate_name} ) {
  2760         6157  
661 14076         18080 my $rank = $rank_by_candidate_name->{$candidate_name};
662 14076 100       23876 if ( $candidate_name eq '' ) {
663 2760         15809 $ruby_vector_by_id[$_] = $rank for @non_final_end_tag_ids;
664 2760         4167 next CANDIDATE;
665             }
666 11316         20156 my $candidate_id = $tracer->symbol_by_name($candidate_name);
667 11316 50       19328 die "Unknown ruby slippers candidate name: $candidate_name"
668             if not defined $candidate_id;
669             $ruby_vector_by_id[$candidate_id] = $rank
670 11316         66594 for @non_final_end_tag_ids;
671             } ## end CANDIDATE: for my $candidate_name ( keys %{...})
672 2760         6176 $ruby_vectors{$rejected_symbol_name} = \@ruby_vector_by_id;
673             } ## end for my $rejected_symbol_name ( keys %{$rank_by_name} )
674              
675 92         802 my @no_ruby_slippers_vector = ( (0) x ( $highest_symbol_id + 1 ) );
676 92         238 SYMBOL: for my $rejected_symbol_id ( 0 .. $highest_symbol_id ) {
677 14519 100       29400 if ( not $thin_grammar->symbol_is_terminal($rejected_symbol_id) )
678             {
679 8213         11079 $ruby_rank_by_id[$rejected_symbol_id] =
680             \@no_ruby_slippers_vector;
681 8213         12073 next SYMBOL;
682             } ## end if ( not $thin_grammar->symbol_is_terminal(...))
683 6306         11715 my $rejected_symbol_name =
684             $tracer->symbol_name($rejected_symbol_id);
685 6306         7986 my $placement;
686             FIND_PLACEMENT: {
687 6306         7103 my $prefix = substr $rejected_symbol_name, 0, 2;
  6306         8950  
688 6306 100       10558 if ( $prefix eq 'S_' ) {
689 2785         3371 $placement = '';
690 2785         3516 $is_start_tag[$rejected_symbol_id] = 1;
691 2785         3789 last FIND_PLACEMENT;
692             }
693 3521 100       5566 if ( $prefix eq 'E_' ) {
694 2785         3608 $placement = '/';
695             }
696             } ## end FIND_PLACEMENT:
697 6306         8766 my $ruby_vector = $ruby_vectors{$rejected_symbol_name};
698 6306 100       10405 if ( defined $ruby_vector ) {
699 2300         2875 $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector;
700 2300         3583 next SYMBOL;
701             }
702 4006 100       6171 if ( not defined $placement ) {
703 460 100       780 if ( $rejected_symbol_name eq 'CRUFT' ) {
704 92         157 $ruby_rank_by_id[$rejected_symbol_id] =
705             \@no_ruby_slippers_vector;
706 92         180 next SYMBOL;
707             }
708             $ruby_rank_by_id[$rejected_symbol_id] =
709 368   50     1070 $ruby_vectors{'!non_element'}
710             // \@no_ruby_slippers_vector;
711 368         1382 next SYMBOL;
712             } ## end if ( not defined $placement )
713 3546         4861 my $tag = substr $rejected_symbol_name, 2;
714 3546         5359 my $primary_group = $primary_group_by_tag->{$tag};
715 3546 100       5949 my $element_type = defined $primary_group ? (substr $primary_group, 4) : 'anywhere';
716             $ruby_vector =
717 3546         6325 $ruby_vectors{ q{<} . $placement . q{%} . $element_type . q{>} };
718 3546 100       5811 if ( defined $ruby_vector ) {
719 295         419 $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector;
720 295         517 next SYMBOL;
721             }
722 3251         4763 $ruby_vector = $ruby_vectors{ q{<} . $placement . q{*>} };
723 3251 50       5287 if ( defined $ruby_vector ) {
724 3251         4102 $ruby_rank_by_id[$rejected_symbol_id] = $ruby_vector;
725 3251         5571 next SYMBOL;
726             }
727 0         0 $ruby_rank_by_id[$rejected_symbol_id] = \@no_ruby_slippers_vector;
728             } ## end SYMBOL: for my $rejected_symbol_id ( 0 .. $highest_symbol_id )
729              
730             }
731              
732 92         192 my @empty_element_end_tag = ();
733             {
734 92         143 TAG: for my $tag (keys %{$is_empty_element}) {
  92         128  
  92         391  
735 1104         2520 my $start_tag_id = $tracer->symbol_by_name('S_' . $tag);
736 1104 100       2293 next TAG if not defined $start_tag_id;
737 284         716 my $end_tag_id = $tracer->symbol_by_name('E_' . $tag);
738 284         604 $empty_element_end_tag[$start_tag_id] = $end_tag_id;
739             }
740             }
741              
742 92         1094 my $recce = Marpa::R2::Thin::R->new($thin_grammar);
743 92         1369 $recce->start_input();
744              
745 92         236 $self->{recce} = $recce;
746 92         190 $self->{tokens} = \@html_parser_tokens;
747 92         221 $self->{earleme_to_html_token_ix} = [-1];
748              
749             # These variables track virtual start tokens as
750             # a protection against infinite loops.
751 92         164 my %start_virtuals_used = ();
752 92         134 my $earleme_of_last_start_virtual = -1;
753              
754             # first token is a dummy, so that ix is never 0
755             # this is done because 0 has a special meaning as a Libmarpa
756             # token value
757 92         114 my $latest_html_token = -1;
758 92         115 my $token_number = 0;
759 92         134 my $token_count = scalar @html_parser_tokens;
760              
761             # this array track the last token number (location) at which
762             # the symbol with this number was last read. It's used
763             # to prevent the same Ruby Slippers token being added
764             # at the same location more than once.
765             # If allowed, this could cause an infinite loop.
766             # Note that only start tags are tracked -- the rest of the
767             # array stays at -1.
768 92         566 my @terminal_last_seen = ( (-1) x ( $highest_symbol_id + 1 ) );
769              
770 92         312 $thin_grammar->throw_set(0);
771 92         132 my $empty_element_end_tag;
772 92         221 RECCE_RESPONSE: while ( $token_number < $token_count ) {
773              
774 2232 100       3642 if ( defined $empty_element_end_tag ) {
775 8         38 my $read_result =
776             $recce->alternative( $empty_element_end_tag, RUBY_SLIPPERS_TOKEN,
777             1 );
778 8 50       33 if ( $read_result != $NO_MARPA_ERROR ) {
779 0         0 die $thin_grammar->error();
780             }
781 8 50       14 if ($trace_terminals) {
782 0 0       0 say {$trace_fh} 'Virtual end tag accepted: ',
  0         0  
783             $tracer->symbol_name($empty_element_end_tag)
784             or Carp::croak("Cannot print: $ERRNO");
785             }
786 8 50       130 if ( $recce->earleme_complete() < 0 ) {
787 0         0 die $thin_grammar->error();
788             }
789 8         24 my $current_earleme = $recce->current_earleme();
790 8 50       16 die $thin_grammar->error() if not defined $current_earleme;
791 8         17 $self->{earleme_to_html_token_ix}->[$current_earleme] =
792             $latest_html_token;
793 8         9 $empty_element_end_tag = undef;
794 8         20 next RECCE_RESPONSE;
795             } ## end if ( defined $empty_element_end_tag )
796              
797 2224         2859 my $token = $html_parser_tokens[$token_number];
798              
799 2224         3291 my $attempted_symbol_id = $token
800             ->[Marpa::R2::HTML::Internal::Token::TOKEN_ID];
801 2224         4743 my $read_result =
802             $recce->alternative( $attempted_symbol_id, PHYSICAL_TOKEN, 1 );
803 2224 100       3958 if ( $read_result != $UNEXPECTED_TOKEN_ID ) {
804 1469 50       2274 if ( $read_result != $NO_MARPA_ERROR ) {
805 0         0 die $thin_grammar->error();
806             }
807 1469 50       2144 if ($trace_terminals) {
808 0 0       0 say {$trace_fh} 'Token accepted: ',
  0         0  
809             $tracer->symbol_name($attempted_symbol_id)
810             or Carp::croak("Cannot print: $ERRNO");
811             }
812 1469 50       24506 if ( $recce->earleme_complete() < 0 ) {
813 0         0 die $thin_grammar->error();
814             }
815              
816 1469         2830 my $last_html_token_of_marpa_token = $token_number;
817 1469         1738 $token_number++;
818 1469 50       2338 if ( defined $last_html_token_of_marpa_token ) {
819 1469         1787 $latest_html_token = $last_html_token_of_marpa_token;
820             }
821 1469         2829 my $current_earleme = $recce->current_earleme();
822 1469 50       2635 die $thin_grammar->error() if not defined $current_earleme;
823 1469         2525 $self->{earleme_to_html_token_ix}->[$current_earleme] =
824             $latest_html_token;
825              
826 1469         1782 $empty_element_end_tag = $empty_element_end_tag[$attempted_symbol_id];
827 1469         3178 next RECCE_RESPONSE;
828             } ## end if ( $read_result != $UNEXPECTED_TOKEN_ID )
829              
830 755 50       1207 if ($trace_terminals) {
831 0 0       0 say {$trace_fh} 'Literal Token not accepted: ',
  0         0  
832             $tracer->symbol_name($attempted_symbol_id)
833             or Carp::croak("Cannot print: $ERRNO");
834             }
835              
836 755         888 my $highest_candidate_rank = 0;
837 755         930 my $virtual_terminal_to_add;
838 755         1038 my $ruby_vector = $ruby_rank_by_id[$attempted_symbol_id];
839 755         3142 my @terminals_expected = $recce->terminals_expected();
840 755 50       1470 die $thin_grammar->error() if not defined $terminals_expected[0];
841 755         1145 CANDIDATE: for my $candidate_id (@terminals_expected) {
842 6736         8266 my $this_candidate_rank = $ruby_vector->[$candidate_id];
843 6736 50       10447 if ($trace_terminals) {
844 0 0       0 say {$trace_fh} 'Considering candidate: ',
  0         0  
845             $tracer->symbol_name($candidate_id),
846             "; rank is $this_candidate_rank; highest rank so far is $highest_candidate_rank"
847             or Carp::croak("Cannot print: $ERRNO");
848             } ## end if ($trace_terminals)
849 6736 100       11099 if ( $this_candidate_rank > $highest_candidate_rank ) {
850 792 50       1280 if ($trace_terminals) {
851 0 0       0 say {$trace_fh} 'Considering candidate: ',
  0         0  
852             $tracer->symbol_name($candidate_id),
853             '; last seen at ', $terminal_last_seen[$candidate_id],
854             "; current token number is $token_number"
855             or Carp::croak("Cannot print: $ERRNO");
856             } ## end if ($trace_terminals)
857             next CANDIDATE
858 792 50       1368 if $terminal_last_seen[$candidate_id] == $token_number;
859 792 50       1257 if ($trace_terminals) {
860 0 0       0 say {$trace_fh} 'Current best candidate: ',
  0         0  
861             $tracer->symbol_name($candidate_id),
862             or Carp::croak("Cannot print: $ERRNO");
863             }
864 792         946 $highest_candidate_rank = $this_candidate_rank;
865 792         1149 $virtual_terminal_to_add = $candidate_id;
866             } ## end if ( $this_candidate_rank > $highest_candidate_rank )
867             } ## end CANDIDATE: for my $candidate_id (@terminals_expected)
868              
869 755 100       1267 if ( defined $virtual_terminal_to_add ) {
870              
871 752 50       1231 if ($trace_terminals) {
872 0 0       0 say {$trace_fh} 'Adding Ruby Slippers token: ',
  0         0  
873             $tracer->symbol_name($virtual_terminal_to_add),
874             or Carp::croak("Cannot print: $ERRNO");
875             }
876              
877 752         1663 my $ruby_slippers_result =
878             $recce->alternative( $virtual_terminal_to_add,
879             RUBY_SLIPPERS_TOKEN, 1 );
880 752 50       1341 if ( $ruby_slippers_result != $NO_MARPA_ERROR ) {
881 0         0 die $thin_grammar->error();
882             }
883 752 50       8658 if ( $recce->earleme_complete() < 0 ) {
884 0         0 die $thin_grammar->error();
885             }
886              
887             # Only keep track of start tags. We need to be able to add end
888             # tags repeatedly.
889             # Adding end tags cannot cause an infinite loop, because each
890             # one ends an element and only a finite number of elements
891             # can have been started.
892 752 100       1690 $terminal_last_seen[$virtual_terminal_to_add] = $token_number
893             if $is_start_tag[$virtual_terminal_to_add];
894              
895 752         1451 my $current_earleme = $recce->current_earleme();
896 752 50       1430 die $thin_grammar->error() if not defined $current_earleme;
897 752         1443 $self->{earleme_to_html_token_ix}->[$current_earleme] =
898             $latest_html_token;
899              
900 752         1052 $empty_element_end_tag = $empty_element_end_tag[$virtual_terminal_to_add];
901              
902 752         1845 next RECCE_RESPONSE;
903             } ## end if ( defined $virtual_terminal_to_add )
904              
905             # If we didn't find a token to add, add the
906             # current physical token as CRUFT.
907              
908 3 50       20 if ($trace_terminals) {
909 0 0       0 say {$trace_fh} 'Adding rejected token as cruft: ',
  0         0  
910             $tracer->symbol_name($attempted_symbol_id)
911             or Carp::croak("Cannot print: $ERRNO");
912             }
913              
914 3 50       13 my $fatal_cruft_error = $token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID]
915             == $SYMID_CRUFT ? 1 : 0;
916              
917 3 50 33     31 if ( $trace_cruft or $fatal_cruft_error ) {
918 0         0 my $current_earleme = $recce->current_earleme();
919 0 0       0 die $thin_grammar->error() if not defined $current_earleme;
920 0         0 my ( $line, $col ) =
921             earleme_to_linecol( $self, $current_earleme );
922              
923             # HTML::Parser uses one-based line numbers,
924             # but zero-based column numbers
925             # The convention (in vi and cut) is that
926             # columns are also one-based.
927 0         0 $col++;
928              
929 0         0 say {$trace_fh} qq{Cruft at line $line, column $col: "},
930             ${
931 0 0       0 token_range_to_original(
  0         0  
932             $self, $token_number, $token_number
933             )
934             },
935             q{"}
936             or Carp::croak("Cannot print: $ERRNO");
937 0 0       0 die 'Internal error: cruft token was rejected'
938             if $fatal_cruft_error;
939             } ## end if ( $trace_cruft or $fatal_cruft_error )
940              
941             # Cruft tokens are not virtual.
942             # They are the real things, hacked up.
943 3         13 $token->[Marpa::R2::HTML::Internal::Token::TOKEN_ID] = $SYMID_CRUFT;
944              
945             } ## end RECCE_RESPONSE: while ( $token_number < $token_count )
946 92         276 $thin_grammar->throw_set(1);
947              
948 92 50       201 if ($trace_terminals) {
949 0 0       0 say {$trace_fh} 'at end of tokens'
  0         0  
950             or Carp::croak("Cannot print: $ERRNO");
951             }
952              
953 92         5038 $Marpa::R2::HTML::INSTANCE = $self;
954 92         227 local $Marpa::R2::HTML::Internal::PARSE_INSTANCE = $self;
955 92         318 my $latest_earley_set_ID = $recce->latest_earley_set();
956 92         5160 my $bocage = Marpa::R2::Thin::B->new( $recce, $latest_earley_set_ID );
957 92         402 my $order = Marpa::R2::Thin::O->new($bocage);
958 92         367 my $tree = Marpa::R2::Thin::T->new($order);
959 92         677 $tree->next();
960              
961 92         168 my @stack = ();
962 92         191 local $Marpa::R2::HTML::Internal::STACK = \@stack;
963 92         164 my %memoized_handlers = ();
964              
965 92         543 my $valuator = Marpa::R2::Thin::V->new($tree);
966 92         169 local $Marpa::R2::HTML::Internal::RECCE = $recce;
967 92         136 local $Marpa::R2::HTML::Internal::VALUATOR = $valuator;
968              
969 92         613 for my $rule_id ( grep { $thin_grammar->rule_length($_); }
  12838         19168  
970             0 .. $thin_grammar->highest_rule_id() )
971             {
972 12746         20204 $valuator->rule_is_valued_set( $rule_id, 1 );
973             }
974 92         390 STEP: while (1) {
975 7044         22223 my ( $type, @step_data ) = $valuator->step();
976 7044 100       13410 last STEP if not defined $type;
977 6952 100       12110 if ( $type eq 'MARPA_STEP_TOKEN' ) {
978 2229 50 0     3624 say {*STDERR} join q{ }, $type, @step_data,
  0         0  
979             $tracer->symbol_name( $step_data[0] )
980             or Carp::croak("Cannot print: $ERRNO")
981             if $trace_values;
982 2229         3503 my ( undef, $token_value, $arg_n ) = @step_data;
983 2229 100       3851 if ( $token_value eq RUBY_SLIPPERS_TOKEN ) {
984 760         1406 $stack[$arg_n] = ['RUBY_SLIPPERS_TOKEN'];
985 760 50 0     1405 say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack )
  0         0  
986             or Carp::croak("Cannot print: $ERRNO")
987             if $trace_values;
988 760         1195 next STEP;
989             } ## end if ( $token_value eq RUBY_SLIPPERS_TOKEN )
990 1469         3039 my ( $start_earley_set_id, $end_earley_set_id ) =
991             $valuator->location();
992 1469         2991 my $start_earleme = $recce->earleme($start_earley_set_id);
993             my $start_html_token_ix =
994 1469         2529 $self->{earleme_to_html_token_ix}->[$start_earleme];
995 1469         2453 my $end_earleme = $recce->earleme($end_earley_set_id);
996             my $end_html_token_ix =
997 1469         2084 $self->{earleme_to_html_token_ix}->[$end_earleme];
998 1469         3065 $stack[$arg_n] = [
999             'PHYSICAL_TOKEN' => $start_html_token_ix + 1,
1000             $end_html_token_ix,
1001             ];
1002 1469 50 0     2809 say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack )
  0         0  
1003             or Carp::croak("Cannot print: $ERRNO")
1004             if $trace_values;
1005 1469         2399 next STEP;
1006             } ## end if ( $type eq 'MARPA_STEP_TOKEN' )
1007 4723 100       7626 if ( $type eq 'MARPA_STEP_RULE' ) {
1008 4168 50 0     6345 say {*STDERR} join q{ }, ( $type, @step_data )
  0         0  
1009             or Carp::croak("Cannot print: $ERRNO")
1010             if $trace_values;
1011 4168         6441 my ( $rule_id, $arg_0, $arg_n ) = @step_data;
1012              
1013 4168         5121 my $attributes = undef;
1014 4168         4851 my $class = undef;
1015 4168         5722 my $action = $action_by_rule_id[$rule_id];
1016 4168         5092 local $Marpa::R2::HTML::Internal::START_TAG_IX = undef;
1017 4168         4898 local $Marpa::R2::HTML::Internal::END_TAG_IX_REF = undef;
1018 4168         4646 local $Marpa::R2::HTML::Internal::ELEMENT = undef;
1019 4168         5368 local $Marpa::R2::HTML::Internal::SPECIES = q{};
1020              
1021 4168 100 100     10357 if ( defined $action and ( index $action, 'ELE_' ) == 0 ) {
1022 747         1762 $Marpa::R2::HTML::Internal::SPECIES =
1023             $Marpa::R2::HTML::Internal::ELEMENT = substr $action, 4;
1024 747         1038 my $start_tag_marpa_token = $stack[$arg_0];
1025              
1026 747         1076 my $start_tag_type = $start_tag_marpa_token
1027             ->[Marpa::R2::HTML::Internal::TDesc::TYPE];
1028 747 100 66     2280 if ( defined $start_tag_type
1029             and $start_tag_type eq 'PHYSICAL_TOKEN' )
1030             {
1031 401         560 my $start_tag_ix = $start_tag_marpa_token->[1];
1032 401         866 my $start_tag_token = $html_parser_tokens[$start_tag_ix];
1033 401 50       1186 if ( $start_tag_token
1034             ->[Marpa::R2::HTML::Internal::Token::TYPE] eq 'S' )
1035             {
1036 401         549 $Marpa::R2::HTML::Internal::START_TAG_IX =
1037             $start_tag_ix;
1038 401         770 $attributes = $start_tag_token
1039             ->[Marpa::R2::HTML::Internal::Token::ATTR];
1040             } ## end if ( $start_tag_token->[...])
1041             } ## end if ( defined $start_tag_type and $start_tag_type eq ...)
1042             } ## end if ( defined $action and ( index $action, 'ELE_' ) ==...)
1043 4168 100 100     8877 if ( defined $action and ( index $action, 'SPE_' ) == 0 ) {
1044 741         1563 $Marpa::R2::HTML::Internal::SPECIES = q{:} . substr $action,
1045             4;
1046             }
1047 4168         5106 local $Marpa::R2::HTML::Internal::ATTRIBUTES = $attributes;
1048 4168   100     10512 $class = $attributes->{class} // q{*};
1049 4168         5453 local $Marpa::R2::HTML::Internal::CLASS = $class;
1050 4168         4835 local $Marpa::R2::HTML::Internal::ARG_0 = $arg_0;
1051 4168         4918 local $Marpa::R2::HTML::Internal::ARG_N = $arg_n;
1052              
1053 4168         8328 my ( $start_earley_set_id, $end_earley_set_id ) =
1054             $valuator->location();
1055              
1056 4168         8531 my $start_earleme = $recce->earleme($start_earley_set_id);
1057             my $start_html_token_ix =
1058 4168         6324 $self->{earleme_to_html_token_ix}->[$start_earleme] + 1;
1059 4168         6484 my $end_earleme = $recce->earleme($end_earley_set_id);
1060             my $end_html_token_ix =
1061 4168         5483 $self->{earleme_to_html_token_ix}->[$end_earleme];
1062              
1063 4168 100       6794 if ( $start_html_token_ix > $end_html_token_ix ) {
1064 117         168 $start_html_token_ix = $end_html_token_ix = undef;
1065             }
1066 4168         5181 local $Marpa::R2::HTML::Internal::START_HTML_TOKEN_IX =
1067             $start_html_token_ix;
1068 4168         5126 local $Marpa::R2::HTML::Internal::END_HTML_TOKEN_IX =
1069             $end_html_token_ix;
1070              
1071 4168         6967 my $handler_key =
1072             $rule_id . q{;} . $Marpa::R2::HTML::Internal::CLASS;
1073              
1074 4168         5982 my $handler = $memoized_handlers{$handler_key};
1075              
1076             $trace_handlers
1077             and $handler
1078 4168 50 0     7096 and say {*STDERR}
  0   33     0  
1079             qq{Found memoized handler for rule $rule_id, class "},
1080             ( $class // q{*} ), q{"};
1081              
1082 4168 100       6737 if ( not defined $handler ) {
1083 1341         2411 $handler = $memoized_handlers{$handler_key} =
1084             handler_find( $self, $rule_id, $class );
1085             }
1086              
1087             COMPUTE_VALUE: {
1088 4168 100       5087 if ( ref $handler ) {
  4168         6813  
1089 581         1492 $stack[$arg_0] = [
1090             VALUED_SPAN => $start_html_token_ix,
1091             $end_html_token_ix,
1092             ( scalar $handler->() ),
1093             $rule_id
1094             ];
1095 581         12322 last COMPUTE_VALUE;
1096             } ## end if ( ref $handler )
1097 3587         4703 my @flat_tdesc_list = ();
1098             STACK_IX:
1099 3587         6465 for my $stack_ix ( $Marpa::R2::HTML::Internal::ARG_0 ..
1100             $Marpa::R2::HTML::Internal::ARG_N )
1101             {
1102 5339         6785 my $tdesc_item =
1103             $Marpa::R2::HTML::Internal::STACK->[$stack_ix];
1104 5339         6355 my $tdesc_type = $tdesc_item->[0];
1105 5339 50       8216 next STACK_IX if not defined $tdesc_type;
1106 5339 100       8357 if ( $tdesc_type eq 'VALUES' ) {
1107             push @flat_tdesc_list,
1108 471         548 @{ $tdesc_item
  471         1152  
1109             ->[Marpa::R2::HTML::Internal::TDesc::VALUE] };
1110 471         806 next STACK_IX;
1111             } ## end if ( $tdesc_type eq 'VALUES' )
1112 4868 100       8349 next STACK_IX if $tdesc_type ne 'VALUED_SPAN';
1113 3233         5466 push @flat_tdesc_list, $tdesc_item;
1114             } ## end STACK_IX: for my $stack_ix ( $Marpa::R2::HTML::Internal::ARG_0...)
1115 3587 100       6285 if ( scalar @flat_tdesc_list <= 1 ) {
1116 2972         6611 $stack[$arg_0] = [
1117             VALUED_SPAN => $start_html_token_ix,
1118             $end_html_token_ix,
1119             $flat_tdesc_list[0]
1120             ->[Marpa::R2::HTML::Internal::TDesc::VALUE],
1121             $rule_id
1122             ];
1123 2972         5177 last COMPUTE_VALUE;
1124             } ## end if ( scalar @flat_tdesc_list <= 1 )
1125 615         1735 $stack[$arg_0] = [
1126             VALUES => $start_html_token_ix,
1127             $end_html_token_ix,
1128             \@flat_tdesc_list,
1129             $rule_id
1130             ];
1131             } ## end COMPUTE_VALUE:
1132              
1133 4168 50       7150 if ($trace_values) {
1134 0 0       0 say {*STDERR} "rule $rule_id: ", join q{ },
  0         0  
1135             symbol_names_by_rule_id( $self, $rule_id )
1136             or Carp::croak("Cannot print: $ERRNO");
1137 0 0       0 say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack )
  0         0  
1138             or Carp::croak("Cannot print: $ERRNO");
1139             } ## end if ($trace_values)
1140 4168         10601 next STEP;
1141             } ## end if ( $type eq 'MARPA_STEP_RULE' )
1142              
1143 555 50       1070 if ( $type eq 'MARPA_STEP_NULLING_SYMBOL' ) {
1144 555         909 my ( $symbol_id, $arg_n ) = @step_data;
1145 555         1258 $stack[$arg_n] = ['ZERO_SPAN'];
1146              
1147 555 50       1047 if ($trace_values) {
1148 0 0       0 say {*STDERR} join q{ }, $type, @step_data,
  0         0  
1149             $tracer->symbol_name($symbol_id)
1150             or Carp::croak("Cannot print: $ERRNO");
1151 0 0       0 say {*STDERR} "Stack:\n", Data::Dumper::Dumper( \@stack )
  0         0  
1152             or Carp::croak("Cannot print: $ERRNO");
1153             } ## end if ($trace_values)
1154 555         906 next STEP;
1155             } ## end if ( $type eq 'MARPA_STEP_NULLING_SYMBOL' )
1156 0         0 die "Unexpected step type: $type";
1157             } ## end STEP: while (1)
1158              
1159 92         180 my $result = $stack[0];
1160 92 50       199 Marpa::R2::exception('No parse: evaler returned undef')
1161             if not defined $result;
1162              
1163 92 100       248 if ( ref $self->{handler_by_species}->{TOP} ) {
1164             ## This is a user-defined handler. We assume it returns
1165             ## a VALUED_SPAN.
1166 4         8 $result = $result->[Marpa::R2::HTML::Internal::TDesc::VALUE];
1167             }
1168             else {
1169             ## The TOP handler was the default handler.
1170             ## We now want to "literalize" its result.
1171             FIND_LITERALIZEABLE: {
1172 88         117 my $type = $result->[Marpa::R2::HTML::Internal::TDesc::TYPE];
  88         166  
1173 88 100       192 if ( $type eq 'VALUES' ) {
1174 11         28 $result = $result->[Marpa::R2::HTML::Internal::TDesc::VALUE];
1175 11         71 last FIND_LITERALIZEABLE;
1176             }
1177 77 50       153 if ( $type eq 'VALUED_SPAN' ) {
1178 77         134 $result = [$result];
1179 77         147 last FIND_LITERALIZEABLE;
1180             }
1181 0         0 die 'Internal: TOP result is not literalize-able';
1182             } ## end FIND_LITERALIZEABLE:
1183 88         301 $result = range_and_values_to_literal( $self, 0, $#html_parser_tokens,
1184             $result );
1185             } ## end else [ if ( ref $self->{handler_by_species}->{TOP} ) ]
1186              
1187 92         8597 return $result;
1188              
1189             } ## end sub parse
1190              
1191             sub Marpa::R2::HTML::html {
1192 94     94 0 25135 my ( $document_ref, @args ) = @_;
1193 94         250 my $html = Marpa::R2::HTML::Internal::create(@args);
1194 94         286 return Marpa::R2::HTML::Internal::parse( $html, $document_ref );
1195             }
1196              
1197             1;
1198              
1199             # vim: set expandtab shiftwidth=4: