File Coverage

blib/lib/Marpa/PP/Recognizer.pm
Criterion Covered Total %
statement 727 796 91.3
branch 192 300 64.0
condition 53 79 67.0
subroutine 44 45 97.7
pod 8 26 30.7
total 1024 1246 82.1


line stmt bran cond sub pod time code
1             # Copyright 2012 Jeffrey Kegler
2             # This file is part of Marpa::PP. Marpa::PP is free software: you can
3             # redistribute it and/or modify it under the terms of the GNU Lesser
4             # General Public License as published by the Free Software Foundation,
5             # either version 3 of the License, or (at your option) any later version.
6             #
7             # Marpa::PP is distributed in the hope that it will be useful,
8             # but WITHOUT ANY WARRANTY; without even the implied warranty of
9             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10             # Lesser General Public License for more details.
11             #
12             # You should have received a copy of the GNU Lesser
13             # General Public License along with Marpa::PP. If not, see
14             # http://www.gnu.org/licenses/.
15              
16             package Marpa::PP::Recognizer;
17              
18 44     44   1555 use 5.010;
  44         162  
  44         2059  
19 44     44   1177 use warnings;
  44         269  
  44         6298  
20              
21             no warnings ## no critic (TestingAndDebugging::ProhibitNoWarnings)
22 44     44   459 'recursion';
  44         97  
  44         2707  
23              
24 44     44   1054 use strict;
  44         108  
  44         8757  
25 44     44   2981 use integer;
  44         103  
  44         390  
26              
27 44     44   1777 use English qw( -no_match_vars );
  44         2049  
  44         451  
28              
29 44     44   33060 use vars qw($VERSION $STRING_VERSION);
  44         140  
  44         6107  
30             $VERSION = '0.014000';
31             $STRING_VERSION = $VERSION;
32             {
33             ## no critic (BuiltinFunctions::ProhibitStringyEval)
34             ## no critic (ValuesAndExpressions::RequireConstantVersion)
35             $VERSION = eval $VERSION;
36             }
37              
38             BEGIN {
39 44     44   113 my $structure = <<'END_OF_STRUCTURE';
40              
41             :package=Marpa::PP::Internal::Earley_Set
42              
43             ORDINAL { The ordinal for this set }
44             ITEMS { The Earley items for this set. }
45             HASH { Hash by origin & state. To prevent dups. }
46             POSTDOT { Index by postdot symbol. }
47              
48             END_OF_STRUCTURE
49 44         236 Marpa::PP::offset($structure);
50             } ## end BEGIN
51              
52             # Elements of the EARLEY ITEM structure
53             # Note that these are Earley items as modified by Aycock & Horspool,
54             # with AHFA states instead of
55             # LR(0) items.
56              
57             # We don't prune the Earley items because we want ORIGIN and SET
58             # around for debugging.
59              
60             BEGIN {
61 44     44   117 my $structure = <<'END_OF_STRUCTURE';
62              
63             :package=Marpa::PP::Internal::Earley_Item
64              
65             ID { ID of Earley item. Unique within recognizer. }
66             STATE { The AHFA state. }
67             LINKS { A list of the links from the completer step. }
68              
69             LEO_LINKS { Leo Links sources -- not necessarily unique.
70             No more than one Leo link can come from a single
71             Earleme.
72             But the distance to the origin of this item can be
73             "factored" differently between predecessor and cause.
74             Each different "factoring" can contribute a Leo
75             link. }
76             IS_LEO_EXPANDED { Flag indicating if Leo links were expanded }
77              
78             ORIGIN { The number of the Earley set with the parent item(s) }
79             SET { The set this item is in. For debugging. }
80              
81             END_OF_STRUCTURE
82 44         306 Marpa::PP::offset($structure);
83             } ## end BEGIN
84              
85             our $LEO_CLASS;
86             $LEO_CLASS = 'Marpa::PP::Internal::Leo_Item';
87              
88             BEGIN {
89 44     44   117 my $structure = <<'END_OF_STRUCTURE';
90              
91             :package=Marpa::PP::Internal::Leo_Item
92              
93             LEO_POSTDOT_SYMBOL { A symbol name. }
94             ORIGIN { The number of the Earley set with the parent item(s) }
95             BASE { The Earley item on which this item is based. }
96             PREDECESSOR { The Leo item prior in the series to this one. }
97             SET { The set this item is in. }
98             TOP_TO_STATE { The AHFA to-state of the top-level transition. }
99              
100             END_OF_STRUCTURE
101 44         180 Marpa::PP::offset($structure);
102             } ## end BEGIN
103              
104             # Elements of the RECOGNIZER structure
105             BEGIN {
106 44     44   125 my $structure = <<'END_OF_STRUCTURE';
107              
108             :package=Marpa::PP::Internal::Recognizer
109              
110             C { A C structure }
111              
112             GRAMMAR { the grammar used }
113             EARLEY_SETS { the array of the Earley sets }
114             NEXT_EARLEY_ITEM_ID { ID of the next Earley item to be created. }
115             FURTHEST_EARLEME { last earley set with something in it }
116             LAST_COMPLETED_EARLEME { the current earleme }
117             FINISHED
118             EXHAUSTED { can parse continue? }
119             EXPECTED_TERMINALS { terminals which are expected at the
120             current earleme }
121             USE_LEO { Use Leo items? }
122             NEXT_ORDINAL { Ordinal of next Earley set }
123             EARLEY_SETS_BY_ORDINAL { Array of Earley sets by ordinal }
124              
125             TRACE_FILE_HANDLE
126              
127             END
128             CLOSURES
129             TRACE_ACTIONS
130             TRACE_VALUES
131             TRACE_TASKS
132             TRACING
133             MAX_PARSES
134             NULL_VALUES
135             RANKING_METHOD
136              
137             { The following fields must be reinitialized when
138             evaluation is reset }
139              
140             SINGLE_PARSE_MODE
141             PARSE_COUNT :{ number of parses in an ambiguous parse :}
142              
143             AND_NODES
144             AND_NODE_HASH
145             OR_NODES
146             OR_NODE_HASH
147              
148             ITERATION_STACK
149              
150             EVALUATOR_RULES
151              
152             { This is the end of the list of fields which
153             must be reinitialized when evaluation is reset }
154              
155             TOO_MANY_EARLEY_ITEMS
156             TRACE_EARLEY_SETS
157             TRACE_TERMINALS
158             WARNINGS
159              
160             MODE
161              
162             END_OF_STRUCTURE
163 44         198 Marpa::PP::offset($structure);
164             } ## end BEGIN
165              
166             package Marpa::PP::Internal::Recognizer;
167              
168 44     44   475 use English qw( -no_match_vars );
  44         4242  
  44         413  
169              
170 44     44   33645 use constant EARLEME_MASK => ~(0x7fffffff);
  44         99  
  44         4085  
171              
172 44     44   229 use constant DEFAULT_TOO_MANY_EARLEY_ITEMS => 100;
  44         95  
  44         71017  
173              
174             my $parse_number = 0;
175              
176             # Returns the new parse object or throws an exception
177             sub Marpa::PP::Recognizer::new {
178 188     188 1 104441 my ( $class, @arg_hashes ) = @_;
179 188         1106 my $recce = bless [], 'Marpa::PP::Recognizer';
180              
181 188         478 my $grammar;
182 188         466 ARG_HASH: for my $arg_hash (@arg_hashes) {
183 188 50       904 if ( defined( $grammar = $arg_hash->{grammar} ) ) {
184 188         505 delete $arg_hash->{grammar};
185 188         526 last ARG_HASH;
186             }
187             } ## end for my $arg_hash (@arg_hashes)
188 188 50       734 Marpa::PP::exception('No grammar specified') if not defined $grammar;
189              
190 188         932 $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR] = $grammar;
191              
192 188         2613 my $grammar_class = ref $grammar;
193 188 50       696 Marpa::PP::exception(
194             "${class}::new() grammar arg has wrong class: $grammar_class")
195             if not $grammar_class eq 'Marpa::PP::Grammar';
196              
197 188         571 my $problems = $grammar->[Marpa::PP::Internal::Grammar::PROBLEMS];
198 188 50       561 if ($problems) {
199 0         0 Marpa::PP::exception(
200             Marpa::PP::Grammar::show_problems($grammar),
201             "Attempt to parse grammar with fatal problems\n",
202             'Marpa::PP cannot proceed',
203             );
204             } ## end if ($problems)
205              
206 188         553 my $phase = $grammar->[Marpa::PP::Internal::Grammar::PHASE];
207 188 50       611 if ( $phase != Marpa::PP::Internal::Phase::PRECOMPUTED ) {
208 0         0 Marpa::PP::exception(
209             'Attempt to parse grammar in inappropriate phase ',
210             Marpa::PP::Internal::Phase::description($phase)
211             );
212             } ## end if ( $phase != Marpa::PP::Internal::Phase::PRECOMPUTED)
213              
214             # set the defaults
215 188         1113 local $Marpa::PP::Internal::TRACE_FH = my $trace_fh =
216             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE] =
217             $grammar->[Marpa::PP::Internal::Grammar::TRACE_FILE_HANDLE];
218 188         641 $recce->[Marpa::PP::Internal::Recognizer::WARNINGS] = 1;
219 188         435 $recce->[Marpa::PP::Internal::Recognizer::MODE] = 'default';
220 188         408 $recce->[Marpa::PP::Internal::Recognizer::RANKING_METHOD] = 'none';
221 188         407 $recce->[Marpa::PP::Internal::Recognizer::USE_LEO] = 1;
222 188         336 $recce->[Marpa::PP::Internal::Recognizer::MAX_PARSES] = 0;
223 188         355 $recce->[Marpa::PP::Internal::Recognizer::NEXT_EARLEY_ITEM_ID] = 0;
224 188         1600 $recce->reset_evaluation();
225              
226 188         874 $recce->set(@arg_hashes);
227              
228 188 50 66     898 if ( $grammar->[Marpa::PP::Internal::Grammar::HAS_CYCLE]
      33        
229             and $recce->[Marpa::PP::Internal::Recognizer::RANKING_METHOD] ne
230             'none'
231             and not $grammar->[Marpa::PP::Internal::Grammar::CYCLE_RANKING_ACTION]
232             )
233             {
234 0         0 Marpa::PP::exception(
235             "The grammar cycles (is infinitely ambiguous)\n",
236             " but it has no 'cycle_ranking_action'.\n",
237             " Either rewrite the grammar to eliminate cycles\n",
238             " or define a 'cycle ranking action'\n"
239             );
240             } ## end if ( $grammar->[Marpa::PP::Internal::Grammar::HAS_CYCLE...])
241              
242 188   100     1081 my $trace_terminals =
243             $recce->[Marpa::PP::Internal::Recognizer::TRACE_TERMINALS] // 0;
244 188   50     1109 my $trace_tasks = $recce->[Marpa::PP::Internal::Recognizer::TRACE_TASKS]
245             // 0;
246              
247 188 50       717 if (not defined
248             $recce->[Marpa::PP::Internal::Recognizer::TOO_MANY_EARLEY_ITEMS] )
249             {
250 188         504 my $AHFA_size =
251 188         1038 scalar @{ $grammar->[Marpa::PP::Internal::Grammar::AHFA] };
252 188         1317 $recce->[Marpa::PP::Internal::Recognizer::TOO_MANY_EARLEY_ITEMS] =
253             List::Util::max( ( 2 * $AHFA_size ),
254             Marpa::PP::Internal::Recognizer::DEFAULT_TOO_MANY_EARLEY_ITEMS );
255             } ## end if ( not defined $recce->[...])
256              
257             # Some of this processing -- to find terminals and Leo symbols
258             # by state -- should perhaps be done in the grammar.
259              
260 188         472 my $terminal_names =
261             $grammar->[Marpa::PP::Internal::Grammar::TERMINAL_NAMES];
262              
263 188         367 my $AHFA = $grammar->[Marpa::PP::Internal::Grammar::AHFA];
264 188         343 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
265              
266 188         426 my @earley_items = ();
267              
268 188         405 my $start_states = $grammar->[Marpa::PP::Internal::Grammar::START_STATES];
269 188         405 my %postdot = ();
270              
271 188         341 for my $state ( @{$start_states} ) {
  188         497  
272 375         714 my $state_id = $state->[Marpa::PP::Internal::AHFA::ID];
273 375         1523 my $name = sprintf
274             'S%d@%d-%d',
275             $state_id, 0, 0;
276              
277 375         666 my $item = [];
278 375         965 $item->[Marpa::PP::Internal::Earley_Item::ID] =
279             $recce->[Marpa::PP::Internal::Recognizer::NEXT_EARLEY_ITEM_ID]++;
280 375         671 $item->[Marpa::PP::Internal::Earley_Item::STATE] = $state;
281 375         892 $item->[Marpa::PP::Internal::Earley_Item::ORIGIN] = 0;
282 375         707 $item->[Marpa::PP::Internal::Earley_Item::LINKS] = [];
283 375         743 $item->[Marpa::PP::Internal::Earley_Item::SET] = 0;
284              
285 375         594 push @earley_items, $item;
286              
287 375         581 while ( my ( $transition_symbol, $to_states ) =
  2860         11097  
288             each %{ $state->[Marpa::PP::Internal::AHFA::TRANSITION] } )
289             {
290 2485         2653 my @to_states = grep {ref} @{$to_states};
  3918         8179  
  2485         4348  
291 2485         2871 push @{ $postdot{$transition_symbol} }, $item;
  2485         13383  
292             } ## end while ( my ( $transition_symbol, $to_states ) = each %{...})
293              
294             } ## end for my $state ( @{$start_states} )
295              
296 188         531 $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR] = $grammar;
297 188         618 my $earley_set = [];
298 188         492 $earley_set->[Marpa::PP::Internal::Earley_Set::POSTDOT] = \%postdot;
299 188         457 $earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS] = \@earley_items;
300 188         372 $earley_set->[Marpa::PP::Internal::Earley_Set::ORDINAL] = 0;
301 188         502 $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS] = [$earley_set];
302              
303 188         367 $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME] = 0;
304 188         367 $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME] = 0;
305 188         378 $recce->[Marpa::PP::Internal::Recognizer::NEXT_ORDINAL] = 1;
306 188         1461 $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS_BY_ORDINAL]->[0] =
307             $earley_set;
308              
309 188         1180 my @terminals_expected = grep { $terminal_names->{$_} } keys %postdot;
  2463         4043  
310 188         583 $recce->[Marpa::PP::Internal::Recognizer::EXPECTED_TERMINALS] =
311             \@terminals_expected;
312              
313 188         591 $recce->[Marpa::PP::Internal::Recognizer::EXHAUSTED] =
314             scalar @terminals_expected <= 0;
315              
316 188 100       605 if ( $trace_terminals > 1 ) {
317 1         4 for my $terminal ( sort @terminals_expected ) {
318 3 50       5 say {$Marpa::PP::Internal::TRACE_FH}
  3         16  
319             qq{Expecting "$terminal" at earleme 0}
320             or Marpa::PP::exception("Cannot print: $ERRNO");
321             }
322             } ## end if ( $trace_terminals > 1 )
323              
324 188         1080 return $recce;
325             } ## end sub Marpa::PP::Recognizer::new
326              
327 44         3353 use constant RECOGNIZER_OPTIONS => [
328             qw{
329             closures
330             end
331             leo
332             max_parses
333             mode
334             ranking_method
335             too_many_earley_items
336             trace_actions
337             trace_earley_sets
338             trace_fh
339             trace_file_handle
340             trace_tasks
341             trace_terminals
342             trace_values
343             warnings
344             }
345 44     44   2493 ];
  44         101  
346              
347 44     44   240 use constant RECOGNIZER_MODES => [qw(default stream)];
  44         183  
  44         253134  
348              
349             sub Marpa::PP::Recognizer::reset_evaluation {
350 249     249 0 17037 my ($recce) = @_;
351 249         527 $recce->[Marpa::PP::Internal::Recognizer::PARSE_COUNT] = 0;
352 249         527 $recce->[Marpa::PP::Internal::Recognizer::SINGLE_PARSE_MODE] = undef;
353 249         641 $recce->[Marpa::PP::Internal::Recognizer::AND_NODES] = [];
354 249         3496 $recce->[Marpa::PP::Internal::Recognizer::AND_NODE_HASH] = {};
355 249         573 $recce->[Marpa::PP::Internal::Recognizer::OR_NODES] = [];
356 249         639 $recce->[Marpa::PP::Internal::Recognizer::OR_NODE_HASH] = {};
357 249         2790 $recce->[Marpa::PP::Internal::Recognizer::ITERATION_STACK] = [];
358 249         939 $recce->[Marpa::PP::Internal::Recognizer::EVALUATOR_RULES] = [];
359 249         787 return;
360             } ## end sub Marpa::PP::Recognizer::reset_evaluation
361              
362             sub Marpa::PP::Recognizer::set {
363 251     251 1 3433 my ( $recce, @arg_hashes ) = @_;
364              
365             # This may get changed below
366 251         735 my $trace_fh =
367             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE];
368              
369 251         734 for my $args (@arg_hashes) {
370              
371 251         1032 my $ref_type = ref $args;
372 251 50 33     1984 if ( not $ref_type or $ref_type ne 'HASH' ) {
373 0   0     0 Carp::croak(
374             'Marpa::PP Recognizer expects args as ref to HASH, got ',
375             ( "ref to $ref_type" || 'non-reference' ),
376             ' instead'
377             );
378             } ## end if ( not $ref_type or $ref_type ne 'HASH' )
379 251 50       489 if (my @bad_options =
  191         1322  
380             grep {
381 251         1140 not $_ ~~ Marpa::PP::Internal::Recognizer::RECOGNIZER_OPTIONS
382             }
383             keys %{$args}
384             )
385             {
386 0         0 Carp::croak( 'Unknown option(s) for Marpa::PP Recognizer: ',
387             join q{ }, @bad_options );
388             } ## end if ( my @bad_options = grep { not $_ ~~ ...})
389              
390 251 100       1117 if ( defined( my $value = $args->{'leo'} ) ) {
391 4 100       15 $recce->[Marpa::PP::Internal::Recognizer::USE_LEO] =
392             $value ? 1 : 0;
393             }
394              
395 251 100       923 if ( defined( my $value = $args->{'max_parses'} ) ) {
396 66         157 $recce->[Marpa::PP::Internal::Recognizer::MAX_PARSES] = $value;
397             }
398              
399 251 100       1171 if ( defined( my $value = $args->{'mode'} ) ) {
400 18 50       87 if (not $value ~~
401             Marpa::PP::Internal::Recognizer::RECOGNIZER_MODES )
402             {
403 0         0 Carp::croak( 'Unknown mode for Marpa::PP Recognizer: ',
404             $value );
405             } ## end if ( not $value ~~ ...)
406 18         47 $recce->[Marpa::PP::Internal::Recognizer::MODE] = $value;
407             } ## end if ( defined( my $value = $args->{'mode'} ) )
408              
409 251 100       954 if ( defined( my $value = $args->{'ranking_method'} ) ) {
410 7 50       28 Marpa::PP::exception(q{ranking_method no longer implemented})
411             if not $Marpa::PP::AUTHOR_TEST_ONLY;
412 7 50       301 Marpa::PP::exception(
413             q{ranking_method must be 'constant' or 'none'})
414             if not $value ~~ [qw(constant none)];
415 7         21 $recce->[Marpa::PP::Internal::Recognizer::RANKING_METHOD] =
416             $value;
417             } ## end if ( defined( my $value = $args->{'ranking_method'} ...))
418              
419 251 100       841 if ( defined( my $value = $args->{'trace_fh'} ) ) {
420 1         4 $trace_fh =
421             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE] =
422             $value;
423             }
424              
425 251 100       861 if ( defined( my $value = $args->{'trace_file_handle'} ) ) {
426 4         12 $trace_fh =
427             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE] =
428             $value;
429             }
430              
431 251 50       870 if ( defined( my $value = $args->{'trace_actions'} ) ) {
432 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACE_ACTIONS] = $value;
433             ## Do not allow setting this option in recognizer for single parse mode
434 0         0 $recce->[Marpa::PP::Internal::Recognizer::SINGLE_PARSE_MODE] = 0;
435 0 0       0 if ($value) {
436 0 0       0 say {$trace_fh} 'Setting trace_actions option'
  0         0  
437             or Marpa::PP::exception("Cannot print: $ERRNO");
438 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACING] = 1;
439             }
440             } ## end if ( defined( my $value = $args->{'trace_actions'} ))
441              
442 251 50       846 if ( defined( my $value = $args->{'trace_tasks'} ) ) {
443 0 0       0 Marpa::PP::exception('trace_tasks must be set to a number >= 0')
444             if $value !~ /\A\d+\z/xms;
445 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACE_TASKS] =
446             $value + 0;
447 0 0       0 if ($value) {
448 0 0       0 say {$trace_fh} "Setting trace_tasks option to $value"
  0         0  
449             or Marpa::PP::exception("Cannot print: $ERRNO");
450 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACING] = 1;
451             }
452             } ## end if ( defined( my $value = $args->{'trace_tasks'} ) )
453              
454 251 100       861 if ( defined( my $value = $args->{'trace_terminals'} ) ) {
455 3         5 $recce->[Marpa::PP::Internal::Recognizer::TRACE_TERMINALS] =
456             $value;
457 3 50       9 if ($value) {
458 3 50       4 say {$trace_fh} 'Setting trace_terminals option'
  3         23  
459             or Marpa::PP::exception("Cannot print: $ERRNO");
460 3         5 $recce->[Marpa::PP::Internal::Recognizer::TRACING] = 1;
461             }
462             } ## end if ( defined( my $value = $args->{'trace_terminals'}...))
463              
464 251 50       936 if ( defined( my $value = $args->{'trace_earley_sets'} ) ) {
465 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACE_EARLEY_SETS] =
466             $value;
467 0 0       0 if ($value) {
468 0 0       0 say {$trace_fh} 'Setting trace_earley_sets option'
  0         0  
469             or Marpa::PP::exception("Cannot print: $ERRNO");
470 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACING] = 1;
471             }
472             } ## end if ( defined( my $value = $args->{'trace_earley_sets'...}))
473              
474 251 100       880 if ( defined( my $value = $args->{'trace_values'} ) ) {
475 1         3 $recce->[Marpa::PP::Internal::Recognizer::TRACE_VALUES] = $value;
476             ## Do not allow setting this option in recognizer for single parse mode
477 1         2 $recce->[Marpa::PP::Internal::Recognizer::SINGLE_PARSE_MODE] = 0;
478 1 50       4 if ($value) {
479 0 0       0 say {$trace_fh} 'Setting trace_values option'
  0         0  
480             or Marpa::PP::exception("Cannot print: $ERRNO");
481 0         0 $recce->[Marpa::PP::Internal::Recognizer::TRACING] = 1;
482             }
483             } ## end if ( defined( my $value = $args->{'trace_values'} ) )
484              
485 251 100       862 if ( defined( my $value = $args->{'end'} ) ) {
486              
487             # Not allowed once parsing is started
488 57 50       191 if ( $recce->[Marpa::PP::Internal::Recognizer::PARSE_COUNT] > 0 )
489             {
490 0         0 Marpa::PP::exception(
491             q{Cannot reset end once parsing has started});
492             }
493 57         117 $recce->[Marpa::PP::Internal::Recognizer::END] = $value;
494             ## Do not allow setting this option in recognizer for single parse mode
495 57         103 $recce->[Marpa::PP::Internal::Recognizer::SINGLE_PARSE_MODE] = 0;
496             } ## end if ( defined( my $value = $args->{'end'} ) )
497              
498 251 100       941 if ( defined( my $value = $args->{'closures'} ) ) {
499              
500             # Not allowed once parsing is started
501 30 50       121 if ( $recce->[Marpa::PP::Internal::Recognizer::PARSE_COUNT] > 0 )
502             {
503 0         0 Marpa::PP::exception(
504             q{Cannot reset end once parsing has started});
505             }
506 30         71 my $closures =
507             $recce->[Marpa::PP::Internal::Recognizer::CLOSURES] = $value;
508             ## Do not allow setting this option in recognizer for single parse mode
509 30         48 $recce->[Marpa::PP::Internal::Recognizer::SINGLE_PARSE_MODE] = 0;
510 30         56 while ( my ( $action, $closure ) = each %{$closures} ) {
  2576         6842  
511 2546 50       5627 Marpa::PP::exception(qq{Bad closure for action "$action"})
512             if ref $closure ne 'CODE';
513             }
514             } ## end if ( defined( my $value = $args->{'closures'} ) )
515              
516 251 50       868 if ( defined( my $value = $args->{'warnings'} ) ) {
517 0         0 $recce->[Marpa::PP::Internal::Recognizer::WARNINGS] = $value;
518             }
519              
520 251 50       1234 if ( defined( my $value = $args->{'too_many_earley_items'} ) ) {
521 0         0 $recce->[Marpa::PP::Internal::Recognizer::TOO_MANY_EARLEY_ITEMS] =
522             $value;
523             }
524              
525             } ## end for my $args (@arg_hashes)
526              
527 251         781 return 1;
528             } ## end sub Marpa::PP::Recognizer::set
529              
530             # Not intended to be documented.
531             # Returns the size of the last completed earley set.
532             # For testing, especially that the Leo items
533             # are doing their job.
534             sub Marpa::PP::Recognizer::earley_set_size {
535 165     165 0 741 my ( $recce, $ordinal ) = @_;
536 165         271 my $earley_set =
537             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS_BY_ORDINAL]
538             ->[$ordinal];
539 165 50       355 return if not defined $earley_set;
540 165         195 return scalar @{ $earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS] };
  165         463  
541             } ## end sub Marpa::PP::Recognizer::earley_set_size
542              
543             sub Marpa::PP::Recognizer::latest_earley_set {
544 165     165 0 853 my ($recce) = @_;
545 165         371 my $earleme =
546             $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
547 165         202 while (1) {
548              
549             # Earley set has a defined ORDINAL, so this loop must terminate
550 165         272 my $earley_set =
551             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS]
552             ->[$earleme];
553 165         225 my $ordinal = $earley_set->[Marpa::PP::Internal::Earley_Set::ORDINAL];
554 165 50       771 return $ordinal if defined $ordinal;
555 0         0 $earleme--;
556             } ## end while (1)
557 0         0 die 'Internal error: this line should not be reached';
558             } ## end sub Marpa::PP::Recognizer::latest_earley_set
559              
560             sub Marpa::PP::Recognizer::check_terminal {
561 1     1 1 8 my ( $recce, $name ) = @_;
562 1         2 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
563 1         6 return $grammar->check_terminal($name);
564             }
565              
566             sub Marpa::PP::Recognizer::exhausted {
567 10     10 0 73 return $_[0]->[Marpa::PP::Internal::Recognizer::EXHAUSTED];
568             }
569              
570             sub Marpa::PP::Recognizer::current_earleme {
571 1014     1014 0 4360 return $_[0]->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
572             }
573              
574             sub Marpa::PP::Recognizer::terminals_expected {
575 425     425 1 1804 return $_[0]->[Marpa::PP::Internal::Recognizer::EXPECTED_TERMINALS];
576             }
577              
578             # Deprecated -- obsolete
579             sub Marpa::PP::Recognizer::status {
580 70     70 0 102 my ($recce) = @_;
581 70 100       353 return ( $recce->current_earleme(), $recce->terminals_expected() )
582             if wantarray;
583 1         6 return $recce->current_earleme();
584              
585             } ## end sub Marpa::PP::Recognizer::status
586              
587             # Now useless and deprecated
588 0     0 0 0 sub Marpa::PP::Recognizer::strip { return 1; }
589              
590             # Viewing methods, for debugging
591              
592             sub Marpa::PP::show_link_choice {
593 272     272 0 333 my ($link) = @_;
594 272         284 my ( $predecessor, $cause, $token_name, $value_ref ) = @{$link};
  272         451  
595 272         452 my @pieces = ();
596 272 50       527 if ($predecessor) {
597 272         491 push @pieces,
598             'p=' . Marpa::PP::Internal::Earley_Item::name($predecessor);
599             }
600 272 100       859 if ( not defined $cause ) {
601 70         122 push @pieces, "s=$token_name";
602 70         355 my $token_dump = Data::Dumper->new( [$value_ref] )->Terse(1)->Dump;
603 70         4120 chomp $token_dump;
604 70         162 push @pieces, "t=$token_dump";
605             } ## end if ( not defined $cause )
606             else {
607 202         381 push @pieces,
608             'c=' . Marpa::PP::Internal::Earley_Item::name( $link->[1] );
609             }
610 272         1618 return '[' . ( join '; ', @pieces ) . ']';
611             } ## end sub Marpa::PP::show_link_choice
612              
613             sub Marpa::PP::show_leo_link_choice {
614 47     47 0 64 my ( $recce, $leo_link ) = @_;
615 47         54 my ( $leo_item, $cause ) = @{$leo_link};
  47         74  
616 47         113 my @link_texts = ();
617 47 50       101 if ($leo_item) {
618 47         103 push @link_texts,
619             ( 'l=' . Marpa::PP::leo_item_name( $recce, $leo_item ) );
620             }
621 47         110 push @link_texts, 'c=' . Marpa::PP::Internal::Earley_Item::name($cause);
622 47         295 return '[' . ( join '; ', @link_texts ) . ']';
623             } ## end sub Marpa::PP::show_leo_link_choice
624              
625             sub Marpa::PP::Internal::Earley_Item::name {
626 1075     1075   1248 my ($item) = @_;
627 1075         3914 return sprintf 'S%d@%d-%d',
628             $item->[Marpa::PP::Internal::Earley_Item::STATE]
629             ->[Marpa::PP::Internal::AHFA::ID],
630             $item->[Marpa::PP::Internal::Earley_Item::ORIGIN],
631             $item->[Marpa::PP::Internal::Earley_Item::SET];
632             } ## end sub Marpa::PP::Internal::Earley_Item::name
633              
634             sub Marpa::PP::show_earley_item {
635 351     351 0 470 my ( $recce, $item ) = @_;
636 351         448 my $links = $item->[Marpa::PP::Internal::Earley_Item::LINKS];
637 351         425 my $leo_links = $item->[Marpa::PP::Internal::Earley_Item::LEO_LINKS];
638 351         423 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
639 351         408 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
640              
641 351         550 my $text = Marpa::PP::Internal::Earley_Item::name($item);
642              
643 351 100 66     864 if ( defined $links and @{$links} ) {
  351         1189  
644 242         286 my @sort_data;
645 242         251 for my $link ( @{$links} ) {
  242         437  
646 272         285 my ( $predecessor, $cause, $token_name, $value_ref ) = @{$link};
  272         525  
647              
648             # The actual middle of a link with no predecessor
649             # is the origin of the Earley item which contains this link,
650             # but for sorting purposes any number less than than will do
651 272 50       622 my $middle =
652             defined $predecessor
653             ? $predecessor->[Marpa::PP::Internal::Earley_Item::SET]
654             : -1;
655 272 100       586 my $cause_state_id =
656             defined $cause
657             ? $cause->[Marpa::PP::Internal::Earley_Item::STATE]
658             ->[Marpa::PP::Internal::AHFA::ID]
659             : -1;
660 272 100       592 my $symbol_id =
661             defined $token_name ? $symbol_hash->{$token_name} : -1;
662 272         1539 push @sort_data,
663             [
664             $middle, $cause_state_id,
665             $symbol_id, Marpa::PP::show_link_choice($link)
666             ];
667             } ## end for my $link ( @{$links} )
668 272 50 66     737 my @sorted_links = map { $_->[-1] } sort {
  30         186  
669 242         506 $a->[0] <=> $b->[0]
670             || $a->[1] <=> $b->[1]
671             || $a->[2] <=> $b->[2]
672             } @sort_data;
673 242         828 $text .= q{ } . join q{ }, @sorted_links;
674             } ## end if ( defined $links and @{$links} )
675 351 100 100     845 if ( defined $leo_links and @{$leo_links} ) {
  324         1055  
676 38         50 my @sort_data;
677 38         45 for my $link ( @{$leo_links} ) {
  38         76  
678 47         55 my ( $predecessor, $cause ) = @{$link};
  47         84  
679 47         79 my $middle = $predecessor->[Marpa::PP::Internal::Leo_Item::SET];
680 47         74 my $cause_state_id =
681             $cause->[Marpa::PP::Internal::Earley_Item::STATE]
682             ->[Marpa::PP::Internal::AHFA::ID];
683 47         70 my $symbol_name = $predecessor
684             ->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
685 47         82 my $symbol_id = $symbol_hash->{$symbol_name};
686 47         104 push @sort_data,
687             [
688             $middle, $cause_state_id, $symbol_id,
689             Marpa::PP::show_leo_link_choice( $recce, $link )
690             ];
691             } ## end for my $link ( @{$leo_links} )
692 47 50 66     129 my @sorted_links = map { $_->[-1] } sort {
  9         63  
693 38         110 $a->[0] <=> $b->[0]
694             || $a->[1] <=> $b->[1]
695             || $a->[2] <=> $b->[2]
696             } @sort_data;
697 38         152 $text .= q{ } . join q{ }, @sorted_links;
698             } ## end if ( defined $leo_links and @{$leo_links} )
699 351         1763 return $text;
700             } ## end sub Marpa::PP::show_earley_item
701              
702             sub Marpa::PP::leo_item_name {
703 444     444 0 513 my ( $recce, $item ) = @_;
704 444         524 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
705 444         527 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
706 444         474 my $set = $item->[Marpa::PP::Internal::Leo_Item::SET];
707 444         598 my $symbol_name =
708             $item->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
709 444         635 my $symbol_id = $symbol_hash->{$symbol_name};
710 444         1484 return sprintf 'L%d@%d', $symbol_id, $set;
711             } ## end sub Marpa::PP::leo_item_name
712              
713             sub Marpa::PP::show_leo_item {
714 203     203 0 260 my ( $recce, $item ) = @_;
715 203         266 my $base = $item->[Marpa::PP::Internal::Leo_Item::BASE];
716 203         239 my $predecessor = $item->[Marpa::PP::Internal::Leo_Item::PREDECESSOR];
717 203         232 my $leo_symbol_name =
718             $item->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
719              
720 203         330 my $text = Marpa::PP::leo_item_name( $recce, $item );
721 203         470 my @link_texts = qq{"$leo_symbol_name"};
722 203 100       451 if ($predecessor) {
723 194         328 push @link_texts, Marpa::PP::leo_item_name( $recce, $predecessor );
724             }
725 203         375 push @link_texts, Marpa::PP::Internal::Earley_Item::name($base);
726 203         549 $text .= ' [' . ( join '; ', @link_texts ) . ']';
727 203         848 return $text;
728             } ## end sub Marpa::PP::show_leo_item
729              
730             sub Marpa::PP::show_earley_set {
731 79     79 0 101 my ( $recce, $earley_set ) = @_;
732 79         114 my $text = q{};
733 79         127 my $items = $earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS];
734 351 50       648 my @sorted_descriptions = map { $_->[-1] }
  451         1021  
735 351         925 sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] }
736             map {
737 79         173 [ $_->[Marpa::PP::Internal::Earley_Item::ORIGIN],
738             $_->[Marpa::PP::Internal::Earley_Item::STATE]
739             ->[Marpa::PP::Internal::AHFA::ID],
740             Marpa::PP::show_earley_item( $recce, $_ ) . "\n"
741             ]
742 79         115 } @{$items};
743 79         538 return join q{}, @sorted_descriptions;
744             } ## end sub Marpa::PP::show_earley_set
745              
746             sub Marpa::PP::show_postdot_set {
747 79     79 0 119 my ( $recce, $postdot_set ) = @_;
748 79         112 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
749 79         106 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
750 79         97 my $text = q{};
751 79         116 my @decorated_leo_items = ();
752 79         101 for my $leo_item (
  620         1234  
753 383         733 grep { ref eq $LEO_CLASS }
754 383         367 map { @{$_} } values %{$postdot_set}
  79         204  
755             )
756             {
757 203         295 my $symbol_name =
758             $leo_item->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
759 203         285 my $symbol_id = $symbol_hash->{$symbol_name};
760 203         525 push @decorated_leo_items, [ $leo_item, $symbol_id ];
761             } ## end for my $leo_item ( grep { ref eq $LEO_CLASS } map { @...})
762 203         339 my @sorted_leo_items =
763 79         214 map { $_->[0] } sort { $a->[1] <=> $b->[1] } @decorated_leo_items;
  402         489  
764 79         147 for my $postdot_item (@sorted_leo_items) {
765 203         376 $text .= Marpa::PP::show_leo_item( $recce, $postdot_item ) . "\n";
766             }
767 79         361 return $text;
768             } ## end sub Marpa::PP::show_postdot_set
769              
770             sub Marpa::PP::show_earley_set_list {
771 12     12 0 23 my ( $recce, $earley_set_list ) = @_;
772 12         30 my $text = q{};
773 12         27 my $earley_set_count = @{$earley_set_list};
  12         36  
774 12         96 LIST: for my $ix ( 0 .. $earley_set_count - 1 ) {
775 79         130 my $set = $earley_set_list->[$ix];
776 79 50       175 next LIST if not defined $set;
777 79         234 $text .= "Earley Set $ix\n"
778             . Marpa::PP::show_earley_set( $recce, $set );
779 79         159 my $postdot_set =
780             $earley_set_list->[$ix]
781             ->[Marpa::PP::Internal::Earley_Set::POSTDOT];
782 79 50       168 next LIST if not defined $postdot_set;
783 79         156 $text .= Marpa::PP::show_postdot_set( $recce, $postdot_set );
784             } ## end for my $ix ( 0 .. $earley_set_count - 1 )
785 12         213 return $text;
786             } ## end sub Marpa::PP::show_earley_set_list
787              
788             sub Marpa::PP::Recognizer::show_earley_sets {
789 12     12 1 2868 my ($recce) = @_;
790 12   50     59 my $last_completed_earleme = $recce->[LAST_COMPLETED_EARLEME]
791             // 'stripped';
792 12         30 my $furthest_earleme = $recce->[FURTHEST_EARLEME];
793 12         28 my $earley_set_list = $recce->[EARLEY_SETS];
794             return
795 12         99 "Last Completed: $last_completed_earleme; "
796             . "Furthest: $furthest_earleme\n"
797             . Marpa::PP::show_earley_set_list( $recce, $earley_set_list );
798              
799             } ## end sub Marpa::PP::Recognizer::show_earley_sets
800              
801             BEGIN {
802 44     44   272 my $structure = <<'END_OF_STRUCTURE';
803              
804             :package=Marpa::PP::Internal::Progress_Report
805              
806             RULE_ID
807             POSITION
808             ORIGIN
809              
810             END_OF_STRUCTURE
811 44         426 Marpa::PP::offset($structure);
812             } ## end BEGIN
813              
814             sub Marpa::PP::Recognizer::show_progress {
815 4     4 1 790 my ( $recce, $start_ordinal, $end_ordinal ) = @_;
816 4         8 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
817 4         12 my $rules = $grammar->[Marpa::PP::Internal::Grammar::RULES];
818              
819 4         9 my $earley_sets_by_ordinal =
820             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS_BY_ORDINAL];
821 4         13 my $last_ordinal = $#{$earley_sets_by_ordinal};
  4         10  
822              
823 4         22 my $start_ix;
824 4 100       20 if ( not defined $start_ordinal ) {
825 2         6 $start_ix =
826             $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
827             }
828             else {
829 2 50 33     17 if ( $start_ordinal < 0 or $start_ordinal > $last_ordinal ) {
830             return
831 0         0 "Marpa::PP::Recognizer::show_progress start index is $start_ordinal, "
832             . "must be in range 0-$last_ordinal";
833             }
834             $start_ix =
835 2         5 $earley_sets_by_ordinal->[$start_ordinal]
836             ->[Marpa::PP::Internal::Earley_Set::ORDINAL];
837             } ## end else [ if ( not defined $start_ordinal ) ]
838              
839 4         9 my $end_ix;
840 4 100       15 if ( not defined $end_ordinal ) {
841 3         7 $end_ix = $start_ix;
842             }
843             else {
844 1         3 my $end_ordinal_argument = $end_ordinal;
845 1 50       4 if ( $end_ordinal < 0 ) {
846 1         3 $end_ordinal += $last_ordinal + 1;
847             }
848 1 50       3 if ( $end_ordinal < 0 ) {
849             return
850 0         0 "Marpa::PP::Recognizer::show_progress end index is $end_ordinal_argument, "
851             . sprintf ' must be in range %d-%d', -( $last_ordinal + 1 ),
852             $last_ordinal;
853             } ## end if ( $end_ordinal < 0 )
854             $end_ix =
855 1         3 $earley_sets_by_ordinal->[$end_ordinal]
856             ->[Marpa::PP::Internal::Earley_Set::ORDINAL];
857             } ## end else [ if ( not defined $end_ordinal ) ]
858              
859 4         13 my $text = q{};
860 4         22 for my $current ( $start_ix .. $end_ix ) {
861 7         16 my %by_rule_by_position = ();
862 7         27 my $reports = report_progress( $recce, $current );
863              
864 7         14 for my $report ( @{$reports} ) {
  7         16  
865 65         83 my $rule_id =
866             $report->[Marpa::PP::Internal::Progress_Report::RULE_ID];
867 65         68 my $position =
868             $report->[Marpa::PP::Internal::Progress_Report::POSITION];
869 65         76 my $origin =
870             $report->[Marpa::PP::Internal::Progress_Report::ORIGIN];
871              
872 65         224 $by_rule_by_position{$rule_id}->{$position}->{$origin}++;
873             } ## end for my $report ( @{$reports} )
874 7         59 for my $rule_id ( sort { $a <=> $b } keys %by_rule_by_position ) {
  39         69  
875 29         64 my $by_position = $by_rule_by_position{$rule_id};
876 29         37 for my $position ( sort { $a <=> $b } keys %{$by_position} ) {
  10         31  
  29         97  
877 39         71 my $raw_origins = $by_position->{$position};
878 39         57 my @origins = sort { $a <=> $b } keys %{$raw_origins};
  73         95  
  39         114  
879 39         63 my $origins_count = scalar @origins;
880 39         53 my $origin_desc;
881 39 100       72 if ( $origins_count <= 3 ) {
882 37         74 $origin_desc = join q{,}, @origins;
883             }
884             else {
885 2         8 $origin_desc = $origins[0] . q{...} . $origins[-1];
886             }
887              
888 39         60 my $rule = $rules->[$rule_id];
889 39         68 my $rhs_length =
890 39         52 scalar @{ $rule->[Marpa::PP::Internal::Rule::RHS] };
891 39         47 my $item_text;
892              
893             # flag indicating whether we need to show the dot in the rule
894 39 100       98 if ( $position >= $rhs_length ) {
    100          
895 18         32 $item_text .= "F$rule_id";
896             }
897             elsif ($position) {
898 5         13 $item_text .= "R$rule_id:$position";
899             }
900             else {
901 16         33 $item_text .= "P$rule_id";
902             }
903 39 100       91 $item_text .= " x$origins_count" if $origins_count > 1;
904 39         68 $item_text .= q{ @} . $origin_desc . q{-} . $current . q{ };
905 39         119 $item_text .= Marpa::PP::show_dotted_rule( $rule, $position );
906 39         233 $text .= $item_text . "\n";
907             } ## end for my $position ( sort { $a <=> $b } keys %{...})
908             } ## end for my $rule_id ( sort { $a <=> $b } keys ...)
909             } ## end for my $current ( $start_ix .. $end_ix )
910 4         21 return $text;
911             } ## end sub Marpa::PP::Recognizer::show_progress
912              
913             sub report_progress {
914 7     7   14 my ( $recce, $current ) = @_;
915              
916 7         17 my $earley_set =
917             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS]->[$current];
918 7         14 my $earley_items = $earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS];
919              
920             # Duplicates are not dealt with here -- they are more easily dealt
921             # with when sorting, which is done in the display logic.
922 7         30 my @worklist = ();
923 7         13 for my $earley_item ( @{$earley_items} ) {
  7         18  
924 30         53 my $AHFA_state =
925             $earley_item->[Marpa::PP::Internal::Earley_Item::STATE];
926 30         48 my $origin = $earley_item->[Marpa::PP::Internal::Earley_Item::ORIGIN];
927 30         82 push @worklist, [ $origin, $AHFA_state ];
928 30   100     101 my $leo_links =
929             $earley_item->[Marpa::PP::Internal::Earley_Item::LEO_LINKS] // [];
930 30         36 for my $leo_link ( @{$leo_links} ) {
  30         92  
931              
932             # The predecessor is the Leo item, which
933             # needs to be expanded
934 2         6 my $leo_item = $leo_link->[0];
935 2         6 while ($leo_item) {
936 22         36 my $leo_symbol_name = $leo_item
937             ->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
938 22         31 my $leo_base_item =
939             $leo_item->[Marpa::PP::Internal::Leo_Item::BASE];
940 22         51 my ( undef, $base_to_state ) =
941 22         25 @{ $leo_base_item
942             ->[Marpa::PP::Internal::Earley_Item::STATE]
943             ->[Marpa::PP::Internal::AHFA::TRANSITION]
944             ->{$leo_symbol_name} };
945 22         66 push @worklist,
946             [
947             $leo_item->[Marpa::PP::Internal::Leo_Item::SET],
948             $base_to_state
949             ];
950 22         71 $leo_item =
951             $leo_item->[Marpa::PP::Internal::Leo_Item::PREDECESSOR];
952             } ## end while ($leo_item)
953             } ## end for my $leo_link ( @{$leo_links} )
954             } ## end for my $earley_item ( @{$earley_items} )
955              
956 7         18 my @progress_report = ();
957 7         15 for my $workitem (@worklist) {
958 52         94 my ( $origin, $AHFA_state ) = @{$workitem};
  52         85  
959 52         82 my $NFA_states = $AHFA_state->[Marpa::PP::Internal::AHFA::NFA_STATES];
960 52 50       112 if ( not $NFA_states ) {
961 0         0 Marpa::PP::exception(
962             'Cannot report progress of Marpa::PP::Recognizer: it is stripped'
963             );
964             }
965 52         62 NFA_STATE: for my $NFA_state ( @{$NFA_states} ) {
  52         88  
966 65         95 my $LR0_item = $NFA_state->[Marpa::PP::Internal::NFA::ITEM];
967 65         88 my $marpa_rule = $LR0_item->[Marpa::PP::Internal::LR0_item::RULE];
968 65         76 my $marpa_position =
969             $LR0_item->[Marpa::PP::Internal::LR0_item::POSITION];
970              
971 65         77 my $original_rule = $marpa_rule;
972 65 50       159 if ( $marpa_rule->[Marpa::PP::Internal::Rule::VIRTUAL_START] ) {
973 0         0 $original_rule =
974             $marpa_rule->[Marpa::PP::Internal::Rule::ORIGINAL_RULE];
975             }
976              
977 65         87 my $original_rhs =
978             $original_rule->[Marpa::PP::Internal::Rule::RHS];
979              
980             # position in original rule, to be calculated
981 65         57 my $original_position;
982 65 50       132 if ( my $chaf_start =
983             $marpa_rule->[Marpa::PP::Internal::Rule::VIRTUAL_START] )
984             {
985 0         0 my $chaf_rhs = $marpa_rule->[Marpa::PP::Internal::Rule::RHS];
986 0         0 $original_position =
987 0         0 $marpa_position >= scalar @{$chaf_rhs}
988 0 0       0 ? scalar @{$original_rhs}
989             : ( $chaf_start + $marpa_position );
990             } ## end if ( my $chaf_start = $marpa_rule->[...])
991 65   66     207 $original_position //= $marpa_position;
992 65         78 my $rule_id = $original_rule->[Marpa::PP::Internal::Rule::ID];
993 65         338 push @progress_report,
994             [ $rule_id, $original_position, $origin, $current ];
995             } ## end for my $NFA_state ( @{$NFA_states} )
996             } ## end for my $workitem (@worklist)
997 7         39 return \@progress_report;
998             } ## end sub report_progress
999              
1000             sub Marpa::PP::Recognizer::read {
1001              
1002             # For efficiency, not unpacked
1003             # my ( $recce, $symbol_name, $value ) = @_;
1004 1788     1788 1 10710 my $recce = shift;
1005             return
1006 1788 100       7115 defined $recce->alternative(@_) ? $recce->earleme_complete() : undef;
1007             } ## end sub Marpa::PP::Recognizer::read
1008              
1009             sub Marpa::PP::Recognizer::alternative {
1010              
1011 1961     1961 0 6745 my ( $recce, $symbol_name, $value, $length ) = @_;
1012              
1013 1961 50       6712 Marpa::PP::exception(
1014             'Missing recognizer argument for Marpa::PP::Recognizer::alternative()'
1015             ) if not defined $recce;
1016              
1017             {
1018 1961         2812 my $recce_class = ref $recce;
  1961         3930  
1019 1961   50     4955 $recce_class //= 'not defined';
1020 1961 50       7270 Marpa::PP::exception(
1021             "recognizer argument of alternative() has wrong class\n",
1022             'Class of argument is ',
1023             $recce_class,
1024             "\n",
1025             "Class of argument should be Marpa::PP::Recognizer\n"
1026             ) if $recce_class ne 'Marpa::PP::Recognizer';
1027             }
1028              
1029 1961         13780 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
1030 1961         6245 local $Marpa::PP::Internal::TRACE_FH = my $trace_fh =
1031             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE];
1032 1961         3591 my $trace_terminals =
1033             $recce->[Marpa::PP::Internal::Recognizer::TRACE_TERMINALS];
1034 1961         3095 my $warnings = $recce->[Marpa::PP::Internal::Recognizer::WARNINGS];
1035              
1036 1961 50       6580 Marpa::PP::exception('Attempt to read token after parsing is finished')
1037             if $recce->[Marpa::PP::Internal::Recognizer::FINISHED];
1038              
1039 1961 50       4927 Marpa::PP::exception('Attempt to read token when parsing is exhausted')
1040             if $recce->[Marpa::PP::Internal::Recognizer::EXHAUSTED];
1041              
1042 1961         3911 my $terminal_names =
1043             $grammar->[Marpa::PP::Internal::Grammar::TERMINAL_NAMES];
1044              
1045 1961         4049 my $current_earleme =
1046             $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
1047 1961         2739 my $earley_set_list =
1048             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS];
1049 1961         2965 my $AHFA = $grammar->[Marpa::PP::Internal::Grammar::AHFA];
1050 1961         3443 my $symbols = $grammar->[Marpa::PP::Internal::Grammar::SYMBOLS];
1051 1961         2615 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
1052              
1053 1961         3823 my $postdot_here =
1054             $earley_set_list->[$current_earleme]
1055             ->[Marpa::PP::Internal::Earley_Set::POSTDOT];
1056              
1057 1961 50 33     10427 if ( not defined $symbol_name or not $terminal_names->{$symbol_name} ) {
1058 0 0       0 my $problem =
1059             defined $symbol_name
1060             ? qq{Token name "$symbol_name" is not the name of a terminal symbol}
1061             : q{Undef given, instead of the name of a terminal symbol};
1062 0         0 Marpa::PP::exception($problem);
1063             } ## end if ( not defined $symbol_name or not $terminal_names...)
1064              
1065 1961   100     10209 $length //= 1;
1066              
1067             # Make sure it's an allowed terminal symbol.
1068 1961         3399 my $postdot_data = $postdot_here->{$symbol_name};
1069 1961 100       5499 if ( not $postdot_data ) {
1070 1 50       5 if ($trace_terminals) {
1071 1 50       3 say {$trace_fh} qq{Rejected "$symbol_name" at $current_earleme-}
  1         11  
1072             . ( $length + $current_earleme )
1073             or Marpa::PP::exception("Cannot print: $ERRNO");
1074             }
1075 1         6 return;
1076             } ## end if ( not $postdot_data )
1077              
1078 1960         2800 my $value_ref = \($value);
1079              
1080 1960 100       5660 if ( $length & Marpa::PP::Internal::Recognizer::EARLEME_MASK ) {
1081 1         463 Marpa::PP::exception(
1082             'Token ' . $symbol_name . " is too long\n",
1083             " Token starts at $current_earleme, and its length is $length\n"
1084             );
1085             } ## end if ( $length & Marpa::PP::Internal::Recognizer::EARLEME_MASK)
1086              
1087 1959 50       4831 if ( $length <= 0 ) {
1088 0         0 Marpa::PP::exception(
1089             'Token ' . $symbol_name . ' has non-positive length ' . $length );
1090             }
1091              
1092 1959         3066 my $end_earleme = $current_earleme + $length;
1093              
1094 1959 50       4295 Marpa::PP::exception(
1095             'Token ' . $symbol_name . " makes parse too long\n",
1096             " Token starts at $current_earleme, and its length is $length\n"
1097             ) if $end_earleme & Marpa::PP::Internal::Recognizer::EARLEME_MASK;
1098              
1099 1959         2477 my $accepted = 0; # for trace_terminals
1100 1959         3702 my $target_ix = $current_earleme + $length;
1101 1959   100     11034 my $target_earley_set = $earley_set_list->[$target_ix] //= [];
1102 1959   100     10471 my $target_earley_items =
1103             $target_earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS] //= [];
1104 1959   100     12456 my $target_hash =
1105             $target_earley_set->[Marpa::PP::Internal::Earley_Set::HASH] //= {};
1106              
1107 1959         2744 EARLEY_ITEM: for my $postdot_item ( @{$postdot_data} ) {
  1959         14533  
1108              
1109 2352         2867 my $origin;
1110             my @to_states;
1111 2352 100       7524 next EARLEY_ITEM if ref $postdot_item eq $LEO_CLASS;
1112             {
1113 2345         4079 my $state =
  2345         4785  
1114             $postdot_item->[Marpa::PP::Internal::Earley_Item::STATE];
1115 3210         7556 @to_states =
1116 2345         6801 grep {ref}
1117 2345         3196 @{ $state->[Marpa::PP::Internal::AHFA::TRANSITION]
1118             ->{$symbol_name} };
1119 2345 50       5877 next EARLEY_ITEM if not scalar @to_states;
1120 2345         4999 $origin =
1121             $postdot_item->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1122             }
1123              
1124 2345         2709 $accepted++;
1125              
1126 2345         4451 TO_STATE: for my $to_state (@to_states) {
1127 3150         5152 my $reset = $to_state->[Marpa::PP::Internal::AHFA::RESET_ORIGIN];
1128 3150 100       7483 my $new_origin = $reset ? $target_ix : $origin;
1129 3150         4882 my $to_state_id = $to_state->[Marpa::PP::Internal::AHFA::ID];
1130 3150         6838 my $hash_key = join q{:}, $to_state_id, $new_origin;
1131 3150         10089 my $target_item = $target_hash->{$hash_key};
1132 3150 100       6558 if ( defined $target_item ) {
1133 110 100       594 next TO_STATE if $reset;
1134 10 100       25 if ($postdot_item->[Marpa::PP::Internal::Earley_Item::ID] ~~ [
  10         55  
1135             map {
1136 10         22 $_->[0]->[Marpa::PP::Internal::Earley_Item::ID]
1137             } @{
1138             $target_item
1139             ->[Marpa::PP::Internal::Earley_Item::LINKS]
1140             }
1141             ]
1142             )
1143             {
1144 2         713 Marpa::PP::exception(
1145             qq{"$symbol_name" already scanned with length $length at location $current_earleme}
1146             );
1147             } ## end if ( $postdot_item->[...])
1148             } ## end if ( defined $target_item )
1149             else {
1150              
1151 3040         5020 $target_item = [];
1152 3040         7935 $target_item->[Marpa::PP::Internal::Earley_Item::ID] =
1153             $recce
1154             ->[ Marpa::PP::Internal::Recognizer::NEXT_EARLEY_ITEM_ID
1155             ]++;
1156 3040         4517 $target_item->[Marpa::PP::Internal::Earley_Item::STATE] =
1157             $to_state;
1158 3040         7167 $target_item->[Marpa::PP::Internal::Earley_Item::ORIGIN] =
1159             $new_origin;
1160 3040         5615 $target_item->[Marpa::PP::Internal::Earley_Item::LEO_LINKS] =
1161             [];
1162 3040         5408 $target_item->[Marpa::PP::Internal::Earley_Item::LINKS] = [];
1163 3040         5694 $target_item->[Marpa::PP::Internal::Earley_Item::SET] =
1164             $target_ix;
1165 3040         8472 $target_hash->{$hash_key} = $target_item;
1166 3040         3591 push @{$target_earley_items}, $target_item;
  3040         6371  
1167              
1168             } ## end else [ if ( defined $target_item ) ]
1169              
1170 3048 100       10843 next TO_STATE if $reset;
1171              
1172 2343         2947 push @{ $target_item->[Marpa::PP::Internal::Earley_Item::LINKS] },
  2343         12540  
1173             [ $postdot_item, undef, $symbol_name, $value_ref ];
1174             } # for my $to_state
1175              
1176             } ## end for my $postdot_item ( @{$postdot_data} )
1177              
1178 1957 100 66     12246 if ( $accepted
1179             and $target_ix
1180             > $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME] )
1181             {
1182 1897         3023 $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME] =
1183             $target_ix;
1184             } ## end if ( $accepted and $target_ix > $recce->[...])
1185              
1186 1957 100       4374 if ($trace_terminals) {
1187 10 50       22 my $verb = $accepted ? 'Accepted' : 'Rejected';
1188 10 50       10 say {$trace_fh} qq{$verb "$symbol_name" at $current_earleme-}
  10         64  
1189             . ( $length + $current_earleme )
1190             or Marpa::PP::exception("Cannot print: $ERRNO");
1191             } ## end if ($trace_terminals)
1192              
1193 1957         13291 return $current_earleme;
1194              
1195             } ## end sub Marpa::PP::Recognizer::alternative
1196              
1197             # Deprecated -- obsolete
1198             sub Marpa::PP::Recognizer::tokens {
1199              
1200 70     70 1 3918 my ( $recce, $tokens, $token_ix_ref ) = @_;
1201              
1202 70 50       152 Marpa::PP::exception(
1203             'Missing recognizer argument for Marpa::PP::Recognizer::tokens()')
1204             if not defined $recce;
1205              
1206             {
1207 70         80 my $recce_class = ref $recce;
  70         101  
1208 70   50     135 $recce_class //= 'not defined';
1209 70 50       254 Marpa::PP::exception(
1210             "recognizer argument of tokens() has wrong class\n",
1211             'Class of argument is ',
1212             $recce_class,
1213             "\n",
1214             "Class of argument should be Marpa::PP::Recognizer\n"
1215             ) if $recce_class ne 'Marpa::PP::Recognizer';
1216             }
1217              
1218 70 50       369 Marpa::PP::exception('No tokens arg for Marpa::PP::Recognizer::tokens()')
1219             if not defined $tokens;
1220              
1221 70         106 my $mode = $recce->[Marpa::PP::Internal::Recognizer::MODE];
1222 70         67 my $interactive;
1223              
1224 70 50       153 if ( defined $token_ix_ref ) {
1225 0         0 my $ref_type = ref $token_ix_ref;
1226 0 0       0 if ( ref $token_ix_ref ne 'SCALAR' ) {
1227 0 0       0 my $description = $ref_type ? "ref to $ref_type" : 'not a ref';
1228 0         0 Marpa::PP::exception(
1229             "Token index arg for Marpa::PP::Recognizer::tokens is $description, must be ref to SCALAR"
1230             );
1231             } ## end if ( ref $token_ix_ref ne 'SCALAR' )
1232             Marpa::PP::exception(
1233 0 0       0 q{'Tokens index ref for Marpa::PP::Recognizer::tokens allowed only in 'stream' mode}
1234             ) if $mode ne 'stream';
1235 0         0 $interactive = 1;
1236             } ## end if ( defined $token_ix_ref )
1237              
1238 70         99 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
1239 70         170 local $Marpa::PP::Internal::TRACE_FH = my $trace_fh =
1240             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE];
1241 70         86 my $trace_terminals =
1242             $recce->[Marpa::PP::Internal::Recognizer::TRACE_TERMINALS];
1243              
1244 0         0 Marpa::PP::exception('Attempt to scan tokens after parsing is finished')
1245             if $recce->[Marpa::PP::Internal::Recognizer::FINISHED]
1246 70 0 50     152 and scalar @{$tokens};
1247              
1248 0         0 Marpa::PP::exception('Attempt to scan tokens when parsing is exhausted')
1249             if $recce->[Marpa::PP::Internal::Recognizer::EXHAUSTED]
1250 70 0 50     131 and scalar @{$tokens};
1251              
1252 70         93 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
1253              
1254 70         90 my $next_token_earleme = my $last_completed_earleme =
1255             $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
1256              
1257 70   50     249 $token_ix_ref //= \( my $token_ix = 0 );
1258              
1259 70         95 my $token_args = $tokens->[ ${$token_ix_ref} ];
  70         107  
1260              
1261             # If the token list is empty, we will go straight to the
1262             # next token
1263 70 100       96 if ( not scalar @{$tokens} ) { $next_token_earleme++ }
  70         160  
  56         68  
1264              
1265 70         90 EARLEME: while ( ${$token_ix_ref} < scalar @{$tokens} ) {
  87         107  
  87         339  
1266              
1267 17         22 my $current_token_earleme = $last_completed_earleme;
1268              
1269             # At this point, typically, $current_token_earleme,
1270             # $next_token_earleme and $last_completed_earleme are
1271             # all equal.
1272              
1273             # It's not 100% clear whether it's best to leave
1274             # the token_ix_ref pointing at the start of the
1275             # earleme, or at the actual problem token.
1276             # Right now, we set it at the actual problem
1277             # token, which is probably what will turn out
1278             # to be easiest.
1279             # my $first_ix_of_this_earleme = ${$token_ix_ref};
1280              
1281             # For as long the $next_token_earleme does not advance ...
1282 17         36 TOKEN: while ( $current_token_earleme == $next_token_earleme ) {
1283              
1284             # ... or until we run out of tokens
1285 32 100       41 last TOKEN if not my $token_args = $tokens->[ ${$token_ix_ref} ];
  32         81  
1286 0         0 Marpa::PP::exception(
1287             'Tokens must be array refs: token #',
1288 19 50       64 ${$token_ix_ref}, " is $token_args\n",
1289             ) if ref $token_args ne 'ARRAY';
1290 19         24 ${$token_ix_ref}++;
  19         92  
1291 19         21 my ( $symbol_name, $value, $length, $offset ) = @{$token_args};
  19         41  
1292              
1293 19 50       44 Marpa::PP::exception(
1294             "Attempt to add token '$symbol_name' at location where processing is complete:\n",
1295             " Add attempted at $current_token_earleme\n",
1296             " Processing complete to $last_completed_earleme\n"
1297             ) if $current_token_earleme < $last_completed_earleme;
1298              
1299 19         38 my $symbol_id = $symbol_hash->{$symbol_name};
1300 19 50       45 if ( not defined $symbol_id ) {
1301 0 0       0 say {$trace_fh}
  0         0  
1302             qq{Attempted to add non-existent symbol named "$symbol_name" at $last_completed_earleme\n}
1303             or Marpa::PP::exception("Cannot print: $ERRNO");
1304             }
1305              
1306 19         62 my $result = $recce->alternative( $symbol_name, $value, $length );
1307              
1308 19 50       42 if ( not defined $result ) {
1309 0 0       0 if ( not $interactive ) {
1310 0         0 Marpa::PP::exception(
1311             qq{Terminal "$symbol_name" received when not expected}
1312             );
1313             }
1314              
1315             # Current token didn't actually work, so back out
1316             # the increment
1317 0         0 ${$token_ix_ref}--;
  0         0  
1318              
1319 0         0 return $recce->status();
1320             } ## end if ( not defined $result )
1321              
1322 19   100     43 $offset //= 1;
1323 19 50       39 Marpa::PP::exception(
1324             'Token ' . $symbol_name . " has negative offset\n",
1325             " Token starts at $last_completed_earleme, and its length is $length\n",
1326             " Tokens are required to be in sequence by location\n",
1327             ) if $offset < 0;
1328 19         59 $next_token_earleme += $offset;
1329              
1330             } ## end while ( $current_token_earleme == $next_token_earleme )
1331              
1332             # We've ended the loop for the tokens at $current_token_earleme.
1333             # It is possible that $next_token_earleme did not advance,
1334             # and the loop ended when we ran out of tokens in the
1335             # argument list.
1336             # We arrange it so that the last descriptor in
1337             # a tokens call always advances the current earleme by at least one --
1338             # as if it had incremented $next_token_earleme
1339 17         24 $current_token_earleme++;
1340 17 50       37 $current_token_earleme = $next_token_earleme
1341             if $next_token_earleme > $current_token_earleme;
1342              
1343 17         48 $recce->earleme_complete();
1344 17         27 $last_completed_earleme++;
1345              
1346             } ## end while ( ${$token_ix_ref} < scalar @{$tokens} )
1347              
1348 70 100       157 if ( $mode eq 'stream' ) {
1349 69         130 while ( $last_completed_earleme < $next_token_earleme ) {
1350 56         125 $recce->earleme_complete();
1351 56         137 $last_completed_earleme++;
1352             }
1353             } ## end if ( $mode eq 'stream' )
1354              
1355 70 100       144 if ( $mode eq 'default' ) {
1356 1         5 while ( $last_completed_earleme
1357             < $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME] )
1358             {
1359 0         0 $recce->earleme_complete();
1360 0         0 $last_completed_earleme++;
1361             } ## end while ( $last_completed_earleme < $recce->[...])
1362 1         3 $recce->[Marpa::PP::Internal::Recognizer::FINISHED] = 1;
1363             } ## end if ( $mode eq 'default' )
1364              
1365 70         161 return $recce->status();
1366              
1367             } ## end sub Marpa::PP::Recognizer::tokens
1368              
1369             # Perform the completion step on an earley set
1370              
1371             sub Marpa::PP::Recognizer::end_input {
1372 103     103 0 850 my ($recce) = @_;
1373 103         199 my $last_completed_earleme =
1374             $recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
1375 103         216 my $furthest_earleme =
1376             $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME];
1377 103         344 while ( $last_completed_earleme < $furthest_earleme ) {
1378 20030         50286 $recce->earleme_complete();
1379 20030         57628 $last_completed_earleme++;
1380             }
1381 103         178 $recce->[Marpa::PP::Internal::Recognizer::FINISHED] = 1;
1382 103         230 return 1;
1383             } ## end sub Marpa::PP::Recognizer::end_input
1384              
1385             sub Marpa::PP::Recognizer::earleme_complete {
1386 22048     22048 0 36557 my ($recce) = @_;
1387              
1388 22048         37326 my $recce_c = $recce->[Marpa::PP::Internal::Recognizer::C];
1389 22048         49705 local $Marpa::PP::Internal::TRACE_FH =
1390             $recce->[Marpa::PP::Internal::Recognizer::TRACE_FILE_HANDLE];
1391 22048         42980 my $grammar = $recce->[Marpa::PP::Internal::Recognizer::GRAMMAR];
1392 22048         32823 my $AHFA = $grammar->[Marpa::PP::Internal::Grammar::AHFA];
1393 22048         33690 my $symbol_hash = $grammar->[Marpa::PP::Internal::Grammar::SYMBOL_HASH];
1394 22048         31120 my $symbols = $grammar->[Marpa::PP::Internal::Grammar::SYMBOLS];
1395 22048         31230 my $earley_set_list =
1396             $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS];
1397              
1398 22048         30255 my $terminal_names =
1399             $grammar->[Marpa::PP::Internal::Grammar::TERMINAL_NAMES];
1400 22048         32252 my $too_many_earley_items =
1401             $recce->[Marpa::PP::Internal::Recognizer::TOO_MANY_EARLEY_ITEMS];
1402 22048         30126 my $trace_earley_sets =
1403             $recce->[Marpa::PP::Internal::Recognizer::TRACE_EARLEY_SETS];
1404 22048   100     90356 my $trace_terminals =
1405             $recce->[Marpa::PP::Internal::Recognizer::TRACE_TERMINALS] // 0;
1406              
1407 22048         36380 my $earleme_to_complete =
1408             ++$recce->[Marpa::PP::Internal::Recognizer::LAST_COMPLETED_EARLEME];
1409              
1410 22048   100     121266 my $earley_set = $earley_set_list->[$earleme_to_complete] //= [];
1411 22048   100     118491 my $earley_items =
1412             $earley_set->[Marpa::PP::Internal::Earley_Set::ITEMS] //= [];
1413 22048   100     109579 my $earley_hash = $earley_set->[Marpa::PP::Internal::Earley_Set::HASH] //=
1414             {};
1415 22048   50     93359 my $postdot_here =
1416             $earley_set->[Marpa::PP::Internal::Earley_Set::POSTDOT] //= {};
1417              
1418             # Important: more earley sets can be added in the loop
1419 22048         33399 my $earley_set_ix = -1;
1420 22048         48536 EARLEY_ITEM: while (1) {
1421              
1422 39315         111930 my $earley_item = $earley_items->[ ++$earley_set_ix ];
1423 39315 100       111252 last EARLEY_ITEM if not defined $earley_item;
1424              
1425 17267         23249 my ( $state, $parent ) = @{$earley_item}[
  17267         39979  
1426             Marpa::PP::Internal::Earley_Item::STATE,
1427             Marpa::PP::Internal::Earley_Item::ORIGIN
1428             ];
1429 17267         24977 my $state_id = $state->[Marpa::PP::Internal::AHFA::ID];
1430              
1431 17267 100       41230 next EARLEY_ITEM if $earleme_to_complete == $parent;
1432              
1433 16095         39649 LHS_SYMBOL:
1434 16095         18202 for my $lhs_symbol (
1435             @{ $state->[Marpa::PP::Internal::AHFA::COMPLETE_LHS] } )
1436             {
1437 14971         40349 my $postdot_data =
1438             $earley_set_list->[$parent]
1439             ->[Marpa::PP::Internal::Earley_Set::POSTDOT]->{$lhs_symbol};
1440 14971 100       32674 next LHS_SYMBOL if not defined $postdot_data;
1441 14062         31916 PARENT_ITEM:
1442 14062         15077 for my $postdot_item ( @{$postdot_data} ) {
1443 18060         25229 my $parent_origin;
1444             my @transition_states;
1445              
1446 18060         38015 my $postdot_item_is_leo = ref $postdot_item eq $LEO_CLASS;
1447 18060 100       32822 if ($postdot_item_is_leo) {
1448 1828         3709 $parent_origin = $postdot_item
1449             ->[Marpa::PP::Internal::Leo_Item::ORIGIN];
1450 1828         4587 @transition_states = $postdot_item
1451             ->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE];
1452             } ## end if ($postdot_item_is_leo)
1453             else {
1454 16232         29834 my $parent_state = $postdot_item
1455             ->[Marpa::PP::Internal::Earley_Item::STATE];
1456 25398         85431 @transition_states =
1457 16232         55437 grep {ref}
1458 16232         23428 @{ $parent_state
1459             ->[Marpa::PP::Internal::AHFA::TRANSITION]
1460             ->{$lhs_symbol} };
1461 16232         30966 $parent_origin = $postdot_item
1462             ->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1463             } ## end else [ if ($postdot_item_is_leo) ]
1464              
1465             TRANSITION_STATE:
1466 18060         46056 for my $transition_state (@transition_states) {
1467 20010         33077 my $reset = $transition_state
1468             ->[Marpa::PP::Internal::AHFA::RESET_ORIGIN];
1469 20010 100       47321 my $origin =
1470             $reset
1471             ? $earleme_to_complete
1472             : $parent_origin;
1473 20010         57039 my $transition_state_id =
1474             $transition_state->[Marpa::PP::Internal::AHFA::ID];
1475 20010         61648 my $name = sprintf
1476             'S%d@%d-%d',
1477             $transition_state_id, $origin, $earleme_to_complete;
1478 20010         38167 my $hash_key = join q{:}, $transition_state_id, $origin;
1479 20010         33123 my $target_item = $earley_hash->{$hash_key};
1480 20010 100       51711 if ( not defined $target_item ) {
1481 14231         40622 $target_item = [];
1482 14231         34399 $target_item->[Marpa::PP::Internal::Earley_Item::ID] =
1483             $recce->[
1484             Marpa::PP::Internal::Recognizer::NEXT_EARLEY_ITEM_ID
1485             ]++;
1486 14231         20784 $target_item
1487             ->[Marpa::PP::Internal::Earley_Item::STATE] =
1488             $transition_state;
1489 14231         35337 $target_item
1490             ->[Marpa::PP::Internal::Earley_Item::ORIGIN] =
1491             $origin;
1492 14231         40114 $target_item
1493             ->[Marpa::PP::Internal::Earley_Item::LEO_LINKS] =
1494             [];
1495 14231         34730 $target_item
1496             ->[Marpa::PP::Internal::Earley_Item::LINKS] = [];
1497 14231         26407 $target_item->[Marpa::PP::Internal::Earley_Item::SET]
1498             = $earleme_to_complete;
1499 14231         66039 $earley_hash->{$hash_key} = $target_item;
1500 14231         16800 push @{$earley_items}, $target_item;
  14231         40998  
1501             } # unless defined $target_item
1502 20010 100       68219 next TRANSITION_STATE if $reset;
1503 18060 100       42537 if ($postdot_item_is_leo) {
1504 1828         10434 push @{
1505 1828         2255 $target_item->[
1506             Marpa::PP::Internal::Earley_Item::LEO_LINKS
1507             ]
1508             },
1509             [ $postdot_item, $earley_item, $lhs_symbol ];
1510              
1511             # If we do the Leo item, do *ONLY* the Leo item
1512 1828         15463 last PARENT_ITEM;
1513             } ## end if ($postdot_item_is_leo)
1514             else {
1515 16232         19523 push @{ $target_item
  16232         145439  
1516             ->[Marpa::PP::Internal::Earley_Item::LINKS] },
1517             [ $postdot_item, $earley_item, $lhs_symbol ];
1518             }
1519             } # TRANSITION_STATE
1520              
1521             } # PARENT_ITEM
1522             } # LHS_SYMBOL
1523              
1524             } # EARLEY_ITEM
1525              
1526 22048 50 33     76289 if ( $too_many_earley_items >= 0
  22048         110119  
1527             and ( my $item_count = scalar @{$earley_items} )
1528             >= $too_many_earley_items )
1529             {
1530 0 0       0 if ( $recce->[Marpa::PP::Internal::Recognizer::WARNINGS] ) {
1531 0 0       0 say {$Marpa::PP::Internal::TRACE_FH}
  0         0  
1532             "Very large earley set: $item_count items at location $earleme_to_complete"
1533             or Marpa::PP::exception("Cannot print: $ERRNO");
1534             }
1535             } ## end if ( $too_many_earley_items >= 0 and ( my $item_count...))
1536              
1537             # Each possible cause
1538             # link is only visited once.
1539             # It may be paired with several different predecessors.
1540             # The cause may complete several different LHS symbols
1541             # and Marpa::PP will seek predecessors for each at
1542             # the parent location.
1543             # Different completed LHS symbols might be postdot
1544             # symbols for the same predecessor Earley item.
1545             # For this reason,
1546             # predecessor-cause pairs might not be unique
1547             # within an Earley item.
1548             #
1549             # This is not an issue for unambiguous parsing.
1550             # It *IS* an issue for iterating ambiguous parses.
1551              
1552 22048 50       51095 if ($trace_earley_sets) {
1553 0 0       0 print {$Marpa::PP::Internal::TRACE_FH}
  0         0  
1554             "=== Earley set $earleme_to_complete\n"
1555             or Marpa::PP::exception("Cannot print: $ERRNO");
1556 0 0       0 print {$Marpa::PP::Internal::TRACE_FH}
  0         0  
1557             Marpa::PP::show_earley_set($earley_set)
1558             or Marpa::PP::exception("Cannot print: $ERRNO");
1559             } ## end if ($trace_earley_sets)
1560              
1561 22048         33837 for my $earley_item ( @{$earley_items} ) {
  22048         51848  
1562 17267         40452 my $state = $earley_item->[Marpa::PP::Internal::Earley_Item::STATE];
1563 17267         34102 my $parent = $earley_item->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1564 17267         17551 for my $postdot_symbol_name (
  17267         68170  
1565             keys %{ $state->[Marpa::PP::Internal::AHFA::TRANSITION] } )
1566             {
1567 39810         67315 push @{ $postdot_here->{$postdot_symbol_name} }, $earley_item;
  39810         162505  
1568             }
1569             } ## end for my $earley_item ( @{$earley_items} )
1570              
1571             # Create the unpopulated Leo items, and put them into a worklist
1572 22048         42888 my @leo_worklist = ();
1573 22048 100       62727 if ( $recce->[Marpa::PP::Internal::Recognizer::USE_LEO] ) {
1574 21978         53949 SYMBOL: for my $postdot_symbol_name ( keys %{$postdot_here} ) {
  21978         101338  
1575 37009         58235 my $postdot_data = $postdot_here->{$postdot_symbol_name};
1576 37009 100       46727 next SYMBOL if scalar @{$postdot_data} != 1;
  37009         110712  
1577 35358         65637 my $earley_item = $postdot_data->[0];
1578 35358         152705 my ( $leo_lhs, $base_to_state ) =
1579 35358         48673 @{ $earley_item->[Marpa::PP::Internal::Earley_Item::STATE]
1580             ->[Marpa::PP::Internal::AHFA::TRANSITION]
1581             ->{$postdot_symbol_name} };
1582              
1583             # Only one transition in the Earley set on this symbol,
1584             # but it is not to a Leo completion.
1585 35358 100       128784 next SYMBOL if ref $leo_lhs;
1586              
1587 4934         19812 my $leo_item = bless [], $LEO_CLASS;
1588              
1589             # $leo_item->[Marpa::PP::Internal::Leo_Item::BASE_TO_STATE] =
1590             # $base_to_state;
1591 4934         10330 $leo_item->[Marpa::PP::Internal::Leo_Item::SET] =
1592             $earleme_to_complete;
1593 4934         8094 $leo_item->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL] =
1594             $postdot_symbol_name;
1595 4934         6486 $leo_item->[Marpa::PP::Internal::Leo_Item::BASE] = $earley_item;
1596              
1597 4934         14014 unshift @{ $postdot_here->{$postdot_symbol_name} }, $leo_item;
  4934         11626  
1598 4934         12723 push @leo_worklist, $postdot_symbol_name;
1599              
1600             } ## end for my $postdot_symbol_name ( keys %{$postdot_here} )
1601             } ## end if ( $recce->[Marpa::PP::Internal::Recognizer::USE_LEO...])
1602              
1603 22048         70818 POSTDOT_SYMBOL: for my $postdot_symbol_name (@leo_worklist) {
1604              
1605 4934         9682 my $leo_item = $postdot_here->{$postdot_symbol_name}->[0];
1606             next POSTDOT_SYMBOL
1607             if
1608 4934 100       12538 defined $leo_item->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE];
1609              
1610             # Find the predecessor LIM
1611 3887         4613 my $base_earley_item =
1612             $leo_item->[Marpa::PP::Internal::Leo_Item::BASE];
1613 3887         6389 my $base_origin =
1614             $base_earley_item->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1615 3887         10735 my ( $leo_transition_symbol, $top_to_state ) =
1616 3887         5797 @{ $base_earley_item->[Marpa::PP::Internal::Earley_Item::STATE]
1617             ->[Marpa::PP::Internal::AHFA::TRANSITION]
1618             ->{$postdot_symbol_name} };
1619 3887         8024 my $predecessor_postdot =
1620             $earley_set_list->[$base_origin]
1621             ->[Marpa::PP::Internal::Earley_Set::POSTDOT]
1622             ->{$leo_transition_symbol};
1623 3887         17640 my $first_postdot_item = $predecessor_postdot->[0];
1624 3887 100       10965 my $predecessor_leo_item =
1625             ref $first_postdot_item eq $LEO_CLASS
1626             ? $first_postdot_item
1627             : undef;
1628              
1629             # If there is a predecessor Leo item and it is populated, populate from the predecessor
1630             # Leo item
1631 3887 100       8065 my $predecessor_top_to_state =
1632             defined $predecessor_leo_item
1633             ? $predecessor_leo_item
1634             ->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE]
1635             : undef;
1636 3887 100       27323 if ( defined $predecessor_top_to_state ) {
1637 1514         2320 $leo_item->[Marpa::PP::Internal::Leo_Item::PREDECESSOR] =
1638             $predecessor_leo_item;
1639 1514         2661 $leo_item->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE] =
1640             $predecessor_top_to_state;
1641 1514         3399 $leo_item->[Marpa::PP::Internal::Leo_Item::ORIGIN] =
1642             $predecessor_leo_item
1643             ->[Marpa::PP::Internal::Leo_Item::ORIGIN];
1644 1514         12354 next POSTDOT_SYMBOL;
1645             } ## end if ( defined $predecessor_top_to_state )
1646              
1647             # If there is no predecessor Leo item, populate from the base Earley item
1648 2373 100       5543 if ( not defined $predecessor_leo_item ) {
1649 1619         3433 $leo_item->[Marpa::PP::Internal::Leo_Item::ORIGIN] = $base_origin;
1650 1619         3286 $leo_item->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE] =
1651             $top_to_state;
1652 1619         4060 next POSTDOT_SYMBOL;
1653             } ## end if ( not defined $predecessor_leo_item )
1654              
1655             # If there is a predecessor, but it is not populated, we need to build a
1656             # predecessor chain of Leo items
1657 754         13887 my @leo_chain = ($postdot_symbol_name);
1658 754         906 BUILD_LEO_CHAIN: while (1) {
1659 1047         1330 my $chain_leo_item = $predecessor_leo_item;
1660 1047         1683 my $chain_leo_transition_symbol = $chain_leo_item
1661             ->[Marpa::PP::Internal::Leo_Item::LEO_POSTDOT_SYMBOL];
1662              
1663             # If this leo item is already on the chain, break here.
1664             # The predecessor Leo item has not yet been updated
1665             # (it and the current Leo item are the same)
1666             # so the predecessor Leo item
1667             # is still correct for the Leo item at the top of the
1668             # Leo item chain.
1669             last BUILD_LEO_CHAIN
1670 1047 50       4310 if $chain_leo_transition_symbol ~~ @leo_chain;
1671              
1672             # Find the new predecessor Leo item
1673 1047         1498 my $chain_base_earley_item =
1674             $chain_leo_item->[Marpa::PP::Internal::Leo_Item::BASE];
1675 1047         14210 my $chain_base_origin = $chain_base_earley_item
1676             ->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1677 1047         3491 my ( $chain_predecessor_leo_transition_symbol,
1678             $chain_top_to_state )
1679 1047         1239 = @{ $chain_base_earley_item
1680             ->[Marpa::PP::Internal::Earley_Item::STATE]
1681             ->[Marpa::PP::Internal::AHFA::TRANSITION]
1682             ->{$chain_leo_transition_symbol} };
1683 1047         2248 my $chain_predecessor_postdot =
1684             $earley_set_list->[$chain_base_origin]
1685             ->[Marpa::PP::Internal::Earley_Set::POSTDOT]
1686             ->{$chain_predecessor_leo_transition_symbol};
1687 1047         1416 my $chain_first_postdot_item = $chain_predecessor_postdot->[0];
1688 1047 100       2762 $predecessor_leo_item =
1689             ref $chain_first_postdot_item eq $LEO_CLASS
1690             ? $chain_first_postdot_item
1691             : undef;
1692              
1693 1047         1681 push @leo_chain, $chain_leo_transition_symbol;
1694              
1695             # No predecessor, so I am forced to break the Leo chain here.
1696 1047 100       3535 last BUILD_LEO_CHAIN if not defined $predecessor_leo_item;
1697              
1698             # A populated predecessor, so I can fully populate the Leo chain.
1699             # Break the Leo chain here.
1700             last BUILD_LEO_CHAIN
1701 458 100       1722 if defined $predecessor_leo_item
1702             ->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE];
1703             } ## end while (1)
1704              
1705 754         2270 while ( my $chain_leo_transition_symbol = pop @leo_chain ) {
1706 1801         3200 my $chain_leo_item =
1707             $postdot_here->{$chain_leo_transition_symbol}->[0];
1708              
1709             # If there is a predecessor Leo item and it is populated, populate from the predecessor
1710             # Leo item
1711 1801 100       3559 my $chain_predecessor_top_to_state =
1712             $predecessor_leo_item
1713             ? $predecessor_leo_item
1714             ->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE]
1715             : undef;
1716 1801 100       3739 if ( defined $chain_predecessor_top_to_state ) {
1717 1212         1950 $chain_leo_item->[Marpa::PP::Internal::Leo_Item::PREDECESSOR]
1718             = $predecessor_leo_item;
1719 1212         2485 $chain_leo_item->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE]
1720             = $chain_predecessor_top_to_state;
1721 1212         2633 $chain_leo_item->[Marpa::PP::Internal::Leo_Item::ORIGIN] =
1722             $predecessor_leo_item
1723             ->[Marpa::PP::Internal::Leo_Item::ORIGIN];
1724             } ## end if ( defined $chain_predecessor_top_to_state )
1725             else {
1726 589         1252 my $chain_base_earley_item =
1727             $chain_leo_item->[Marpa::PP::Internal::Leo_Item::BASE];
1728 589         1090 my $chain_base_origin = $chain_base_earley_item
1729             ->[Marpa::PP::Internal::Earley_Item::ORIGIN];
1730 589         1477 my ( undef, $chain_top_to_state ) =
1731 589         9406 @{ $chain_base_earley_item
1732             ->[Marpa::PP::Internal::Earley_Item::STATE]
1733             ->[Marpa::PP::Internal::AHFA::TRANSITION]
1734             ->{$chain_leo_transition_symbol} };
1735 589         1653 $chain_leo_item->[Marpa::PP::Internal::Leo_Item::ORIGIN] =
1736             $chain_base_origin;
1737 589         1522 $chain_leo_item->[Marpa::PP::Internal::Leo_Item::TOP_TO_STATE]
1738             = $chain_top_to_state;
1739             } ## end else [ if ( defined $chain_predecessor_top_to_state ) ]
1740 1801         16292 $predecessor_leo_item = $chain_leo_item;
1741             } ## end while ( my $chain_leo_transition_symbol = pop @leo_chain)
1742             } ## end for my $postdot_symbol_name (@leo_worklist)
1743              
1744 37209         106936 my @terminals_expected =
1745 22048         43075 grep { $terminal_names->{$_} } keys %{$postdot_here};
  22048         59926  
1746 22048         51880 $recce->[Marpa::PP::Internal::Recognizer::EXPECTED_TERMINALS] =
1747             \@terminals_expected;
1748              
1749 22048   100     116221 $recce->[Marpa::PP::Internal::Recognizer::EXHAUSTED] =
1750             ( scalar @terminals_expected <= 0 )
1751             && $earleme_to_complete
1752             >= $recce->[Marpa::PP::Internal::Recognizer::FURTHEST_EARLEME];
1753              
1754 22048 100       57546 if ( $trace_terminals > 1 ) {
1755 3         7 for my $terminal ( sort @terminals_expected ) {
1756 4 50       5 say {$Marpa::PP::Internal::TRACE_FH}
  4         26  
1757             qq{Expecting "$terminal" at $earleme_to_complete}
1758             or Marpa::PP::exception("Cannot print: $ERRNO");
1759             }
1760             } ## end if ( $trace_terminals > 1 )
1761              
1762 22048 100       32069 if ( scalar @{$earley_items} > 0 ) {
  22048         83161  
1763 1906         3504 my $ordinal =
1764             $recce->[Marpa::PP::Internal::Recognizer::NEXT_ORDINAL]++;
1765 1906         3417 $earley_set->[Marpa::PP::Internal::Earley_Set::ORDINAL] = $ordinal;
1766 1906         4311 $recce->[Marpa::PP::Internal::Recognizer::EARLEY_SETS_BY_ORDINAL]
1767             ->[$ordinal] = $earley_set;
1768             } ## end if ( scalar @{$earley_items} > 0 )
1769              
1770 22048         87362 return scalar @terminals_expected;
1771              
1772             } ## end sub Marpa::PP::Recognizer::earleme_complete
1773              
1774             1;