File Coverage

blib/lib/Text/Balanced/Marpa.pm
Criterion Covered Total %
statement 225 277 81.2
branch 73 114 64.0
condition 14 26 53.8
subroutine 23 23 100.0
pod 2 3 66.6
total 337 443 76.0


line stmt bran cond sub pod time code
1             package Text::Balanced::Marpa;
2              
3 17     17   1161669 use strict;
  17         178  
  17         550  
4 17     17   8635 use utf8;
  17         208  
  17         101  
5 17     17   516 use warnings;
  17         34  
  17         553  
6 17     17   87 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  17         33  
  17         715  
7 17     17   6325 use open qw(:std :utf8); # Undeclared streams in UTF-8.
  17         16280  
  17         156  
8              
9 17         226 use Const::Exporter constants =>
10             [
11             nothing_is_fatal => 0, # The default.
12             print_errors => 1,
13             print_warnings => 2,
14             print_debugs => 4,
15             overlap_is_fatal => 8,
16             nesting_is_fatal => 16,
17             ambiguity_is_fatal => 32,
18             exhaustion_is_fatal => 64,
19 17     17   12015 ];
  17         481407  
20              
21 17     17   27150 use Marpa::R2;
  17         2607800  
  17         1026  
22              
23 17     17   10660 use Moo;
  17         127414  
  17         104  
24              
25 17     17   35215 use Tree;
  17         102775  
  17         740  
26              
27 17     17   11059 use Types::Standard qw/Any ArrayRef HashRef Int ScalarRef Str/;
  17         1280155  
  17         224  
28              
29 17     17   25699 use Try::Tiny;
  17         46  
  17         78316  
30              
31             has bnf =>
32             (
33             default => sub{return ''},
34             is => 'rw',
35             isa => Any,
36             required => 0,
37             );
38              
39             has close =>
40             (
41             default => sub{return []},
42             is => 'rw',
43             isa => ArrayRef,
44             required => 0,
45             );
46              
47             has delimiter_action =>
48             (
49             default => sub{return {} },
50             is => 'rw',
51             isa => HashRef,
52             required => 0,
53             );
54              
55             has delimiter_frequency =>
56             (
57             default => sub{return {} },
58             is => 'rw',
59             isa => HashRef,
60             required => 0,
61             );
62              
63             has delimiter_stack =>
64             (
65             default => sub{return []},
66             is => 'rw',
67             isa => ArrayRef,
68             required => 0,
69             );
70              
71             has error_message =>
72             (
73             default => sub{return ''},
74             is => 'rw',
75             isa => Str,
76             required => 0,
77             );
78              
79             has error_number =>
80             (
81             default => sub{return 0},
82             is => 'rw',
83             isa => Int,
84             required => 0,
85             );
86              
87             has escape_char =>
88             (
89             default => sub{return '\\'},
90             is => 'rw',
91             isa => Str,
92             required => 0,
93             );
94              
95             has grammar =>
96             (
97             default => sub {return ''},
98             is => 'rw',
99             isa => Any,
100             required => 0,
101             );
102              
103             has known_events =>
104             (
105             default => sub{return {} },
106             is => 'rw',
107             isa => HashRef,
108             required => 0,
109             );
110              
111             has length =>
112             (
113             default => sub{return 0},
114             is => 'rw',
115             isa => Int,
116             required => 0,
117             );
118              
119             has matching_delimiter =>
120             (
121             default => sub{return {} },
122             is => 'rw',
123             isa => HashRef,
124             required => 0,
125             );
126              
127             has next_few_limit =>
128             (
129             default => sub{return 20},
130             is => 'rw',
131             isa => Int,
132             required => 0,
133             );
134              
135             has node_stack =>
136             (
137             default => sub{return []},
138             is => 'rw',
139             isa => ArrayRef,
140             required => 0,
141             );
142              
143             has open =>
144             (
145             default => sub{return []},
146             is => 'rw',
147             isa => ArrayRef,
148             required => 0,
149             );
150              
151             has options =>
152             (
153             default => sub{return 0},
154             is => 'rw',
155             isa => Int,
156             required => 0,
157             );
158              
159             has pos =>
160             (
161             default => sub{return 0},
162             is => 'rw',
163             isa => Int,
164             required => 0,
165             );
166              
167             has recce =>
168             (
169             default => sub{return ''},
170             is => 'rw',
171             isa => Any,
172             required => 0,
173             );
174              
175             has tree =>
176             (
177             default => sub{return ''},
178             is => 'rw',
179             isa => Any,
180             required => 0,
181             );
182              
183             has text =>
184             (
185             default => sub{return \''}, # Use ' in comment for UltraEdit.
186             is => 'rw',
187             isa => ScalarRef[Str],
188             required => 0,
189             );
190              
191             has uid =>
192             (
193             default => sub{return 0},
194             is => 'rw',
195             isa => Int,
196             required => 0,
197             );
198              
199             our $VERSION = '1.08';
200              
201             # ------------------------------------------------
202              
203             sub BUILD
204             {
205 16     16 0 392 my($self) = @_;
206              
207             # Policy: Event names are always the same as the name of the corresponding lexeme.
208             #
209             # Note: Tokens of the form '_xxx_' are replaced just below, with values returned
210             # by the call to validate_open_close().
211              
212 16         51 my($bnf) = <<'END_OF_GRAMMAR';
213              
214             :default ::= action => [values]
215              
216             lexeme default = latm => 1
217              
218             :start ::= input_text
219              
220             input_text ::= input_string*
221              
222             input_string ::= quoted_text
223             | unquoted_text
224              
225             quoted_text ::= open_delim input_text close_delim
226              
227             unquoted_text ::= text
228              
229             # Lexemes in alphabetical order.
230              
231             delimiter_char ~ [_delimiter_]
232              
233             :lexeme ~ close_delim pause => before event => close_delim
234             _close_
235              
236             escaped_char ~ '_escape_char_' delimiter_char # Use ' in comment for UltraEdit.
237              
238             # Warning: Do not add '+' to this set, even though it speeds up things.
239             # The problem is that the set then gobbles up any '\', so the following
240             # character is no longer recognized as being escaped.
241             # Trapping the exception then generated would be possible.
242              
243             non_quote_char ~ [^_delimiter_] # Use " in comment for UltraEdit.
244              
245             :lexeme ~ open_delim pause => before event => open_delim
246             _open_
247              
248             :lexeme ~ text pause => before event => text
249             text ~ escaped_char
250             | non_quote_char
251             END_OF_GRAMMAR
252              
253 16         83 my($hashref) = $self -> _validate_open_close;
254 16         273 $bnf =~ s/_open_/$$hashref{_open_}/;
255 16         220 $bnf =~ s/_close_/$$hashref{_close_}/;
256 16         227 $bnf =~ s/_delimiter_/$$hashref{_delimiter_}/g;
257 16         343 my($escape_char) = $self -> escape_char;
258              
259 16 50       181 if ($escape_char eq "'")
260             {
261 0         0 my($message) = 'Single-quote is forbidden as an escape character';
262              
263 0         0 $self -> error_message($message);
264 0         0 $self -> error_number(7);
265              
266             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
267              
268 0         0 die "Error: $message\n";
269             }
270              
271 16         217 $bnf =~ s/_escape_char_/$escape_char/g;
272              
273 16         401 $self -> bnf($bnf);
274 16         740 $self -> grammar
275             (
276             Marpa::R2::Scanless::G -> new
277             ({
278             source => \$self -> bnf
279             })
280             );
281              
282             # This hash does not contain the key "'exhausted" because the exhaustion
283             # event is everywhere handled explicitly. Yes, it has a leading quote.
284              
285 16         3270749 my(%event);
286              
287 16         410 for my $line (split(/\n/, $self -> bnf) )
288             {
289 680 100       2010 $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
290             }
291              
292 16         413 $self -> known_events(\%event);
293              
294             } # End of BUILD.
295              
296             # ------------------------------------------------
297              
298             sub _add_daughter
299             {
300 438     438   945 my($self, $name, $attributes) = @_;
301 438         8093 $attributes = {%$attributes, uid => $self -> uid($self -> uid + 1)};
302 438         20301 my($stack) = $self -> node_stack;
303 438         3535 my($node) = Tree -> new($name);
304              
305 438         20857 $node -> meta($attributes);
306              
307 438         6577 $$stack[$#$stack] -> add_child({}, $node);
308              
309             } # End of _add_daughter.
310              
311             # ------------------------------------------------
312              
313             sub next_few_chars
314             {
315 1289     1289 1 2443 my($self, $stringref, $offset) = @_;
316 1289         20830 my($s) = substr($$stringref, $offset, $self -> next_few_limit);
317 1289         8933 $s =~ tr/\n/ /;
318 1289         3431 $s =~ s/^\s+//;
319 1289         3841 $s =~ s/\s+$//;
320              
321 1289         3151 return $s;
322              
323             } # End of next_few_chars.
324              
325             # ------------------------------------------------
326              
327             sub parse
328             {
329 87     87 1 36737 my($self, %opts) = @_;
330              
331             # Emulate parts of new(), which makes things a bit earier for the caller.
332              
333 87 50       386 $self -> options($opts{options}) if (defined $opts{options});
334 87 100       2799 $self -> text($opts{text}) if (defined $opts{text});
335 87 50       3542 $self -> pos($opts{pos}) if (defined $opts{pos});
336 87 50       302 $self -> length($opts{length}) if (defined $opts{length});
337              
338 87         1749 $self -> recce
339             (
340             Marpa::R2::Scanless::R -> new
341             ({
342             exhaustion => 'event',
343             grammar => $self -> grammar,
344             ranking_method => 'high_rule_only',
345             })
346             );
347              
348             # Since $self -> node_stack has not been initialized yet,
349             # we can't call _add_daughter() until after this statement.
350              
351 87         36540 $self -> uid(0);
352 87         3042 $self -> tree(Tree -> new('root') );
353 87         9933 $self -> tree -> meta({text => '', uid => $self -> uid});
354 87         4761 $self -> node_stack([$self -> tree -> root]);
355              
356             # Return 0 for success and 1 for failure.
357              
358 87         5428 my($result) = 0;
359              
360 87         181 my($message);
361              
362             try
363             {
364 87 50   87   4726 if (defined (my $value = $self -> _process) )
365             {
366             }
367             else
368             {
369 0         0 $result = 1;
370              
371 0 0       0 print "Error: Parse failed\n" if ($self -> options & print_errors);
372             }
373             }
374             catch
375             {
376 3     3   94 $result = 1;
377              
378 3 50       63 print "Error: Parse failed. ${_}" if ($self -> options & print_errors);
379 87         879 };
380              
381             # Return 0 for success and 1 for failure.
382              
383 87         208894 return $result;
384              
385             } # End of parse.
386              
387             # ------------------------------------------------
388              
389             sub _pop_node_stack
390             {
391 104     104   235 my($self) = @_;
392 104         1742 my($stack) = $self -> node_stack;
393              
394 104         640 pop @$stack;
395              
396 104         1603 $self -> node_stack($stack);
397              
398             } # End of _pop_node_stack.
399              
400             # ------------------------------------------------
401              
402             sub _process
403             {
404 87     87   460 my($self) = @_;
405 87   50     1941 my($stringref) = $self -> text || \''; # Allow for undef. Use ' in comment for UltraEdit.
406 87         2027 my($pos) = $self -> pos;
407 87         699 my($first_pos) = $pos;
408 87         206 my($total_length) = length($$stringref);
409 87   100     1480 my($length) = $self -> length || $total_length;
410 87         882 my($text) = '';
411 87         166 my($format) = "%-20s %5s %5s %5s %-20s %-20s\n";
412 87         176 my($last_event) = '';
413 87         1432 my($matching_delimiter) = $self -> matching_delimiter;
414              
415 87 50       1857 if ($self -> options & print_debugs)
416             {
417 0         0 print "Length of input: $length. Input |$$stringref|\n";
418 0         0 print sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme', 'Comment');
419             }
420              
421 87         1184 my($delimiter_frequency, $delimiter_stack);
422 87         0 my($event_name);
423 87         0 my($lexeme);
424 87         0 my($message);
425 87         0 my($original_lexeme);
426 87         0 my($span, $start);
427 87         0 my($tos);
428              
429             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
430             # Also, in read(), we use $pos and $length to avoid reading Ruby Slippers tokens (if any).
431             # For the latter, see scripts/match.parentheses.02.pl in MarpaX::Demo::SampleScripts.
432              
433 87   66     1431 for
434             (
435             $pos = $self -> recce -> read($stringref, $pos, $length);
436             ($pos < $total_length) && ( ($pos - $first_pos) <= $length);
437             $pos = $self -> recce -> resume($pos)
438             )
439             {
440 1289         159612 $delimiter_frequency = $self -> delimiter_frequency;
441 1289         25659 $delimiter_stack = $self -> delimiter_stack;
442 1289         24525 ($start, $span) = $self -> recce -> pause_span;
443 1289         16526 ($event_name, $span, $pos) = $self -> _validate_event($stringref, $start, $span, $pos, $delimiter_frequency);
444              
445             # If the input is exhausted, we exit immediately so we don't try to use
446             # the values of $start, $span or $pos. They are ignored upon exit.
447              
448 1289 50       3151 last if ($event_name eq "'exhausted"); # Yes, it has a leading quote.
449              
450 1289         19980 $lexeme = $self -> recce -> literal($start, $span);
451 1289         16163 $original_lexeme = $lexeme;
452 1289         19440 $pos = $self -> recce -> lexeme_read($event_name);
453              
454 1289 50       90224 die "lexeme_read($event_name) rejected lexeme |$lexeme|\n" if (! defined $pos);
455              
456 1289 50       24557 print sprintf($format, $event_name, $start, $span, $pos, $lexeme, '-') if ($self -> options & print_debugs);
457              
458 1289 100       9375 if ($event_name ne 'text')
459             {
460 215         690 $self -> _save_text($text);
461              
462 215         417 $text = '';
463             }
464              
465 1289 100       4158 if ($event_name eq 'close_delim')
    100          
    50          
466             {
467 105         265 $$delimiter_frequency{$lexeme}--;
468              
469 105         2406 $self -> delimiter_frequency($delimiter_frequency);
470              
471 105         2678 $tos = pop @$delimiter_stack;
472              
473 105         1831 $self -> delimiter_stack($delimiter_stack);
474              
475             # If the top of the delimiter stack is not the lexeme corresponding to the
476             # opening delimiter of the current closing delimiter, then there's an error.
477              
478 105 100       2546 if ($$matching_delimiter{$$tos{lexeme} } ne $lexeme)
479             {
480 1         7 $message = "Last open delimiter: $$tos{lexeme}. Unexpected closing delimiter: $lexeme";
481              
482 1         20 $self -> error_message($message);
483 1         46 $self -> error_number(1);
484              
485             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
486              
487 1 50       46 die "$message\n" if ($self -> options & overlap_is_fatal);
488              
489             # If we did not die, then it's a warning message.
490              
491 0         0 $self -> error_number(-1);
492              
493 0 0       0 print "Warning: $message\n" if ($self -> options & print_warnings);
494             }
495              
496 104         393 $self -> _pop_node_stack;
497 104         2459 $self -> _add_daughter('close', {text => $lexeme});
498             }
499             elsif ($event_name eq 'open_delim')
500             {
501 110         380 $$delimiter_frequency{$$matching_delimiter{$lexeme} }++;
502              
503             # If the top of the delimiter stack reaches 2, then there's an error.
504             # Unlike mismatched delimiters (just above), this is never gets a warning.
505              
506 110 100       374 if ($$delimiter_frequency{$$matching_delimiter{$lexeme} } > 1)
507             {
508 11         50 $message = "Opened delimiter $lexeme again before closing previous one";
509              
510 11         282 $self -> error_message($message);
511 11         582 $self -> error_number(2);
512              
513             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
514              
515 11 100       520 die "$message\n" if ($self -> options & nesting_is_fatal);
516              
517             # If we did not die, then it's a warning message.
518              
519 9         210 $self -> error_number(-2);
520              
521 9 50       330 print "Warning: $message\n" if ($self -> options & print_warnings);
522             }
523              
524             push @$delimiter_stack,
525             {
526 108         540 count => $$delimiter_frequency{$$matching_delimiter{$lexeme} },
527             lexeme => $lexeme,
528             };
529              
530 108         2234 $self -> delimiter_frequency($delimiter_frequency);
531 108         4567 $self -> delimiter_stack($delimiter_stack);
532 108         2718 $self -> _add_daughter('open', {text => $lexeme});
533 108         23442 $self -> _push_node_stack;
534             }
535             elsif ($event_name eq 'text')
536             {
537 1074         2023 $text .= $lexeme;
538             }
539              
540 1286         45933 $last_event = $event_name;
541             }
542              
543             # Mop up any left-over chars.
544              
545 84         3212 $self -> _save_text($text);
546              
547 84 50       1743 if ($self -> recce -> exhausted)
    50          
548             {
549 0         0 $message = 'Parse exhausted';
550              
551 0         0 $self -> error_message($message);
552 0         0 $self -> error_number(6);
553              
554 0 0       0 if ($self -> options & exhaustion_is_fatal)
555             {
556             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
557              
558 0         0 die "$message\n";
559             }
560             else
561             {
562 0         0 $self -> error_number(-6);
563              
564 0 0       0 print "Warning: $message\n" if ($self -> options & print_warnings);
565             }
566             }
567             elsif (my $status = $self -> recce -> ambiguous)
568             {
569 0         0 my($terminals) = $self -> recce -> terminals_expected;
570 0 0       0 $terminals = ['(None)'] if ($#$terminals < 0);
571 0         0 $message = "Ambiguous parse. Status: $status. Terminals expected: " . join(', ', @$terminals);
572              
573 0         0 $self -> error_message($message);
574 0         0 $self -> error_number(3);
575              
576 0 0       0 if ($self -> options & ambiguity_is_fatal)
    0          
577             {
578             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
579              
580 0         0 die "$message\n";
581             }
582             elsif ($self -> options & print_warnings)
583             {
584 0         0 $self -> error_number(-3);
585              
586 0         0 print "Warning: $message\n";
587             }
588             }
589              
590             # Return a defined value for success and undef for failure.
591              
592 84         13329 return $self -> recce -> value;
593              
594             } # End of _process.
595              
596             # ------------------------------------------------
597              
598             sub _push_node_stack
599             {
600 108     108   241 my($self) = @_;
601 108         2360 my($stack) = $self -> node_stack;
602 108         860 my(@daughters) = $$stack[$#$stack] -> children;
603              
604 108         1839 push @$stack, $daughters[$#daughters];
605              
606 108         1836 $self -> node_stack($stack);
607              
608             } # End of _push_node_stack.
609              
610             # ------------------------------------------------
611              
612             sub _save_text
613             {
614 299     299   674 my($self, $text) = @_;
615              
616 299 100       1439 $self -> _add_daughter('text', {text => $text}) if (length($text) );
617              
618 299         56960 return '';
619              
620             } # End of _save_text.
621              
622             # ------------------------------------------------
623              
624             sub _validate_event
625             {
626 1289     1289   2984 my($self, $stringref, $start, $span, $pos, $delimiter_frequency) = @_;
627 1289         1970 my(@event) = @{$self -> recce -> events};
  1289         19554  
628 1289         13154 my($event_count) = scalar @event;
629 1289         2577 my(@event_name) = sort map{$$_[0]} @event;
  1306         4527  
630 1289         2631 my($event_name) = $event_name[0]; # Default.
631              
632             # If the input is exhausted, we return immediately so we don't try to use
633             # the values of $start, $span or $pos. They are ignored upon return.
634              
635 1289 50       3054 if ($event_name eq "'exhausted") # Yes, it has a leading quote.
636             {
637 0         0 return ($event_name, $span, $pos);
638             }
639              
640 1289         2984 my($lexeme) = substr($$stringref, $start, $span);
641 1289         20635 my($line, $column) = $self -> recce -> line_column($start);
642 1289         18691 my($literal) = $self -> next_few_chars($stringref, $start + $span);
643 1289         4256 my($message) = "Location: ($line, $column). Lexeme: |$lexeme|. Next few chars: |$literal|";
644 1289         2881 $message = "$message. Events: $event_count. Names: ";
645              
646 1289 50       20670 print $message, join(', ', @event_name), "\n" if ($self -> options & print_debugs);
647              
648 1289         8150 my(%event_name);
649              
650 1289         4085 @event_name{@event_name} = (1) x @event_name;
651              
652 1289         2786 for (@event_name)
653             {
654 1306 50       2003 if (! ${$self -> known_events}{$_})
  1306         21413  
655             {
656 0         0 $message = "Unexpected event name '$_'";
657              
658 0         0 $self -> error_message($message);
659 0         0 $self -> error_number(10);
660              
661             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
662              
663 0         0 die "$message\n";
664             }
665             }
666              
667 1289 100       10052 if ($event_count > 1)
668             {
669             # We get here for single and double quotes because an open s. or d. quote is
670             # indistinguishable from a close s. or d. quote, and that leads to ambiguity.
671              
672 17 50 33     142 if ( ($lexeme =~ /["']/) && (join(', ', @event_name) eq 'close_delim, open_delim') ) # ".
673             {
674             # At the time _validate_event() is called, the quote count has not yet been bumped.
675             # If this is the 1st quote, then it's an open_delim.
676             # If this is the 2nd quote, them it's a close delim.
677              
678 17 100       71 if ($$delimiter_frequency{$lexeme} % 2 == 0)
679             {
680 3         11 $event_name = 'open_delim';
681              
682 3 50       53 print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
683             }
684             else
685             {
686 14         34 $event_name = 'close_delim';
687              
688 14 50       413 print "Disambiguated lexeme |$lexeme| as '$event_name'\n" if ($self -> options & print_debugs);
689             }
690             }
691             else
692             {
693 0         0 $message = join(', ', @event_name);
694 0         0 $message = "The code does not handle these events simultaneously: $message";
695              
696 0         0 $self -> error_message($message);
697 0         0 $self -> error_number(11);
698              
699             # This 'die' is inside try{}catch{}, which adds the prefix 'Error: '.
700              
701 0         0 die "$message\n";
702             }
703             }
704              
705 1289         4912 return ($event_name, $span, $pos);
706              
707             } # End of _validate_event.
708              
709             # ------------------------------------------------
710              
711             sub _validate_open_close
712             {
713 16     16   46 my($self) = @_;
714 16         310 my($open) = $self -> open;
715 16         450 my($close) = $self -> close;
716              
717 16         142 my($message);
718              
719 16 50 33     151 if ( ($#$open < 0) || ($#$close < 0) )
720             {
721 0         0 $message = 'There must be at least 1 pair of open/close delimiters';
722              
723 0         0 $self -> error_message($message);
724 0         0 $self -> error_number(8);
725              
726             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
727              
728 0         0 die "Error: $message\n";
729             }
730              
731 16 50       77 if ($#$open != $#$close)
732             {
733 0         0 $message = 'The # of open delimiters must match the # of close delimiters';
734              
735 0         0 $self -> error_message($message);
736 0         0 $self -> error_number(9);
737              
738             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
739              
740 0         0 die "Error: $message\n";
741             }
742              
743 16         100 my(%substitute) = (_close_ => '', _delimiter_ => '', _open_ => '');
744 16         46 my($matching_delimiter) = {};
745 16         62 my(%seen) = (close => {}, open => {});
746              
747 16         35 my($close_quote);
748 16         86 my(%delimiter_action, %delimiter_frequency);
749 16         0 my($open_quote);
750 16         0 my($prefix, %prefix);
751              
752 16         77 for my $i (0 .. $#$open)
753             {
754 37 50 33     230 if ( ($$open[$i] =~ /\\/) || ($$close[$i] =~ /\\/) )
755             {
756 0         0 $message = 'Backslash is forbidden as a delimiter character';
757              
758 0         0 $self -> error_message($message);
759 0         0 $self -> error_number(4);
760              
761             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
762              
763 0         0 die "Error: $message\n";
764             }
765              
766 37 50 66     322 if ( ( (length($$open[$i]) > 1) && ($$open[$i] =~ /'/) ) || ( (length($$close[$i]) > 1) && ($$close[$i] =~ /'/) ) )
      66        
      33        
767             {
768 0         0 $message = 'Single-quotes are forbidden in multi-character delimiters';
769              
770 0         0 $self -> error_message($message);
771 0         0 $self -> error_number(5);
772              
773             # This 'die' is not inside try{}catch{}, so we add the prefix 'Error: '.
774              
775 0         0 die "Error: $message\n";
776             }
777              
778 37 50       243 $seen{open}{$$open[$i]} = 0 if (! $seen{open}{$$open[$i]});
779 37 100       164 $seen{close}{$$close[$i]} = 0 if (! $seen{close}{$$close[$i]});
780              
781 37         83 $seen{open}{$$open[$i]}++;
782 37         75 $seen{close}{$$close[$i]}++;
783              
784 37         89 $delimiter_action{$$open[$i]} = 'open';
785 37         84 $delimiter_action{$$close[$i]} = 'close';
786 37         94 $$matching_delimiter{$$open[$i]} = $$close[$i];
787 37         73 $delimiter_frequency{$$open[$i]} = 0;
788 37         89 $delimiter_frequency{$$close[$i]} = 0;
789              
790 37 100       108 if (length($$open[$i]) == 1)
791             {
792 21 100       91 $open_quote = $$open[$i] eq '[' ? "[\\$$open[$i]]" : "[$$open[$i]]";
793             }
794             else
795             {
796             # This fails if length > 1 and open contains a single quote.
797              
798 16         40 $open_quote = "'$$open[$i]'";
799             }
800              
801 37 100       96 if (length($$close[$i]) == 1)
802             {
803 25 100       89 $close_quote = $$close[$i] eq ']' ? "[\\$$close[$i]]" : "[$$close[$i]]";
804             }
805             else
806             {
807             # This fails if length > 1 and close contains a single quote.
808              
809 12         26 $close_quote = "'$$close[$i]'";
810             }
811              
812 37 50       177 $substitute{_open_} .= "open_delim\t\t\t\~ $open_quote\n" if ($seen{open}{$$open[$i]} <= 1);
813 37 100       150 $substitute{_close_} .= "close_delim\t\t\t\~ $close_quote\n" if ($seen{close}{$$close[$i]} <= 1);
814 37         98 $prefix = substr($$open[$i], 0, 1);
815 37 100       132 $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
816 37 100       118 $prefix{$prefix} = 0 if (! $prefix{$prefix});
817              
818 37         73 $prefix{$prefix}++;
819              
820 37 100       109 $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
821 37         82 $prefix = substr($$close[$i], 0, 1);
822 37 100       110 $prefix = "\\$prefix" if ($prefix =~ /[\[\]]/);
823 37 100       108 $prefix{$prefix} = 0 if (! $prefix{$prefix});
824              
825 37         64 $prefix{$prefix}++;
826              
827 37 100       116 $substitute{_delimiter_} .= $prefix if ($prefix{$prefix} == 1);
828             }
829              
830 16         434 $self -> delimiter_action(\%delimiter_action);
831 16         798 $self -> delimiter_frequency(\%delimiter_frequency);
832 16         729 $self -> matching_delimiter($matching_delimiter);
833              
834 16         496 return \%substitute;
835              
836             } # End of _validate_open_close.
837              
838             # ------------------------------------------------
839              
840             1;
841              
842             =pod
843              
844             =head1 NAME
845              
846             C - Extract delimited text sequences from strings
847              
848             =head1 Synopsis
849              
850             #!/usr/bin/env perl
851              
852             use strict;
853             use warnings;
854              
855             use Text::Balanced::Marpa ':constants';
856              
857             # -----------
858              
859             my($count) = 0;
860             my($parser) = Text::Balanced::Marpa -> new
861             (
862             open => ['<:' ,'[%'],
863             close => [':>', '%]'],
864             options => nesting_is_fatal | print_warnings,
865             );
866             my(@text) =
867             (
868             q|<: a :>|,
869             q|a [% b <: c :> d %] e|,
870             q|a <: b <: c :> d :> e|, # nesting_is_fatal triggers an error here.
871             );
872              
873             my($result);
874              
875             for my $text (@text)
876             {
877             $count++;
878              
879             print "Parsing |$text|\n";
880              
881             $result = $parser -> parse(text => \$text);
882              
883             print join("\n", @{$parser -> tree -> tree2string}), "\n";
884             print "Parse result: $result (0 is success)\n";
885              
886             if ($count == 3)
887             {
888             print "Deliberate error: Failed to parse |$text|\n";
889             print 'Error number: ', $parser -> error_number, '. Error message: ',
890             $parser -> error_message, "\n";
891             }
892              
893             print '-' x 50, "\n";
894             }
895              
896             See scripts/synopsis.pl.
897              
898             This is the printout of synopsis.pl:
899              
900             Parsing |<: a :>|
901             Parsed text:
902             root. Attributes: {}
903             |--- open. Attributes: {text => "<:"}
904             | |--- string. Attributes: {text => " a "}
905             |--- close. Attributes: {text => ":>"}
906             Parse result: 0 (0 is success)
907             --------------------------------------------------
908             Parsing |a [% b <: c :> d %] e|
909             Parsed text:
910             root. Attributes: {}
911             |--- string. Attributes: {text => "a "}
912             |--- open. Attributes: {text => "[%"}
913             | |--- string. Attributes: {text => " b "}
914             | |--- open. Attributes: {text => "<:"}
915             | | |--- string. Attributes: {text => " c "}
916             | |--- close. Attributes: {text => ":>"}
917             | |--- string. Attributes: {text => " d "}
918             |--- close. Attributes: {text => "%]"}
919             |--- string. Attributes: {text => " e"}
920             Parse result: 0 (0 is success)
921             --------------------------------------------------
922             Parsing |a <: b <: c :> d :> e|
923             Error: Parse failed. Opened delimiter <: again before closing previous one
924             Text parsed so far:
925             root. Attributes: {}
926             |--- string. Attributes: {text => "a "}
927             |--- open. Attributes: {text => "<:"}
928             |--- string. Attributes: {text => " b "}
929             Parse result: 1 (0 is success)
930             Deliberate error: Failed to parse |a <: b <: c :> d :> e|
931             Error number: 2. Error message: Opened delimiter <: again before closing previous one
932             --------------------------------------------------
933              
934             See also scripts/tiny.pl and scripts/traverse.pl.
935              
936             =head1 Description
937              
938             L provides a L-based parser for extracting delimited text
939             sequences from strings. The text outside and inside the delimiters, and delimiters themselves, are
940             all stored as nodes in a tree managed by L.
941              
942             Nested strings, with the same or different delimiters, are stored as daughters of the nodes which
943             hold the delimiters.
944              
945             This module is a companion to L. The differences are discussed in the L
946             below.
947              
948             See the L for various topics, including:
949              
950             =over 4
951              
952             =item o UFT8 handling
953              
954             See t/utf8.t.
955              
956             =item o Escaping delimiters within the text
957              
958             See t/escapes.t.
959              
960             =item o Options to make nested and/or overlapped delimiters fatal errors
961              
962             See t/colons.t.
963              
964             =item o Using delimiters which are part of another delimiter
965              
966             See t/escapes.t and t/perl.delimiters.
967              
968             =item o Processing the tree-structured output
969              
970             See scripts/traverse.pl.
971              
972             =item o Emulating L's use of '<:' and ':>
973              
974             See t/colons.t and t/percents.t.
975              
976             =item o Implementing a really trivial HTML parser
977              
978             See t/html.t.
979              
980             In the same vein, see t/angle.brackets.t, for code where the delimiters are just '<' and '>'.
981              
982             =item o Handling multiple sets of delimiters
983              
984             See t/multiple.delimiters.t.
985              
986             =item o Skipping (leading) characters in the input string
987              
988             See t/skip.prefix.t.
989              
990             =item o Implementing hard-to-read text strings as delimiters
991              
992             See t/silly.delimiters.
993              
994             =back
995              
996             =head1 Distributions
997              
998             This module is available as a Unix-style distro (*.tgz).
999              
1000             See L
1001             for help on unpacking and installing distros.
1002              
1003             =head1 Installation
1004              
1005             Install L as you would any C module:
1006              
1007             Run:
1008              
1009             cpanm Text::Balanced::Marpa
1010              
1011             or run:
1012              
1013             sudo cpan Text::Balanced::Marpa
1014              
1015             or unpack the distro, and then either:
1016              
1017             perl Build.PL
1018             ./Build
1019             ./Build test
1020             sudo ./Build install
1021              
1022             or:
1023              
1024             perl Makefile.PL
1025             make (or dmake or nmake)
1026             make test
1027             make install
1028              
1029             =head1 Constructor and Initialization
1030              
1031             C is called as C<< my($parser) = Text::Balanced::Marpa -> new(k1 => v1, k2 => v2, ...) >>.
1032              
1033             It returns a new object of type C.
1034              
1035             Key-value pairs accepted in the parameter list (see corresponding methods for details
1036             [e.g. L]):
1037              
1038             =over 4
1039              
1040             =item o close => $arrayref
1041              
1042             An arrayref of strings, each one a closing delimiter.
1043              
1044             The # of elements must match the # of elements in the 'open' arrayref.
1045              
1046             See the L for details and warnings.
1047              
1048             A value for this option is mandatory.
1049              
1050             Default: None.
1051              
1052             =item o length => $integer
1053              
1054             The maximum length of the input string to process.
1055              
1056             This parameter works in conjunction with the C parameter.
1057              
1058             C can also be used as a key in the hash passed to L.
1059              
1060             See the L for details.
1061              
1062             Default: Calls Perl's length() function on the input string.
1063              
1064             =item o next_few_limit => $integer
1065              
1066             This controls how many characters are printed when displaying 'the next few chars'.
1067              
1068             It only affects debug output.
1069              
1070             Default: 20.
1071              
1072             =item o open => $arrayref
1073              
1074             An arrayref of strings, each one an opening delimiter.
1075              
1076             The # of elements must match the # of elements in the 'open' arrayref.
1077              
1078             See the L for details and warnings.
1079              
1080             A value for this option is mandatory.
1081              
1082             Default: None.
1083              
1084             =item o options => $bit_string
1085              
1086             This allows you to turn on various options.
1087              
1088             C can also be used as a key in the hash passed to L.
1089              
1090             Default: 0 (nothing is fatal).
1091              
1092             See the L for details.
1093              
1094             =item o pos => $integer
1095              
1096             The offset within the input string at which to start processing.
1097              
1098             This parameter works in conjunction with the C parameter.
1099              
1100             C can also be used as a key in the hash passed to L.
1101              
1102             See the L for details.
1103              
1104             Note: The first character in the input string is at pos == 0.
1105              
1106             Default: 0.
1107              
1108             =item o text => $stringref
1109              
1110             This is a reference to the string to be parsed. A stringref is used to avoid copying what could
1111             potentially be a very long string.
1112              
1113             C can also be used as a key in the hash passed to L.
1114              
1115             Default: \''.
1116              
1117             =back
1118              
1119             =head1 Methods
1120              
1121             =head2 bnf()
1122              
1123             Returns a string containing the grammar constructed based on user input.
1124              
1125             =head2 close()
1126              
1127             Get the arrayref of closing delimiters.
1128              
1129             See also L.
1130              
1131             See the L for details and warnings.
1132              
1133             'close' is a parameter to L. See L for details.
1134              
1135             =head2 delimiter_action()
1136              
1137             Returns a hashref, where the keys are delimiters and the values are either 'open' or 'close'.
1138              
1139             =head2 delimiter_frequency()
1140              
1141             Returns a hashref where the keys are opening and closing delimiters, and the values are the # of
1142             times each delimiter appears in the input stream.
1143              
1144             The value is incremented for each opening delimiter and decremented for each closing delimiter.
1145              
1146             =head2 error_message()
1147              
1148             Returns the last error or warning message set.
1149              
1150             Error messages always start with 'Error: '. Messages never end with "\n".
1151              
1152             Parsing error strings is not a good idea, ever though this module's format for them is fixed.
1153              
1154             See L.
1155              
1156             =head2 error_number()
1157              
1158             Returns the last error or warning number set.
1159              
1160             Warnings have values < 0, and errors have values > 0.
1161              
1162             If the value is > 0, the message has the prefix 'Error: ', and if the value is < 0, it has the
1163             prefix 'Warning: '. If this is not the case, it's a reportable bug.
1164              
1165             Possible values for error_number() and error_message():
1166              
1167             =over 4
1168              
1169             =item o 0 => ""
1170              
1171             This is the default value.
1172              
1173             =item o 1/-1 => "Last open delimiter: $lexeme_1. Unexpected closing delimiter: $lexeme_2"
1174              
1175             If L returns 1 it's an error, and if it returns -1 it's a warning.
1176              
1177             You can set the option C to make it fatal.
1178              
1179             =item o 2/-2 => "Opened delimiter $lexeme again before closing previous one"
1180              
1181             If L returns 2 it's an error, and if it returns -2 it's a warning.
1182              
1183             You can set the option C to make it fatal.
1184              
1185             =item o 3/-3 => "Ambiguous parse. Status: $status. Terminals expected: a, b, ..."
1186              
1187             This message is only produced when the parse is ambiguous.
1188              
1189             If L returns 3 it's an error, and if it returns -3 it's a warning.
1190              
1191             You can set the option C to make it fatal.
1192              
1193             =item o 4 => "Backslash is forbidden as a delimiter character"
1194              
1195             This preempts some types of sabotage.
1196              
1197             This message always indicates an error, never a warning.
1198              
1199             =item o 5 => "Single-quotes are forbidden in multi-character delimiters"
1200              
1201             This limitation is due to the syntax of
1202             L.
1203              
1204             This message always indicates an error, never a warning.
1205              
1206             =item o 6/-6 => "Parse exhausted"
1207              
1208             If L returns 6 it's an error, and if it returns -6 it's a warning.
1209              
1210             You can set the option C to make it fatal.
1211              
1212             =item o 7 => 'Single-quote is forbidden as an escape character'
1213              
1214             This limitation is due to the syntax of
1215             L.
1216              
1217             This message always indicates an error, never a warning.
1218              
1219             =item o 8 => "There must be at least 1 pair of open/close delimiters"
1220              
1221             This message always indicates an error, never a warning.
1222              
1223             =item o 9 => "The # of open delimiters must match the # of close delimiters"
1224              
1225             This message always indicates an error, never a warning.
1226              
1227             =item o 10 => "Unexpected event name 'xyz'"
1228              
1229             Marpa has triggered an event and it's name is not in the hash of event names derived from the BNF.
1230              
1231             This message always indicates an error, never a warning.
1232              
1233             =item o 11 => "The code does not handle these events simultaneously: a, b, ..."
1234              
1235             The code is written to handle single events at a time, or in rare cases, 2 events at the same time.
1236             But here, multiple events have been triggered and the code cannot handle the given combination.
1237              
1238             This message always indicates an error, never a warning.
1239              
1240             =back
1241              
1242             See L.
1243              
1244             =head2 escape_char()
1245              
1246             Get the escape char.
1247              
1248             =head2 known_events()
1249              
1250             Returns a hashref where the keys are event names and the values are 1.
1251              
1252             =head2 length([$integer])
1253              
1254             Here, the [] indicate an optional parameter.
1255              
1256             Get or set the length of the input string to process.
1257              
1258             See also the L and L.
1259              
1260             'length' is a parameter to L. See L for details.
1261              
1262             =head2 matching_delimiter()
1263              
1264             Returns a hashref where the keys are opening delimiters and the values are the corresponding closing
1265             delimiters.
1266              
1267             =head2 new()
1268              
1269             See L for details on the parameters accepted by L.
1270              
1271             =head2 next_few_chars($stringref, $offset)
1272              
1273             Returns a substring of $s, starting at $offset, for use in debug messages.
1274              
1275             See L.
1276              
1277             =head2 next_few_limit([$integer])
1278              
1279             Here, the [] indicate an optional parameter.
1280              
1281             Get or set the number of characters called 'the next few chars', which are printed during debugging.
1282              
1283             'next_few_limit' is a parameter to L. See L for details.
1284              
1285             =head2 open()
1286              
1287             Get the arrayref of opening delimiters.
1288              
1289             See also L.
1290              
1291             See the L for details and warnings.
1292              
1293             'open' is a parameter to L. See L for details.
1294              
1295             =head2 options([$bit_string])
1296              
1297             Here, the [] indicate an optional parameter.
1298              
1299             Get or set the option flags.
1300              
1301             For typical usage, see scripts/synopsis.pl.
1302              
1303             See the L for details.
1304              
1305             'options' is a parameter to L. See L for details.
1306              
1307             =head2 parse([%hash])
1308              
1309             Here, the [] indicate an optional parameter.
1310              
1311             This is the only method the user needs to call. All data can be supplied when calling L.
1312              
1313             You can of course call other methods (e.g. L ) after calling L but
1314             before calling C.
1315              
1316             The optional hash takes these ($key => $value) pairs (exactly the same as for L):
1317              
1318             =over 4
1319              
1320             =item o length => $integer
1321              
1322             =item o options => $bit_string
1323              
1324             =item o pos => $integer
1325              
1326             =item o text => $stringref
1327              
1328             =back
1329              
1330             Note: If a value is passed to C, it takes precedence over any value with the same
1331             key passed to L, and over any value previously passed to the method whose name is $key.
1332             Further, the value passed to C is always passed to the corresponding method (i.e. whose
1333             name is $key), meaning any subsequent call to that method returns the value passed to C.
1334              
1335             See scripts/samples.pl.
1336              
1337             Returns 0 for success and 1 for failure.
1338              
1339             If the value is 1, you should call L to find out what happened.
1340              
1341             =head2 pos([$integer])
1342              
1343             Here, the [] indicate an optional parameter.
1344              
1345             Get or set the offset within the input string at which to start processing.
1346              
1347             See also the L and L.
1348              
1349             'pos' is a parameter to L. See L for details.
1350              
1351             =head2 text([$stringref])
1352              
1353             Here, the [] indicate an optional parameter.
1354              
1355             Get or set a reference to the string to be parsed.
1356              
1357             'text' is a parameter to L. See L for details.
1358              
1359             =head2 tree()
1360              
1361             Returns an object of type L, which holds the parsed data.
1362              
1363             Obviously, it only makes sense to call C after calling C.
1364              
1365             See scripts/traverse.pl for sample code which processes this tree's nodes.
1366              
1367             =head1 FAQ
1368              
1369             =head2 What are the differences between Text::Balanced::Marpa and Text::Delimited::Marpa?
1370              
1371             I think this is shown most clearly by getting the 2 modules to process the same string. So,
1372             using this as input:
1373              
1374             'a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k'
1375              
1376             Output from Text::Balanced::Marpa's scripts/tiny.pl:
1377              
1378             (# 2) | 1 2 3 4 5 6 7 8 9
1379             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1380             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1381             Parse result: 0 (0 is success)
1382             root. Attributes: {text => "", uid => "0"}
1383             |--- text. Attributes: {text => "a ", uid => "1"}
1384             |--- open. Attributes: {text => "<:", uid => "2"}
1385             | |--- text. Attributes: {text => "b ", uid => "3"}
1386             | |--- open. Attributes: {text => "<:", uid => "4"}
1387             | | |--- text. Attributes: {text => "c", uid => "5"}
1388             | |--- close. Attributes: {text => ":>", uid => "6"}
1389             | |--- text. Attributes: {text => " d", uid => "7"}
1390             |--- close. Attributes: {text => ":>", uid => "8"}
1391             |--- text. Attributes: {text => " e ", uid => "9"}
1392             |--- open. Attributes: {text => "<:", uid => "10"}
1393             | |--- text. Attributes: {text => "f ", uid => "11"}
1394             | |--- open. Attributes: {text => "<:", uid => "12"}
1395             | | |--- text. Attributes: {text => " g ", uid => "13"}
1396             | | |--- open. Attributes: {text => "<:", uid => "14"}
1397             | | | |--- text. Attributes: {text => "h", uid => "15"}
1398             | | |--- close. Attributes: {text => ":>", uid => "16"}
1399             | | |--- text. Attributes: {text => " i", uid => "17"}
1400             | |--- close. Attributes: {text => ":>", uid => "18"}
1401             | |--- text. Attributes: {text => " j", uid => "19"}
1402             |--- close. Attributes: {text => ":>", uid => "20"}
1403             |--- text. Attributes: {text => " k", uid => "21"}
1404              
1405             Output from Text::Delimited::Marpa's scripts/tiny.pl:
1406              
1407             (# 2) | 1 2 3 4 5 6 7 8 9
1408             |0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
1409             Parsing |Skip me ->a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|. pos: 10. length: 42
1410             Parse result: 0 (0 is success)
1411             root. Attributes: {end => "0", length => "0", start => "0", text => "", uid => "0"}
1412             |--- span. Attributes: {end => "22", length => "9", start => "14", text => "b <:c:> d", uid => "1"}
1413             | |--- span. Attributes: {end => "18", length => "1", start => "18", text => "c", uid => "2"}
1414             |--- span. Attributes: {end => "47", length => "18", start => "30", text => "f <: g <:h:> i:> j", uid => "3"}
1415             |--- span. Attributes: {end => "43", length => "10", start => "34", text => " g <:h:> i", uid => "4"}
1416             |--- span. Attributes: {end => "39", length => "1", start => "39", text => "h", uid => "5"}
1417              
1418             Another example, using the same input string, but manually processing the tree nodes.
1419             Parent-daughter relationships are here represented by indentation.
1420              
1421             Output from Text::Balanced::Marpa's scripts/traverse.pl:
1422              
1423             | 1 2 3 4 5
1424             |012345678901234567890123456789012345678901234567890
1425             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1426             Span Text
1427             1 |a |
1428             2 |<:|
1429             3 |b |
1430             4 |<:|
1431             5 |c|
1432             6 |:>|
1433             7 | d|
1434             8 |:>|
1435             9 | e |
1436             10 |<:|
1437             11 |f |
1438             12 |<:|
1439             13 | g |
1440             14 |<:|
1441             15 |h|
1442             16 |:>|
1443             17 | i|
1444             18 |:>|
1445             19 | j|
1446             20 |:>|
1447             21 | k|
1448              
1449             Output from Text::Delimited::Marpa's scripts/traverse.pl:
1450              
1451             | 1 2 3 4 5
1452             |012345678901234567890123456789012345678901234567890
1453             Parsing |a <:b <:c:> d:> e <:f <: g <:h:> i:> j:> k|.
1454             Span Start End Length Text
1455             1 4 12 9 |b <:c:> d|
1456             2 8 8 1 |c|
1457             3 20 37 18 |f <: g <:h:> i:> j|
1458             4 24 33 10 | g <:h:> i|
1459             5 29 29 1 |h|
1460              
1461             =head2 Where are the error messages and numbers described?
1462              
1463             See L and L.
1464              
1465             =head2 How do I escape delimiters?
1466              
1467             By backslash-escaping the first character of all open and close delimiters which appear in the
1468             text.
1469              
1470             As an example, if the delimiters are '<:' and ':>', this means you have to escape I the '<'
1471             chars and I the colons in the text.
1472              
1473             The backslash is preserved in the output.
1474              
1475             If you don't want to use backslash for escaping, or can't, you can pass a different escape character
1476             to L.
1477              
1478             See t/escapes.t.
1479              
1480             =head2 How do the length and pos parameters to new() work?
1481              
1482             The recognizer - an object of type Marpa::R2::Scanless::R - is called in a loop, like this:
1483              
1484             for
1485             (
1486             $pos = $self -> recce -> read($stringref, $pos, $length);
1487             $pos < $length;
1488             $pos = $self -> recce -> resume($pos)
1489             )
1490              
1491             L and L can be used to initialize $pos and $length.
1492              
1493             Note: The first character in the input string is at pos == 0.
1494              
1495             See L for details.
1496              
1497             =head2 Does this package support Unicode/UTF8?
1498              
1499             Yes. See t/escapes.t, t/multiple.quotes.t and t/utf8.t.
1500              
1501             =head2 Does this package handler Perl delimiters (e.g. q|..|, qq|..|, qr/../, qw/../)?
1502              
1503             See t/perl.delimiters.t.
1504              
1505             =head2 Warning: Calling mutators after calling new()
1506              
1507             The only mutator which works after calling new() is L.
1508              
1509             In particular, you can't call L, L or L after calling L.
1510             This is because parameters passed to C are interpolated into the grammar before parsing
1511             begins. And that's why the docs for those methods all say 'Get the...' and not 'Get and set the...'.
1512              
1513             To make the code work, you would have to manually call _validate_open_close(). But even then
1514             a lot of things would have to be re-initialized to give the code any hope of working.
1515              
1516             =head2 What is the format of the 'open' and 'close' parameters to new()?
1517              
1518             Each of these parameters takes an arrayref as a value.
1519              
1520             The # of elements in the 2 arrayrefs must be the same.
1521              
1522             The 1st element in the 'open' arrayref is the 1st user-chosen opening delimiter, and the 1st
1523             element in the 'close' arrayref must be the corresponding closing delimiter.
1524              
1525             It is possible to use a delimiter which is part of another delimiter.
1526              
1527             See scripts/samples.pl. It uses both '<' and '<:' as opening delimiters and their corresponding
1528             closing delimiters are '>' and ':>'. Neat, huh?
1529              
1530             =head2 What are the possible values for the 'options' parameter to new()?
1531              
1532             Firstly, to make these constants available, you must say:
1533              
1534             use Text::Balanced::Marpa ':constants';
1535              
1536             Secondly, more detail on errors and warnings can be found at L.
1537              
1538             Thirdly, for usage of these option flags, see t/angle.brackets.t, t/colons.t, t/escapes.t,
1539             t/multiple.quotes.t, t/percents.t and scripts/samples.pl.
1540              
1541             Now the flags themselves:
1542              
1543             =over 4
1544              
1545             =item o nothing_is_fatal
1546              
1547             This is the default.
1548              
1549             C has the value of 0.
1550              
1551             =item o print_errors
1552              
1553             Print errors if this flag is set.
1554              
1555             C has the value of 1.
1556              
1557             =item o print_warnings
1558              
1559             Print various warnings if this flag is set:
1560              
1561             =over 4
1562              
1563             =item o The ambiguity status and terminals expected, if the parse is ambiguous
1564              
1565             =item o See L for other warnings which might be printed
1566              
1567             Ambiguity is not, in and of itself, an error. But see the C option, below.
1568              
1569             =back
1570              
1571             It's tempting to call this option C, but Perl already has C, so I didn't.
1572              
1573             C has the value of 2.
1574              
1575             =item o print_debugs
1576              
1577             Print extra stuff if this flag is set.
1578              
1579             C has the value of 4.
1580              
1581             =item o overlap_is_fatal
1582              
1583             This means overlapping delimiters cause a fatal error.
1584              
1585             So, setting C means '{Bold [Italic}]' would be a fatal error.
1586              
1587             I use this example since it gives me the opportunity to warn you, this will I do what you want
1588             if you try to use the delimiters of '<' and '>' for HTML. That is, 'Bold Italic' is
1589             not an error because what overlap are '' and '' BUT THEY ARE NOT TAGS. The tags are '<' and
1590             '>', ok? See also t/html.t.
1591              
1592             C has the value of 8.
1593              
1594             =item o nesting_is_fatal
1595              
1596             This means nesting of identical opening delimiters is fatal.
1597              
1598             So, using C means 'a <: b <: c :> d :> e' would be a fatal error.
1599              
1600             C has the value of 16.
1601              
1602             =item o ambiguity_is_fatal
1603              
1604             This makes L return 3 rather than -3.
1605              
1606             C has the value of 32.
1607              
1608             =item o exhaustion_is_fatal
1609              
1610             This makes L return 6 rather than -6.
1611              
1612             C has the value of 64.
1613              
1614             =back
1615              
1616             =head2 How do I print the tree built by the parser?
1617              
1618             See L.
1619              
1620             =head2 How do I make use of the tree built by the parser?
1621              
1622             See scripts/traverse.pl. It is a copy of t/html.t with tree-walking code instead of test code.
1623              
1624             =head2 How is the parsed data held in RAM?
1625              
1626             The parsed output is held in a tree managed by L.
1627              
1628             The tree always has a root node, which has nothing to do with the input data. So, even an empty
1629             input string will produce a tree with 1 node. This root has an empty hashref associated with it.
1630              
1631             Nodes have a name and a hashref of attributes.
1632              
1633             The name indicates the type of node. Names are one of these literals:
1634              
1635             =over 4
1636              
1637             =item o close
1638              
1639             =item o open
1640              
1641             =item o root
1642              
1643             =item o text
1644              
1645             =back
1646              
1647             For 'open' and 'close', the delimiter is given by the value of the 'text' key in the hashref.
1648              
1649             The (key => value) pairs in the hashref are:
1650              
1651             =over 4
1652              
1653             =item o text => $string
1654              
1655             If the node name is 'open' or 'close', $string is the delimiter.
1656              
1657             If the node name is 'text', $string is the verbatim text from the document.
1658              
1659             Verbatim means, for example, that backslashes in the input are preserved.
1660              
1661             =back
1662              
1663             Try:
1664              
1665             perl -Ilib scripts/samples.pl info
1666              
1667             =head2 How is HTML/XML handled?
1668              
1669             The tree does not preserve the nested nature of HTML/XML.
1670              
1671             Post-processing (valid) HTML could easily generate another view of the data.
1672              
1673             But anyway, to get perfect HTML you'd be grabbing the output of L, right?
1674              
1675             See scripts/traverse.pl and t/html.t for a trivial HTML parser.
1676              
1677             =head2 What is the homepage of Marpa?
1678              
1679             L.
1680              
1681             That page has a long list of links.
1682              
1683             =head2 How do I run author tests?
1684              
1685             This runs both standard and author tests:
1686              
1687             shell> perl Build.PL; ./Build; ./Build authortest
1688              
1689             =head1 TODO
1690              
1691             =over 4
1692              
1693             =item o Advanced error reporting
1694              
1695             See L.
1696              
1697             Perhaps this could be a sub-class?
1698              
1699             =item o I8N support for error messages
1700              
1701             =item o An explicit test program for parse exhaustion
1702              
1703             =back
1704              
1705             =head1 See Also
1706              
1707             L.
1708              
1709             L and L.
1710              
1711             L.
1712              
1713             L - for various usages of L, but not of this module.
1714              
1715             =head1 Machine-Readable Change Log
1716              
1717             The file Changes was converted into Changelog.ini by L.
1718              
1719             =head1 Version Numbers
1720              
1721             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1722              
1723             =head1 Thanks
1724              
1725             Thanks to Jeffrey Kegler, who wrote Marpa and L.
1726              
1727             And thanks to rns (Ruslan Shvedov) for writing the grammar for double-quoted strings used in
1728             L's scripts/quoted.strings.02.pl. I adapted it to HTML (see
1729             scripts/quoted.strings.05.pl in that module), and then incorporated the grammar into
1730             L, and - after more extensions - into this module.
1731              
1732             Lastly, thanks to Robert Rothenberg for L, a module which works the same way
1733             Perl does.
1734              
1735             =head1 Repository
1736              
1737             L
1738              
1739             =head1 Support
1740              
1741             Email the author, or log a bug on RT:
1742              
1743             L.
1744              
1745             =head1 Author
1746              
1747             L was written by Ron Savage Iron@savage.net.auE> in 2014.
1748              
1749             Marpa's homepage: L.
1750              
1751             My homepage: L.
1752              
1753             =head1 Copyright
1754              
1755             Australian copyright (c) 2014, Ron Savage.
1756              
1757             All Programs of mine are 'OSI Certified Open Source Software';
1758             you can redistribute them and/or modify them under the terms of
1759             The Artistic License 2.0, a copy of which is available at:
1760             http://opensource.org/licenses/alphabetical.
1761              
1762             =cut