File Coverage

blib/lib/MarpaX/Demo/StringParser.pm
Criterion Covered Total %
statement 201 253 79.4
branch 50 74 67.5
condition 7 15 46.6
subroutine 28 31 90.3
pod 4 5 80.0
total 290 378 76.7


line stmt bran cond sub pod time code
1             package MarpaX::Demo::StringParser;
2              
3 1     1   55471 use strict;
  1         2  
  1         31  
4 1     1   4 use utf8;
  1         2  
  1         8  
5 1     1   16 use warnings;
  1         6  
  1         32  
6 1     1   3 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  1         2  
  1         33  
7              
8 1     1   553 use File::Slurp; # For read_file().
  1         9979  
  1         95  
9              
10 1     1   809 use Log::Handler;
  1         28850  
  1         9  
11              
12 1     1   599 use Marpa::R2;
  1         109343  
  1         23  
13              
14 1     1   996 use Moo;
  1         17232  
  1         10  
15              
16 1     1   3219 use Set::Array;
  1         16801  
  1         29  
17              
18 1     1   872 use Text::CSV;
  1         11007  
  1         7  
19              
20 1     1   976 use Tree::DAG_Node;
  1         17425  
  1         134  
21              
22 1     1   1085 use Types::Standard qw/Any ArrayRef HashRef Int Str/;
  1         74556  
  1         23  
23              
24 1     1   1548 use Try::Tiny;
  1         14  
  1         4214  
25              
26             has bnf =>
27             (
28             default => sub{return ''},
29             is => 'rw',
30             isa => Any,
31             required => 0,
32             );
33              
34             has description =>
35             (
36             default => sub{return ''},
37             is => 'rw',
38             isa => Str,
39             required => 0,
40             );
41              
42             has grammar =>
43             (
44             default => sub{return ''},
45             is => 'rw',
46             isa => Any, # 'Marpa::R2::Scanless::G'.
47             required => 0,
48             );
49              
50             has graph_text =>
51             (
52             default => sub{return ''},
53             is => 'rw',
54             isa => Str,
55             required => 0,
56             );
57              
58             has input_file =>
59             (
60             default => sub{return ''},
61             is => 'rw',
62             isa => Str,
63             required => 0,
64             );
65              
66             has logger =>
67             (
68             default => sub{return undef},
69             is => 'rw',
70             isa => Any,
71             required => 0,
72             );
73              
74             has maxlevel =>
75             (
76             default => sub{return 'notice'},
77             is => 'rw',
78             isa => Str,
79             required => 0,
80             );
81              
82             has minlevel =>
83             (
84             default => sub{return 'error'},
85             is => 'rw',
86             isa => Str,
87             required => 0,
88             );
89              
90             has known_events =>
91             (
92             default => sub{return {} },
93             is => 'rw',
94             isa => HashRef,
95             required => 0,
96             );
97              
98             has recce =>
99             (
100             default => sub{return ''},
101             is => 'rw',
102             isa => Any, # 'Marpa::R2::Scanless::R'.
103             required => 0,
104             );
105              
106             has stack =>
107             (
108             default => sub{return []},
109             is => 'rw',
110             isa => ArrayRef,
111             required => 0,
112             );
113              
114             has tree =>
115             (
116             default => sub{return ''},
117             is => 'rw',
118             isa => Any,
119             required => 0,
120             );
121              
122             has uid =>
123             (
124             default => sub{return 0},
125             is => 'rw',
126             isa => Int,
127             required => 0,
128             );
129              
130             our $VERSION = '2.02';
131              
132             # --------------------------------------------------
133             # For accepted and rejected by Marpa, see
134             # Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
135              
136             sub BUILD
137             {
138 6     6 0 248 my($self) = @_;
139              
140 6 50       157 if (! defined $self -> logger)
141             {
142 6         1254 $self -> logger(Log::Handler -> new);
143 6         1302 $self -> logger -> add
144             (
145             screen =>
146             {
147             alias => 'logger',
148             maxlevel => $self -> maxlevel,
149             message_layout => '%m',
150             minlevel => $self -> minlevel,
151             }
152             );
153             }
154              
155             # Policy: Event names are always the same as the name of the corresponding lexeme.
156              
157             $self -> bnf
158             (
159             <<'END_OF_GRAMMAR'
160              
161             :default ::= action => [values]
162              
163             lexeme default = latm => 1 # Longest Acceptable Token Match.
164              
165             :start ::= graph_grammar
166              
167             graph_grammar ::= graph_definition
168              
169             # Graph stuff.
170              
171             graph_definition ::= node_definition
172             | edge_definition
173             # Node stuff
174              
175             node_definition ::= node_statement
176             | node_statement graph_definition
177              
178             node_statement ::= node_name_token
179             | node_name_token attribute_definition
180             | node_statement (',') node_statement
181              
182             node_name_token ::= start_node end_node # Allow for the anonymous node.
183             | start_node node_name end_node
184              
185             # Edge stuff
186              
187             edge_definition ::= edge_statement
188             | edge_statement graph_definition
189              
190             edge_statement ::= edge_name
191             | edge_name attribute_definition
192             | edge_statement (',') edge_statement
193              
194             edge_name ::= directed_edge
195             | undirected_edge
196              
197             # Attribute stuff.
198              
199             attribute_definition ::= attribute_statement+
200              
201             attribute_statement ::= start_attributes string_token_set end_attributes
202              
203             string_token_set ::= string_token_pair+
204              
205             string_token_pair ::= literal_label
206             | attribute_name (':') attribute_value
207              
208             # Lexemes in alphabetical order.
209              
210             :lexeme ~ attribute_name pause => before event => attribute_name
211              
212             attribute_name ~ string_char_set+
213              
214             :lexeme ~ attribute_value pause => before event => attribute_value
215              
216             attribute_value ~ string_char_set+
217              
218             :lexeme ~ directed_edge pause => before event => directed_edge priority => 2
219             directed_edge ~ '->'
220              
221             :lexeme ~ end_attributes pause => before event => end_attributes priority => 1
222             end_attributes ~ '}'
223              
224             :lexeme ~ end_node pause => before event => end_node priority => 1
225             end_node ~ ']'
226              
227             escaped_char ~ '\' [[:print:]]
228              
229             # Use ' here just for the UltraEdit syntax hiliter.
230              
231             :lexeme ~ literal_label pause => before event => literal_label priority => 1
232             literal_label ~ 'label'
233              
234             :lexeme ~ node_name pause => before event => node_name
235              
236             node_name ~ string_char_set+
237              
238             :lexeme ~ start_attributes pause => before event => start_attributes
239             start_attributes ~ '{'
240              
241             :lexeme ~ start_node pause => before event => start_node
242             start_node ~ '['
243              
244             string_char_set ~ escaped_char
245             | [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
246              
247             :lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
248             undirected_edge ~ '--'
249              
250             # Lexemes in alphabetical order.
251              
252             # Boilerplate.
253              
254             :discard ~ whitespace
255             whitespace ~ [\s]+
256              
257             END_OF_GRAMMAR
258 6         23063 );
259              
260 6         2544 $self -> grammar
261             (
262             Marpa::R2::Scanless::G -> new
263             ({
264             source => \$self -> bnf
265             })
266              
267             );
268 6         1114757 $self -> recce
269             (
270             Marpa::R2::Scanless::R -> new
271             ({
272             grammar => $self -> grammar,
273             })
274             );
275              
276 6         5544 my(%event);
277              
278 6         208 for my $line (split(/\n/, $self -> bnf) )
279             {
280 576 100       2097 $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
281             }
282              
283 6         272 $self -> known_events(\%event);
284              
285             # Since $self -> tree has not been initialized yet,
286             # we can't call our _add_daughter() until after this statement.
287              
288 6         1242 $self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
289 6         3648 $self -> stack([$self -> tree -> root]);
290              
291             # This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
292             # So, all tokens in the input are descended from the 'graph' node.
293              
294 6         1483 for my $name (qw/prolog graph/)
295             {
296 12         615 $self -> _add_daughter($name, {});
297             }
298              
299             # The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
300             # It is used in GraphViz2::Marpa;
301             # The 'graph' daughter gets pushed onto the stack because in this module's grammar,
302             # all items belong to the graph.
303              
304 6         584 my(@daughters) = $self -> tree -> daughters;
305 6         110 my($index) = 1; # 0 => prolog, 1 => graph.
306 6         241 my($stack) = $self -> stack;
307              
308 6         96 push @$stack, $daughters[1];
309              
310 6         192 $self -> stack($stack);
311              
312             } # End of BUILD.
313              
314             # ------------------------------------------------
315              
316             sub _add_daughter
317             {
318 62     62   155 my($self, $name, $attributes) = @_;
319 62         2446 $$attributes{uid} = $self -> uid($self -> uid + 1);
320 62         4223 my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
321 62         6510 my($stack) = $self -> stack;
322              
323 62         914 $$stack[$#$stack] -> add_daughter($node);
324              
325             } # End of _add_daughter.
326              
327             # --------------------------------------------------
328              
329             sub clean_after
330             {
331 42     42 1 91 my($self, $s) = @_;
332              
333 42         171 $s =~ s/^\s+//;
334 42         143 $s =~ s/\s+$//;
335 42         103 $s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
336              
337 42         149 return $s;
338              
339             } # End of clean_after.
340              
341             # --------------------------------------------------
342              
343             sub clean_before
344             {
345 6     6 1 69 my($self, $s) = @_;
346              
347 6         112 $s =~ s/\s*;\s*$//;
348 6         22 $s =~ s/^\s+//;
349 6         70 $s =~ s/\s+$//;
350 6         23 $s =~ s/^(<)\s+/$1/;
351 6         69 $s =~ s/\s+(>)$/$1/;
352              
353 6         28 return $s;
354              
355             } # End of clean_before.
356              
357             # --------------------------------------------------
358              
359             sub _get_graph_from_command_line
360             {
361 0     0   0 my($self) = @_;
362              
363 0         0 $self -> graph_text($self -> description);
364              
365             } # End of _get_graph_from_command_line.
366              
367             # --------------------------------------------------
368              
369             sub log
370             {
371 92     92 1 14257 my($self, $level, $s) = @_;
372              
373 92 50       5278 $self -> logger -> log($level => $s) if ($self -> logger);
374              
375             } # End of log.
376              
377             # --------------------------------------------------
378              
379             sub _process
380             {
381 6     6   17 my($self) = @_;
382 6         272 my($string) = $self -> clean_before($self -> graph_text);
383 6         28 my($length) = length $string;
384 6         20 my($last_event) = '';
385 6         20 my($format) = '%-20s %5s %5s %-s';
386              
387             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
388              
389 6         13 my($event_name, $edge);
390 0         0 my(@fields);
391 0         0 my($lexeme, $literal);
392 0         0 my($span, $start);
393              
394 6         88 $self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Lexeme') );
395              
396 6         712 for
397             (
398             my $pos = $self -> recce -> read(\$string);
399             $pos < $length;
400             $pos = $self -> recce -> resume($pos)
401             )
402             {
403 74         45436 $event_name = $self -> _validate_event;
404 74         2489 ($start, $span) = $self -> recce -> pause_span;
405 74         4075 $pos = $self -> recce -> lexeme_read($event_name);
406 74         10029 $literal = substr($string, $start, $pos - $start);
407 74         2626 $lexeme = $self -> recce -> literal($start, $span);
408              
409 74         2321 $self -> log(debug => sprintf($format, $event_name, $start, $span, $lexeme) );
410              
411 74 100       37303 if ($event_name eq 'attribute_name')
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    0          
412             {
413 11         215 $fields[0] = $self -> clean_after($literal);
414             }
415             elsif ($event_name eq 'attribute_value')
416             {
417 11         47 $literal = $self -> clean_after($literal);
418              
419 11         157 $self -> _add_daughter($fields[0], {value => $literal});
420              
421 11         1409 @fields = ();
422              
423             # Skip the separator.
424              
425 11   100     183 while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
  18         125  
426             }
427             elsif ($event_name eq 'directed_edge')
428             {
429 5         24 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
430             }
431             elsif ($event_name eq 'end_attributes')
432             {
433 9         37 $self -> _process_brace($literal);
434             }
435             elsif ($event_name eq 'end_node')
436             {
437             # Is this the anonymous node?
438              
439 7 100       35 if ($last_event eq 'start_node')
440             {
441 1         11 $self -> _add_daughter('node_id', {value => ''});
442             }
443             }
444             elsif ($event_name eq 'literal_label')
445             {
446 9         34 push @fields, $literal;
447              
448 9         262 $pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
449 9         33 @fields = ();
450             }
451             elsif ($event_name eq 'node_name')
452             {
453 6         38 $literal = $self -> clean_after($literal);
454              
455 6         50 $self -> _add_daughter('node_id', {value => $literal});
456             }
457             elsif ($event_name eq 'start_attributes')
458             {
459 9         35 $self -> _process_brace($literal);
460             }
461             elsif ($event_name eq 'start_node')
462             {
463             # Do nothing.
464             }
465             elsif ($event_name eq 'undirected_edge')
466             {
467 0         0 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
468             }
469              
470 74         5604 $last_event = $event_name;
471             }
472              
473 6 50       580 if ($self -> recce -> ambiguity_metric > 1)
474             {
475 0         0 $self -> log(notice => 'Ambiguous parse');
476             }
477              
478 6 50       5917 if (my $ambiguous_status = $self -> recce -> ambiguous)
479             {
480 0         0 $self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
481             }
482              
483             # Return a defined value for success and undef for failure.
484              
485 6         434 return $self -> recce -> value;
486              
487             } # End of _process.
488              
489             # --------------------------------------------------
490              
491             sub _process_brace
492             {
493 18     18   47 my($self, $name) = @_;
494              
495             # When a '{' is encountered, the last thing pushed becomes it's parent.
496             # Likewise, if '}' is encountered, we pop the stack.
497              
498 18         529 my($stack) = $self -> stack;
499              
500 18 100       337 if ($name eq '{')
501             {
502 9         62 my(@daughters) = $$stack[$#$stack] -> daughters;
503              
504 9         151 push @$stack, $daughters[$#daughters];
505              
506 9         39 $self -> _process_token('literal', $name);
507             }
508             else
509             {
510 9         49 $self -> _process_token('literal', $name);
511              
512 9         2735 pop @$stack;
513              
514 9         297 $self -> stack($stack);
515             }
516              
517             } # End of _process_brace.
518              
519             # ------------------------------------------------
520              
521             sub _process_html
522             {
523 1     1   5 my($self, $recce, $fields, $string, $length, $pos) = @_;
524              
525 1         4 my($bracket_count) = 0;
526 1         3 my($open_bracket) = '<';
527 1         3 my($close_bracket) = '>';
528 1         4 my($previous_char) = '';
529 1         3 my($label) = '';
530              
531 1         4 my($char);
532              
533 1         7 while ($pos < $length)
534             {
535 68         379 $char = substr($string, $pos, 1);
536 68         66 $label .= $char;
537              
538 68 50       205 if ($previous_char eq '\\')
    100          
    100          
539             {
540             }
541             elsif ($char eq $open_bracket)
542             {
543 7         7 $bracket_count++;
544             }
545             elsif ($char eq $close_bracket)
546             {
547 7         7 $bracket_count--;
548              
549 7 100       19 if ($bracket_count == 0)
550             {
551 1         4 $pos++;
552              
553 1         3 last;
554             }
555             }
556              
557 67         634 $previous_char = $char;
558              
559 67         99 $pos++;
560             }
561              
562 1         8 $label = $self -> clean_after($label);
563              
564 1 50 33     27 if ( ($label =~ /^$/) )
565             {
566 0         0 my($line, $column) = $recce -> line_column;
567              
568 0         0 die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
569             }
570              
571 1         4 push @$fields, $label;
572              
573 1         7 return $self -> _skip_separator($string, $length, $pos, ';');
574              
575             } # End of _process_html.
576              
577             # ------------------------------------------------
578              
579             sub _process_label
580             {
581 9     9   101 my($self, $recce, $fields, $string, $length, $pos) = @_;
582              
583 9         46 $pos = $self -> _skip_separator($string, $length, $pos, ':');
584              
585 9 50       34 return $pos if ($pos >= $length);
586              
587 9         34 my($char) = substr($string, $pos, 1);
588              
589 9 50       205 if ($char eq "'")
    50          
    100          
590             {
591 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
592             }
593             elsif ($char eq '"')
594             {
595 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
596             }
597             elsif ($char eq '<')
598             {
599 1         9 $pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
600             }
601             else
602             {
603 8         43 $pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
604             }
605              
606 9         52 for (my $i = 0; $i < $#$fields; $i += 2)
607             {
608 9         87 $self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
609             }
610              
611 9         1058 return $pos;
612              
613             } # End of _process_label.
614              
615             # ------------------------------------------------
616              
617             sub _process_quotes
618             {
619 0     0   0 my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
620              
621 0         0 my($previous_char) = '';
622 0         0 my($label) = '';
623 0         0 my($quote_count) = 0;
624              
625 0         0 my($char);
626              
627 0         0 while ($pos < $length)
628             {
629 0         0 $char = substr($string, $pos, 1);
630              
631 0 0 0     0 if ( ($previous_char ne '\\') && ($char eq $terminator) )
632             {
633 0         0 $quote_count++;
634              
635 0 0       0 if ($quote_count == 2)
636             {
637 0         0 $label .= $char;
638              
639 0         0 $pos++;
640              
641 0         0 last;
642             }
643             }
644              
645 0         0 $label .= $char;
646 0         0 $previous_char = $char;
647              
648 0         0 $pos++;
649             }
650              
651             # Don't call clean_after, since it removes the ' and " we are about to check.
652              
653 0         0 $label =~ s/^\s+//;
654 0         0 $label =~ s/\s+$//;
655              
656 0 0 0     0 if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
657             {
658             # Use ' and " here just for the UltraEdit syntax hiliter.
659              
660 0         0 my($line, $column) = $recce -> line_column;
661              
662 0         0 die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
663             }
664              
665 0         0 $label = $self -> clean_after($label);
666              
667 0         0 push @$fields, $label;
668              
669 0         0 $self -> log(debug => "_process_quotes(). Label !$label!");
670              
671 0         0 return $self -> _skip_separator($string, $length, $pos, ';');
672              
673             } # End of _process_quotes.
674              
675             # --------------------------------------------------
676              
677             sub _process_token
678             {
679 18     18   44 my($self, $name, $value) = @_;
680              
681 18         107 $self -> _add_daughter($name, {value => $value});
682              
683             } # End of _process_token.
684              
685             # ------------------------------------------------
686              
687             sub _process_unquoted
688             {
689 8     8   21 my($self, $recce, $fields, $string, $length, $pos) = @_;
690 8         45 my($re) = qr/[;}]/;
691              
692 8 50       72 if (substr($string, $pos, 1) =~ $re)
693             {
694 0         0 push @$fields, '';
695              
696 0         0 return $pos;
697             }
698              
699 8         24 my($previous_char) = '';
700 8         22 my($label) = '';
701 8         17 my($quote_count) = 0;
702              
703 8         17 my($char);
704              
705 8         29 while ($pos < $length)
706             {
707 99         142 $char = substr($string, $pos, 1);
708              
709 99 100 100     2825 last if ( ($previous_char ne '\\') && ($char =~ $re) );
710              
711 91         116 $label .= $char;
712 91         112 $previous_char = $char;
713              
714 91         156 $pos++;
715             }
716              
717 8         37 $label = $self -> clean_after($label);
718              
719 8         24 push @$fields, $label;
720              
721 8         31 return $self -> _skip_separator($string, $length, $pos, ';');
722              
723             } # End of _process_unquoted.
724              
725             # --------------------------------------------------
726              
727             sub run
728             {
729 6     6 1 16827 my($self) = @_;
730              
731 6 50       225 if ($self -> description)
    50          
732             {
733 0         0 $self -> _get_graph_from_command_line;
734             }
735             elsif ($self -> input_file)
736             {
737 6         2322 $self -> graph_text(join(' ', grep{! m!^(?:#|//)!} read_file($self -> input_file, binmode => ':encoding(utf-8)') ) );
  19         38592  
738             }
739             else
740             {
741 0         0 die "Error: You must provide a graph using one of -input_file or -description\n";
742             }
743              
744             # Return 0 for success and 1 for failure.
745              
746 6         1591 my($result) = 0;
747              
748             try
749             {
750 6 50   6   444 if (defined (my $value = $self -> _process) )
751             {
752 6         69128 $self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
  6         381  
753             }
754             else
755             {
756 0         0 $result = 1;
757              
758 0         0 $self -> log(error => 'Parse failed');
759             }
760             }
761             catch
762             {
763 0     0   0 $result = 1;
764              
765 0         0 $self -> log(error => "Parse failed. Error: $_");
766 6         196 };
767              
768             # Return 0 for success and 1 for failure.
769              
770 6         3810 $self -> log(info => "Parse result: $result (0 is success)");
771              
772 6         1302 return $result;
773              
774             } # End of run.
775              
776             # ------------------------------------------------
777              
778             sub _skip_separator
779             {
780 18     18   45 my($self, $string, $length, $pos, $separator) = @_;
781 18         856 my($re) = qr/[\s$separator]/;
782              
783 18         36 my($char);
784              
785 18         74 while ($pos < $length - 1)
786             {
787 42         113 $char = substr($string, $pos, 1);
788              
789 42 100       291 last if ($char !~ $re);
790              
791 26         57 $pos++;
792             }
793              
794 18         93 return $pos;
795              
796             } # End of _skip_separator.
797              
798             # ------------------------------------------------
799              
800             sub _validate_event
801             {
802 74     74   138 my($self) = @_;
803 74         94 my(@event) = @{$self -> recce -> events};
  74         2793  
804 74         1128 my($event_count) = scalar @event;
805              
806 74 50       328 if ($event_count > 1)
807             {
808 0         0 $self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
  0         0  
  0         0  
809              
810 0         0 die "The code only handles 1 event at a time\n";
811             }
812              
813 74         1209 my($event_name) = ${$event[0]}[0];
  74         186  
814              
815 74 50       85 if (! ${$self -> known_events}{$event_name})
  74         2179  
816             {
817 0         0 my($msg) = "Unexpected event name '$event_name'";
818              
819 0         0 $self -> log(error => $msg);
820              
821 0         0 die "$msg\n";
822             }
823              
824 74         1328 return $event_name;
825              
826             } # End of _validate_event.
827              
828             # --------------------------------------------------
829              
830             1;
831              
832             =pod
833              
834             =head1 NAME
835              
836             L - A Marpa-based parser for the DASH language
837              
838             =head1 Synopsis
839              
840             Typical usage:
841              
842             perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
843             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
844              
845             You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
846              
847             scripts/parse.sh node.04 -max info
848              
849             See L for sample
850             input and output.
851              
852             Also, see L
853             based on this module.
854              
855             =head1 Description
856              
857             This module implements a parser for L (below), a wrapper language around Graphviz's
858             L. That is, the module is a pre-processor for the
859             DOT language.
860              
861             Specifically, this module demonstrates how to use L's capabilities to have Marpa
862             repeatedly pass control back to code in your own module, during the parse, to handle those cases
863             where you don't want Marpa's default processing to occur.
864              
865             This allows the code to deal with the classic case of where you wish to preserve whitespace in some
866             contexts, but also want Marpa to discard whitespace in all other contexts.
867              
868             DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
869             to learn DOT.
870              
871             The DASH language is actually a cut-down version of the language used by L. For a full
872             explanation of the Graph::Easy language, see L.
873              
874             The wrapper is parsed into a tree of tokens managed by L.
875              
876             If requested by the user, the tree is passed to the default renderer
877             L. Various options allow the user to control the output, as
878             an SVG (PNG, ...) image, and to save the DOT version of the graph.
879              
880             In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
881             been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
882             current module.
883              
884             Note that this module's usage of Marpa's adverbs I and I should be regarded as an
885             intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
886             is the recommended technique.
887              
888             The article mentioned above discusses important issues regarding the timing sequence of I
889             and I.
890              
891             All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
892             is implemented. I'm currently (2014-10-10) using L V 2.096000.
893              
894             Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
895             L, which parses DOT files.
896              
897             =head1 Installation
898              
899             Install L as you would for any C module:
900              
901             Run:
902              
903             cpanm MarpaX::Demo::StringParser
904              
905             or run:
906              
907             sudo cpan MarpaX::Demo::StringParser
908              
909             or unpack the distro, and then either:
910              
911             perl Build.PL
912             ./Build
913             ./Build test
914             sudo ./Build install
915              
916             or:
917              
918             perl Makefile.PL
919             make (or dmake or nmake)
920             make test
921             make install
922              
923             =head1 Scripts Shipped with this Module
924              
925             All scripts are shipped in the scripts/ directory.
926              
927             =over 4
928              
929             =item o copy.config.pl
930              
931             This is for use by the author. It just copies the config file out of the distro, so the script
932             generate.index.pl (which uses HTML template stuff) can find it.
933              
934             =item o find.config.pl
935              
936             This cross-checks the output of copy.config.pl.
937              
938             =item o dash2svg.pl
939              
940             Converts all data/*.dash files into the corresponding html/*.svg files.
941              
942             Used by generate.demo.sh.
943              
944             =item o generate.demo.sh
945              
946             This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
947              
948             And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
949              
950             =item o generate.index.pl
951              
952             This constructs a web page containing all the html/*.svg files.
953              
954             =item o parse.pl
955              
956             This runs a parse on a single input file. Run 'parse.pl -h' for details.
957              
958             =item o parse.sh
959              
960             This simplifies running parse.pl.
961              
962             =item o pod2html.sh
963              
964             This converts all lib/*.pm files into their corresponding *.html versions, for proof-reading and
965             uploading to my real web site.
966              
967             =item o render.pl
968              
969             This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
970             for details.
971              
972             =item o render.sh
973              
974             This simplifies running render.pl.
975              
976             =back
977              
978             =head1 Constructor and Initialization
979              
980             C is called as C<< my($parser) = MarpaX::Demo::StringParser -> new(k1 => v1, k2 => v2, ...) >>.
981              
982             It returns a new object of type C.
983              
984             Key-value pairs accepted in the parameter list (see corresponding methods for details
985             [e.g. description($graph)]):
986              
987             =over 4
988              
989             =item o description => '[node.1]->[node.2]'
990              
991             Specify a string for the graph definition.
992              
993             You are strongly encouraged to surround this string with '...' to protect it from your shell if using
994             this module directly from the command line.
995              
996             See also the I key which reads the graph from a file.
997              
998             The I key takes precedence over the I key.
999              
1000             Default: ''.
1001              
1002             =item o input_file => $graph_file_name
1003              
1004             Read the graph definition from this file.
1005              
1006             See also the I key to read the graph from the command line.
1007              
1008             The whole file is slurped in as a single graph.
1009              
1010             The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1011              
1012             The I key takes precedence over the I key.
1013              
1014             Default: ''.
1015              
1016             =item o logger => $logger_object
1017              
1018             Specify a logger object.
1019              
1020             To disable logging, just set logger to the empty string.
1021              
1022             Default: An object of type L.
1023              
1024             =item o maxlevel => $level
1025              
1026             This option is only used if this module creates an object of type L.
1027              
1028             See L.
1029              
1030             Default: 'notice'. A typical choice is 'info' or 'debug'.
1031              
1032             =item o minlevel => $level
1033              
1034             This option is only used if this module creates an object of type L.
1035              
1036             See L.
1037              
1038             Default: 'error'.
1039              
1040             No lower levels are used.
1041              
1042             =back
1043              
1044             =head1 Methods
1045              
1046             =head2 clean_before($s)
1047              
1048             Cleans the input string before the next step in the parse process.
1049              
1050             Typically only ever called once.
1051              
1052             Returns the cleaned string.
1053              
1054             =head2 clean_after($s)
1055              
1056             Cleans the input string after each step in the parse process.
1057              
1058             Typically called many times, once on each output token.
1059              
1060             Returns the cleaned string.
1061              
1062             =head2 description([$graph])
1063              
1064             Here, the [] indicate an optional parameter.
1065              
1066             Gets or sets the graph string to be parsed.
1067              
1068             See also the L method.
1069              
1070             The value supplied to the description() method takes precedence over the value read from the input file.
1071              
1072             Also, I is an option to new().
1073              
1074             =head2 graph_text([$graph])
1075              
1076             Here, the [] indicate an optional parameter.
1077              
1078             Returns the value of the graph definition string, from either the command line or a file.
1079              
1080             =head2 input_file([$graph_file_name])
1081              
1082             Here, the [] indicate an optional parameter.
1083              
1084             Gets or sets the name of the file to read the graph definition from.
1085              
1086             See also the L method.
1087              
1088             The whole file is slurped in as a single graph.
1089              
1090             The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1091              
1092             The value supplied to the description() method takes precedence over the value read from the input file.
1093              
1094             Also, I is an option to new().
1095              
1096             =head2 log($level, $s)
1097              
1098             Calls $self -> logger -> log($level => $s) if ($self -> logger).
1099              
1100             =head2 run()
1101              
1102             This is the only method the caller needs to call. All parameters are supplied to new().
1103              
1104             Returns 0 for success and 1 for failure.
1105              
1106             =head2 recce()
1107              
1108             Returns an object of type L.
1109              
1110             =head2 tree()
1111              
1112             Returns an object of type L.
1113              
1114             =head1 DASH Syntax
1115              
1116             See L for sample
1117             input and output.
1118              
1119             The examples in the following sections are almost all taken from data/*.dash, in the distro.
1120              
1121             =head2 Graphs in DASH
1122              
1123             1: A graph definition may continue over multiple lines.
1124             2: Lines beginning with either '#' or '//' are discarded as comments.
1125             3: A node name or an edge name must never be split over multiple lines.
1126             4: Attributes may be split over lines, but do not split either the name or value of the
1127             attribute over multiple lines.
1128             Note: Attribute values can contain various escaped characters, e.g. \n.
1129             5: A graph may start or end with an edge, and even have contiguous edges.
1130             See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1131             possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1132             they will satisfy the requirements of Graphviz.
1133              
1134             Examples:
1135              
1136             1: A graph split over 10 lines:
1137             [node.1] {label: "n 1"}
1138             -> {label: 'e 1'}
1139             -> {label: e 2}
1140             [] {label: n 2}
1141             -> {label : e 3}
1142             [node.3] {label: "n 3"}
1143             -> {label: 'e 4'},
1144             -> {label: e 5}
1145             [] {label: n 2}
1146             -> {label : e 6}
1147             2: A graph split over 14 lines:
1148             ->
1149             ->
1150              
1151             [node]
1152             [node] ->
1153             -> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1154             [node.1] [node.2] [node.3]
1155              
1156             []
1157             [node.1]
1158             [node 1]
1159             ['node.2']
1160             ["node.3"]
1161             [ From here ] -> [ To there ]
1162              
1163             =head2 Nodes in DASH
1164              
1165             Node names:
1166              
1167             1: Are delimited by '[' and ']'.
1168             2: May be quoted with " or '.
1169             3: Allow escaped characters, using '\'.
1170             4: Allow internal spaces, even if not quoted.
1171             5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1172             This is called 'Daisy-chaining'.
1173              
1174             See L for the origin of this term.
1175              
1176             Examples:
1177              
1178             1: The anonymous node: []
1179             2: The anonymous node, with attributes (explained below): []{color:red}
1180             3: A named node: [Marpa]
1181             4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1182             5: A named node with an internal space: [Perl 6]
1183             6: A named node with attributes: [node.1]{label: A and B}
1184             7: A named node with spaces: [ node.1 ]
1185             These spaces are discarded.
1186             8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1187             The spaces around 'node.1' are discarded.
1188             The spaces around ' A Z ' are discarded.
1189             The spaces inside ' A Z ' are preserved (because of the quotes).
1190             Double-quotes act in the same way.
1191             9: A named node with attributes, with spaces:
1192             [ node.1 ] { label : Flight Path from Melbourne to London }
1193             Space preservation is as above.
1194             10: A named node with escaped characters: [\[node\]]
1195             The '[' and ']' chars are preserved.
1196             11: A named node with [] in name: [[ \]]
1197             However, since '[' and ']' delimit node names, you are I advised to escape such
1198             characters.
1199             12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1200             13: A complete graph:
1201             [node.1]
1202             -> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1203             -> {color: blue; label: B 2; penwidth: 3}
1204             -> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1205             [] {label: 'Some node'}
1206             -> [node.2]
1207              
1208             =head2 Edges in DASH
1209              
1210             Edge names:
1211              
1212             1: Are '->'
1213             This is part of a directed graph.
1214             2: Or '--'
1215             This is part of an undirected graph.
1216             3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1217             This is called 'Daisy-chaining'.
1218              
1219             See L for the origin of this term.
1220              
1221             It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1222             reject such input. In other words, directed and undirected graphs are mutually exclusive.
1223              
1224             So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1225             same for '->'.
1226              
1227             Examples:
1228              
1229             1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1230             2: Juxtaposed edges without any spacing and without attributes: ------
1231             3: Juxtaposed edges (without comma) with attributes:
1232             -- {color: cornflowerblue; label: Top row\nBottom row}
1233             -- {color:red; label: Edges use cornflowerblue and red}
1234             4: An edge with attributes, with some escaped characters:
1235             -> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1236              
1237             =head2 Attributes in DASH
1238              
1239             Attributes:
1240              
1241             1: Are delimited by '{' and '}'.
1242             2: Consist of a C and a C, separated by ':'.
1243             3: Are separated by ';'.
1244             4: The DOT language defines a set of escape characters acceptable in such a C.
1245             5: Allow quotes and whitespace as per node names.
1246             This must be true because the same non-Marpa parsers are used for both.
1247             6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1248             not HTML. See data/table.*.ge for examples.
1249              
1250             See L for details.
1251              
1252             Examples:
1253              
1254             1: -- {color: cornflowerblue; label: Top row\nBottom row}
1255             Note the use of '\n' in the value of the label.
1256              
1257             =head1 FAQ
1258              
1259             =head2 What is the grammar parsed by this module?
1260              
1261             See L just above.
1262              
1263             =head2 How is the parsed graph stored in RAM?
1264              
1265             Items are stored in a tree managed by L.
1266              
1267             The sample code in the L will display a tree:
1268              
1269             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1270              
1271             Output:
1272              
1273             root. Attributes: {uid => "0"}
1274             |---prolog. Attributes: {uid => "1"}
1275             |---graph. Attributes: {uid => "2"}
1276             |---node_id. Attributes: {uid => "3", value => "node.1"}
1277             | |---literal. Attributes: {uid => "4", value => "{"}
1278             | |---label. Attributes: {uid => "5", value => "A and B"}
1279             | |---literal. Attributes: {uid => "6", value => "}"}
1280             |---node_id. Attributes: {uid => "7", value => "node.2"}
1281             |---literal. Attributes: {uid => "8", value => "{"}
1282             |---label. Attributes: {uid => "9", value => "A or B"}
1283             |---literal. Attributes: {uid => "10", value => "}"}
1284             Parse result: 0 (0 is success)
1285              
1286             See also the next question.
1287              
1288             =head2 What is the structure of the tree of parsed tokens?
1289              
1290             From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1291             currently used. It is used by L.
1292              
1293             The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1294             L to convert the tree (i.e. the input file) into a DOT file
1295             and into an image.
1296              
1297             =head2 Does this module handle utf8?
1298              
1299             Yes. See the last sample on L.
1300              
1301             =head2 Why doesn't the parser handle my HTML-style labels?
1302              
1303             Traps for young players:
1304              
1305             =over 4
1306              
1307             =item o The
component must include the '/'
1308              
1309             =back
1310              
1311             =head2 Why do I get error messages like the following?
1312              
1313             Error: :1: syntax error near line 1
1314             context: digraph >>> Graph <<< {
1315              
1316             Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1317             So, don't do this:
1318              
1319             strict graph graph{...}
1320             strict graph Graph{...}
1321             strict graph strict{...}
1322             etc...
1323              
1324             Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1325              
1326             strict graph "graph"{...}
1327              
1328             Even better, use a more meaningful name for your graph...
1329              
1330             The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1331              
1332             See L in the discussion of the syntax of DOT
1333             for details.
1334              
1335             =head2 What is the homepage of Marpa?
1336              
1337             L.
1338              
1339             =head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1340              
1341             I've included in a recent article a section called
1342             L
1343             which is aimed at helping us think about this issue.
1344              
1345             =head2 How did you generate the html/*.svg files?
1346              
1347             With a private script which uses L V 2.00. This script is not shipped
1348             in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1349             Makefile.PL would complain about the missing dependency.
1350              
1351             See L for details.
1352              
1353             =head1 Machine-Readable Change Log
1354              
1355             The file Changes was converted into Changelog.ini by L.
1356              
1357             =head1 Version Numbers
1358              
1359             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1360              
1361             =head1 Repository
1362              
1363             L
1364              
1365             =head1 Support
1366              
1367             Email the author, or log a bug on RT:
1368              
1369             L.
1370              
1371             =head1 Author
1372              
1373             L was written by Ron Savage Iron@savage.net.auE> in 2013.
1374              
1375             Home page: L.
1376              
1377             =head1 Copyright
1378              
1379             Australian copyright (c) 2013, Ron Savage.
1380              
1381             All Programs of mine are 'OSI Certified Open Source Software';
1382             you can redistribute them and/or modify them under the terms of
1383             The Artistic License, a copy of which is available at:
1384             http://www.opensource.org/licenses/index.html
1385              
1386             =cut