File Coverage

blib/lib/MarpaX/Languages/Dash.pm
Criterion Covered Total %
statement 198 247 80.1
branch 50 74 67.5
condition 7 15 46.6
subroutine 27 29 93.1
pod 4 5 80.0
total 286 370 77.3


line stmt bran cond sub pod time code
1             package MarpaX::Languages::Dash;
2              
3 1     1   45463 use strict;
  1         1  
  1         22  
4 1     1   3 use warnings;
  1         1  
  1         21  
5 1     1   3 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  1         3  
  1         31  
6              
7 1     1   398 use File::Slurper 'read_lines';
  1         9563  
  1         44  
8              
9 1     1   675 use Log::Handler;
  1         28302  
  1         4  
10              
11 1     1   482 use Marpa::R2;
  1         78085  
  1         13  
12              
13 1     1   522 use Moo;
  1         7581  
  1         3  
14              
15 1     1   1524 use Set::Array;
  1         7498  
  1         9  
16              
17 1     1   537 use Text::CSV;
  1         7319  
  1         4  
18              
19 1     1   716 use Tree::DAG_Node;
  1         10585  
  1         11  
20              
21 1     1   537 use Types::Standard qw/Any ArrayRef HashRef Int Str/;
  1         44968  
  1         52  
22              
23 1     1   767 use Try::Tiny;
  1         1  
  1         2236  
24              
25             has bnf =>
26             (
27             default => sub{return ''},
28             is => 'rw',
29             isa => Any,
30             required => 0,
31             );
32              
33             has description =>
34             (
35             default => sub{return ''},
36             is => 'rw',
37             isa => Str,
38             required => 0,
39             );
40              
41             has grammar =>
42             (
43             default => sub{return ''},
44             is => 'rw',
45             isa => Any, # 'Marpa::R2::Scanless::G'.
46             required => 0,
47             );
48              
49             has graph_text =>
50             (
51             default => sub{return ''},
52             is => 'rw',
53             isa => Str,
54             required => 0,
55             );
56              
57             has input_file =>
58             (
59             default => sub{return ''},
60             is => 'rw',
61             isa => Str,
62             required => 0,
63             );
64              
65             has logger =>
66             (
67             default => sub{return undef},
68             is => 'rw',
69             isa => Any,
70             required => 0,
71             );
72              
73             has maxlevel =>
74             (
75             default => sub{return 'notice'},
76             is => 'rw',
77             isa => Str,
78             required => 0,
79             );
80              
81             has minlevel =>
82             (
83             default => sub{return 'error'},
84             is => 'rw',
85             isa => Str,
86             required => 0,
87             );
88              
89             has known_events =>
90             (
91             default => sub{return {} },
92             is => 'rw',
93             isa => HashRef,
94             required => 0,
95             );
96              
97             has recce =>
98             (
99             default => sub{return ''},
100             is => 'rw',
101             isa => Any, # 'Marpa::R2::Scanless::R'.
102             required => 0,
103             );
104              
105             has stack =>
106             (
107             default => sub{return []},
108             is => 'rw',
109             isa => ArrayRef,
110             required => 0,
111             );
112              
113             has tree =>
114             (
115             default => sub{return ''},
116             is => 'rw',
117             isa => Any,
118             required => 0,
119             );
120              
121             has uid =>
122             (
123             default => sub{return 0},
124             is => 'rw',
125             isa => Int,
126             required => 0,
127             );
128              
129             our $VERSION = '1.00';
130              
131             # --------------------------------------------------
132             # For accepted and rejected by Marpa, see
133             # Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
134              
135             sub BUILD
136             {
137 6     6 0 111 my($self) = @_;
138              
139 6 50       83 if (! defined $self -> logger)
140             {
141 6         66 $self -> logger(Log::Handler -> new);
142 6         577 $self -> logger -> add
143             (
144             screen =>
145             {
146             alias => 'logger',
147             maxlevel => $self -> maxlevel,
148             message_layout => '%m',
149             minlevel => $self -> minlevel,
150             }
151             );
152             }
153              
154             # Policy: Event names are always the same as the name of the corresponding lexeme.
155              
156             $self -> bnf
157             (
158             <<'END_OF_GRAMMAR'
159              
160             :default ::= action => [values]
161              
162             lexeme default = latm => 1 # Longest Acceptable Token Match.
163              
164             :start ::= graph_grammar
165              
166             graph_grammar ::= graph_definition
167              
168             # Graph stuff.
169              
170             graph_definition ::= node_definition
171             | edge_definition
172             # Node stuff
173              
174             node_definition ::= node_statement
175             | node_statement graph_definition
176              
177             node_statement ::= node_name_token
178             | node_name_token attribute_definition
179             | node_statement (',') node_statement
180              
181             node_name_token ::= start_node end_node # Allow for the anonymous node.
182             | start_node node_name end_node
183              
184             # Edge stuff
185              
186             edge_definition ::= edge_statement
187             | edge_statement graph_definition
188              
189             edge_statement ::= edge_name
190             | edge_name attribute_definition
191             | edge_statement (',') edge_statement
192              
193             edge_name ::= directed_edge
194             | undirected_edge
195              
196             # Attribute stuff.
197              
198             attribute_definition ::= attribute_statement+
199              
200             attribute_statement ::= start_attributes string_token_set end_attributes
201              
202             string_token_set ::= string_token_pair+
203              
204             string_token_pair ::= literal_label
205             | attribute_name (':') attribute_value
206              
207             # Lexemes in alphabetical order.
208              
209             :lexeme ~ attribute_name pause => before event => attribute_name
210              
211             attribute_name ~ string_char_set+
212              
213             :lexeme ~ attribute_value pause => before event => attribute_value
214              
215             attribute_value ~ string_char_set+
216              
217             :lexeme ~ directed_edge pause => before event => directed_edge priority => 2
218             directed_edge ~ '->'
219              
220             :lexeme ~ end_attributes pause => before event => end_attributes priority => 1
221             end_attributes ~ '}'
222              
223             :lexeme ~ end_node pause => before event => end_node priority => 1
224             end_node ~ ']'
225              
226             escaped_char ~ '\' [[:print:]]
227              
228             # Use ' here just for the UltraEdit syntax hiliter.
229              
230             :lexeme ~ literal_label pause => before event => literal_label priority => 1
231             literal_label ~ 'label'
232              
233             :lexeme ~ node_name pause => before event => node_name
234              
235             node_name ~ string_char_set+
236              
237             :lexeme ~ start_attributes pause => before event => start_attributes
238             start_attributes ~ '{'
239              
240             :lexeme ~ start_node pause => before event => start_node
241             start_node ~ '['
242              
243             string_char_set ~ escaped_char
244             | [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
245              
246             :lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
247             undirected_edge ~ '--'
248              
249             # Boilerplate.
250              
251             :discard ~ whitespace
252             whitespace ~ [\s]+
253              
254             END_OF_GRAMMAR
255 6         4053 );
256              
257 6         184 $self -> grammar
258             (
259             Marpa::R2::Scanless::G -> new
260             ({
261             source => \$self -> bnf
262             })
263             );
264              
265 6         318188 $self -> recce
266             (
267             Marpa::R2::Scanless::R -> new
268             ({
269             grammar => $self -> grammar,
270             })
271             );
272              
273 6         1389 my(%event);
274              
275 6         118 for my $line (split(/\n/, $self -> bnf) )
276             {
277 564 100       880 $event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
278             }
279              
280 6         127 $self -> known_events(\%event);
281              
282             # Since $self -> tree has not been initialized yet,
283             # we can't call our _add_daughter() until after this statement.
284              
285 6         162 $self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
286 6         566 $self -> stack([$self -> tree -> root]);
287              
288             # This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
289             # So, all tokens in the input are descended from the 'graph' node.
290              
291 6         256 for my $name (qw/prolog graph/)
292             {
293 12         355 $self -> _add_daughter($name, {});
294             }
295              
296             # The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
297             # It is used in GraphViz2::Marpa;
298             # The 'graph' daughter gets pushed onto the stack because in this module's grammar,
299             # all items belong to the graph.
300              
301 6         288 my(@daughters) = $self -> tree -> daughters;
302 6         60 my($index) = 1; # 0 => prolog, 1 => graph.
303 6         78 my($stack) = $self -> stack;
304              
305 6         25 push @$stack, $daughters[$index];
306              
307 6         76 $self -> stack($stack);
308              
309             } # End of BUILD.
310              
311             # ------------------------------------------------
312              
313             sub _add_daughter
314             {
315 62     62   122 my($self, $name, $attributes) = @_;
316 62         855 $$attributes{uid} = $self -> uid($self -> uid + 1);
317 62         1222 my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
318 62         2368 my($stack) = $self -> stack;
319              
320 62         335 $$stack[$#$stack] -> add_daughter($node);
321              
322             } # End of _add_daughter.
323              
324             # --------------------------------------------------
325              
326             sub clean_after
327             {
328 42     42 1 43 my($self, $s) = @_;
329              
330 42         70 $s =~ s/^\s+//;
331 42         69 $s =~ s/\s+$//;
332 42         50 $s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
333              
334 42         65 return $s;
335              
336             } # End of clean_after.
337              
338             # --------------------------------------------------
339              
340             sub clean_before
341             {
342 6     6 1 34 my($self, $s) = @_;
343              
344 6         61 $s =~ s/\s*;\s*$//;
345 6         11 $s =~ s/^\s+//;
346 6         28 $s =~ s/\s+$//;
347 6         15 $s =~ s/^(<)\s+/$1/;
348 6         15 $s =~ s/\s+(>)$/$1/;
349              
350 6         10 return $s;
351              
352             } # End of clean_before.
353              
354             # --------------------------------------------------
355              
356             sub log
357             {
358 92     92 1 3960 my($self, $level, $s) = @_;
359              
360 92 50       1246 $self -> logger -> log($level => $s) if ($self -> logger);
361              
362             } # End of log.
363              
364             # --------------------------------------------------
365              
366             sub _process
367             {
368 6     6   6 my($self) = @_;
369 6         95 my($string) = $self -> clean_before($self -> graph_text);
370 6         13 my($length) = length $string;
371 6         11 my($last_event) = '';
372 6         8 my($format) = '%-20s %5s %5s %5s %-s';
373              
374 6         47 $self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme') );
375              
376             # We use read()/lexeme_read()/resume() because we pause at each lexeme.
377              
378 6         235 my($event_name);
379             my(@fields);
380 0         0 my($lexeme, $literal);
381 0         0 my($span, $start);
382              
383 6         82 for
384             (
385             my $pos = $self -> recce -> read(\$string);
386             $pos < $length;
387             $pos = $self -> recce -> resume($pos)
388             )
389             {
390 74         10340 $event_name = $self -> _validate_event;
391 74         865 ($start, $span) = $self -> recce -> pause_span;
392 74         1228 $pos = $self -> recce -> lexeme_read($event_name);
393 74         2244 $literal = substr($string, $start, $pos - $start);
394 74         1013 $lexeme = $self -> recce -> literal($start, $span);
395              
396 74         728 $self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme) );
397              
398 74 100       2094 if ($event_name eq 'attribute_name')
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    0          
399             {
400 11         21 $fields[0] = $self -> clean_after($literal);
401             }
402             elsif ($event_name eq 'attribute_value')
403             {
404 11         19 $literal = $self -> clean_after($literal);
405              
406 11         31 $self -> _add_daughter($fields[0], {value => $literal});
407              
408 11         512 @fields = ();
409              
410             # Skip the separator.
411              
412 11   100     75 while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
  18         73  
413             }
414             elsif ($event_name eq 'directed_edge')
415             {
416 5         10 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
417             }
418             elsif ($event_name eq 'end_attributes')
419             {
420 9         17 $self -> _process_brace($literal);
421             }
422             elsif ($event_name eq 'end_node')
423             {
424             # Is this the anonymous node?
425              
426 7 100       16 if ($last_event eq 'start_node')
427             {
428 1         4 $self -> _add_daughter('node_id', {value => ''});
429             }
430             }
431             elsif ($event_name eq 'literal_label')
432             {
433 9         13 push @fields, $literal;
434              
435 9         113 $pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
436 9         17 @fields = ();
437             }
438             elsif ($event_name eq 'node_name')
439             {
440 6         12 $literal = $self -> clean_after($literal);
441              
442 6         48 $self -> _add_daughter('node_id', {value => $literal});
443             }
444             elsif ($event_name eq 'start_attributes')
445             {
446 9         18 $self -> _process_brace($literal);
447             }
448             elsif ($event_name eq 'start_node')
449             {
450             # Do nothing.
451             }
452             elsif ($event_name eq 'undirected_edge')
453             {
454 0         0 $self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
455             }
456              
457 74         2060 $last_event = $event_name;
458             }
459              
460 6 50       163 if ($self -> recce -> ambiguity_metric > 1)
461             {
462 0         0 $self -> log(notice => 'Ambiguous parse');
463             }
464              
465 6 50       420 if (my $ambiguous_status = $self -> recce -> ambiguous)
466             {
467 0         0 $self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
468             }
469              
470             # Return a defined value for success and undef for failure.
471              
472 6         169 return $self -> recce -> value;
473              
474             } # End of _process.
475              
476             # --------------------------------------------------
477              
478             sub _process_brace
479             {
480 18     18   18 my($self, $name) = @_;
481              
482             # When a '{' is encountered, the last thing pushed becomes it's parent.
483             # Likewise, if '}' is encountered, we pop the stack.
484              
485 18         234 my($stack) = $self -> stack;
486              
487 18 100       71 if ($name eq '{')
488             {
489 9         26 my(@daughters) = $$stack[$#$stack] -> daughters;
490              
491 9         49 push @$stack, $daughters[$#daughters];
492              
493 9         21 $self -> _process_token('literal', $name);
494             }
495             else
496             {
497 9         20 $self -> _process_token('literal', $name);
498              
499 9         414 pop @$stack;
500              
501 9         149 $self -> stack($stack);
502             }
503              
504             } # End of _process_brace.
505              
506             # ------------------------------------------------
507              
508             sub _process_html
509             {
510 1     1   3 my($self, $recce, $fields, $string, $length, $pos) = @_;
511              
512 1         2 my($bracket_count) = 0;
513 1         2 my($open_bracket) = '<';
514 1         2 my($close_bracket) = '>';
515 1         1 my($previous_char) = '';
516 1         2 my($label) = '';
517              
518 1         1 my($char);
519              
520 1         3 while ($pos < $length)
521             {
522 68         40 $char = substr($string, $pos, 1);
523 68         43 $label .= $char;
524              
525 68 50       112 if ($previous_char eq '\\')
    100          
    100          
526             {
527             }
528             elsif ($char eq $open_bracket)
529             {
530 7         7 $bracket_count++;
531             }
532             elsif ($char eq $close_bracket)
533             {
534 7         4 $bracket_count--;
535              
536 7 100       10 if ($bracket_count == 0)
537             {
538 1         1 $pos++;
539              
540 1         1 last;
541             }
542             }
543              
544 67         47 $previous_char = $char;
545              
546 67         72 $pos++;
547             }
548              
549 1         3 $label = $self -> clean_after($label);
550              
551 1 50 33     12 if ( ($label =~ /^$/) )
552             {
553 0         0 my($line, $column) = $recce -> line_column;
554              
555 0         0 die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
556             }
557              
558 1         3 push @$fields, $label;
559              
560 1         3 return $self -> _skip_separator($string, $length, $pos, ';');
561              
562             } # End of _process_html.
563              
564             # ------------------------------------------------
565              
566             sub _process_label
567             {
568 9     9   44 my($self, $recce, $fields, $string, $length, $pos) = @_;
569              
570 9         21 $pos = $self -> _skip_separator($string, $length, $pos, ':');
571              
572 9 50       17 return $pos if ($pos >= $length);
573              
574 9         19 my($char) = substr($string, $pos, 1);
575              
576 9 50       29 if ($char eq "'")
    50          
    100          
577             {
578 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
579             }
580             elsif ($char eq '"')
581             {
582 0         0 $pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
583             }
584             elsif ($char eq '<')
585             {
586 1         3 $pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
587             }
588             else
589             {
590 8         18 $pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
591             }
592              
593 9         24 for (my $i = 0; $i < $#$fields; $i += 2)
594             {
595 9         27 $self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
596             }
597              
598 9         429 return $pos;
599              
600             } # End of _process_label.
601              
602             # ------------------------------------------------
603              
604             sub _process_quotes
605             {
606 0     0   0 my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
607              
608 0         0 my($previous_char) = '';
609 0         0 my($label) = '';
610 0         0 my($quote_count) = 0;
611              
612 0         0 my($char);
613              
614 0         0 while ($pos < $length)
615             {
616 0         0 $char = substr($string, $pos, 1);
617              
618 0 0 0     0 if ( ($previous_char ne '\\') && ($char eq $terminator) )
619             {
620 0         0 $quote_count++;
621              
622 0 0       0 if ($quote_count == 2)
623             {
624 0         0 $label .= $char;
625              
626 0         0 $pos++;
627              
628 0         0 last;
629             }
630             }
631              
632 0         0 $label .= $char;
633 0         0 $previous_char = $char;
634              
635 0         0 $pos++;
636             }
637              
638             # Don't call clean_after, since it removes the ' and " we are about to check.
639              
640 0         0 $label =~ s/^\s+//;
641 0         0 $label =~ s/\s+$//;
642              
643 0 0 0     0 if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
644             {
645             # Use ' and " here just for the UltraEdit syntax hiliter.
646              
647 0         0 my($line, $column) = $recce -> line_column;
648              
649 0         0 die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
650             }
651              
652 0         0 $label = $self -> clean_after($label);
653              
654 0         0 push @$fields, $label;
655              
656 0         0 $self -> log(debug => "_process_quotes(). Label !$label!");
657              
658 0         0 return $self -> _skip_separator($string, $length, $pos, ';');
659              
660             } # End of _process_quotes.
661              
662             # --------------------------------------------------
663              
664             sub _process_token
665             {
666 18     18   18 my($self, $name, $value) = @_;
667              
668 18         43 $self -> _add_daughter($name, {value => $value});
669              
670             } # End of _process_token.
671              
672             # ------------------------------------------------
673              
674             sub _process_unquoted
675             {
676 8     8   11 my($self, $recce, $fields, $string, $length, $pos) = @_;
677 8         17 my($re) = qr/[;}]/;
678              
679 8 50       31 if (substr($string, $pos, 1) =~ $re)
680             {
681 0         0 push @$fields, '';
682              
683 0         0 return $pos;
684             }
685              
686 8         9 my($previous_char) = '';
687 8         11 my($label) = '';
688 8         4 my($quote_count) = 0;
689              
690 8         6 my($char);
691              
692 8         13 while ($pos < $length)
693             {
694 99         78 $char = substr($string, $pos, 1);
695              
696 99 100 100     312 last if ( ($previous_char ne '\\') && ($char =~ $re) );
697              
698 91         55 $label .= $char;
699 91         65 $previous_char = $char;
700              
701 91         101 $pos++;
702             }
703              
704 8         13 $label = $self -> clean_after($label);
705              
706 8         13 push @$fields, $label;
707              
708 8         12 return $self -> _skip_separator($string, $length, $pos, ';');
709              
710             } # End of _process_unquoted.
711              
712             # --------------------------------------------------
713              
714             sub run
715             {
716 6     6 1 6860 my($self) = @_;
717              
718 6 50       152 if ($self -> description)
    50          
719             {
720             # Assume graph is a single line without comments.
721              
722 0         0 $self -> graph_text($self -> description);
723             }
724             elsif ($self -> input_file)
725             {
726             # Quick removal of whole-line C++ and hash comments.
727              
728 6         229 $self -> graph_text(join(' ', grep{! m!^(?:#|//)!} read_lines($self -> input_file, 'UTF-8') ) );
  19         824  
729             }
730             else
731             {
732 0         0 die "Error: You must provide a graph using one of -input_file or -description\n";
733             }
734              
735             # Return 0 for success and 1 for failure.
736              
737 6         146 my($result) = 0;
738              
739             try
740             {
741 6 50   6   264 if (defined (my $value = $self -> _process) )
742             {
743 6         17093 $self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
  6         123  
744             }
745             else
746             {
747 0         0 $result = 1;
748              
749 0         0 $self -> log(error => 'Parse failed');
750             }
751             }
752             catch
753             {
754 0     0   0 $result = 1;
755              
756 0         0 $self -> log(error => "Parse failed. Error: $_");
757 6         75 };
758              
759 6         1064 $self -> log(info => "Parse result: $result (0 is success)");
760              
761             # Return 0 for success and 1 for failure.
762              
763 6         503 return $result;
764              
765             } # End of run.
766              
767             # ------------------------------------------------
768              
769             sub _skip_separator
770             {
771 18     18   24 my($self, $string, $length, $pos, $separator) = @_;
772 18         205 my($re) = qr/[\s$separator]/;
773              
774 18         15 my($char);
775              
776 18         39 while ($pos < $length - 1)
777             {
778 42         59 $char = substr($string, $pos, 1);
779              
780 42 100       134 last if ($char !~ $re);
781              
782 26         41 $pos++;
783             }
784              
785 18         46 return $pos;
786              
787             } # End of _skip_separator.
788              
789             # ------------------------------------------------
790              
791             sub _validate_event
792             {
793 74     74   63 my($self) = @_;
794 74         55 my(@event) = @{$self -> recce -> events};
  74         1127  
795 74         432 my($event_count) = scalar @event;
796              
797 74 50       142 if ($event_count > 1)
798             {
799 0         0 $self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
  0         0  
  0         0  
800              
801 0         0 die "The code only handles 1 event at a time\n";
802             }
803              
804 74         60 my($event_name) = ${$event[0]}[0];
  74         102  
805              
806 74 50       57 if (! ${$self -> known_events}{$event_name})
  74         922  
807             {
808 0         0 my($msg) = "Unexpected event name '$event_name'";
809              
810 0         0 $self -> log(error => $msg);
811              
812 0         0 die "$msg\n";
813             }
814              
815 74         346 return $event_name;
816              
817             } # End of _validate_event.
818              
819             # --------------------------------------------------
820              
821             1;
822              
823             =pod
824              
825             =head1 NAME
826              
827             L - A Marpa-based parser for the DASH language
828              
829             =head1 Synopsis
830              
831             Typical usage:
832              
833             perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
834             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
835              
836             You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
837              
838             scripts/parse.sh node.04 -max info
839              
840             See L for sample
841             input and output.
842              
843             Also, see L
844             based on this module.
845              
846             =head1 Description
847              
848             This module implements a parser for L (below), a wrapper language around Graphviz's
849             L. That is, the module is a pre-processor for the
850             DOT language.
851              
852             Specifically, this module demonstrates how to use L's capabilities to have Marpa
853             repeatedly pass control back to code in your own module, during the parse, to handle those cases
854             where you don't want Marpa's default processing to occur.
855              
856             This allows the code to deal with the classic case of where you wish to preserve whitespace in some
857             contexts, but also want Marpa to discard whitespace in all other contexts.
858              
859             DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
860             to learn DOT.
861              
862             The DASH language is actually a cut-down version of the language used by L. For a full
863             explanation of the Graph::Easy language, see L.
864              
865             The wrapper is parsed into a tree of tokens managed by L.
866              
867             If requested by the user, the tree is passed to the default renderer
868             L. Various options allow the user to control the output, as
869             an SVG (PNG, ...) image, and to save the DOT version of the graph.
870              
871             In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
872             been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
873             current module.
874              
875             Note that this module's usage of Marpa's adverbs I and I should be regarded as an
876             intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
877             is the recommended technique.
878              
879             The article mentioned above discusses important issues regarding the timing sequence of I
880             and I.
881              
882             All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
883             is implemented. I'm currently (2014-10-10) using L V 2.096000.
884              
885             Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
886             L, which parses DOT files.
887              
888             =head1 Installation
889              
890             Install L as you would for any C module:
891              
892             Run:
893              
894             cpanm MarpaX::Languages::Dash
895              
896             or run:
897              
898             sudo cpan MarpaX::Languages::Dash
899              
900             or unpack the distro, and then either:
901              
902             perl Build.PL
903             ./Build
904             ./Build test
905             sudo ./Build install
906              
907             or:
908              
909             perl Makefile.PL
910             make (or dmake or nmake)
911             make test
912             make install
913              
914             =head1 Scripts Shipped with this Module
915              
916             All scripts are shipped in the scripts/ directory.
917              
918             =over 4
919              
920             =item o copy.config.pl
921              
922             This is for use by the author. It just copies the config file out of the distro, so the script
923             generate.index.pl (which uses HTML template stuff) can find it.
924              
925             =item o find.config.pl
926              
927             This cross-checks the output of copy.config.pl.
928              
929             =item o dash2svg.pl
930              
931             Converts all data/*.dash files into the corresponding html/*.svg files.
932              
933             Used by generate.demo.sh.
934              
935             =item o generate.demo.sh
936              
937             This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
938              
939             And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
940              
941             =item o generate.index.pl
942              
943             This constructs a web page containing all the html/*.svg files.
944              
945             =item o parse.pl
946              
947             This runs a parse on a single input file. Run 'parse.pl -h' for details.
948              
949             =item o parse.sh
950              
951             This simplifies running parse.pl.
952              
953             =item o pod2html.sh
954              
955             This converts all lib/*.pm files into their corresponding *.html versions, for proof-reading and
956             uploading to my real web site.
957              
958             =item o render.pl
959              
960             This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
961             for details.
962              
963             =item o render.sh
964              
965             This simplifies running render.pl.
966              
967             =back
968              
969             =head1 Constructor and Initialization
970              
971             C is called as C<< my($parser) = MarpaX::Languages::Dash -> new(k1 => v1, k2 => v2, ...) >>.
972              
973             It returns a new object of type C.
974              
975             Key-value pairs accepted in the parameter list (see corresponding methods for details
976             [e.g. description($graph)]):
977              
978             =over 4
979              
980             =item o description => '[node.1]->[node.2]'
981              
982             Specify a string for the graph definition.
983              
984             You are strongly encouraged to surround this string with '...' to protect it from your shell if using
985             this module directly from the command line.
986              
987             See also the I key which reads the graph from a file.
988              
989             The I key takes precedence over the I key.
990              
991             Default: ''.
992              
993             =item o input_file => $graph_file_name
994              
995             Read the graph definition from this file.
996              
997             See also the I key to read the graph from the command line.
998              
999             The whole file is slurped in as a single graph.
1000              
1001             The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1002              
1003             The I key takes precedence over the I key.
1004              
1005             Default: ''.
1006              
1007             =item o logger => $logger_object
1008              
1009             Specify a logger object.
1010              
1011             To disable logging, just set logger to the empty string.
1012              
1013             Default: An object of type L.
1014              
1015             =item o maxlevel => $level
1016              
1017             This option is only used if this module creates an object of type L.
1018              
1019             See L.
1020              
1021             Default: 'notice'. A typical choice is 'info' or 'debug'.
1022              
1023             =item o minlevel => $level
1024              
1025             This option is only used if this module creates an object of type L.
1026              
1027             See L.
1028              
1029             Default: 'error'.
1030              
1031             No lower levels are used.
1032              
1033             =back
1034              
1035             =head1 Methods
1036              
1037             =head2 clean_before($s)
1038              
1039             Cleans the input string before the next step in the parse process.
1040              
1041             Typically only ever called once.
1042              
1043             Returns the cleaned string.
1044              
1045             =head2 clean_after($s)
1046              
1047             Cleans the input string after each step in the parse process.
1048              
1049             Typically called many times, once on each output token.
1050              
1051             Returns the cleaned string.
1052              
1053             =head2 description([$graph])
1054              
1055             Here, the [] indicate an optional parameter.
1056              
1057             Gets or sets the graph string to be parsed.
1058              
1059             See also the L method.
1060              
1061             The value supplied to the description() method takes precedence over the value read from the input file.
1062              
1063             Also, I is an option to new().
1064              
1065             =head2 graph_text([$graph])
1066              
1067             Here, the [] indicate an optional parameter.
1068              
1069             Returns the value of the graph definition string, from either the command line or a file.
1070              
1071             =head2 input_file([$graph_file_name])
1072              
1073             Here, the [] indicate an optional parameter.
1074              
1075             Gets or sets the name of the file to read the graph definition from.
1076              
1077             See also the L method.
1078              
1079             The whole file is slurped in as a single graph.
1080              
1081             The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1082              
1083             The value supplied to the description() method takes precedence over the value read from the input file.
1084              
1085             Also, I is an option to new().
1086              
1087             =head2 log($level, $s)
1088              
1089             Calls $self -> logger -> log($level => $s) if ($self -> logger).
1090              
1091             =head2 run()
1092              
1093             This is the only method the caller needs to call. All parameters are supplied to new().
1094              
1095             Returns 0 for success and 1 for failure.
1096              
1097             =head2 recce()
1098              
1099             Returns an object of type L.
1100              
1101             =head2 tree()
1102              
1103             Returns an object of type L.
1104              
1105             =head1 DASH Syntax
1106              
1107             See L for sample
1108             input and output.
1109              
1110             The examples in the following sections are almost all taken from data/*.dash, in the distro.
1111              
1112             =head2 Graphs in DASH
1113              
1114             1: A graph definition may continue over multiple lines.
1115             2: Lines beginning with either '#' or '//' are discarded as comments.
1116             3: A node name or an edge name must never be split over multiple lines.
1117             4: Attributes may be split over lines, but do not split either the name or value of the
1118             attribute over multiple lines.
1119             Note: Attribute values can contain various escaped characters, e.g. \n.
1120             5: A graph may start or end with an edge, and even have contiguous edges.
1121             See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1122             possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1123             they will satisfy the requirements of Graphviz.
1124              
1125             Examples:
1126              
1127             1: A graph split over 10 lines:
1128             [node.1] {label: "n 1"}
1129             -> {label: 'e 1'}
1130             -> {label: e 2}
1131             [] {label: n 2}
1132             -> {label : e 3}
1133             [node.3] {label: "n 3"}
1134             -> {label: 'e 4'},
1135             -> {label: e 5}
1136             [] {label: n 2}
1137             -> {label : e 6}
1138             2: A graph split over 14 lines:
1139             ->
1140             ->
1141              
1142             [node]
1143             [node] ->
1144             -> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1145             [node.1] [node.2] [node.3]
1146              
1147             []
1148             [node.1]
1149             [node 1]
1150             ['node.2']
1151             ["node.3"]
1152             [ From here ] -> [ To there ]
1153              
1154             =head2 Nodes in DASH
1155              
1156             Node names:
1157              
1158             1: Are delimited by '[' and ']'.
1159             2: May be quoted with " or '.
1160             3: Allow escaped characters, using '\'.
1161             4: Allow internal spaces, even if not quoted.
1162             5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1163             This is called 'Daisy-chaining'.
1164              
1165             See L for the origin of this term.
1166              
1167             Examples:
1168              
1169             1: The anonymous node: []
1170             2: The anonymous node, with attributes (explained below): []{color:red}
1171             3: A named node: [Marpa]
1172             4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1173             5: A named node with an internal space: [Perl 6]
1174             6: A named node with attributes: [node.1]{label: A and B}
1175             7: A named node with spaces: [ node.1 ]
1176             These spaces are discarded.
1177             8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1178             The spaces around 'node.1' are discarded.
1179             The spaces around ' A Z ' are discarded.
1180             The spaces inside ' A Z ' are preserved (because of the quotes).
1181             Double-quotes act in the same way.
1182             9: A named node with attributes, with spaces:
1183             [ node.1 ] { label : Flight Path from Melbourne to London }
1184             Space preservation is as above.
1185             10: A named node with escaped characters: [\[node\]]
1186             The '[' and ']' chars are preserved.
1187             11: A named node with [] in name: [[ \]]
1188             However, since '[' and ']' delimit node names, you are I advised to escape such
1189             characters.
1190             12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1191             13: A complete graph:
1192             [node.1]
1193             -> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1194             -> {color: blue; label: B 2; penwidth: 3}
1195             -> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1196             [] {label: 'Some node'}
1197             -> [node.2]
1198              
1199             =head2 Edges in DASH
1200              
1201             Edge names:
1202              
1203             1: Are '->'
1204             This is part of a directed graph.
1205             2: Or '--'
1206             This is part of an undirected graph.
1207             3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1208             This is called 'Daisy-chaining'.
1209              
1210             See L for the origin of this term.
1211              
1212             It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1213             reject such input. In other words, directed and undirected graphs are mutually exclusive.
1214              
1215             So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1216             same for '->'.
1217              
1218             Examples:
1219              
1220             1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1221             2: Juxtaposed edges without any spacing and without attributes: ------
1222             3: Juxtaposed edges (without comma) with attributes:
1223             -- {color: cornflowerblue; label: Top row\nBottom row}
1224             -- {color:red; label: Edges use cornflowerblue and red}
1225             4: An edge with attributes, with some escaped characters:
1226             -> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1227              
1228             =head2 Attributes in DASH
1229              
1230             Attributes:
1231              
1232             1: Are delimited by '{' and '}'.
1233             2: Consist of a C and a C, separated by ':'.
1234             3: Are separated by ';'.
1235             4: The DOT language defines a set of escape characters acceptable in such a C.
1236             5: Allow quotes and whitespace as per node names.
1237             This must be true because the same non-Marpa parsers are used for both.
1238             6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1239             not HTML. See data/table.*.ge for examples.
1240              
1241             See L for details.
1242              
1243             Examples:
1244              
1245             1: -- {color: cornflowerblue; label: Top row\nBottom row}
1246             Note the use of '\n' in the value of the label.
1247              
1248             =head1 FAQ
1249              
1250             =head2 What is the grammar parsed by this module?
1251              
1252             See L just above.
1253              
1254             =head2 How is the parsed graph stored in RAM?
1255              
1256             Items are stored in a tree managed by L.
1257              
1258             The sample code in the L will display a tree:
1259              
1260             perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1261              
1262             Output:
1263              
1264             root. Attributes: {uid => "0"}
1265             |---prolog. Attributes: {uid => "1"}
1266             |---graph. Attributes: {uid => "2"}
1267             |---node_id. Attributes: {uid => "3", value => "node.1"}
1268             | |---literal. Attributes: {uid => "4", value => "{"}
1269             | |---label. Attributes: {uid => "5", value => "A and B"}
1270             | |---literal. Attributes: {uid => "6", value => "}"}
1271             |---node_id. Attributes: {uid => "7", value => "node.2"}
1272             |---literal. Attributes: {uid => "8", value => "{"}
1273             |---label. Attributes: {uid => "9", value => "A or B"}
1274             |---literal. Attributes: {uid => "10", value => "}"}
1275             Parse result: 0 (0 is success)
1276              
1277             See also the next question.
1278              
1279             =head2 What is the structure of the tree of parsed tokens?
1280              
1281             From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1282             currently used. It is used by L.
1283              
1284             The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1285             L to convert the tree (i.e. the input file) into a DOT file
1286             and into an image.
1287              
1288             =head2 Does this module handle utf8?
1289              
1290             Yes. See the last sample on L.
1291              
1292             =head2 Why doesn't the parser handle my HTML-style labels?
1293              
1294             Traps for young players:
1295              
1296             =over 4
1297              
1298             =item o The
component must include the '/'
1299              
1300             =back
1301              
1302             =head2 Why do I get error messages like the following?
1303              
1304             Error: :1: syntax error near line 1
1305             context: digraph >>> Graph <<< {
1306              
1307             Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1308             So, don't do this:
1309              
1310             strict graph graph{...}
1311             strict graph Graph{...}
1312             strict graph strict{...}
1313             etc...
1314              
1315             Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1316              
1317             strict graph "graph"{...}
1318              
1319             Even better, use a more meaningful name for your graph...
1320              
1321             The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1322              
1323             See L in the discussion of the syntax of DOT
1324             for details.
1325              
1326             =head2 What is the homepage of Marpa?
1327              
1328             L.
1329              
1330             =head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1331              
1332             I've included in a recent article a section called
1333             L
1334             which is aimed at helping us think about this issue.
1335              
1336             =head2 How did you generate the html/*.svg files?
1337              
1338             With a private script which uses L V 2.00. This script is not shipped
1339             in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1340             Makefile.PL would complain about the missing dependency.
1341              
1342             See L for details.
1343              
1344             =head1 Machine-Readable Change Log
1345              
1346             The file Changes was converted into Changelog.ini by L.
1347              
1348             =head1 Version Numbers
1349              
1350             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1351              
1352             =head1 Repository
1353              
1354             L
1355              
1356             =head1 Support
1357              
1358             Email the author, or log a bug on RT:
1359              
1360             L.
1361              
1362             =head1 Author
1363              
1364             L was written by Ron Savage Iron@savage.net.auE> in 2013.
1365              
1366             Marpa's homepage: .
1367              
1368             My homepage: L.
1369              
1370             =head1 Copyright
1371              
1372             Australian copyright (c) 2013, Ron Savage.
1373              
1374             All Programs of mine are 'OSI Certified Open Source Software';
1375             you can redistribute them and/or modify them under the terms of
1376             The Artistic License, a copy of which is available at:
1377             http://www.opensource.org/licenses/index.html
1378              
1379             =cut