File Coverage

blib/lib/MarpaX/Languages/Dash.pm

Criterion	Covered	Total	%
statement	198	247	80.1
branch	50	74	67.5
condition	7	15	46.6
subroutine	27	29	93.1
pod	4	5	80.0
total	286	370	77.3

line	stmt	bran	cond	sub	pod	time	code
1							package MarpaX::Languages::Dash;
2
3	1			1		45463	use strict;
	1					1
	1					22
4	1			1		3	use warnings;
	1					1
	1					21
5	1			1		3	use warnings qw(FATAL utf8); # Fatalize encoding glitches.
	1					3
	1					31
6
7	1			1		398	use File::Slurper 'read_lines';
	1					9563
	1					44
8
9	1			1		675	use Log::Handler;
	1					28302
	1					4
10
11	1			1		482	use Marpa::R2;
	1					78085
	1					13
12
13	1			1		522	use Moo;
	1					7581
	1					3
14
15	1			1		1524	use Set::Array;
	1					7498
	1					9
16
17	1			1		537	use Text::CSV;
	1					7319
	1					4
18
19	1			1		716	use Tree::DAG_Node;
	1					10585
	1					11
20
21	1			1		537	use Types::Standard qw/Any ArrayRef HashRef Int Str/;
	1					44968
	1					52
22
23	1			1		767	use Try::Tiny;
	1					1
	1					2236
24
25							has bnf =>
26							(
27							default => sub{return ''},
28							is => 'rw',
29							isa => Any,
30							required => 0,
31							);
32
33							has description =>
34							(
35							default => sub{return ''},
36							is => 'rw',
37							isa => Str,
38							required => 0,
39							);
40
41							has grammar =>
42							(
43							default => sub{return ''},
44							is => 'rw',
45							isa => Any, # 'Marpa::R2::Scanless::G'.
46							required => 0,
47							);
48
49							has graph_text =>
50							(
51							default => sub{return ''},
52							is => 'rw',
53							isa => Str,
54							required => 0,
55							);
56
57							has input_file =>
58							(
59							default => sub{return ''},
60							is => 'rw',
61							isa => Str,
62							required => 0,
63							);
64
65							has logger =>
66							(
67							default => sub{return undef},
68							is => 'rw',
69							isa => Any,
70							required => 0,
71							);
72
73							has maxlevel =>
74							(
75							default => sub{return 'notice'},
76							is => 'rw',
77							isa => Str,
78							required => 0,
79							);
80
81							has minlevel =>
82							(
83							default => sub{return 'error'},
84							is => 'rw',
85							isa => Str,
86							required => 0,
87							);
88
89							has known_events =>
90							(
91							default => sub{return {} },
92							is => 'rw',
93							isa => HashRef,
94							required => 0,
95							);
96
97							has recce =>
98							(
99							default => sub{return ''},
100							is => 'rw',
101							isa => Any, # 'Marpa::R2::Scanless::R'.
102							required => 0,
103							);
104
105							has stack =>
106							(
107							default => sub{return []},
108							is => 'rw',
109							isa => ArrayRef,
110							required => 0,
111							);
112
113							has tree =>
114							(
115							default => sub{return ''},
116							is => 'rw',
117							isa => Any,
118							required => 0,
119							);
120
121							has uid =>
122							(
123							default => sub{return 0},
124							is => 'rw',
125							isa => Int,
126							required => 0,
127							);
128
129							our $VERSION = '1.00';
130
131							# --------------------------------------------------
132							# For accepted and rejected by Marpa, see
133							# Marpa-R2-2.094000/lib/Marpa/R2/meta/metag.bnf.
134
135							sub BUILD
136							{
137	6			6	0	111	my($self) = @_;
138
139	6	50				83	if (! defined $self -> logger)
140							{
141	6					66	$self -> logger(Log::Handler -> new);
142	6					577	$self -> logger -> add
143							(
144							screen =>
145							{
146							alias => 'logger',
147							maxlevel => $self -> maxlevel,
148							message_layout => '%m',
149							minlevel => $self -> minlevel,
150							}
151							);
152							}
153
154							# Policy: Event names are always the same as the name of the corresponding lexeme.
155
156							$self -> bnf
157							(
158							<<'END_OF_GRAMMAR'
159
160							:default ::= action => [values]
161
162							lexeme default = latm => 1 # Longest Acceptable Token Match.
163
164							:start ::= graph_grammar
165
166							graph_grammar ::= graph_definition
167
168							# Graph stuff.
169
170							graph_definition ::= node_definition
171							\| edge_definition
172							# Node stuff
173
174							node_definition ::= node_statement
175							\| node_statement graph_definition
176
177							node_statement ::= node_name_token
178							\| node_name_token attribute_definition
179							\| node_statement (',') node_statement
180
181							node_name_token ::= start_node end_node # Allow for the anonymous node.
182							\| start_node node_name end_node
183
184							# Edge stuff
185
186							edge_definition ::= edge_statement
187							\| edge_statement graph_definition
188
189							edge_statement ::= edge_name
190							\| edge_name attribute_definition
191							\| edge_statement (',') edge_statement
192
193							edge_name ::= directed_edge
194							\| undirected_edge
195
196							# Attribute stuff.
197
198							attribute_definition ::= attribute_statement+
199
200							attribute_statement ::= start_attributes string_token_set end_attributes
201
202							string_token_set ::= string_token_pair+
203
204							string_token_pair ::= literal_label
205							\| attribute_name (':') attribute_value
206
207							# Lexemes in alphabetical order.
208
209							:lexeme ~ attribute_name pause => before event => attribute_name
210
211							attribute_name ~ string_char_set+
212
213							:lexeme ~ attribute_value pause => before event => attribute_value
214
215							attribute_value ~ string_char_set+
216
217							:lexeme ~ directed_edge pause => before event => directed_edge priority => 2
218							directed_edge ~ '->'
219
220							:lexeme ~ end_attributes pause => before event => end_attributes priority => 1
221							end_attributes ~ '}'
222
223							:lexeme ~ end_node pause => before event => end_node priority => 1
224							end_node ~ ']'
225
226							escaped_char ~ '\' [[:print:]]
227
228							# Use ' here just for the UltraEdit syntax hiliter.
229
230							:lexeme ~ literal_label pause => before event => literal_label priority => 1
231							literal_label ~ 'label'
232
233							:lexeme ~ node_name pause => before event => node_name
234
235							node_name ~ string_char_set+
236
237							:lexeme ~ start_attributes pause => before event => start_attributes
238							start_attributes ~ '{'
239
240							:lexeme ~ start_node pause => before event => start_node
241							start_node ~ '['
242
243							string_char_set ~ escaped_char
244							\| [^;:}\]] # Neither a separator [;:] nor a terminator [}\]].
245
246							:lexeme ~ undirected_edge pause => before event => undirected_edge priority => 2
247							undirected_edge ~ '--'
248
249							# Boilerplate.
250
251							:discard ~ whitespace
252							whitespace ~ [\s]+
253
254							END_OF_GRAMMAR
255	6					4053	);
256
257	6					184	$self -> grammar
258							(
259							Marpa::R2::Scanless::G -> new
260							({
261							source => \$self -> bnf
262							})
263							);
264
265	6					318188	$self -> recce
266							(
267							Marpa::R2::Scanless::R -> new
268							({
269							grammar => $self -> grammar,
270							})
271							);
272
273	6					1389	my(%event);
274
275	6					118	for my $line (split(/\n/, $self -> bnf) )
276							{
277	564	100				880	$event{$1} = 1 if ($line =~ /event\s+=>\s+(\w+)/);
278							}
279
280	6					127	$self -> known_events(\%event);
281
282							# Since $self -> tree has not been initialized yet,
283							# we can't call our _add_daughter() until after this statement.
284
285	6					162	$self -> tree(Tree::DAG_Node -> new({name => 'root', attributes => {uid => 0} }));
286	6					566	$self -> stack([$self -> tree -> root]);
287
288							# This cut-down version of Graph::Easy::Marpa has no prolog (unlike Graph::Marpa).
289							# So, all tokens in the input are descended from the 'graph' node.
290
291	6					256	for my $name (qw/prolog graph/)
292							{
293	12					355	$self -> _add_daughter($name, {});
294							}
295
296							# The 'prolog' daughter is the parent of all items in the prolog, but is not used here.
297							# It is used in GraphViz2::Marpa;
298							# The 'graph' daughter gets pushed onto the stack because in this module's grammar,
299							# all items belong to the graph.
300
301	6					288	my(@daughters) = $self -> tree -> daughters;
302	6					60	my($index) = 1; # 0 => prolog, 1 => graph.
303	6					78	my($stack) = $self -> stack;
304
305	6					25	push @$stack, $daughters[$index];
306
307	6					76	$self -> stack($stack);
308
309							} # End of BUILD.
310
311							# ------------------------------------------------
312
313							sub _add_daughter
314							{
315	62			62		122	my($self, $name, $attributes) = @_;
316	62					855	$$attributes{uid} = $self -> uid($self -> uid + 1);
317	62					1222	my($node) = Tree::DAG_Node -> new({name => $name, attributes => $attributes});
318	62					2368	my($stack) = $self -> stack;
319
320	62					335	$$stack[$#$stack] -> add_daughter($node);
321
322							} # End of _add_daughter.
323
324							# --------------------------------------------------
325
326							sub clean_after
327							{
328	42			42	1	43	my($self, $s) = @_;
329
330	42					70	$s =~ s/^\s+//;
331	42					69	$s =~ s/\s+$//;
332	42					50	$s =~ s/^([\"\'])(.*)\1$/$2/; # The backslashes are just for the UltraEdit syntax hiliter.
333
334	42					65	return $s;
335
336							} # End of clean_after.
337
338							# --------------------------------------------------
339
340							sub clean_before
341							{
342	6			6	1	34	my($self, $s) = @_;
343
344	6					61	$s =~ s/\s;\s$//;
345	6					11	$s =~ s/^\s+//;
346	6					28	$s =~ s/\s+$//;
347	6					15	$s =~ s/^(<)\s+/$1/;
348	6					15	$s =~ s/\s+(>)$/$1/;
349
350	6					10	return $s;
351
352							} # End of clean_before.
353
354							# --------------------------------------------------
355
356							sub log
357							{
358	92			92	1	3960	my($self, $level, $s) = @_;
359
360	92	50				1246	$self -> logger -> log($level => $s) if ($self -> logger);
361
362							} # End of log.
363
364							# --------------------------------------------------
365
366							sub _process
367							{
368	6			6		6	my($self) = @_;
369	6					95	my($string) = $self -> clean_before($self -> graph_text);
370	6					13	my($length) = length $string;
371	6					11	my($last_event) = '';
372	6					8	my($format) = '%-20s %5s %5s %5s %-s';
373
374	6					47	$self -> log(debug => sprintf($format, 'Event', 'Start', 'Span', 'Pos', 'Lexeme') );
375
376							# We use read()/lexeme_read()/resume() because we pause at each lexeme.
377
378	6					235	my($event_name);
379							my(@fields);
380	0					0	my($lexeme, $literal);
381	0					0	my($span, $start);
382
383	6					82	for
384							(
385							my $pos = $self -> recce -> read(\$string);
386							$pos < $length;
387							$pos = $self -> recce -> resume($pos)
388							)
389							{
390	74					10340	$event_name = $self -> _validate_event;
391	74					865	($start, $span) = $self -> recce -> pause_span;
392	74					1228	$pos = $self -> recce -> lexeme_read($event_name);
393	74					2244	$literal = substr($string, $start, $pos - $start);
394	74					1013	$lexeme = $self -> recce -> literal($start, $span);
395
396	74					728	$self -> log(debug => sprintf($format, $event_name, $start, $span, $pos, $lexeme) );
397
398	74	100				2094	if ($event_name eq 'attribute_name')
		100
		100
		100
		100
		100
		100
		100
		50
		0
399							{
400	11					21	$fields[0] = $self -> clean_after($literal);
401							}
402							elsif ($event_name eq 'attribute_value')
403							{
404	11					19	$literal = $self -> clean_after($literal);
405
406	11					31	$self -> _add_daughter($fields[0], {value => $literal});
407
408	11					512	@fields = ();
409
410							# Skip the separator.
411
412	11		100			75	while ( ($pos < (length($string) - 1) ) && (substr($string, $pos, 1) =~ /[\s;]/) ) { $pos++ };
	18					73
413							}
414							elsif ($event_name eq 'directed_edge')
415							{
416	5					10	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
417							}
418							elsif ($event_name eq 'end_attributes')
419							{
420	9					17	$self -> _process_brace($literal);
421							}
422							elsif ($event_name eq 'end_node')
423							{
424							# Is this the anonymous node?
425
426	7	100				16	if ($last_event eq 'start_node')
427							{
428	1					4	$self -> _add_daughter('node_id', {value => ''});
429							}
430							}
431							elsif ($event_name eq 'literal_label')
432							{
433	9					13	push @fields, $literal;
434
435	9					113	$pos = $self -> _process_label($self -> recce, \@fields, $string, $length, $pos);
436	9					17	@fields = ();
437							}
438							elsif ($event_name eq 'node_name')
439							{
440	6					12	$literal = $self -> clean_after($literal);
441
442	6					48	$self -> _add_daughter('node_id', {value => $literal});
443							}
444							elsif ($event_name eq 'start_attributes')
445							{
446	9					18	$self -> _process_brace($literal);
447							}
448							elsif ($event_name eq 'start_node')
449							{
450							# Do nothing.
451							}
452							elsif ($event_name eq 'undirected_edge')
453							{
454	0					0	$self -> _add_daughter('edge_id', {value => $self -> clean_after($literal)});
455							}
456
457	74					2060	$last_event = $event_name;
458							}
459
460	6	50				163	if ($self -> recce -> ambiguity_metric > 1)
461							{
462	0					0	$self -> log(notice => 'Ambiguous parse');
463							}
464
465	6	50				420	if (my $ambiguous_status = $self -> recce -> ambiguous)
466							{
467	0					0	$self -> log(notice => "Parse is ambiguous: $ambiguous_status.");
468							}
469
470							# Return a defined value for success and undef for failure.
471
472	6					169	return $self -> recce -> value;
473
474							} # End of _process.
475
476							# --------------------------------------------------
477
478							sub _process_brace
479							{
480	18			18		18	my($self, $name) = @_;
481
482							# When a '{' is encountered, the last thing pushed becomes it's parent.
483							# Likewise, if '}' is encountered, we pop the stack.
484
485	18					234	my($stack) = $self -> stack;
486
487	18	100				71	if ($name eq '{')
488							{
489	9					26	my(@daughters) = $$stack[$#$stack] -> daughters;
490
491	9					49	push @$stack, $daughters[$#daughters];
492
493	9					21	$self -> _process_token('literal', $name);
494							}
495							else
496							{
497	9					20	$self -> _process_token('literal', $name);
498
499	9					414	pop @$stack;
500
501	9					149	$self -> stack($stack);
502							}
503
504							} # End of _process_brace.
505
506							# ------------------------------------------------
507
508							sub _process_html
509							{
510	1			1		3	my($self, $recce, $fields, $string, $length, $pos) = @_;
511
512	1					2	my($bracket_count) = 0;
513	1					2	my($open_bracket) = '<';
514	1					2	my($close_bracket) = '>';
515	1					1	my($previous_char) = '';
516	1					2	my($label) = '';
517
518	1					1	my($char);
519
520	1					3	while ($pos < $length)
521							{
522	68					40	$char = substr($string, $pos, 1);
523	68					43	$label .= $char;
524
525	68	50				112	if ($previous_char eq '\\')
		100
		100
526							{
527							}
528							elsif ($char eq $open_bracket)
529							{
530	7					7	$bracket_count++;
531							}
532							elsif ($char eq $close_bracket)
533							{
534	7					4	$bracket_count--;
535
536	7	100				10	if ($bracket_count == 0)
537							{
538	1					1	$pos++;
539
540	1					1	last;
541							}
542							}
543
544	67					47	$previous_char = $char;
545
546	67					72	$pos++;
547							}
548
549	1					3	$label = $self -> clean_after($label);
550
551	1	50	33			12	if ( ($label =~ /^$/) )
552							{
553	0					0	my($line, $column) = $recce -> line_column;
554
555	0					0	die "Mismatched <> in HTML !$label! at (line, column) = ($line, $column)\n";
556							}
557
558	1					3	push @$fields, $label;
559
560	1					3	return $self -> _skip_separator($string, $length, $pos, ';');
561
562							} # End of _process_html.
563
564							# ------------------------------------------------
565
566							sub _process_label
567							{
568	9			9		44	my($self, $recce, $fields, $string, $length, $pos) = @_;
569
570	9					21	$pos = $self -> _skip_separator($string, $length, $pos, ':');
571
572	9	50				17	return $pos if ($pos >= $length);
573
574	9					19	my($char) = substr($string, $pos, 1);
575
576	9	50				29	if ($char eq "'")
		50
		100
577							{
578	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, "'");
579							}
580							elsif ($char eq '"')
581							{
582	0					0	$pos = $self -> _process_quotes($recce, $fields, $string, $length, $pos, '"');
583							}
584							elsif ($char eq '<')
585							{
586	1					3	$pos = $self -> _process_html($recce, $fields, $string, $length, $pos);
587							}
588							else
589							{
590	8					18	$pos = $self -> _process_unquoted($recce, $fields, $string, $length, $pos);
591							}
592
593	9					24	for (my $i = 0; $i < $#$fields; $i += 2)
594							{
595	9					27	$self -> _add_daughter($$fields[$i], {value => $$fields[$i + 1]});
596							}
597
598	9					429	return $pos;
599
600							} # End of _process_label.
601
602							# ------------------------------------------------
603
604							sub _process_quotes
605							{
606	0			0		0	my($self, $recce, $fields, $string, $length, $pos, $terminator) = @_;
607
608	0					0	my($previous_char) = '';
609	0					0	my($label) = '';
610	0					0	my($quote_count) = 0;
611
612	0					0	my($char);
613
614	0					0	while ($pos < $length)
615							{
616	0					0	$char = substr($string, $pos, 1);
617
618	0	0	0			0	if ( ($previous_char ne '\\') && ($char eq $terminator) )
619							{
620	0					0	$quote_count++;
621
622	0	0				0	if ($quote_count == 2)
623							{
624	0					0	$label .= $char;
625
626	0					0	$pos++;
627
628	0					0	last;
629							}
630							}
631
632	0					0	$label .= $char;
633	0					0	$previous_char = $char;
634
635	0					0	$pos++;
636							}
637
638							# Don't call clean_after, since it removes the ' and " we are about to check.
639
640	0					0	$label =~ s/^\s+//;
641	0					0	$label =~ s/\s+$//;
642
643	0	0	0			0	if ( ($label =~ /^['"]/) && ($label !~ /^(['"]).*\1$/) )
644							{
645							# Use ' and " here just for the UltraEdit syntax hiliter.
646
647	0					0	my($line, $column) = $recce -> line_column;
648
649	0					0	die "Mismatched quotes in label !$label! at (line, column) = ($line, $column)\n";
650							}
651
652	0					0	$label = $self -> clean_after($label);
653
654	0					0	push @$fields, $label;
655
656	0					0	$self -> log(debug => "_process_quotes(). Label !$label!");
657
658	0					0	return $self -> _skip_separator($string, $length, $pos, ';');
659
660							} # End of _process_quotes.
661
662							# --------------------------------------------------
663
664							sub _process_token
665							{
666	18			18		18	my($self, $name, $value) = @_;
667
668	18					43	$self -> _add_daughter($name, {value => $value});
669
670							} # End of _process_token.
671
672							# ------------------------------------------------
673
674							sub _process_unquoted
675							{
676	8			8		11	my($self, $recce, $fields, $string, $length, $pos) = @_;
677	8					17	my($re) = qr/[;}]/;
678
679	8	50				31	if (substr($string, $pos, 1) =~ $re)
680							{
681	0					0	push @$fields, '';
682
683	0					0	return $pos;
684							}
685
686	8					9	my($previous_char) = '';
687	8					11	my($label) = '';
688	8					4	my($quote_count) = 0;
689
690	8					6	my($char);
691
692	8					13	while ($pos < $length)
693							{
694	99					78	$char = substr($string, $pos, 1);
695
696	99	100	100			312	last if ( ($previous_char ne '\\') && ($char =~ $re) );
697
698	91					55	$label .= $char;
699	91					65	$previous_char = $char;
700
701	91					101	$pos++;
702							}
703
704	8					13	$label = $self -> clean_after($label);
705
706	8					13	push @$fields, $label;
707
708	8					12	return $self -> _skip_separator($string, $length, $pos, ';');
709
710							} # End of _process_unquoted.
711
712							# --------------------------------------------------
713
714							sub run
715							{
716	6			6	1	6860	my($self) = @_;
717
718	6	50				152	if ($self -> description)
		50
719							{
720							# Assume graph is a single line without comments.
721
722	0					0	$self -> graph_text($self -> description);
723							}
724							elsif ($self -> input_file)
725							{
726							# Quick removal of whole-line C++ and hash comments.
727
728	6					229	$self -> graph_text(join(' ', grep{! m!^(?:#\|//)!} read_lines($self -> input_file, 'UTF-8') ) );
	19					824
729							}
730							else
731							{
732	0					0	die "Error: You must provide a graph using one of -input_file or -description\n";
733							}
734
735							# Return 0 for success and 1 for failure.
736
737	6					146	my($result) = 0;
738
739							try
740							{
741	6	50		6		264	if (defined (my $value = $self -> _process) )
742							{
743	6					17093	$self -> log(info => join("\n", @{$self -> tree -> tree2string}) );
	6					123
744							}
745							else
746							{
747	0					0	$result = 1;
748
749	0					0	$self -> log(error => 'Parse failed');
750							}
751							}
752							catch
753							{
754	0			0		0	$result = 1;
755
756	0					0	$self -> log(error => "Parse failed. Error: $_");
757	6					75	};
758
759	6					1064	$self -> log(info => "Parse result: $result (0 is success)");
760
761							# Return 0 for success and 1 for failure.
762
763	6					503	return $result;
764
765							} # End of run.
766
767							# ------------------------------------------------
768
769							sub _skip_separator
770							{
771	18			18		24	my($self, $string, $length, $pos, $separator) = @_;
772	18					205	my($re) = qr/[\s$separator]/;
773
774	18					15	my($char);
775
776	18					39	while ($pos < $length - 1)
777							{
778	42					59	$char = substr($string, $pos, 1);
779
780	42	100				134	last if ($char !~ $re);
781
782	26					41	$pos++;
783							}
784
785	18					46	return $pos;
786
787							} # End of _skip_separator.
788
789							# ------------------------------------------------
790
791							sub _validate_event
792							{
793	74			74		63	my($self) = @_;
794	74					55	my(@event) = @{$self -> recce -> events};
	74					1127
795	74					432	my($event_count) = scalar @event;
796
797	74	50				142	if ($event_count > 1)
798							{
799	0					0	$self -> log(error => "Events triggered: $event_count (should be 1). Names: " . join(', ', map{${$_}[0]} @event) . '.');
	0					0
	0					0
800
801	0					0	die "The code only handles 1 event at a time\n";
802							}
803
804	74					60	my($event_name) = ${$event[0]}[0];
	74					102
805
806	74	50				57	if (! ${$self -> known_events}{$event_name})
	74					922
807							{
808	0					0	my($msg) = "Unexpected event name '$event_name'";
809
810	0					0	$self -> log(error => $msg);
811
812	0					0	die "$msg\n";
813							}
814
815	74					346	return $event_name;
816
817							} # End of _validate_event.
818
819							# --------------------------------------------------
820
821							1;
822
823							=pod
824
825							=head1 NAME
826
827							L - A Marpa-based parser for the DASH language
828
829							=head1 Synopsis
830
831							Typical usage:
832
833							perl -Ilib scripts/parse.pl -de '[node]{color:blue; label: "Node name"}' -max info
834							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
835
836							You can use scripts/parse.sh to simplify this process, but it assumes you're input file is in data/:
837
838							scripts/parse.sh node.04 -max info
839
840							See L for sample
841							input and output.
842
843							Also, see L
844							based on this module.
845
846							=head1 Description
847
848							This module implements a parser for L (below), a wrapper language around Graphviz's
849							L. That is, the module is a pre-processor for the
850							DOT language.
851
852							Specifically, this module demonstrates how to use L's capabilities to have Marpa
853							repeatedly pass control back to code in your own module, during the parse, to handle those cases
854							where you don't want Marpa's default processing to occur.
855
856							This allows the code to deal with the classic case of where you wish to preserve whitespace in some
857							contexts, but also want Marpa to discard whitespace in all other contexts.
858
859							DASH is easier to use than DOT, which means the user can specify graphs very simply, without having
860							to learn DOT.
861
862							The DASH language is actually a cut-down version of the language used by L. For a full
863							explanation of the Graph::Easy language, see L.
864
865							The wrapper is parsed into a tree of tokens managed by L.
866
867							If requested by the user, the tree is passed to the default renderer
868							L. Various options allow the user to control the output, as
869							an SVG (PNG, ...) image, and to save the DOT version of the graph.
870
871							In the past, the code in this module was part of Graph::Easy::Marpa, but that latter module has
872							been deleted from CPAN, and all it's new code and features, together with bug fixes, is in the
873							current module.
874
875							Note that this module's usage of Marpa's adverbs I and I should be regarded as an
876							intermediate/advanced technique. For people just beginning to use Marpa, use of the I adverb
877							is the recommended technique.
878
879							The article mentioned above discusses important issues regarding the timing sequence of I
880							and I.
881
882							All this assumes a relatively recent version of Marpa, one in which its Scanless interface (SLIF)
883							is implemented. I'm currently (2014-10-10) using L V 2.096000.
884
885							Lastly, the parser and renderer will be incorporated into the next major release (V 2.00) of
886							L, which parses DOT files.
887
888							=head1 Installation
889
890							Install L as you would for any C module:
891
892							Run:
893
894							cpanm MarpaX::Languages::Dash
895
896							or run:
897
898							sudo cpan MarpaX::Languages::Dash
899
900							or unpack the distro, and then either:
901
902							perl Build.PL
903							./Build
904							./Build test
905							sudo ./Build install
906
907							or:
908
909							perl Makefile.PL
910							make (or dmake or nmake)
911							make test
912							make install
913
914							=head1 Scripts Shipped with this Module
915
916							All scripts are shipped in the scripts/ directory.
917
918							=over 4
919
920							=item o copy.config.pl
921
922							This is for use by the author. It just copies the config file out of the distro, so the script
923							generate.index.pl (which uses HTML template stuff) can find it.
924
925							=item o find.config.pl
926
927							This cross-checks the output of copy.config.pl.
928
929							=item o dash2svg.pl
930
931							Converts all data/.dash files into the corresponding html/.svg files.
932
933							Used by generate.demo.sh.
934
935							=item o generate.demo.sh
936
937							This generates all the SVG files for the data/*.dash files, and then generates html/index.html.
938
939							And then it copies the demo output to my dev web server's doc root, where I can cross-check it.
940
941							=item o generate.index.pl
942
943							This constructs a web page containing all the html/*.svg files.
944
945							=item o parse.pl
946
947							This runs a parse on a single input file. Run 'parse.pl -h' for details.
948
949							=item o parse.sh
950
951							This simplifies running parse.pl.
952
953							=item o pod2html.sh
954
955							This converts all lib/.pm files into their corresponding .html versions, for proof-reading and
956							uploading to my real web site.
957
958							=item o render.pl
959
960							This runs a parse on a single input file, and coverts the output into an SVG file. Run 'render.pl -h'
961							for details.
962
963							=item o render.sh
964
965							This simplifies running render.pl.
966
967							=back
968
969							=head1 Constructor and Initialization
970
971							C is called as C<< my($parser) = MarpaX::Languages::Dash -> new(k1 => v1, k2 => v2, ...) >>.
972
973							It returns a new object of type C.
974
975							Key-value pairs accepted in the parameter list (see corresponding methods for details
976							[e.g. description($graph)]):
977
978							=over 4
979
980							=item o description => '[node.1]->[node.2]'
981
982							Specify a string for the graph definition.
983
984							You are strongly encouraged to surround this string with '...' to protect it from your shell if using
985							this module directly from the command line.
986
987							See also the I key which reads the graph from a file.
988
989							The I key takes precedence over the I key.
990
991							Default: ''.
992
993							=item o input_file => $graph_file_name
994
995							Read the graph definition from this file.
996
997							See also the I key to read the graph from the command line.
998
999							The whole file is slurped in as a single graph.
1000
1001							The first lines of the file can start with /^\s*#/, and will be discarded as comments.
1002
1003							The I key takes precedence over the I key.
1004
1005							Default: ''.
1006
1007							=item o logger => $logger_object
1008
1009							Specify a logger object.
1010
1011							To disable logging, just set logger to the empty string.
1012
1013							Default: An object of type L.
1014
1015							=item o maxlevel => $level
1016
1017							This option is only used if this module creates an object of type L.
1018
1019							See L.
1020
1021							Default: 'notice'. A typical choice is 'info' or 'debug'.
1022
1023							=item o minlevel => $level
1024
1025							This option is only used if this module creates an object of type L.
1026
1027							See L.
1028
1029							Default: 'error'.
1030
1031							No lower levels are used.
1032
1033							=back
1034
1035							=head1 Methods
1036
1037							=head2 clean_before($s)
1038
1039							Cleans the input string before the next step in the parse process.
1040
1041							Typically only ever called once.
1042
1043							Returns the cleaned string.
1044
1045							=head2 clean_after($s)
1046
1047							Cleans the input string after each step in the parse process.
1048
1049							Typically called many times, once on each output token.
1050
1051							Returns the cleaned string.
1052
1053							=head2 description([$graph])
1054
1055							Here, the [] indicate an optional parameter.
1056
1057							Gets or sets the graph string to be parsed.
1058
1059							See also the L method.
1060
1061							The value supplied to the description() method takes precedence over the value read from the input file.
1062
1063							Also, I is an option to new().
1064
1065							=head2 graph_text([$graph])
1066
1067							Here, the [] indicate an optional parameter.
1068
1069							Returns the value of the graph definition string, from either the command line or a file.
1070
1071							=head2 input_file([$graph_file_name])
1072
1073							Here, the [] indicate an optional parameter.
1074
1075							Gets or sets the name of the file to read the graph definition from.
1076
1077							See also the L method.
1078
1079							The whole file is slurped in as a single graph.
1080
1081							The first few lines of the file can start with /^\s*#/, and will be discarded as comments.
1082
1083							The value supplied to the description() method takes precedence over the value read from the input file.
1084
1085							Also, I is an option to new().
1086
1087							=head2 log($level, $s)
1088
1089							Calls $self -> logger -> log($level => $s) if ($self -> logger).
1090
1091							=head2 run()
1092
1093							This is the only method the caller needs to call. All parameters are supplied to new().
1094
1095							Returns 0 for success and 1 for failure.
1096
1097							=head2 recce()
1098
1099							Returns an object of type L.
1100
1101							=head2 tree()
1102
1103							Returns an object of type L.
1104
1105							=head1 DASH Syntax
1106
1107							See L for sample
1108							input and output.
1109
1110							The examples in the following sections are almost all taken from data/*.dash, in the distro.
1111
1112							=head2 Graphs in DASH
1113
1114							1: A graph definition may continue over multiple lines.
1115							2: Lines beginning with either '#' or '//' are discarded as comments.
1116							3: A node name or an edge name must never be split over multiple lines.
1117							4: Attributes may be split over lines, but do not split either the name or value of the
1118							attribute over multiple lines.
1119							Note: Attribute values can contain various escaped characters, e.g. \n.
1120							5: A graph may start or end with an edge, and even have contiguous edges.
1121							See data/edge.06.dash (or the demo page). Graphviz does not allow any of these
1122							possibilities, so the default renderer fabricates anonymous nodes and inserts them where
1123							they will satisfy the requirements of Graphviz.
1124
1125							Examples:
1126
1127							1: A graph split over 10 lines:
1128							[node.1] {label: "n 1"}
1129							-> {label: 'e 1'}
1130							-> {label: e 2}
1131							[] {label: n 2}
1132							-> {label : e 3}
1133							[node.3] {label: "n 3"}
1134							-> {label: 'e 4'},
1135							-> {label: e 5}
1136							[] {label: n 2}
1137							-> {label : e 6}
1138							2: A graph split over 14 lines:
1139							->
1140							->
1141
1142							[node]
1143							[node] ->
1144							-> {label: Start} -> {color: red} [node.1] {color: green} -> [node.2]
1145							[node.1] [node.2] [node.3]
1146
1147							[]
1148							[node.1]
1149							[node 1]
1150							['node.2']
1151							["node.3"]
1152							[ From here ] -> [ To there ]
1153
1154							=head2 Nodes in DASH
1155
1156							Node names:
1157
1158							1: Are delimited by '[' and ']'.
1159							2: May be quoted with " or '.
1160							3: Allow escaped characters, using '\'.
1161							4: Allow internal spaces, even if not quoted.
1162							5: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1163							This is called 'Daisy-chaining'.
1164
1165							See L for the origin of this term.
1166
1167							Examples:
1168
1169							1: The anonymous node: []
1170							2: The anonymous node, with attributes (explained below): []{color:red}
1171							3: A named node: [Marpa]
1172							4: Juxtaposed nodes: [Perl][Marpa] or [Perl] [Marpa] or [Perl], [Marpa]
1173							5: A named node with an internal space: [Perl 6]
1174							6: A named node with attributes: [node.1]{label: A and B}
1175							7: A named node with spaces: [ node.1 ]
1176							These spaces are discarded.
1177							8: A named node with attributes, with spaces: [ node.1 ] { label : ' A Z ' }
1178							The spaces around 'node.1' are discarded.
1179							The spaces around ' A Z ' are discarded.
1180							The spaces inside ' A Z ' are preserved (because of the quotes).
1181							Double-quotes act in the same way.
1182							9: A named node with attributes, with spaces:
1183							[ node.1 ] { label : Flight Path from Melbourne to London }
1184							Space preservation is as above.
1185							10: A named node with escaped characters: [\[node\]]
1186							The '[' and ']' chars are preserved.
1187							11: A named node with [] in name: [[ \]]
1188							However, since '[' and ']' delimit node names, you are I advised to escape such
1189							characters.
1190							12: A named node with quotes, spaces, and escaped chars: [" a \' b \" c"]
1191							13: A complete graph:
1192							[node.1]
1193							-> {arrowhead: odot; arrowtail: ediamond; color: green; dir: both; label: A 1; penwidth: 1}
1194							-> {color: blue; label: B 2; penwidth: 3}
1195							-> {arrowhead: box; arrowtail: invdot; color: maroon; dir: both; label: C 3; penwidth: 5}
1196							[] {label: 'Some node'}
1197							-> [node.2]
1198
1199							=head2 Edges in DASH
1200
1201							Edge names:
1202
1203							1: Are '->'
1204							This is part of a directed graph.
1205							2: Or '--'
1206							This is part of an undirected graph.
1207							3: May be separated with nothing (juxtaposed), with whitespace, or with ','.
1208							This is called 'Daisy-chaining'.
1209
1210							See L for the origin of this term.
1211
1212							It makes no sense to combine '->' and '--' in a single graph, because Graphviz will automatically
1213							reject such input. In other words, directed and undirected graphs are mutually exclusive.
1214
1215							So, if any edge in your graph is undirected (you use '--'), then every edge must use '--' and the
1216							same for '->'.
1217
1218							Examples:
1219
1220							1: An edge with attributes: -> {color:cornflowerblue; label: This edge's color is blueish ;}
1221							2: Juxtaposed edges without any spacing and without attributes: ------
1222							3: Juxtaposed edges (without comma) with attributes:
1223							-- {color: cornflowerblue; label: Top row\nBottom row}
1224							-- {color:red; label: Edges use cornflowerblue and red}
1225							4: An edge with attributes, with some escaped characters:
1226							-> {color:cornflowerblue; label: Use various escaped chars (\' \" \< \>) in label}
1227
1228							=head2 Attributes in DASH
1229
1230							Attributes:
1231
1232							1: Are delimited by '{' and '}'.
1233							2: Consist of a C and a C, separated by ':'.
1234							3: Are separated by ';'.
1235							4: The DOT language defines a set of escape characters acceptable in such a C.
1236							5: Allow quotes and whitespace as per node names.
1237							This must be true because the same non-Marpa parsers are used for both.
1238							6: Attribute values can be HTML-like. See the Graphviz docs for why we say 'HTML-like' and
1239							not HTML. See data/table.*.ge for examples.
1240
1241							See L for details.
1242
1243							Examples:
1244
1245							1: -- {color: cornflowerblue; label: Top row\nBottom row}
1246							Note the use of '\n' in the value of the label.
1247
1248							=head1 FAQ
1249
1250							=head2 What is the grammar parsed by this module?
1251
1252							See L just above.
1253
1254							=head2 How is the parsed graph stored in RAM?
1255
1256							Items are stored in a tree managed by L.
1257
1258							The sample code in the L will display a tree:
1259
1260							perl -Ilib scripts/parse.pl -i data/node.04.dash -max info
1261
1262							Output:
1263
1264							root. Attributes: {uid => "0"}
1265							\|---prolog. Attributes: {uid => "1"}
1266							\|---graph. Attributes: {uid => "2"}
1267							\|---node_id. Attributes: {uid => "3", value => "node.1"}
1268							\| \|---literal. Attributes: {uid => "4", value => "{"}
1269							\| \|---label. Attributes: {uid => "5", value => "A and B"}
1270							\| \|---literal. Attributes: {uid => "6", value => "}"}
1271							\|---node_id. Attributes: {uid => "7", value => "node.2"}
1272							\|---literal. Attributes: {uid => "8", value => "{"}
1273							\|---label. Attributes: {uid => "9", value => "A or B"}
1274							\|---literal. Attributes: {uid => "10", value => "}"}
1275							Parse result: 0 (0 is success)
1276
1277							See also the next question.
1278
1279							=head2 What is the structure of the tree of parsed tokens?
1280
1281							From the previous answer, you can see the root has 2 daughters, with the 'prolog' daughter not
1282							currently used. It is used by L.
1283
1284							The 'graph' daughter (sub-tree) is what's processed by the default rendering engine
1285							L to convert the tree (i.e. the input file) into a DOT file
1286							and into an image.
1287
1288							=head2 Does this module handle utf8?
1289
1290							Yes. See the last sample on L.
1291
1292							=head2 Why doesn't the parser handle my HTML-style labels?
1293
1294							Traps for young players:
1295
1296							=over 4
1297
1298							=item o The component must include the '/'
1299
1300							=back
1301
1302							=head2 Why do I get error messages like the following?
1303
1304							Error: :1: syntax error near line 1
1305							context: digraph >>> Graph <<< {
1306
1307							Graphviz reserves some words as keywords, meaning they can't be used as an ID, e.g. for the name of the graph.
1308							So, don't do this:
1309
1310							strict graph graph{...}
1311							strict graph Graph{...}
1312							strict graph strict{...}
1313							etc...
1314
1315							Likewise for non-strict graphs, and digraphs. You can however add double-quotes around such reserved words:
1316
1317							strict graph "graph"{...}
1318
1319							Even better, use a more meaningful name for your graph...
1320
1321							The keywords are: node, edge, graph, digraph, subgraph and strict. Compass points are not keywords.
1322
1323							See L in the discussion of the syntax of DOT
1324							for details.
1325
1326							=head2 What is the homepage of Marpa?
1327
1328							L.
1329
1330							=head2 How do I reconcile Marpa's approach with classic lexing and parsing?
1331
1332							I've included in a recent article a section called
1333							L
1334							which is aimed at helping us think about this issue.
1335
1336							=head2 How did you generate the html/*.svg files?
1337
1338							With a private script which uses L V 2.00. This script is not shipped
1339							in order to avoid a dependency on that module. Also, another private script which validates Build.PL and
1340							Makefile.PL would complain about the missing dependency.
1341
1342							See L for details.
1343
1344							=head1 Machine-Readable Change Log
1345
1346							The file Changes was converted into Changelog.ini by L.
1347
1348							=head1 Version Numbers
1349
1350							Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1351
1352							=head1 Repository
1353
1354							L
1355
1356							=head1 Support
1357
1358							Email the author, or log a bug on RT:
1359
1360							L.
1361
1362							=head1 Author
1363
1364							L was written by Ron Savage Iron@savage.net.auE> in 2013.
1365
1366							Marpa's homepage: .
1367
1368							My homepage: L.
1369
1370							=head1 Copyright
1371
1372							Australian copyright (c) 2013, Ron Savage.
1373
1374							All Programs of mine are 'OSI Certified Open Source Software';
1375							you can redistribute them and/or modify them under the terms of
1376							The Artistic License, a copy of which is available at:
1377							http://www.opensource.org/licenses/index.html
1378
1379							=cut