File Coverage

lib/Graph/Easy/Parser.pm
Criterion Covered Total %
statement 581 601 96.6
branch 233 310 75.1
condition 93 141 65.9
subroutine 52 53 98.1
pod 5 5 100.0
total 964 1110 86.8


line stmt bran cond sub pod time code
1             #############################################################################
2             # Parse text definition into a Graph::Easy object
3             #
4             #############################################################################
5              
6             package Graph::Easy::Parser;
7              
8 20     20   492984 use Graph::Easy;
  20         60  
  20         951  
9              
10             $VERSION = '0.75';
11 20     20   125 use Graph::Easy::Base;
  20         48  
  20         963  
12             @ISA = qw/Graph::Easy::Base/;
13 20     20   127 use Scalar::Util qw/weaken/;
  20         57  
  20         2003  
14              
15 20     20   287 use strict;
  20         1552  
  20         744  
16 20     20   106 use warnings;
  20         41  
  20         801  
17 20     20   111 use constant NO_MULTIPLES => 1;
  20         38  
  20         1544  
18              
19 20     20   112 use Graph::Easy::Util qw(ord_values);
  20         36  
  20         399538  
20              
21             sub _init
22             {
23 81     81   206 my ($self,$args) = @_;
24              
25 81         3020 $self->{error} = '';
26 81         254 $self->{debug} = 0;
27 81         294 $self->{fatal_errors} = 1;
28            
29 81         444 foreach my $k (sort keys %$args)
30             {
31 117 50       721 if ($k !~ /^(debug|fatal_errors)\z/)
32             {
33 0         0 require Carp;
34 0         0 my $class = ref($self);
35 0         0 Carp::confess ("Invalid argument '$k' passed to $class" . '->new()');
36             }
37 117         733 $self->{$k} = $args->{$k};
38             }
39              
40             # what to replace the matched text with
41 81         284 $self->{replace} = '';
42 81         231 $self->{attr_sep} = ':';
43             # An optional regexp to remove parts of an autosplit label, used by Graphviz
44             # to remove " ":
45 81         283 $self->{_qr_part_clean} = undef;
46              
47             # setup default class names for generated objects
48 81         853 $self->{use_class} = {
49             edge => 'Graph::Easy::Edge',
50             group => 'Graph::Easy::Group',
51             graph => 'Graph::Easy',
52             node => 'Graph::Easy::Node',
53             };
54              
55 81         296 $self;
56             }
57              
58             sub reset
59             {
60             # reset the status of the parser, clear errors etc.
61 802     802 1 410700 my $self = shift;
62              
63 802         2560 $self->{error} = '';
64 802         1819 $self->{anon_id} = 0;
65 802         2844 $self->{cluster_id} = ''; # each cluster gets a unique ID
66 802         1832 $self->{line_nr} = -1;
67 802         3196 $self->{match_stack} = []; # patterns and their handlers
68              
69 802         48352 $self->{clusters} = {}; # cluster names we already created
70              
71 802         6041 Graph::Easy::Base::_reset_id(); # start with the same set of IDs
72            
73             # After "[ 1 ] -> [ 2 ]" we push "2" on the stack and when we encounter
74             # " -> [ 3 ]" treat the stack as a node-list left of "3".
75             # In addition, for " [ 1 ], [ 2 ] => [ 3 ]", the stack will contain
76             # "1" and "2" when we encounter "3".
77 802         2884 $self->{stack} = [];
78              
79 802         2149 $self->{group_stack} = []; # all the (nested) groups we are currently in
80 802         2507 $self->{left_stack} = []; # stack for the left side for "[]->[],[],..."
81 802         1998 $self->{left_edge} = undef; # for -> [A], [B] continuations
82              
83 802         5513 Graph::Easy->_drop_special_attributes();
84              
85 802         9523 $self->{_graph} = $self->{use_class}->{graph}->new( {
86             debug => $self->{debug},
87             strict => 0,
88             fatal_errors => $self->{fatal_errors},
89             } );
90              
91 802         5354 $self;
92             }
93              
94             sub from_file
95             {
96             # read in entire file and call from_text() on the contents
97 2     2 1 20 my ($self,$file) = @_;
98              
99 2 50       51 $self = $self->new() unless ref $self;
100              
101 2         5 my $doc;
102 2         10 local $/ = undef; # slurp mode
103             # if given a reference, assume it is a glob, or something like that
104 2 50       11 if (ref($file))
105             {
106 0 0       0 binmode $file, ':utf8' or die ("binmode '$file', ':utf8' failed: $!");
107 0         0 $doc = <$file>;
108             }
109             else
110             {
111 2 50       129783 open my $PARSER_FILE, $file or die (ref($self).": Cannot read $file: $!");
112 2 50       25 binmode $PARSER_FILE, ':utf8' or die ("binmode '$file', ':utf8' failed: $!");
113 2         212304 $doc = <$PARSER_FILE>; # read entire file
114 2         86 close $PARSER_FILE;
115             }
116              
117 2         23 $self->from_text($doc);
118             }
119              
120             sub use_class
121             {
122             # use the provided class for generating objects of the type $object
123 4     4 1 22 my ($self, $object, $class) = @_;
124              
125 4 50       24 $self->_croak("Expected one of node, edge, group or graph, but got $object")
126             unless $object =~ /^(node|group|graph|edge)\z/;
127              
128 4         10 $self->{use_class}->{$object} = $class;
129              
130 4         10 $self;
131             }
132              
133             sub _register_handler
134             {
135             # register a pattern and a handler for it
136 4341     4341   7345 my $self = shift;
137              
138 4341         5292 push @{$self->{match_stack}}, [ @_ ];
  4341         20550  
139              
140 4341         12778 $self;
141             }
142              
143             sub _register_attribute_handler
144             {
145             # register a handler for attributes like "{ color: red; }"
146 541     541   1160 my ($self, $qr_attr, $target) = @_;
147              
148             # $object is either undef (for Graph::Easy, meaning "node", or "parent" for Graphviz)
149              
150             # { attributes }
151             $self->_register_handler( qr/^$qr_attr/,
152             sub
153             {
154 41     41   362 my $self = shift;
155             # This happens in the case of "[ Test ]\n { ... }", the node is consumed
156             # first, and the attributes are left over:
157              
158 41 100       93 my $stack = $self->{stack}; $stack = $self->{group_stack} if @{$self->{stack}} == 0;
  41         53  
  41         234  
159              
160 41         75 my $object = $target;
161 41 100 66     287 if ($target && $target eq 'parent')
162             {
163             # for Graphviz, stray attributes always apply to the parent
164 33         56 $stack = $self->{group_stack};
165              
166 33 50       1225 $object = $stack->[-1] if ref $stack;
167 33 100       100 if (!defined $object)
168             {
169             # try the scope stack next:
170 22         35 $stack = $self->{scope_stack};
171 22         41 $object = $self->{_graph};
172 22 50 33     139 if (!$stack || @$stack <= 1)
173             {
174 22         43 $object = $self->{_graph};
175 22         63 $stack = [ $self->{_graph} ];
176             }
177             }
178             }
179 41   50     279 my ($a, $max_idx) = $self->_parse_attributes($1||'', $object);
180 41 50       155 return undef if $self->{error}; # wrong attributes or empty stack?
181              
182 41 50       221 if (ref($stack->[-1]) eq 'HASH')
183             {
184             # stack is a scope stack
185             # XXX TODO: Find out wether the attribute goes to graph, node or edge
186 0         0 for my $k (sort keys %$a)
187             {
188 0         0 $stack->[-1]->{graph}->{$k} = $a->{$k};
189             }
190 0         0 return 1;
191             }
192              
193 41 50 33     493 print STDERR "max_idx = $max_idx, stack contains ", join (" , ", @$stack),"\n"
194             if $self->{debug} && $self->{debug} > 1;
195 41 100       120 if ($max_idx != 1)
196             {
197 1         4 my $i = 0;
198 1         3 for my $n (@$stack)
199             {
200 3         12 $n->set_attributes($a, $i++);
201             }
202             }
203             else
204             {
205             # set attributes on all nodes/groups on stack
206 40         83 for my $n (@$stack) { $n->set_attributes($a); }
  42         204  
207             }
208             # This happens in the case of "[ a | b ]\n { ... }", the node is consumed
209             # first, and the attributes are left over. And if we encounter a basename
210             # attribute here, the node-parts will already have been created with the
211             # wrong basename, so correct this:
212 41 100       138 if (defined $a->{basename})
213             {
214 2         7 for my $s (@$stack)
215             {
216             # for every node on the stack that is the primary one
217 6 100       69 $self->_set_new_basename($s, $a->{basename}) if exists $s->{autosplit_parts};
218             }
219             }
220 41         139 1;
221 541         46509 } );
222             }
223              
224             sub _register_node_attribute_handler
225             {
226             # register a handler for attributes like "[ A ] { ... }"
227 324     324   780 my ($self, $qr_node, $qr_oatr) = @_;
228              
229             $self->_register_handler( qr/^$qr_node$qr_oatr/,
230             sub
231             {
232 729     729   1784 my $self = shift;
233 729         2567 my $n1 = $1;
234 729   100     6293 my $a1 = $self->_parse_attributes($2||'');
235 729 50       2696 return undef if $self->{error};
236            
237 729         3914 $self->{stack} = [ $self->_new_node ($self->{_graph}, $n1, $self->{group_stack}, $a1) ];
238              
239             # forget left stack
240 729         2540 $self->{left_edge} = undef;
241 729         1813 $self->{left_stack} = [];
242 729         2201 1;
243 324         4681 } );
244             }
245              
246             sub _new_group
247             {
248             # create a new (possible anonymous) group
249 57     57   159 my ($self, $name) = @_;
250              
251 57 50       191 $name = '' unless defined $name;
252              
253 57         170 my $gr = $self->{use_class}->{group};
254              
255 57         95 my $group;
256              
257 57 100       201 if ($name eq '')
258             {
259 15 50       51 print STDERR "# Creating new anon group.\n" if $self->{debug};
260 15         29 $gr .= '::Anon';
261 15         97 $group = $gr->new();
262             }
263             else
264             {
265 42         176 $name = $self->_unquote($name);
266 42 50       180 print STDERR "# Creating new group '$name'.\n" if $self->{debug};
267 42         406 $group = $gr->new( name => $name );
268             }
269              
270 57         445 $self->{_graph}->add_group($group);
271              
272 57         284 my $group_stack = $self->{group_stack};
273 57 100       240 if (@$group_stack > 0)
274             {
275 2         11 $group->set_attribute('group', $group_stack->[-1]->{name});
276             }
277              
278 57         155 $group;
279             }
280              
281             sub _add_group_match
282             {
283             # register two handlers for group start/end
284 324     324   751 my $self = shift;
285              
286 324         1467 my $qr_group_start = $self->_match_group_start();
287 324         1236 my $qr_group_end = $self->_match_group_end();
288 324         841 my $qr_oatr = $self->_match_optional_attributes();
289              
290             # "( group start [" or empty group like "( Group )"
291             $self->_register_handler( qr/^$qr_group_start/,
292             sub
293             {
294 44     44   82 my $self = shift;
295 44         179 my $graph = $self->{_graph};
296              
297 44 50       176 my $end = $2; $end = '' unless defined $end;
  44         128  
298              
299             # repair the start of the next node/group
300 44 100       180 $self->{replace} = '[' if $end eq '[';
301 44 100       138 $self->{replace} = '(' if $end eq '(';
302              
303             # create the new group
304 44         184 my $group = $self->_new_group($1);
305              
306 44 100       4518 if ($end eq ')')
307             {
308             # we matched an empty group like "()", or "( group name )"
309 8         25 $self->{stack} = [ $group ];
310 8 50       29 print STDERR "# Seen end of group '$group->{name}'.\n" if $self->{debug};
311             }
312             else
313             {
314             # only put the group on the stack if it is still open
315 36         71 push @{$self->{group_stack}}, $group;
  36         115  
316             }
317              
318 44         149 1;
319 324         8095 } );
320              
321             # ") { }" # group end (with optional attributes)
322             $self->_register_handler( qr/^$qr_group_end$qr_oatr/,
323             sub
324             {
325 36     36   70 my $self = shift;
326              
327 36         71 my $group = pop @{$self->{group_stack}};
  36         97  
328 36 50       122 return $self->parse_error(0) if !defined $group;
329              
330 36 50       118 print STDERR "# Seen end of group '$group->{name}'.\n" if $self->{debug};
331              
332 36   100     439 my $a1 = $self->_parse_attributes($1||'', 'group', NO_MULTIPLES);
333 36 50       285 return undef if $self->{error};
334              
335 36         190 $group->set_attributes($a1);
336              
337             # the new left side is the group itself
338 36         101 $self->{stack} = [ $group ];
339 36         118 1;
340 324         3702 } );
341              
342             }
343              
344             sub _build_match_stack
345             {
346             # put all known patterns and their handlers on the match stack
347 324     324   719 my $self = shift;
348              
349             # regexps for the different parts
350 324         1459 my $qr_node = $self->_match_node();
351 324         1570 my $qr_attr = $self->_match_attributes();
352 324         1740 my $qr_oatr = $self->_match_optional_attributes();
353 324         1581 my $qr_edge = $self->_match_edge();
354 324         1486 my $qr_comma = $self->_match_comma();
355 324         1559 my $qr_class = $self->_match_class_selector();
356              
357 324         1378 my $e = $self->{use_class}->{edge};
358              
359             # node { color: red; }
360             # node.graph { ... }
361             # .foo { ... }
362             # .foo, node, edge.red { ... }
363             $self->_register_handler( qr/^\s*$qr_class$qr_attr/,
364             sub
365             {
366 98     98   215 my $self = shift;
367 98   50     910 my $class = lc($1 || '');
368 98   50     744 my $att = $self->_parse_attributes($2 || '', $class, NO_MULTIPLES );
369              
370 98 50       328 return undef unless defined $att; # error in attributes?
371              
372 98         212 my $graph = $self->{_graph};
373 98         580 $graph->set_attributes ( $class, $att);
374              
375             # forget stacks
376 98         261 $self->{stack} = [];
377 98         426 $self->{left_edge} = undef;
378 98         209 $self->{left_stack} = [];
379 98         701 1;
380 324         5874 } );
381              
382 324         1383 $self->_add_group_match();
383              
384 324         1585 $self->_register_attribute_handler($qr_attr);
385 324         1349 $self->_register_node_attribute_handler($qr_node,$qr_oatr);
386              
387             # , [ Berlin ] { color: red; }
388             $self->_register_handler( qr/^$qr_comma$qr_node$qr_oatr/,
389             sub
390             {
391 63     63   145 my $self = shift;
392 63         150 my $graph = $self->{_graph};
393 63         174 my $n1 = $1;
394 63   100     458 my $a1 = $self->_parse_attributes($2||'');
395 63 50       286 return undef if $self->{error};
396              
397 63         106 push @{$self->{stack}},
  63         282  
398             $self->_new_node ($graph, $n1, $self->{group_stack}, $a1, $self->{stack});
399              
400 63 100       242 if (defined $self->{left_edge})
401             {
402 26         43 my ($style, $edge_label, $edge_atr, $edge_bd, $edge_un) = @{$self->{left_edge}};
  26         75  
403              
404 26         38 foreach my $node (@{$self->{left_stack}})
  26         64  
405             {
406 28         175 my $edge = $e->new( { style => $style, name => $edge_label } );
407 28         159 $edge->set_attributes($edge_atr);
408             # "<--->": bidirectional
409 28 100       86 $edge->bidirectional(1) if $edge_bd;
410 28 50       67 $edge->undirected(1) if $edge_un;
411 28         133 $graph->add_edge ( $node, $self->{stack}->[-1], $edge );
412             }
413             }
414 63         197 1;
415 324         4135 } );
416              
417             # Things like "[ Node ]" will be consumed before, so we do not need a case
418             # for "[ A ] -> [ B ]":
419             # node chain continued like "-> { ... } [ Kassel ] { ... }"
420             $self->_register_handler( qr/^$qr_edge$qr_oatr$qr_node$qr_oatr/,
421             sub
422             {
423 780     780   1923 my $self = shift;
424              
425 780 50       1271 return if @{$self->{stack}} == 0; # only match this if stack non-empty
  780         3188  
426              
427 780         2210 my $graph = $self->{_graph};
428 780         2026 my $eg = $1; # entire edge ("-- label -->" etc)
429              
430 780   66     3643 my $edge_bd = $2 || $4; # bidirectional edge ('<') ?
431 780         1257 my $edge_un = 0; # undirected edge?
432 780 100 66     2686 $edge_un = 1 if !defined $2 && !defined $5;
433              
434             # optional edge label
435 780         1798 my $edge_label = $7;
436 780   66     9461 my $ed = $3 || $5 || $1; # edge pattern/style ("--")
437              
438 780   100     3504 my $edge_atr = $11 || ''; # save edge attributes
439              
440 780         1508 my $n = $12; # node name
441 780   100     4579 my $a1 = $self->_parse_attributes($13||''); # node attributes
442              
443 780         2158 $edge_atr = $self->_parse_attributes($edge_atr, 'edge');
444 780 50       2419 return undef if $self->{error};
445              
446             # allow undefined edge labels for setting them from the class
447             # strip trailing spaces and convert \[ => [
448 780 100       3970 $edge_label = $self->_unquote($edge_label) if defined $edge_label;
449             # strip trailing spaces
450 780 100       1989 $edge_label =~ s/\s+\z// if defined $edge_label;
451              
452             # the right side node(s) (multiple in case of autosplit)
453 780         2882 my $nodes_b = [ $self->_new_node ($self->{_graph}, $n, $self->{group_stack}, $a1) ];
454              
455 780         3536 my $style = $self->_link_lists( $self->{stack}, $nodes_b,
456             $ed, $edge_label, $edge_atr, $edge_bd, $edge_un);
457              
458             # remember the left side
459 780         3852 $self->{left_edge} = [ $style, $edge_label, $edge_atr, $edge_bd, $edge_un ];
460 780         2307 $self->{left_stack} = $self->{stack};
461              
462             # forget stack and remember the right side instead
463 780         1538 $self->{stack} = $nodes_b;
464 780         6302 1;
465 324         9053 } );
466              
467 324         1008 my $qr_group_start = $self->_match_group_start();
468              
469             # Things like ")" will be consumed before, so we do not need a case
470             # for ") -> { ... } ( Group [ B ]":
471             # edge to a group like "-> { ... } ( Group ["
472             $self->_register_handler( qr/^$qr_edge$qr_oatr$qr_group_start/,
473             sub
474             {
475 6     6   12 my $self = shift;
476              
477 6 50       11 return if @{$self->{stack}} == 0; # only match this if stack non-empty
  6         25  
478              
479 6         18 my $eg = $1; # entire edge ("-- label -->" etc)
480              
481 6   33     37 my $edge_bd = $2 || $4; # bidirectional edge ('<') ?
482 6         9 my $edge_un = 0; # undirected edge?
483 6 0 33     21 $edge_un = 1 if !defined $2 && !defined $5;
484              
485             # optional edge label
486 6         14 my $edge_label = $7;
487 6   0     24 my $ed = $3 || $5 || $1; # edge pattern/style ("--")
488              
489 6   50     51 my $edge_atr = $11 || ''; # save edge attributes
490              
491 6         14 my $gn = $12;
492             # matched "-> ( Group [" or "-> ( Group ("
493 6 50 33     42 $self->{replace} = '[' if defined $13 && $13 eq '[';
494 6 50 33     36 $self->{replace} = '(' if defined $13 && $13 eq '(';
495              
496 6         26 $edge_atr = $self->_parse_attributes($edge_atr, 'edge');
497 6 50       23 return undef if $self->{error};
498              
499             # get the last group of the stack, lest the new one gets nested in it
500 6         8 pop @{$self->{group_stack}};
  6         15  
501              
502 6         22 $self->{group_stack} = [ $self->_new_group($gn) ];
503              
504             # allow undefined edge labels for setting them from the class
505 6 50       28 $edge_label = $self->_unquote($edge_label) if $edge_label;
506             # strip trailing spaces
507 6 50       37 $edge_label =~ s/\s+\z// if $edge_label;
508              
509 6         32 my $style = $self->_link_lists( $self->{stack}, $self->{group_stack},
510             $ed, $edge_label, $edge_atr, $edge_bd, $edge_un);
511              
512             # remember the left side
513 6         31 $self->{left_edge} = [ $style, $edge_label, $edge_atr, $edge_bd, $edge_un ];
514 6         17 $self->{left_stack} = $self->{stack};
515             # forget stack
516 6         14 $self->{stack} = [];
517             # matched "->()" so remember the group on the stack
518 6 50 33     97 $self->{stack} = [ $self->{group_stack}->[-1] ] if defined $13 && $13 eq ')';
519              
520 6         17 1;
521 324         6035 } );
522             }
523              
524             sub _line_insert
525             {
526             # what to insert between two lines, '' for Graph::Easy, ' ' for Graphviz;
527 962     962   7142 '';
528             }
529              
530             sub _clean_line
531             {
532             # do some cleanups on a line before handling it
533 962     962   2114 my ($self,$line) = @_;
534              
535 962         2682 chomp($line);
536              
537             # convert #808080 into \#808080, and "#fff" into "\#fff"
538 962         2525 my $sep = $self->{attr_sep};
539 962         6164 $line =~ s/$sep\s*("?)(#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))("?)/$sep $1\\$2$3/g;
540              
541             # remove comment at end of line (but leave \# alone):
542 962         6757 $line =~ s/(:[^\\]|)$self->{qr_comment}.*/$1/;
543              
544             # remove white space at end (but not at the start, to keep " ||" intact
545 962         4797 $line =~ s/\s+\z//;
546              
547             # print STDERR "# at line '$line' stack: ", join(",",@{ $self->{stack}}),"\n";
548              
549 962         5556 $line;
550             }
551              
552             sub from_text
553             {
554 440     440 1 16478 my ($self,$txt) = @_;
555              
556             # matches a multi-line comment
557 440         3147 my $o_cmt = qr#((\s*/\*.*?\*/\s*)*\s*|\s+)#;
558              
559 440 100 66     33258 if ((ref($self)||$self) eq 'Graph::Easy::Parser' &&
      100        
      66        
560             # contains "digraph GRAPH {" or something similiar
561             ( $txt =~ /^(\s*|\s*\/\*.*?\*\/\s*)(strict)?$o_cmt(di)?graph$o_cmt("[^"]*"|[\w_]+)$o_cmt\{/im ||
562             # contains "digraph {" or something similiar
563             $txt =~ /^(\s*|\s*\/\*.*?\*\/\s*)(strict)?${o_cmt}digraph$o_cmt\{/im ||
564             # contains "strict graph {" or something similiar
565             $txt =~ /^(\s*|\s*\/\*.*?\*\/\s*)strict${o_cmt}(di)?graph$o_cmt\{/im))
566             {
567 41         1318 require Graph::Easy::Parser::Graphviz;
568             # recreate ourselfes, and pass our arguments along
569 41         97 my $debug = 0;
570 41         91 my $old_self = $self;
571 41 100       240 if (ref($self))
572             {
573 40         232 $debug = $self->{debug};
574 40         165 $self->{fatal_errors} = 0;
575             }
576 41         520 $self = Graph::Easy::Parser::Graphviz->new( debug => $debug, fatal_errors => 0 );
577 41         240 $self->reset();
578 41 50       197 $self->{_old_self} = $old_self if ref($self);
579             }
580              
581 440 100 66     5308 if ((ref($self)||$self) eq 'Graph::Easy::Parser' &&
      100        
582             # contains "graph: {"
583             $txt =~ /^([\s\n\t]*|\s*\/\*.*?\*\/\s*)graph\s*:\s*\{/m)
584             {
585 13         766 require Graph::Easy::Parser::VCG;
586             # recreate ourselfes, and pass our arguments along
587 13         34 my $debug = 0;
588 13         24 my $old_self = $self;
589 13 50       69 if (ref($self))
590             {
591 13         39 $debug = $self->{debug};
592 13         35 $self->{fatal_errors} = 0;
593             }
594 13         127 $self = Graph::Easy::Parser::VCG->new( debug => $debug, fatal_errors => 0 );
595 13         58 $self->reset();
596 13 50       67 $self->{_old_self} = $old_self if ref($self);
597             }
598              
599 440 100       1697 $self = $self->new() unless ref $self;
600 440         2329 $self->reset();
601              
602 440         1334 my $graph = $self->{_graph};
603 440 100 66     4610 return $graph if !defined $txt || $txt =~ /^\s*\z/; # empty text?
604            
605 439         1873 my $uc = $self->{use_class};
606              
607             # instruct the graph to use the custom classes, too
608 439         2854 for my $o (sort keys %$uc)
609             {
610 1756 100       8457 $graph->use_class($o, $uc->{$o}) unless $o eq 'graph'; # group, node and edge
611             }
612              
613 439         8011 my @lines = split /(\r\n|\n|\r)/, $txt;
614              
615 439         1112 my $backbuffer = ''; # left over fragments to be combined with next line
616              
617 439         2313 my $qr_comment = $self->_match_commented_line();
618 439         2353 $self->{qr_comment} = $self->_match_comment();
619             # cache the value of this since it can be expensive to construct:
620 439         2896 $self->{_match_single_attribute} = $self->_match_single_attribute();
621              
622 439         2687 $self->_build_match_stack();
623              
624             ###########################################################################
625             # main parsing loop
626              
627 439         1243 my $handled = 0; # did we handle a fragment?
628 439         1091 my $line;
629              
630             # my $counts = {};
631             LINE:
632 439   100     2971 while (@lines > 0 || $backbuffer ne '')
633             {
634             # only accumulate more text if we didn't handle a fragment
635 5902 100 100     35187 if (@lines > 0 && $handled == 0)
636             {
637 3573         6098 $self->{line_nr}++;
638 3573         6841 my $curline = shift @lines;
639              
640             # discard empty lines, or completely commented out lines
641 3573 100       33216 next if $curline =~ $qr_comment;
642              
643             # convert tabs to spaces (the regexps don't expect tabs)
644 1554         6530 $curline =~ tr/\t/ /;
645              
646             # combine backbuffer, what to insert between two lines and next line:
647 1554         6495 $line = $backbuffer . $self->_line_insert() . $self->_clean_line($curline);
648             }
649              
650 3883 50 33     14242 print STDERR "# Line is '$line'\n" if $self->{debug} && $self->{debug} > 2;
651 3883 50 33     13362 print STDERR "# Backbuffer is '$backbuffer'\n" if $self->{debug} && $self->{debug} > 2;
652              
653 3883         11588 $handled = 0;
654             #debug my $count = 0;
655 3883         12265 PATTERN:
656 3883         5538 for my $entry (@{$self->{match_stack}})
657             {
658             # nothing to match against?
659 19929 100       49099 last PATTERN if $line eq '';
660              
661 18676         38129 $self->{replace} = ''; # as default just remove the matched text
662 18676         47655 my ($pattern, $handler, $replace) = @$entry;
663              
664 18676 50 33     60347 print STDERR "# Matching against $pattern\n" if $self->{debug} && $self->{debug} > 3;
665              
666 18676 100       260134 if ($line =~ $pattern)
667             {
668             #debug $counts->{$count}++;
669 2693 50 33     9882 print STDERR "# Matched, calling handler\n" if $self->{debug} && $self->{debug} > 2;
670 2693         4790 my $rc = 1;
671 2693 100       10746 $rc = &$handler($self) if defined $handler;
672 2693 100       8070 if ($rc)
673             {
674 2514 100       8245 $replace = $self->{replace} unless defined $replace;
675 2514 100       7163 $replace = &$replace($self,$line) if ref($replace);
676 2514 50 33     7919 print STDERR "# Handled it successfully.\n" if $self->{debug} && $self->{debug} > 2;
677 2514         44903 $line =~ s/$pattern/$replace/;
678 2514 50 33     9427 print STDERR "# Line is now '$line' (replaced with '$replace')\n" if $self->{debug} && $self->{debug} > 2;
679 2514         3760 $handled++; last PATTERN;
  2514         13381  
680             }
681             }
682             #debug $count ++;
683              
684             }
685              
686             #debug if ($handled == 0) { $counts->{'-1'}++; }
687             # couldn't handle that fragement, so accumulate it and try again
688 3883         7048 $backbuffer = $line;
689              
690             # stop at the very last line
691 3883 100 100     13290 last LINE if $handled == 0 && @lines == 0;
692              
693             # stop at parsing errors
694 3871 50       21044 last LINE if $self->{error};
695             }
696              
697 439 100       20729 $self->error("'$backbuffer' not recognized by " . ref($self)) if $backbuffer ne '';
698              
699             # if something was left on the stack, file ended unexpectedly
700 439 50 100     3964 $self->parse_error(7) if !$self->{error} && $self->{scope_stack} && @{$self->{scope_stack}} > 0;
  115   66     670  
701              
702 439 50 66     1760 return undef if $self->{error} && $self->{fatal_errors};
703              
704             #debug use Data::Dumper; print Dumper($counts);
705              
706 439 50       1530 print STDERR "# Parsing done.\n" if $graph->{debug};
707              
708             # Do final cleanup (for parsing Graphviz)
709 439 50       3498 $self->_parser_cleanup() if $self->can('_parser_cleanup');
710 439         2840 $graph->_drop_special_attributes();
711              
712             # turn on strict checking on returned graph
713 439         4469 $graph->strict(1);
714 439         1993 $graph->fatal_errors(1);
715              
716 439         15421 $graph;
717             }
718              
719             #############################################################################
720             # internal routines
721              
722             sub _edge_style
723             {
724 786     786   1292 my ($self, $ed) = @_;
725              
726 786         1104 my $style = undef; # default is "inherit from class"
727 786 100       2713 $style = 'double-dash' if $ed =~ /^(= )+\z/;
728 786 100       2308 $style = 'double' if $ed =~ /^=+\z/;
729 786 100       2029 $style = 'dotted' if $ed =~ /^\.+\z/;
730 786 100       2215 $style = 'dashed' if $ed =~ /^(- )+\z/;
731 786 100       1952 $style = 'dot-dot-dash' if $ed =~ /^(..-)+\z/;
732 786 100       2078 $style = 'dot-dash' if $ed =~ /^(\.-)+\z/;
733 786 100       2249 $style = 'wave' if $ed =~ /^\~+\z/;
734 786 50       2410 $style = 'bold' if $ed =~ /^#+\z/;
735              
736 786         2375 $style;
737             }
738              
739             sub _link_lists
740             {
741             # Given two node lists and an edge style, links each node from list
742             # one to list two.
743 904     904   2297 my ($self, $left, $right, $ed, $label, $edge_atr, $edge_bd, $edge_un) = @_;
744              
745 904         1551 my $graph = $self->{_graph};
746            
747 904         2858 my $style = $self->_edge_style($ed);
748 904         2467 my $e = $self->{use_class}->{edge};
749              
750             # add edges for all nodes in the left list
751 904         2153 for my $node (@$left)
752             {
753 964         1683 for my $node_b (@$right)
754             {
755 969         8795 my $edge = $e->new( { style => $style, name => $label } );
756              
757 969         11335 $graph->add_edge ( $node, $node_b, $edge );
758              
759             # 'string' => [ 'string' ]
760             # [ { hash }, 'string' ] => [ { hash }, 'string' ]
761 969 100       1610 my $e = $edge_atr; $e = [ $edge_atr ] unless ref($e) eq 'ARRAY';
  969         7957  
762              
763 969         2769 for my $a (@$e)
764             {
765 1097 100       2962 if (ref $a)
766             {
767 969         4302 $edge->set_attributes($a);
768             }
769             else
770             {
771             # deferred parsing with the object as param:
772 128         604 my $out = $self->_parse_attributes($a, $edge);
773 128 50       485 return undef if $self->{error};
774 128         814 $edge->set_attributes($out);
775             }
776             }
777              
778             # "<--->": bidirectional
779 969 100       3036 $edge->bidirectional(1) if $edge_bd;
780 969 100       4383 $edge->undirected(1) if $edge_un;
781             }
782             }
783              
784 904         3033 $style;
785             }
786              
787             sub _unquote_attribute
788             {
789 611     611   1430 my ($self,$name,$value) = @_;
790              
791 611         1446 $self->_unquote($value);
792             }
793              
794             sub _unquote
795             {
796 2286     2286   7523 my ($self, $name, $no_collapse) = @_;
797              
798 2286 50       6187 $name = '' unless defined $name;
799              
800             # unquote special chars
801 2286         4602 $name =~ s/\\([\[\(\{\}\]\)#<>\-\.\=])/$1/g;
802              
803             # collapse multiple spaces
804 2286 100       6866 $name =~ s/\s+/ /g unless $no_collapse;
805              
806 2286         10693 $name;
807             }
808              
809             sub _add_node
810             {
811             # add a node to the graph, overidable by subclasses
812 1507     1507   2633 my ($self, $graph, $name) = @_;
813              
814 1507         5922 $graph->add_node($name); # add unless exists
815             }
816              
817             sub _get_cluster_name
818             {
819             # create a unique name for an autosplit node
820 65     65   139 my ($self, $base_name) = @_;
821              
822             # Try to find a unique cluster name in case some one get's creative and names the
823             # last part "-1":
824              
825             # does work without cluster-id?
826 65 100       247 if (exists $self->{clusters}->{$base_name})
827             {
828 2         4 my $g = 1;
829 2         7 while ($g == 1)
830             {
831 5 100       7 my $base_try = $base_name; $base_try .= '-' . $self->{cluster_id} if $self->{cluster_id};
  5         16  
832 5 100       13 last if !exists $self->{clusters}->{$base_try};
833 3         7 $self->{cluster_id}++;
834             }
835 2 50       6 $base_name .= '-' . $self->{cluster_id} if $self->{cluster_id}; $self->{cluster_id}++;
  2         3  
836             }
837              
838 65         282 $self->{clusters}->{$base_name} = undef; # reserve this name
839              
840 65         201 $base_name;
841             }
842              
843             sub _set_new_basename
844             {
845             # when encountering something like:
846             # [ a | b ]
847             # { basename: foo; }
848             # the Parser will create two nodes, ab.0 and ab.1, and then later see
849             # the "basename: foo". Sowe need to rename the already created nodes
850             # due to the changed basename:
851 2     2   6 my ($self, $node, $new_basename) = @_;
852              
853             # nothing changes?
854 2 50       10 return if $node->{autosplit_basename} eq $new_basename;
855              
856 2         6 my $g = $node->{graph};
857              
858 2         4 my @parts = @{$node->{autosplit_parts}};
  2         7  
859 2         4 my $nr = 0;
860 2         6 for my $part ($node, @parts)
861             {
862 6 50       19 print STDERR "# Setting new basename $new_basename for node $part->{name}\n"
863             if $self->{debug} > 1;
864              
865 6         12 $part->{autosplit_basename} = $new_basename;
866 6         23 $part->set_attribute('basename', $new_basename);
867            
868             # delete it from the list of nodes
869 6         26 delete $g->{nodes}->{$part->{name}};
870 6         20 $part->{name} = $new_basename . '.' . $nr; $nr++;
  6         11  
871             # and re-insert it with the right name
872 6         28 $g->{nodes}->{$part->{name}} = $part;
873              
874             # we do not need to care for edges here, as they are stored with refs
875             # to the nodes and not the node names itself
876             }
877             }
878              
879             sub _autosplit_node
880             {
881             # Takes a node name like "a|b||c" and splits it into "a", "b", and "c".
882             # Returns the individual parts.
883 65     65   175 my ($self, $graph, $name, $att, $allow_empty) = @_;
884            
885             # Default is to have empty parts. Graphviz sets this to true;
886 65 100       201 $allow_empty = 1 unless defined $allow_empty;
887              
888 65         110 my @rc;
889 65         149 my $uc = $self->{use_class};
890 65         142 my $qr_clean = $self->{_qr_part_clean};
891              
892             # build base name: "A|B |C||D" => "ABCD"
893 65         121 my $base_name = $name; $base_name =~ s/\s*\|\|?\s*//g;
  65         624  
894              
895             # use user-provided base name
896 65 100       244 $base_name = $att->{basename} if exists $att->{basename};
897              
898             # strip trailing/leading spaces on basename
899 65         259 $base_name =~ s/\s+\z//;
900 65         206 $base_name =~ s/^\s+//;
901              
902             # first one gets: "ABC", second one "ABC.1" and so on
903 65         253 $base_name = $self->_get_cluster_name($base_name);
904              
905 65 50       212 print STDERR "# Parser: Autosplitting node with basename '$base_name'\n" if $graph->{debug};
906              
907 65         110 my $first_in_row; # for relative placement of new row
908 65         114 my $x = 0; my $y = 0; my $idx = 0;
  65         109  
  65         118  
909 65         118 my $remaining = $name; my $sep; my $last_sep = '';
  65         100  
  65         112  
910 65         206 my $add = 0;
911 65         249 while ($remaining ne '')
912             {
913             # XXX TODO: parsing of "\|" and "|" in one node
914 188         1142 $remaining =~ s/^((\\\||[^\|])*)(\|\|?|\z)//;
915 188   100     729 my $part = $1 || ' ';
916 188         321 $sep = $3;
917 188         501 my $port_name = '';
918              
919             # possible cleanup for this part
920 188 100       846 if ($qr_clean)
921             {
922 45         287 $part =~ s/^$qr_clean//; $port_name = $1;
  45         99  
923             }
924              
925             # fix [|G|] to have one empty part as last part
926 188 100 100     1181 if ($add == 0 && $remaining eq '' && $sep =~ /\|\|?/)
      100        
927             {
928 2         4 $add++; # only do it once
929 2         4 $remaining .= '|'
930             }
931              
932 188 50       517 print STDERR "# Parser: Found autosplit part '$part'\n" if $graph->{debug};
933              
934 188         332 my $class = $uc->{node};
935 188 100 100     980 if ($allow_empty && $part eq ' ')
    100          
936             {
937             # create an empty node with no border
938 12         27 $class .= "::Empty";
939             }
940             elsif ($part =~ /^[ ]{2,}\z/)
941             {
942             # create an empty node with border
943 10         16 $part = ' ';
944             }
945             else
946             {
947 166         586 $part =~ s/^\s+//; # rem spaces at front
948 166         509 $part =~ s/\s+\z//; # rem spaces at end
949             }
950              
951 188         484 my $node_name = "$base_name.$idx";
952              
953 188 50       688 if ($graph->{debug})
954             {
955 0         0 my $empty = '';
956 0 0       0 $empty = ' empty' if $class ne $self->{use_class}->{node};
957 0 0       0 print STDERR "# Parser: Creating$empty autosplit part '$part'\n" if $graph->{debug};
958             }
959              
960             # if it doesn't exist, add it, otherwise retrieve node object to $node
961 188 100       510 if ($class =~ /::Empty/)
962             {
963 12         57 my $node = $graph->node($node_name);
964 12 100       43 if (!defined $node)
965             {
966             # create node object from the correct class
967 11         74 $node = $class->new($node_name);
968 11         43 $graph->add_node($node);
969             }
970             }
971              
972 188         6625 my $node = $graph->add_node($node_name);
973 188         714 $node->{autosplit_label} = $part;
974             # remember these two for Graphviz
975 188         380 $node->{autosplit_portname} = $port_name;
976 188         570 $node->{autosplit_basename} = $base_name;
977              
978 188         348 push @rc, $node;
979 188 100       439 if (@rc == 1)
980             {
981             # for correct as_txt output
982 65         581 $node->{autosplit} = $name;
983 65         361 $node->{autosplit} =~ s/\s+\z//; # strip trailing spaces
984 65         275 $node->{autosplit} =~ s/^\s+//; # strip leading spaces
985 65         507 $node->{autosplit} =~ s/([^\|])\s+\|/$1 \|/g; # 'foo |' => 'foo |'
986 65         436 $node->{autosplit} =~ s/\|\s+([^\|])/\| $1/g; # '| foo' => '| foo'
987 65 100       396 $node->set_attribute('basename', $att->{basename}) if defined $att->{basename};
988             # list of all autosplit parts so as_txt() can find them easily again
989 65         229 $node->{autosplit_parts} = [ ];
990 65         129 $first_in_row = $node;
991             }
992             else
993             {
994             # second, third etc. get previous as origin
995 123         225 my ($sx,$sy) = (1,0);
996 123         323 my $origin = $rc[-2];
997 123 100       332 if ($last_sep eq '||')
998             {
999 16         33 ($sx,$sy) = (0,1); $origin = $first_in_row;
  16         29  
1000 16         25 $first_in_row = $node;
1001             }
1002 123         793 $node->relative_to($origin,$sx,$sy);
1003 123         148 push @{$rc[0]->{autosplit_parts}}, $node;
  123         344  
1004 123         203 weaken @{$rc[0]->{autosplit_parts}}[-1];
  123         491  
1005              
1006             # suppress as_txt output for other parts
1007 123         12452 $node->{autosplit} = undef;
1008             }
1009             # nec. for border-collapse
1010 188         583 $node->{autosplit_xy} = "$x,$y";
1011              
1012 188         281 $idx++; # next node ID
1013 188         231 $last_sep = $sep;
1014 188         250 $x++;
1015             # || starts a new row:
1016 188 100       875 if ($sep eq '||')
1017             {
1018 16         29 $x = 0; $y++;
  16         52  
1019             }
1020             } # end for all parts
1021              
1022 65         353 @rc; # return all created nodes
1023             }
1024              
1025             sub _new_node
1026             {
1027             # Create a new node unless it doesn't already exist. If the group stack
1028             # contains entries, the new node appears first in this/these group(s), so
1029             # add it to these groups. If the newly created node contains "|", we auto
1030             # split it up into several nodes and cluster these together.
1031 1900     1900   6949 my ($self, $graph, $name, $group_stack, $att, $stack) = @_;
1032              
1033 1900 50       8400 print STDERR "# Parser: new node '$name'\n" if $graph->{debug};
1034              
1035 1900         5349 $name = $self->_unquote($name, 'no_collapse');
1036              
1037 1900         6487 my $autosplit;
1038 1900         3473 my $uc = $self->{use_class};
1039              
1040 1900         3376 my @rc = ();
1041              
1042 1900 100 100     14691 if ($name =~ /^\s*\z/)
    100          
1043             {
1044 22 50       79 print STDERR "# Parser: Creating anon node\n" if $graph->{debug};
1045             # create a new anon node and add it to the graph
1046 22         67 my $class = $uc->{node} . '::Anon';
1047 22         172 my $node = $class->new();
1048 22         98 @rc = ( $graph->add_node($node) );
1049             }
1050             # nodes to be autosplit will be done in a sep. pass for Graphviz
1051             elsif ((ref($self) eq 'Graph::Easy::Parser') && $name =~ /[^\\]\|/)
1052             {
1053 50         112 $autosplit = 1;
1054 50         201 @rc = $self->_autosplit_node($graph, $name, $att);
1055             }
1056             else
1057             {
1058             # strip trailing and leading spaces
1059 1828         7969 $name =~ s/\s+\z//;
1060 1828         6636 $name =~ s/^\s+//;
1061              
1062             # collapse multiple spaces
1063 1828         4033 $name =~ s/\s+/ /g;
1064              
1065             # unquote \|
1066 1828         3354 $name =~ s/\\\|/\|/g;
1067              
1068 1828 50       5223 if ($self->{debug})
1069             {
1070 0 0       0 if (!$graph->node($name))
1071             {
1072 0         0 print STDERR "# Parser: Creating normal node from name '$name'.\n";
1073             }
1074             else
1075             {
1076 0         0 print STDERR "# Parser: Found node '$name' already in graph.\n";
1077             }
1078             }
1079 1828         5320 @rc = ( $self->_add_node($graph, $name) ); # add to graph, unless exists
1080             }
1081              
1082 1900 50 66     16788 $self->parse_error(5) if exists $att->{basename} && !$autosplit;
1083              
1084 1900         3480 my $b = $att->{basename};
1085 1900         3882 delete $att->{basename};
1086              
1087             # on a node list "[A],[B] { ... }" set attributes on all nodes
1088             # encountered so far, too:
1089 1900 100       4782 if (defined $stack)
1090             {
1091 94         222 for my $node (@$stack)
1092             {
1093 145         1194 $node->set_attributes ($att, 0);
1094             }
1095             }
1096 1900         3224 my $index = 0;
1097 1900         4312 my $group = $self->{group_stack}->[-1];
1098              
1099 1900         3433 for my $node (@rc)
1100             {
1101 1993 100       4683 $node->add_to_group($group) if $group;
1102 1993         8153 $node->set_attributes ($att, $index);
1103 1993         4434 $index++;
1104             }
1105            
1106 1900 100       5145 $att->{basename} = $b if defined $b;
1107              
1108             # return list of created nodes (usually one, but more for "A|B")
1109 1900         8088 @rc;
1110             }
1111              
1112             sub _match_comma
1113             {
1114             # return a regexp that matches something like " , " like in:
1115             # "[ Bonn ], [ Berlin ] => [ Hamburg ]"
1116 324     324   3189 qr/\s*,\s*/;
1117             }
1118              
1119             sub _match_comment
1120             {
1121             # match the start of a comment
1122 324     324   1564 qr/(^|[^\\])#/;
1123             }
1124              
1125             sub _match_commented_line
1126             {
1127             # match empty lines or a completely commented out line
1128 426     426   2591 qr/^\s*(#|\z)/;
1129             }
1130              
1131             sub _match_attributes
1132             {
1133             # return a regexp that matches something like " { color: red; }" and returns
1134             # the inner text without the {}
1135 324     324   1618 qr/\s*\{\s*([^\}]+?)\s*\}/;
1136             }
1137              
1138             sub _match_optional_attributes
1139             {
1140             # return a regexp that matches something like " { color: red; }" and returns
1141             # the inner text with the {}
1142 649     649   2412 qr/(\s*\{[^\}]+?\})?/;
1143             }
1144              
1145             sub _match_node
1146             {
1147             # return a regexp that matches something like " [ bonn ]" and returns
1148             # the inner text without the [] (might leave some spaces)
1149              
1150 326     326   1356 qr/\s*\[ # '[' start of the node
1151             (
1152             (?: # non-capturing group
1153             \\. # either '\]' or '\N' etc.
1154             | # or
1155             [^\]\\] # not ']' and not '\'
1156             )* # 0 times for '[]'
1157             )
1158             \]/x; # followed by ']'
1159             }
1160              
1161             sub _match_class_selector
1162             {
1163 324     324   1294 my $class = qr/(?:\.\w+|graph|(?:edge|group|node)(?:\.\w+)?)/;
1164 324         4010 qr/($class(?:\s*,\s*$class)*)/;
1165             }
1166              
1167             sub _match_single_attribute
1168             {
1169 405     405   2099 qr/\s*([^:]+?)\s*:\s*("(?:\\"|[^"])+"|(?:\\;|[^;])+?)(?:\s*;\s*|\s*\z)/; # "name: value"
1170             }
1171              
1172             sub _match_group_start
1173             {
1174             # Return a regexp that matches something like " ( group [" and returns
1175             # the text between "(" and "[". Also matches empty groups like "( group )"
1176             # or even "()":
1177 648     648   2882 qr/\s*\(\s*([^\[\)\(]*?)\s*([\[\)\(])/;
1178             }
1179              
1180             sub _match_group_end
1181             {
1182             # return a regexp that matches something like " )".
1183 324     324   1375 qr/\s*\)\s*/;
1184             }
1185              
1186             sub _match_edge
1187             {
1188             # Matches all possible edge variants like:
1189             # -->, ---->, ==> etc
1190             # <-->, <---->, <==>, <..> etc
1191             # <-- label -->, <.- label .-> etc
1192             # -- label -->, .- label .-> etc
1193              
1194             # "- " must come before "-"!
1195             # likewise, "..-" must come before ".-" must come before "."
1196              
1197             # XXX TODO: convert the first group into a non-matching group
1198              
1199 325     325   1797 qr/\s*
1200             ( # egde without label ("-->")
1201             (
1202             (=\s|=|-\s|-|\.\.-|\.-|\.|~)+> # pattern (style) of edge
1203             | # edge with label ("-- label -->")
1204             (
1205             ((=\s|=|-\s|-|\.\.-|\.-|\.|~)+) # pattern (style) of edge
1206             \s+ # followed by at least a space
1207             ((?:\\.|[^>\[\{])*?) # either \\, \[ etc, or not ">", "[", "{"
1208             (\s+\5)> # a space and pattern before ">"
1209              
1210             # inserting this needs mucking with all the code that access $5 etc
1211             # | # undirected edge (without arrows, but with label)
1212             # ((=\s|=|-\s|-|\.\.-|\.-|\.|~)+) # pattern (style) of edge
1213             # \s+ # followed by at least a space
1214             # ((?:\\.|[^>\[\{])*?) # either \\, \[ etc, or not ">", "[", "{"
1215             # (\s+\10) # a space and pattern
1216              
1217             | # undirected edge (without arrows and label)
1218             (\.\.-|\.-)+ # pattern (style) of edge (at least once)
1219             |
1220             (=\s|=|-\s|-|\.|~){2,} # these at least two times
1221             )
1222             /x;
1223             }
1224              
1225             sub _clean_attributes
1226             {
1227 2578     2578   4272 my ($self,$text) = @_;
1228              
1229 2578         5411 $text =~ s/^\s*\{\s*//; # remove left-over "{" and spaces
1230 2578         5812 $text =~ s/\s*\}\s*\z//; # remove left-over "}" and spaces
1231              
1232 2578         8359 $text;
1233             }
1234              
1235             sub _parse_attributes
1236             {
1237             # Takes a text like "attribute: value; attribute2 : value2;" and
1238             # returns a hash with the attributes. $class defaults to 'node'.
1239             # In list context, also returns a flag that is maxlevel-1 when one
1240             # of the attributes was a multiple one (aka 2 for "red|green", 1 for "red");
1241 2999     2999   6448 my ($self, $text, $object, $no_multiples) = @_;
1242              
1243 2999         4562 my $class = $object;
1244 2999 100       8595 $class = $object->{class} if ref($object);
1245 2999 100       11028 $class = 'node' unless defined $class;
1246 2999         5447 $class =~ s/\..*//; # remove subclass
1247              
1248 2999         3617 my $out;
1249 2999         5849 my $att = {};
1250 2999         4821 my $multiples = 0;
1251              
1252 2999         10382 $text = $self->_clean_attributes($text);
1253 2999         6273 my $qr_att = $self->{_match_single_attribute};
1254 2999 100       3483 my $qr_cmt; $qr_cmt = $self->_match_multi_line_comment()
  2999         17027  
1255             if $self->can('_match_multi_line_comment');
1256 2999 100       3611 my $qr_satt; $qr_satt = $self->_match_special_attribute()
  2999         12632  
1257             if $self->can('_match_special_attribute');
1258              
1259 2999 100       20004 return {} if $text =~ /^\s*\z/;
1260              
1261 666 50       3028 print STDERR "attr parsing: matching\n '$text'\n against $qr_att\n" if $self->{debug} > 3;
1262              
1263 666         2506 while ($text ne '')
1264             {
1265 877 50       2871 print STDERR "attr parsing: matching '$text'\n" if $self->{debug} > 3;
1266              
1267             # remove a possible comment
1268 877 100       3137 $text =~ s/^$qr_cmt//g if $qr_cmt;
1269              
1270             # if the last part was a comment, we end up with an empty text here:
1271 877 100       3173 last if $text =~ /^\s*\z/;
1272              
1273             # match and remove "name: value"
1274 876   100     16226 my $done = ($text =~ s/^$qr_att//) || 0;
1275              
1276             # match and remove "name" if "name: value;" didn't match
1277 876 100 100     4762 $done++ if $done == 0 && $qr_satt && ($text =~ s/^$qr_satt//);
      66        
1278              
1279 876 100       2283 return $self->error ("Error in attribute: '$text' doesn't look valid to me.")
1280             if $done == 0;
1281              
1282 872         2068 my $name = $1;
1283 872 100       1842 my $v = $2; $v = '' unless defined $v; # for special attributes w/o value
  872         2280  
1284              
1285             # unquote and store
1286 872         3893 $out->{$name} = $self->_unquote_attribute($name,$v);
1287             }
1288              
1289 662 50 33     2568 if ($self->{debug} && $self->{debug} > 1)
1290             {
1291 0         0 require Data::Dumper;
1292 0         0 print STDERR "# ", join (" ", caller),"\n";
1293 0         0 print STDERR "# Parsed attributes into:\n", Data::Dumper::Dumper($out),"\n";
1294             }
1295             # possible remap attributes (for parsing Graphviz)
1296 662 100       13327 $out = $self->_remap_attributes($out, $object) if $self->can('_remap_attributes');
1297              
1298 662         1856 my $g = $self->{_graph};
1299             # check for being valid and finally create hash with name => value pairs
1300 662         4205 for my $name (sort keys %$out)
1301             {
1302 837         5048 my ($rc, $newname, $v) = $g->validate_attribute($name,$out->{$name},$class,$no_multiples);
1303              
1304 837 100       2660 $self->error($g->{error}) if defined $rc;
1305              
1306 837 100       2143 $multiples = scalar @$v if ref($v) eq 'ARRAY';
1307              
1308 837 100       4802 $att->{$newname} = $v if defined $v; # undef => ignore attribute
1309             }
1310              
1311 662 100       6197 return $att unless wantarray;
1312              
1313 41   100     432 ($att, $multiples || 1);
1314             }
1315              
1316             sub parse_error
1317             {
1318             # take a msg number, plus params, and throws an exception
1319 3     3 1 7 my $self = shift;
1320 3         5 my $msg_nr = shift;
1321              
1322             # XXX TODO: should really use the msg nr mapping
1323 3         8 my $msg = "Found unexpected group end"; # 0
1324 3 100       13 $msg = "Error in attribute: '##param2##' is not a valid attribute for a ##param3##" # 1
1325             if $msg_nr == 1;
1326 3 100       11 $msg = "Error in attribute: '##param1##' is not a valid ##param2## for a ##param3##"
1327             if $msg_nr == 2; # 2
1328 3 100       10 $msg = "Error: Found attributes, but expected group or node start"
1329             if $msg_nr == 3; # 3
1330 3 50       8 $msg = "Error in attribute: multi-attribute '##param1##' not allowed here"
1331             if $msg_nr == 4; # 4
1332 3 50       10 $msg = "Error in attribute: basename not allowed for non-autosplit nodes"
1333             if $msg_nr == 5; # 5
1334             # for graphviz parsing
1335 3 50       10 $msg = "Error: Already seen graph start"
1336             if $msg_nr == 6; # 6
1337 3 50       8 $msg = "Error: Expected '}', but found file end"
1338             if $msg_nr == 7; # 7
1339              
1340 3         4 my $i = 1;
1341 3         16 foreach my $p (@_)
1342             {
1343 6         89 $msg =~ s/##param$i##/$p/g; $i++;
  6         19  
1344             }
1345              
1346 3         20 $self->error($msg . ' at line ' . $self->{line_nr});
1347             }
1348              
1349             sub _parser_cleanup
1350             {
1351             # After initial parsing, do a cleanup pass.
1352 324     324   592 my ($self) = @_;
1353              
1354 324         635 my $g = $self->{_graph};
1355            
1356 324         2051 for my $n (ord_values ( $g->{nodes} ))
1357             {
1358 1126 100       3326 next if $n->{autosplit};
1359 1076 50 66     5169 $self->warn("Node '" . $self->_quote($n->{name}) . "' has an offset but no origin")
1360             if (($n->attribute('offset') ne '0,0') && $n->attribute('origin') eq '');
1361             }
1362              
1363 324         1392 $self;
1364             }
1365              
1366             sub _quote
1367             {
1368             # make a node name safe for error message output
1369 0     0     my ($self,$n) = @_;
1370              
1371 0           $n =~ s/'/\\'/g;
1372              
1373 0           $n;
1374             }
1375              
1376             1;
1377             __END__