File Coverage

blib/lib/MarpaX/Languages/SVG/Parser.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package MarpaX::Languages::SVG::Parser;
2              
3 1     1   443 use strict;
  1         2  
  1         24  
4 1     1   4 use warnings;
  1         2  
  1         24  
5 1     1   4 use warnings qw(FATAL utf8); # Fatalize encoding glitches.
  1         4  
  1         46  
6              
7 1     1   339 use Encode; # For decode() and encode().
  1         13251  
  1         96  
8              
9 1     1   526 use Log::Handler;
  1         35535  
  1         5  
10              
11 1     1   901 use MarpaX::Languages::SVG::Parser::XMLHandler;
  0            
  0            
12              
13             use Moo;
14              
15             use Path::Tiny; # For path().
16              
17             use Text::CSV;
18              
19             use Types::Standard qw/Any Int Str/;
20              
21             has attribute =>
22             (
23             default => sub{return ''},
24             is => 'rw',
25             isa => Str,
26             required => 0,
27             );
28              
29             has input_file_name =>
30             (
31             default => sub{return ''},
32             is => 'rw',
33             isa => Str,
34             required => 1,
35             );
36              
37             has item_count =>
38             (
39             default => sub{return 0},
40             is => 'rw',
41             isa => Int,
42             required => 0,
43             );
44              
45             has items =>
46             (
47             default => sub{return Set::Array -> new},
48             is => 'rw',
49             isa => Any,
50             required => 0,
51             );
52              
53             has logger =>
54             (
55             default => sub{return undef},
56             is => 'rw',
57             isa => Any,
58             required => 0,
59             );
60              
61             has maxlevel =>
62             (
63             default => sub{return 'notice'},
64             is => 'rw',
65             isa => Str,
66             required => 0,
67             );
68              
69             has minlevel =>
70             (
71             default => sub{return 'error'},
72             is => 'rw',
73             isa => Str,
74             required => 0,
75             );
76              
77             has output_file_name =>
78             (
79             default => sub{return ''},
80             is => 'rw',
81             isa => Str,
82             required => 0,
83             );
84              
85             our $VERSION = '1.09';
86              
87             # ------------------------------------------------
88              
89             sub BUILD
90             {
91             my($self) = @_;
92              
93             if (! defined $self -> logger)
94             {
95             $self -> logger(Log::Handler -> new);
96             $self -> logger -> add
97             (
98             screen =>
99             {
100             maxlevel => $self -> maxlevel,
101             message_layout => '%m',
102             minlevel => $self -> minlevel,
103             utf8 => 1,
104             }
105             );
106             }
107              
108             $self -> log(debug => 'Input file: ' . $self -> input_file_name);
109              
110             } # End of BUILD.
111              
112             # --------------------------------------------------
113              
114             sub log
115             {
116             my($self, $level, $s) = @_;
117             $level = 'notice' if (! defined $level);
118             $s = '' if (! defined $s);
119              
120             $self -> logger -> $level($s) if ($self -> logger);
121              
122             } # End of log.
123              
124             # --------------------------------------------------
125              
126             sub new_item
127             {
128             my($self, $type, $name, $value) = @_;
129              
130             $self -> item_count($self -> item_count + 1);
131             $self -> items -> push
132             ({
133             count => $self -> item_count,
134             name => $name,
135             type => $type,
136             value => $value,
137             });
138              
139             } # End of new_item.
140              
141             # --------------------------------------------------
142              
143             sub report
144             {
145             my($self) = @_;
146             my($format) = '%6s %-10s %-20s %s';
147              
148             $self -> log(info => sprintf($format, 'Count', 'Type', 'Name', 'Value') );
149              
150             for my $item ($self -> items -> print)
151             {
152             $self -> log(info => sprintf($format, $$item{count}, $$item{type}, $$item{name}, decode('utf-8', $$item{value}) ) );
153             }
154              
155             } # End of report.
156              
157             # ------------------------------------------------
158              
159             sub run
160             {
161             my($self, %args) = @_;
162             my($handler) = MarpaX::Languages::SVG::Parser::XMLHandler -> new
163             (
164             logger => $self -> logger,
165             input_file_name => $self -> input_file_name,
166             );
167              
168             $self -> items -> push(@{$handler -> items -> print});
169             $self -> save;
170             $self -> report;
171              
172             # Return 0 for success and 1 for failure.
173              
174             return 0;
175              
176             } # End of run.
177              
178             # ------------------------------------------------
179              
180             sub save
181             {
182             my($self) = @_;
183             my($output_file_name) = $self -> output_file_name;
184              
185             if ($output_file_name)
186             {
187             my($csv) = Text::CSV -> new({binary => 1, eol => $/});
188              
189             open(my $fh, '>', $output_file_name);
190              
191             $csv -> print($fh, ['Count', 'Type', 'Name', 'Value']);
192              
193             for my $item ($self -> items -> print)
194             {
195             $csv -> print($fh, [$$item{count}, $$item{type}, $$item{name}, decode('utf-8', $$item{value})]);
196             }
197              
198             close $fh;
199              
200             $self -> log(debug => "Wrote $output_file_name");
201             }
202              
203             } # End of save.
204              
205             # ------------------------------------------------
206              
207             sub test
208             {
209             my($self, %args) = @_;
210              
211             # Remove comment lines.
212              
213             my(@data) = grep{! /^#/} path($self -> input_file_name) -> lines_utf8;
214             my($handler) = MarpaX::Languages::SVG::Parser::XMLHandler -> new
215             (
216             logger => $self -> logger,
217             );
218             $handler -> run_marpa($self -> attribute, join('', @data) );
219             $self -> items -> push(@{$handler -> items -> print});
220             $self -> report;
221              
222             # Return 0 for success and 1 for failure.
223              
224             return 0;
225              
226             } # End of test.
227              
228             #-------------------------------------------------
229              
230             1;
231              
232             =pod
233              
234             =head1 NAME
235              
236             C - A nested SVG parser, using XML::SAX and Marpa::R2
237              
238             =head1 Synopsis
239              
240             #!/usr/bin/env perl
241              
242             use strict;
243             use warnings;
244              
245             use MarpaX::Languages::SVG::Parser;
246              
247             # ---------------------------------
248              
249             my(%option) =
250             (
251             input_file_name => 'data/ellipse.01.svg',
252             );
253             my($parser) = MarpaX::Languages::SVG::Parser -> new(%option);
254             my($result) = $parser -> run;
255              
256             die "Parse failed\n" if ($result == 1);
257              
258             for my $item (@{$parser -> items -> print})
259             {
260             print sprintf "%-16s %-16s %s\n", $$item{type}, $$item{name}, $$item{value};
261             }
262              
263             This script ships as scripts/synopsis.pl. Run it as:
264              
265             shell> perl -Ilib scripts/synopsis.pl
266              
267             See also scripts/parse.file.pl for code which takes command line parameters. For help, run:
268              
269             shell> perl -Ilib scripts/parse.file.pl -h
270              
271             =head1 Description
272              
273             C uses L and L to parse SVG into an array of
274             hashrefs.
275              
276             L parses the input file, and then certain tags' attribute values are parsed by L.
277             The attribute values treated specially each have their own BNFs. This is why it's called nested parsing.
278              
279             Examples of these special cases are the path's 'd' attribute and the 'transform' attribute of various tags.
280              
281             The SVG versions of the attribute-specific BNFs are
282             L.
283              
284             See the L for details.
285              
286             =head1 Installation
287              
288             Install C as you would for any C module:
289              
290             Run:
291              
292             cpanm MarpaX::Languages::SVG::Parser
293              
294             or run:
295              
296             sudo cpan MarpaX::Languages::SVG::Parser
297              
298             or unpack the distro, and then either:
299              
300             perl Build.PL
301             ./Build
302             ./Build test
303             sudo ./Build install
304              
305             or:
306              
307             perl Makefile.PL
308             make (or dmake or nmake)
309             make test
310             make install
311              
312             =head1 Constructor and Initialization
313              
314             C is called as C<< my($parser) = MarpaX::Languages::SVG::Parser -> new(k1 => v1, k2 => v2, ...) >>.
315              
316             It returns a new object of type C.
317              
318             Key-value pairs accepted in the parameter list (see also the corresponding methods
319             [e.g. L]):
320              
321             =over 4
322              
323             =item o input_file_name => $string
324              
325             The names the input file to be parsed.
326              
327             When calling L this is an SVG file (e.g. data/*.svg).
328              
329             But when calling L, this is a text file (e.g. data/*.dat).
330              
331             This option is mandatory.
332              
333             Default: ''.
334              
335             =item o logger => aLog::HandlerObject
336              
337             By default, an object of type L is created which prints to STDOUT,
338             but given the default setting (maxlevel => 'info'), nothing is actually printed.
339              
340             See C and C below.
341              
342             Set C to '' (the empty string) to stop a logger being created.
343              
344             Default: undef.
345              
346             =item o maxlevel => logOption1
347              
348             This option affects L objects.
349              
350             See the L docs.
351              
352             Since the L method is always called and outputs at log level C, the first of these produces no output,
353             whereas the second lists all the parse results. The third adds a tiny bit to the output.
354              
355             shell> perl -Ilib scripts/parse.file.pl -i data/ellipse.01.svg
356             shell> perl -Ilib scripts/parse.file.pl -i data/ellipse.01.svg -max info
357             shell> perl -Ilib scripts/parse.file.pl -i data/ellipse.01.svg -max debug
358              
359             The extra output produced by C includes the input file name and the string which L is trying to parse.
360             This helps debug the BNFs themselves.
361              
362             Default: 'notice'.
363              
364             =item o minlevel => logOption2
365              
366             This option affects L object.
367              
368             See the L docs.
369              
370             Default: 'error'.
371              
372             No lower levels are used.
373              
374             =item o output_file_name => $string
375              
376             The names the CSV file to be written.
377              
378             Note: This name is only used when calling L. It is of course ignored when calling L.
379              
380             If not set, nothing is written.
381              
382             See data/circle.01.csv and data/utf8.01.csv, which were created by running:
383              
384             shell> perl -Ilib scripts/parse.file.pl -i data/circle.01.svg -o data/circle.01.csv
385             shell> perl -Ilib scripts/parse.file.pl -i data/utf8.01.svg -o data/utf8.01.csv
386              
387             Default: ''.
388              
389             =back
390              
391             =head1 Methods
392              
393             =head2 attribute($attribute)
394              
395             Get or set the name of the attribute being processed.
396              
397             This is only used in testing, in calls from scripts/test.file.pl and (indirectly) scripts/test.fileset.pl.
398              
399             It is needed because the test files, data/*.dat, do not contain tag/attribute names, and hence the code needs
400             to be told explicitly which attribute it is parsing.
401              
402             Note: C is a parameter to new().
403              
404             =head2 input_file_name([$string])
405              
406             Here, the [] indicate an optional parameter.
407              
408             Get or set the name of the file to parse.
409              
410             When calling L this is an SVG file (e.g. data/*.svg).
411              
412             But when calling L, this is a text file (e.g. data/*.dat).
413              
414             Note: C is a parameter to new().
415              
416             =head2 item_count([$new_value])
417              
418             Here, the [] indicate an optional parameter.
419              
420             Get or set the counter used to populate the C key in the hashref in the array of parsed tokens.
421              
422             Used internally.
423              
424             See the L for details.
425              
426             =head2 items()
427              
428             Returns the instance of L which manages the array of hashrefs holding the parsed tokens.
429              
430             $object -> items -> print returns an array ref.
431              
432             See L for sample code.
433              
434             See also L.
435              
436             =head2 log($level, $s)
437              
438             Calls $self -> logger -> log($level => $s) if ($self -> logger).
439              
440             =head2 logger([$log_object])
441              
442             Here, the [] indicate an optional parameter.
443              
444             Get or set the log object.
445              
446             C<$log_object> must be a L-compatible object.
447              
448             To disable logging, just set logger to the empty string.
449              
450             Note: C is a parameter to new().
451              
452             =head2 maxlevel([$string])
453              
454             Here, the [] indicate an optional parameter.
455              
456             Get or set the value used by the logger object.
457              
458             This option is only used if an object of type L is created. See L.
459              
460             Note: C is a parameter to new().
461              
462             =head2 minlevel([$string])
463              
464             Here, the [] indicate an optional parameter.
465              
466             Get or set the value used by the logger object.
467              
468             This option is only used if an object of type L is created. See L.
469              
470             Note: C is a parameter to new().
471              
472             =head2 new()
473              
474             This method is auto-generated by L.
475              
476             =head2 new_item($type, $name, $value)
477              
478             Pushes another hashref onto the stack managed by $self -> items.
479              
480             See the L for details.
481              
482             =head2 output_file_name([$string])
483              
484             Here, the [] indicate an optional parameter.
485              
486             Get or set the name of the (optional) CSV file to write.
487              
488             Note: C is a parameter to new().
489              
490             =head2 report()
491              
492             Prints a nicely-formatted report of the C array via the logger.
493              
494             =head2 run(%args)
495              
496             The method which does all the work.
497              
498             C<%args> is a hash which is currently not used.
499              
500             Returns 0 for a successful parse and 1 for failure.
501              
502             The code dies if L itself can't parse the given string.
503              
504             See also L.
505              
506             =head2 save()
507              
508             Save the parsed tokens to a CSV file, but only if an output file name was provided in the call to L
509             or to L.
510              
511             =head2 test(%args)
512              
513             This method is used by scripts/test.fileset.pl, since that calls scripts/test.file.pl, to run tests.
514              
515             C<%args> is a hash which is currently not used.
516              
517             Returns 0 for a successful parse and 1 for failure.
518              
519             See also L.
520              
521             =head1 Files Shipped with this Module
522              
523             =head2 Data Files
524              
525             These are all shipped in the data/ directory.
526              
527             =over 4
528              
529             =item o *.log
530              
531             The logs of running this on each *.svg file:
532              
533             shell> perl -Ilib scripts/parse.file.pl -i data/ellipse.02.svg -max debug > data/ellipse.02.log
534              
535             The *.log files are generated by scripts/svg2.log.pl.
536              
537             =item o circle.01.csv
538              
539             Output from scripts/parse.file.pl
540              
541             =item o circle.01.svg
542              
543             Test data for scripts/parse.file.pl
544              
545             =item o d.bnf
546              
547             This is the grammar for the 'd' attribute of the 'path' tag.
548              
549             Note: The module does not read this file. A copy of the grammar is stored at the end of the source code for
550             L, and read by L.
551              
552             =item o d.*.dat
553              
554             Fake data to test d.bnf.
555              
556             Input for scripts/test.file.pl.
557              
558             =item o html/d.svg
559              
560             This is the graph of the grammar d.bnf.
561              
562             It was generated by scripts/bnf2graph.pl.
563              
564             =item o ellipse.*.svg
565              
566             Test data for scripts/parse.file.pl
567              
568             =item o line.01.svg
569              
570             Test data for scripts/parse.file.pl
571              
572             =item o points.bnf
573              
574             This grammar is for both the polygon and polyline 'points' attributes.
575              
576             =item o points.*.dat
577              
578             Fake data to test points.bnf.
579              
580             Input for scripts/test.file.pl.
581              
582             =item o polygon.01.svg
583              
584             Test data for scripts/parse.file.pl
585              
586             =item o polyline.01.svg
587              
588             Test data for scripts/parse.file.pl
589              
590             =item o preserveAspectRatio.bnf
591              
592             This grammar is for the 'preserveAspectRatio' attribute of various tags.
593              
594             =item o preserveAspectRatio.*.dat
595              
596             Fake data to test preserveAspectRatio.bnf.
597              
598             Input for scripts/test.file.pl.
599              
600             =item o preserveAspectRatio.01.svg
601              
602             Test data for scripts/parse.file.pl
603              
604             =item o html/preserveAspectRatio.svg
605              
606             This is the graph of the grammar preserveAspectRatio.bnf.
607              
608             It was generated by scripts/bnf2graph.sh.
609              
610             =item o rect.*.svg
611              
612             Test data for scripts/parse.file.pl
613              
614             =item o transform.bnf
615              
616             This grammar is for the 'transform' attribute of various tags.
617              
618             =item o transform.*.dat
619              
620             Fake data to test transform.bnf.
621              
622             Input for scripts/test.file.pl.
623              
624             =item o utf8.01.csv
625              
626             Output from scripts/parse.file.pl
627              
628             =item o utf8.01.log
629              
630             The log of running:
631              
632             shell> perl -Ilib scripts/parse.file.pl -i data/utf8.01.svg -max debug > data/utf8.01.log
633              
634             =item o utf8.01.svg
635              
636             Test data for scripts/parse.file.pl
637              
638             =item o viewBox.bnf
639              
640             This grammar is for the 'viewBox' attribute of various tags.
641              
642             =item o viewBox.*.dat
643              
644             Fake data to test viewBox.bnf.
645              
646             Input for scripts/test.file.pl.
647              
648             =item o html/viewBox.svg
649              
650             This is the graph of the grammar viewBox.bnf.
651              
652             It was generated by scripts/bnf2graph.sh.
653              
654             =back
655              
656             =head2 Scripts
657              
658             These are all shipped in the scripts/ directory.
659              
660             =over 4
661              
662             =item o bnf2graph.pl
663              
664             Finds all data/*.bnf files and converts them into html/*.svg.
665              
666             shell> perl -Ilib scripts/bnf2graph.pl
667              
668             Requires L.
669              
670             =item o copy.config.pl
671              
672             This is for use by the author. It just copies the config file out of the distro, so the script generate.demo.pl
673             (which uses HTML template stuff) can find it.
674              
675             =item o find.config.pl
676              
677             This cross-checks the output of copy.config.pl.
678              
679             =item o float.pl
680              
681             This was posted by Jean-Damien Durand on the L,
682             as a demonstration of a grammar for parsing floats and hex numbers.
683              
684             =item o generate.demo.pl
685              
686             Run by generate.demo.sh.
687              
688             Input files are data/*.bnf and html/*.svg. Output file is html/*.html.
689              
690             =item o generate.demo.sh
691              
692             Runs generate.demo.pl and then copies html/* to my web server's doc dir ($DR).
693              
694             =item o number.pl
695              
696             This also was posted by Jean-Damien Durand on the L,
697             as a demonstration of a grammar for parsing floats and integers, and binary, octal and hex numbers.
698              
699             =item o parse.file.pl
700              
701             This is the script you'll probably use most frequently. Run with '-h' for help.
702              
703             =item o pod2html.sh
704              
705             This lets me quickly proof-read edits to the docs.
706              
707             =item o svg2log.pl
708              
709             Runs parse.file.pl on each data/*.svg file and saves the output in data/*.log.
710              
711             =item o synopsis.pl
712              
713             The code as per the L.
714              
715             =item o t/test.fake.data.t
716              
717             A test script. It parses data/*.dat, which are not SVG files, but just contain attribute value data.
718              
719             =item o t/test.real.data.t
720              
721             A test script. It parses data/*.svg, which are SVG files, and compares them to the shipped files data/*.log.
722              
723             =item o test.file.pl
724              
725             This runs the code on a single test file (data/*.dat, I an svg file). Try:
726              
727             shell> perl -Ilib scripts/test.file.pl -a d -i data/d.30.dat -max debug
728              
729             =item o test.fileset.pl
730              
731             This runs the code on a set of files (data/d.*.dat, data/points.*.dat or data/transform.*.dat). Try:
732              
733             shell> perl -Ilib scripts/test.fileset.pl -a transform -max debug
734              
735             =item o t/version.t
736              
737             A test script.
738              
739             =back
740              
741             =head1 FAQ
742              
743             See also L.
744              
745             =head2 What exactly does this module do?
746              
747             It parses SVG files (using L), and applies special parsing (using L) to certain
748             attributes of certain tags.
749              
750             The output is an array of hashrefs, whose structure is described below.
751              
752             =head2 Which SVG attributes are treated specially by this module?
753              
754             =over 4
755              
756             =item o d
757              
758             This is the 'd' attribute of the 'path' tag.
759              
760             =item o points
761              
762             This is the 'points' attribute of both the 'polygon' and 'polyline' tags.
763              
764             =item o preserveAspectRatio
765              
766             Various tags can have the 'preserveAspectRatio' attribute.
767              
768             =item o transform
769              
770             Various tags can have the 'transform' attribute.
771              
772             =item o viewBox
773              
774             Various tags can have a 'viewBox' attribute.
775              
776             =back
777              
778             Each of these special cases has its own Marpa-style BNF.
779              
780             The SVG versions of the attribute-specific BNFs are
781             L.
782              
783             =head2 Where are the specs for SVG and the BNFs?
784              
785             L. In particular, see L and
786             L.
787              
788             The BNFs have been translated into the syntax used by L. See L for details.
789              
790             These BNFs are actually stored at the end of the source code of L,
791             and loaded one at a time into Marpa using that fine module L.
792              
793             Also, the BNFs are shipped in data/*.bnf, and in html/*.svg.
794              
795             =head2 Is the stuff at the start of the SVG file preserved in the array?
796              
797             If by 'stuff' you mean:
798              
799            
800            
801             "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
802              
803             Then, no.
804              
805             I could not get the xml_decl etc events to fire using L V 0.99 and L V 1.01.
806              
807             =head2 Why don't you capture comments?
808              
809             Because Perl instantly segfaults if I try. Code tried in SAXHandler.pm:
810              
811             sub comment
812             {
813             my($self, $element) = @_;
814             my($comment) = $$element{Data};
815              
816             $self -> log(debug => "Comment: $comment"); # Prints ok.
817             $self -> new_item('comment', '-', $comment); # Segfaults.
818              
819             } # End of comment.
820              
821             Hence - No comment.
822              
823             =head2 How do I get access to this array?
824              
825             The L contains a runnable program, which ships as scripts/synopsis.pl.
826              
827             =head2 How is the parser's output stored in RAM?
828              
829             It is stored in an array of hashrefs managed by the L module.
830              
831             The hashref structure is documented in the next item.
832              
833             Using L is much simpler than using an arrayref. Compare:
834              
835             $self -> items -> push
836             ({
837             count => $self -> item_count,
838             name => $name,
839             type => $type,
840             value => $value,
841             });
842              
843             With:
844              
845             $self -> items([]);
846             ...
847             my($araref) = $self -> items;
848             push @$araref,
849             {
850             count => $self -> item_count,
851             name => $name,
852             type => $type,
853             value => $value,
854             };
855             $self -> items($araref);
856              
857             =head2 What exactly is the structure of the hashrefs output by the parser?
858              
859             Firstly, since the following text may be confusing, the very next item in this FAQ,
860             L, is designed to clarify things.
861              
862             Also, it may be necessary to study data/*.log to fully grasp this structure.
863              
864             Each hashref has these (key => value) pairs:
865              
866             =over 4
867              
868             =item o count => $integer
869              
870             This simply counts the number of the hashref within the array, starting from 1.
871              
872             =item o name => $string
873              
874             =over 4
875              
876             =item o tags and attributes
877              
878             If the type's C matches /^(attribute|tag)$/, then this is the tag name or attribute name from the SVG.
879              
880             Note: The SAX parser used, L, outputs these names with a '{}' prefix. The code strips this
881             prefix.
882              
883             However, for other items, where the '{...}' is I empty, the specific string is left intact. See data/utf8.01.log
884             for this sample:
885              
886             Item Type Name Value
887             1 tag svg open
888             2 attribute {http://www.w3.org/2000/xmlns/}xlink http://www.w3.org/1999/xlink
889             ...
890              
891             You have been warned.
892              
893             =item o Parser-generated tokens
894              
895             In the case that this current array element has been generated by parsing the C of the attribute,
896             the C value depends on the value of the C field.
897              
898             In all such cases, the array contains a hashref with the C 'raw', and with the C being the tag's
899             original value.
900              
901             The elements which follow the one C 'raw' are the output of Marpa parsing the value.
902              
903             =back
904              
905             =item o type => $string
906              
907             This key can take the following values:
908              
909             =over 4
910              
911             =item o attribute
912              
913             This is an attribute for the most-recently opened tag.
914              
915             The C and C fields are for an attribute which has I been specially parsed.
916              
917             The next element in the array is necessarily another token from the SVG.
918              
919             See C for the other case (i.e. compared to C).
920              
921             =item o boolean
922              
923             The C must be 0 or 1.
924              
925             The C field in this case will be a counter of parameters for the preceeding C (see next point).
926              
927             =item o command
928              
929             The C field is the letter (Mm, ..., Zz) for the command itself. In these cases, the C is '-'.
930              
931             Note: As of V 1.01, in the hashref returned by the C sub C, the C is actually an arrayref
932             of the commands parameters. In V 1.00, the C was '-' and the C was the commany letter. This change
933             was made when I stopped pushing hashrefs onto a stack, and converted the return value of the sub from scalar to
934             hashref.
935              
936             =item o content
937              
938             This is the text content for the most recently opened, but still unclosed, tag. It may be the empty string.
939             Likewise, it may contain any number of newlines, since it's copied faithfully from the input *.svg file.
940              
941             It will actually be followed by an array element flagging the closing of the tag it belongs to.
942              
943             =item o float
944              
945             Any float.
946              
947             The C field in this case will be a counter of parameters for the preceeding C.
948              
949             =item o integer
950              
951             Any integer, but probably always 0, because of the way Marpa handles the BNF.
952              
953             The C field in this case will be a counter of parameters for the preceeding C.
954              
955             =item o raw
956              
957             The C and C fields are for an attribute which has been specially parsed.
958              
959             The next element in the array is necessarily I another token from the SVG.
960              
961             Rather, the array elements following this one are output from the Marpa-based parse of the value in the C
962             hashref's C key.
963              
964             What this means is that if you are scanning the array, and detect a C of C, all elements in the array
965             (after this one), up to the next item of C, must be parameters output from the parse
966             of the value in the C hashref's C key.
967              
968             There is one exception to the claim that 'The next element in the array is necessarily I another token from the SVG.'
969             Consider:
970              
971             972             423,301 350,250 277,301 303,215 231,161 321,161z" />
973              
974             The 'z' (which itself takes no parameters) at the end of the points is the last thing output for this tag, so the
975             close tag item will be next array element.
976              
977             See C for the other case (i.e. compared to C).
978              
979             =item o tag
980              
981             The C and C fields are for a tag.
982              
983             The C is the name of the tag, and the C is 'open' or 'close'.
984              
985             =back
986              
987             =item o value => $string
988              
989             The interpretation of this string depends on the value of the C key. Basically:
990              
991             In the case of tags, this string is either 'open' or 'close'.
992              
993             In the case of attributes, it is the attribute's value.
994              
995             In the case of parsed attributes, it is an SVG command or one of that command's parameters.
996              
997             See the next FAQ item for details.
998              
999             =back
1000              
1001             =head2 Annotated output
1002              
1003             Here is a fragment of data/ellipse.02.svg:
1004              
1005            
1006             fill="red" stroke="blue" stroke-width="5" />
1007              
1008             And here is the output from the built-in reporting mechanism (see data/ellipse.02.log):
1009              
1010             Item Type Name Value
1011             1 tag svg open
1012             ...
1013             27 tag path open
1014             28 raw d M300,200 h-150 a150,150 0 1,0 150,-150 z
1015             29 command M -
1016             30 float 1 300
1017             31 float 2 200
1018             32 command h -
1019             33 float 1 -150
1020             34 command a -
1021             35 float 1 150
1022             36 float 2 150
1023             37 integer 3 0
1024             38 boolean 4 1
1025             39 boolean 5 0
1026             40 float 6 150
1027             41 float 7 -150
1028             42 command z -
1029             43 attribute fill red
1030             44 attribute stroke blue
1031             45 attribute stroke-width 5
1032             46 content path
1033             47 tag path close
1034             ...
1035             66 tag svg close
1036              
1037             Let's go thru it:
1038              
1039             =over 4
1040              
1041             =item o Item 27 is the open tag for the path
1042              
1043             Type: tag
1044             Name: path
1045             Value: open
1046              
1047             =item o Item 28 is the path's 1st attribute, 'd'
1048              
1049             Type: raw
1050             Name: d
1051             Value: M300,200 h-150 a150,150 0 1,0 150,-150 z
1052              
1053             But since the C is C we know both that it's an attribute, and that it must be followed by the parsed
1054             output of that value.
1055              
1056             Note: Attributes are reported in sorted order, but the parameters after parsing the attributes' values cannot be,
1057             because drawing the coordinates of the value is naturally order-dependent.
1058              
1059             =item o Item 29
1060              
1061             Type: command
1062             Name: M
1063             Values: '-'
1064              
1065             This in turn is followed by its respective parameters, if any.
1066              
1067             Note: 'Z' and 'z' have no parameters.
1068              
1069             =item o Item 30 .. 31
1070              
1071             Two floats. Commas are discarded in the parsing of all special values.
1072              
1073             Also, you'll notice they are numbered for your convenience by the C key in their hashrefs.
1074              
1075             =item o Item 32
1076              
1077             Type: command
1078             Name: h
1079             Values: '-'
1080              
1081             =item o Item 33
1082              
1083             This is the float which belongs to 'h'.
1084              
1085             =item o Item 34
1086              
1087             Type: command
1088             Name: a
1089             Values: '-'
1090              
1091             =item o Items 35 .. 41
1092              
1093             The 7 parameters of the 'a' command. You'll notice the parser calls 0 an integer rather than a float.
1094             SVG does not care, and neither should you. But, since the code knows it is, it might as well tell you.
1095              
1096             The two Boolean flags are picked up explicitly, and the code tells you that, too.
1097              
1098             =item o Item 42
1099              
1100             Type: command
1101             Name: z
1102             Values: '-'
1103              
1104             As stated, it has no following parameters.
1105              
1106             =item o Items 43 .. 46
1107              
1108             The remaining attributes of the 'path'. None of these are treated specially.
1109              
1110             =item o Item 47 is the close tag for the path
1111              
1112             Type: tag
1113             Name: path
1114             Value: close
1115              
1116             And, yes, this does mean self-closing tags, such as 'path', have 2 items in the array, with C of 'open'
1117             and 'close'. This allows code scanning the array to know absolutely where the data for the tag finishes.
1118              
1119             =back
1120              
1121             =head2 Why did you use L to parse the SVG?
1122              
1123             I find the SAX mechanism for handling XML particularly easy to work with.
1124              
1125             I did start with L, a great module, for the debugging of the BNFs, but the problem is that too many tags
1126             shared attributes (see 'transform' etc above), which made the code awkward.
1127              
1128             Also, that module triggers a callback for closing a tag before triggering the call to process the attributes defined
1129             by the opening of that tag. This adds yet more complexity.
1130              
1131             =head2 How are file encodings handled?
1132              
1133             I let L choose the encoding.
1134              
1135             For output, scripts/parse.file.pl uses the pragma:
1136              
1137             use open qw(:std :utf8); # Undeclared streams in UTF-8.
1138              
1139             This is needed if reading files encoded in utf-8, such as data/utf8.01.svg, and at the same time trying to print the
1140             parsed results to the screen by calling L with C<$string> set to C or C.
1141              
1142             Without this pragma, data/utf8.01.svg gives you the dread 'Wide character in print...' message.
1143              
1144             The pragma is not in the module because it's global, and the end user's program may not want it at all.
1145              
1146             Lastly, I have unilaterally set the utf8 attribute used by L. This is harmless for non-utf-8 file,
1147             and is vital for data/utf8.01.svg and similar end-user files. It allows the log output (STDOUT) to be redirected.
1148             And indeed, this is what some of the tests do.
1149              
1150             =head1 TODO
1151              
1152             This lists some possibly nice-to-have items, but none of them are important:
1153              
1154             =over 4
1155              
1156             =item o Store BNF's in an array
1157              
1158             This could be done by reading them once using L, in L,
1159             and caching them, rather than re-reading them each time a BNF is required.
1160              
1161             =item o Re-write grammars to do left-recursion
1162              
1163             Well, Jeffrey suggested this, but I don't have the skills (yet).
1164              
1165             =back
1166              
1167             =head1 Machine-Readable Change Log
1168              
1169             The file Changes was converted into Changelog.ini by L.
1170              
1171             =head1 Version Numbers
1172              
1173             Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
1174              
1175             =head1 Repository
1176              
1177             L
1178              
1179             =head1 Support
1180              
1181             Email the author, or log a bug on RT:
1182              
1183             L.
1184              
1185             =head1 Credits
1186              
1187             The BNFs are partially based on the L, and partially (for numbers) on
1188             2 programs posted by Jean-Damien Durand to L.
1189             The thread is titled 'Space (\s) problems with my grammar'.
1190              
1191             Note: Some posts (as of 2013-10-16) in that thread can't be displayed. This may be a temporary issue.
1192             See scripts/float.pl and scripts/number.pl for Jean-Damien's original code, which were of considerable help to me.
1193              
1194             Specifically, I use number.pl for integers and floats, with these adjustments:
1195              
1196             =over 4
1197              
1198             =item o The code did not handle negative numbers, but an optional sign was already defined, so that was easy
1199              
1200             =item o The code did not handle 0
1201              
1202             =item o The code included hex and octal and binary numbers, which I did not need
1203              
1204             =back
1205              
1206             =head1 Author
1207              
1208             L was written by Ron Savage Iron@savage.net.auE> in 2013.
1209              
1210             Home page: L.
1211              
1212             =head1 Copyright
1213              
1214             Australian copyright (c) 2013, Ron Savage.
1215              
1216             All Programs of mine are 'OSI Certified Open Source Software';
1217             you can redistribute them and/or modify them under the terms of
1218             The Artistic License 2.0, a copy of which is available at:
1219             http://www.opensource.org/licenses/index.html
1220              
1221             =cut