File Coverage

blib/lib/Data/Edit/Xml/Lint.pm
Criterion Covered Total %
statement 43 313 13.7
branch 3 120 2.5
condition 0 26 0.0
subroutine 14 37 37.8
pod 13 21 61.9
total 73 517 14.1


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             #-I/home/phil/z/perl/cpan/DataEditXml/lib -I/home/phil/z/perl/cpan/DataTableText/lib
3             #-------------------------------------------------------------------------------
4             # Lint xml files in parallel using xmllint and report the failure rate
5             # Philip R Brenan at gmail dot com, Appa Apps Ltd, 2016
6             #-------------------------------------------------------------------------------
7             # Report should allow the integration of other statistics into its summary besides the one it produces itself
8             # Record reused in other project
9             # podDocumentation
10              
11             package Data::Edit::Xml::Lint;
12             require v5.16.0;
13 1     1   521 use warnings FATAL => qw(all);
  1         2  
  1         31  
14 1     1   4 use strict;
  1         1  
  1         20  
15 1     1   4 use Carp;
  1         1  
  1         59  
16 1     1   422 use Data::Table::Text qw(:all);
  1         18529  
  1         266  
17 1     1   349 use Digest::SHA qw(sha256_hex);
  1         2069  
  1         69  
18 1     1   349 use Encode;
  1         7151  
  1         2511  
19             our $VERSION = 20170802;
20              
21             #1 Constructor # Construct a new linter
22              
23             sub new # Create a new xml linter - call this method statically as in L
24 0     0 1 0 {bless {} # Create xml linter
25             }
26              
27             #2 Attributes # Attributes describing a lint
28              
29             genLValueScalarMethods(qw(author)); # Optional author of the xml - only needed if you want to generate an SDL file map
30             genLValueScalarMethods(qw(catalog)); # Optional catalog file containing the locations of the DTDs used to validate the xml
31             genLValueScalarMethods(qw(ditaType)); # Optional Dita topic type(concept|task|troubleshooting|reference) of the xml - only needed if you want to generate an SDL file map
32             genLValueScalarMethods(qw(docType)); # The second line: the document type extracted from the L
33             genLValueScalarMethods(qw(dtds)); # Optional directory containing the DTDs used to validate the xml
34             genLValueScalarMethods(qw(errors)); # Number of lint errors detected by xmllint
35             genLValueScalarMethods(qw(file)); # File that the xml will be written to and read from by L, L or L
36             genLValueScalarMethods(qw(guid)); # Guid for outermost tag - only required if you want to generate an SD file map
37             genLValueScalarMethods(qw(header)); # The first line: the xml header extracted from L
38             genLValueScalarMethods(qw(idDefs)); # {id} = count - the number of times this id is defined in the xml contained in this L
39             genLValueScalarMethods(qw(labelDefs)); # {label or id} = id - the id of the node containing a L defined on the xml
40             genLValueScalarMethods(qw(labels)); # Optional parse tree to supply L for the current L as the labels are present in the parse tree not in the string representing the parse tree
41             genLValueScalarMethods(qw(linted)); # Date the lint was performed by L
42             genLValueScalarMethods(qw(processes)); # Maximum number of xmllint processes to run in parallel - 8 by default
43             genLValueScalarMethods(qw(project)); # Optional L name to allow error counts to be aggregated by L and to allow L to be scoped to the L contained in each L
44             genLValueArrayMethods(qw(reusedInProject)); # List of projects in which this file is reused
45             genLValueScalarMethods(qw(sha256)); # Sha256 hash of the string containing the xml processed by L or L
46             genLValueScalarMethods(qw(source)); # The source Xml to be linted
47             genLValueScalarMethods(qw(title)); # Optional title of the xml - only needed if you want to generate an SDL file map
48              
49             #1 Lint # Lint xml L in parallel
50              
51             my @pids; # Lint pids
52              
53             sub lint($@) # Store some xml in a L, apply xmllint in parallel and update the source file with the results
54 0     0 1 0 {my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
55 0         0 &lintOP(1, @_);
56             }
57              
58             sub lintNOP($@) # Store some xml in a L, apply xmllint in single and update the source file with the results
59 0     0 1 0 {my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
60 0         0 &lintOP(0, @_);
61             }
62              
63             sub lintOP($$@) #P Store some xml in a L, apply xmllint in parallel or single and update the source file with the results
64 0     0 1 0 {my ($inParallel, $lint, %attributes) = @_; # In parallel or not, Linter, attributes to be recorded as xml comments
65              
66 0 0       0 $lint->source or confess "Use the source() method to provide the source xml"; # Check that we have some source
67 0 0       0 $lint->file or confess "Use the ->file method to provide the target file"; # Check that we have an output file
68              
69 0 0       0 if ($inParallel) # Process in parallel if possible
70 0   0     0 {my $processes = $lint->processes // 8; # Maximum number of processes
71 0         0 &waitProcessing; # Wait until enough sub processes have completed
72 0 0       0 if (my $pid = fork()) # Perform lints in parallel
73 0         0 {push @pids, $pid;
74 0         0 return;
75             }
76             }
77              
78 0         0 $lint->source = $lint->source =~ s/\s+\Z//gsr; # Xml text to be written minus trailing blanks
79 0         0 my @lines = split /\n/, $lint->source; # Split source into lines
80              
81 0         0 my $file = $lint->file; # File to be written to
82 0 0       0 confess "File name contains a new line:\n$file\n" if $file =~ m/\n/s; # Complain if the source file contains a new line
83              
84 0         0 for(qw(author catalog ditaType dtds file guid project title)) # Map parameters to attributes
85 0         0 {my $a = $lint->$_;
86 0 0       0 $attributes{$_} = $a if $a;
87             }
88              
89 0         0 $attributes{docType} = $lines[1]; # Source details
90 0         0 $attributes{header} = $lines[0];
91 0         0 $attributes{sha256} = sha256_hex(encode("ascii", $lint->source)); # Digest of source string
92              
93 0         0 my $time = "\n"; # Time stamp marks the start of the added comments
94 0         0 my $attr = &formatAttributes({%attributes}); # Attributes to be recorded with the xml
95             my $labels = sub # Process any labels in the parse tree
96 0 0   0   0 {return '' unless $lint->labels;
97 0         0 my $s = '';
98             $lint->labels->by(sub # Search the supplied parse tree for any id or label definitions
99 0         0 {my ($o) = @_;
100              
101 0 0       0 if (my $i = $o->id) # Id for this node but no labels
102 0         0 {$s .= "\n"; # Id definition
103 0   0     0 my $d = $lint->idDefs //= {}; # Id definitions for this file
104 0         0 $d->{$i} = $i; # Record id definition
105             }
106              
107 0 0       0 if (my @labels = $o->getLabels) # Labels for this node
108 0         0 {my $i = $o->id; # Id for this node
109 0 0       0 $i or confess "No id for node with labels:\n".$o->prettyString;
110 0         0 $s .= "\n";
111 0   0     0 my $l = $lint->labelDefs //= {}; # Labels for this file
112 0         0 $l->{$_} = $i for @labels; # Link each label to its primary id
113             }
114 0         0 });
115 0         0 $s
116 0         0 }->();
117              
118 0         0 writeFile($file, my $source = $lint->source."\n$time\n$attr\n$labels"); # Write xml to file
119              
120 0 0       0 if (my $v = qx(xmllint --version 2>&1)) # Check xmllint is present
121 0 0       0 {unless ($v =~ m(\Axmllint)is)
122 0         0 {confess "xmllint missing, install with:\nsudo apt-get xmllint";
123             }
124             }
125              
126             my $c = sub # Lint command
127 0     0   0 {my $d = $lint->dtds; # Optional dtd to use
128 0         0 my $f = $file; # File name
129 0 0       0 return "xmllint --path \"$d\" --noout --valid \"$f\" 2>&1" if $d; # Lint against DTDs
130 0         0 my $c = $lint->catalog; # Optional dtd catalog to use
131 0 0       0 return qq(xmllint --noout - < '$f' 2>&1) unless $c; # Normal lint
132 0         0 qq(export XML_CATALOG_FILES='$c' && xmllint --noout --valid - < '$f' 2>&1) # Catalog lint
133 0         0 }->();
134              
135 0 0       0 if (my @errors = qx($c)) # Perform lint and add errors as comments
136 0         0 {my $s = readFile($file);
137 0         0 my $e = join '', map {chomp; "\n"} @errors;
  0         0  
  0         0  
138 0         0 my $n = $lint->errors = int @errors / 3; # Three lines per error message
139              
140 0         0 my $t = "";
141              
142 0         0 writeFile($file, "$source\n$time$e\n$t"); # Update xml file with errors
143             }
144             else # No errors detected
145 0         0 {$lint->errors = 0;
146             }
147 0 0       0 exit if $inParallel;
148             } # lint
149              
150             sub nolint($@) # Store just the attributes in a file so that they can be retrieved later to process non xml objects referenced in the xml - like images
151 0     0 1 0 {my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
152 0 0       0 !$lint->source or confess "Source specified for nolint(), use lint()"; # Source not permitted for nolint()
153 0         0 my $file = $lint->file; # File to be written to
154 0 0       0 $file or confess "Use the ->file method to provide the target file"; # Check that we have an output file
155              
156 0         0 for(qw(author ditaType file guid project)) # Map parameters to attributes
157 0         0 {my $a = $lint->$_;
158 0 0       0 $attributes{$_} = $a if $a;
159             }
160              
161 0         0 my $time = "\n"; # Time stamp marks the start of the added comments
162 0         0 my $attr = &formatAttributes({%attributes}); # Attributes to be recorded with the xml
163              
164 0         0 writeFile($file, "\n$time\n$attr"); # Write attributes to file
165             } # nolint
166              
167             sub formatAttributes(%) #P Format the attributes section of the output file
168 0     0 1 0 {my ($attributes) = @_; # Hash of attributes
169 0         0 my @s;
170 0         0 for(sort keys %$attributes)
171 0         0 {my $v = $attributes->{$_}; # Attribute value
172 0 0       0 defined($v) or confess "Attribute $_ has no value";
173 0 0       0 $v =~ s/--/__/gs if /title/; # Replace -- with __ as -- will upset the use of xml comments to hold the data in a greppable form - but only for title - for files we need to see an error message
174 0 0       0 $v =~ m/--/s and confess "-- in value of $_=>$v"; # Confess if -- present in attribute value as this will mess up the xml comments
175 0         0 push @s, ""; # Place attribute inside a comment
176             }
177 0         0 join "\n", @s
178             }
179              
180             sub read($) # Reread a linted xml L and extract the L associated with the L
181 0     0 1 0 {my ($file) = @_; # File containing xml
182 0         0 my $s = readFile($file); # Read xml from file
183 0         0 my %a = $s =~ m//igs; # Get attributes
184 0         0 my @a = split m/\n/, $s; # Split into lines
185              
186 0         0 my $l = {}; # Reconstructed labels
187 0         0 for(@a) # Each source line
188 0 0       0 {if (//gs) # Labels line
189 0         0 {my ($w) = my @w = split /\s+/, $1; # Id, labels
190 0         0 $l->{$_} = $w for @w; # Associate each id and label with the id
191             }
192             }
193              
194 0         0 my $d = {}; # Id definitions
195 0         0 for(@a) # Each source line
196 0 0       0 {if (//gs) # Definition
197 0         0 {$d->{$1}++; # Record definition
198 0         0 $l->{$1} = $1; # An id also defines a label
199             }
200             }
201              
202 0         0 my $r = {}; # Reused in project
203 0         0 for(@a) # Each source line
204 0 0       0 {if (//gs) # Definition
205 0         0 {$r->{$1}++; # Record definition
206             }
207             }
208              
209 0         0 my $S = $s =~ s/\s+