File Coverage

blib/lib/XML/Tidy.pm
Criterion Covered Total %
statement 12 12 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 16 16 100.0


line stmt bran cond sub pod time code
1             # 4C3JHOH1: XML::Tidy.pm by Pip Stuart to tidy indenting of XML documents as parsed XML::XPath objects;
2             package XML::Tidy;
3 6     6   39599 use strict;use warnings;use utf8;
  6     6   17  
  6     6   175  
  6         38  
  6         13  
  6         182  
  6         3498  
  6         88  
  6         38  
4             require Exporter;
5             require XML::XPath;
6 6     6   318 use base qw( XML::XPath Exporter );
  6         18  
  6         3194  
7             use vars qw( $AUTOLOAD @EXPORT );
8             use XML::XPath::XMLParser;
9             use Carp;
10             use Math::BaseCnv qw(:b64);
11             our $VERSION='1.20';our $d8VS='H79M9hU8';
12             @EXPORT = qw(
13             UNKNOWN_NODE
14             ELEMENT_NODE
15             ATTRIBUTE_NODE
16             TEXT_NODE
17             CDATA_SECTION_NODE
18             ENTITY_REFERENCE_NODE
19             ENTITY_NODE
20             PROCESSING_INSTRUCTION_NODE
21             COMMENT_NODE
22             DOCUMENT_NODE
23             DOCUMENT_TYPE_NODE
24             DOCUMENT_FRAGMENT_NODE
25             NOTATION_NODE
26             ELEMENT_DECL_NODE
27             ATT_DEF_NODE
28             XML_DECL_NODE
29             ATTLIST_DECL_NODE
30             NAMESPACE_NODE
31             STANDARD_XML_DECL);
32             sub UNKNOWN_NODE () { 0;}
33             sub ELEMENT_NODE () { 1;}
34             sub ATTRIBUTE_NODE () { 2;}
35             sub TEXT_NODE () { 3;}
36             sub CDATA_SECTION_NODE () { 4;}
37             sub ENTITY_REFERENCE_NODE () { 5;}
38             sub ENTITY_NODE () { 6;}
39             sub PROCESSING_INSTRUCTION_NODE () { 7;}
40             sub COMMENT_NODE () { 8;}
41             sub DOCUMENT_NODE () { 9;}
42             sub DOCUMENT_TYPE_NODE () {10;}
43             sub DOCUMENT_FRAGMENT_NODE () {11;}
44             sub NOTATION_NODE () {12;}
45             sub ELEMENT_DECL_NODE () {13;} # Non core DOM stuff here down
46             sub ATT_DEF_NODE () {14;}
47             sub XML_DECL_NODE () {15;}
48             sub ATTLIST_DECL_NODE () {16;}
49             sub NAMESPACE_NODE () {17;}
50             my $xmld = qq(\n); # Standard XML Declaration
51             sub STANDARD_XML_DECL () {$xmld;}
52             sub new{my $clas = shift();my $xpob = undef;my $dtst=''; # try to also TeST Declar8ions
53             if (lc($_[0]) eq 'binary' && @_ > 1 && length($_[1]) && -r $_[1]){$xpob = bexpand($_[1]);}
54             elsif( $_[0] =~ /\.xtb$/i ){$xpob = bexpand(@_ );}
55             else {$xpob = XML::XPath->new(@_);
56             if ($_[0] eq 'filename' ){shift( @_ );}
57             if ($_[0] !~ /\n/ && -r "$_[0]"){ # special-case loading XML file with non-standard declaration (but doesn't handle inline XML data or IORef yet)
58             open(my $xmlf,'<',$_[0]) or croak "!*EROR*! Cannot open XML file $_[0]! $!\n";
59             while( <$xmlf>){$dtst.= $_ if($dtst !~ /<\?xml\s+version=.+?\?>/i);} # try to load until decl formed (should almost always be just first line)
60             close( $xmlf) or croak "!*EROR*! Cannot close XML file $_[0]! $!\n";$xmld =~ s/(\?>).*/$1\n/ if(defined($xmld));
61             }elsif($_[0] eq 'xml' && @_ > 1 && length($_[1]) ){$dtst=$1 if($_[1]=~ /(<\?xml\s+version=.+?\?>\s*?\n)/i);
62             }elsif($_[0] =~ /(<\?xml\s+version=.+?\?>\s*?\n)/i){$dtst=$1;
63             } #else{} # maybe also try to handle ioref or context here by consulting above crE8d XPath obj?
64             $xmld=$dtst if($dtst =~ /^<\?xml\s+version="[^"]+"\s+encoding="[^"]+"(\s+standalone="(yes|no)")?\s*\?>\n$/i);
65             } # if provided XML Declaration above doesn't seem well-formed, then just leave the default as a good standard
66             my $self = bless($xpob, $clas); return($self);} # self just a new XPath obj blessed into Tidy class
67             sub reload{my $self = shift(); # dump XML text && re-parse object to re-index all nodes cleanly
68             if(defined($self)){
69             my($root)= $self->findnodes('/');
70             my $data = $xmld;
71             $data.= $_->toString() for($root->getChildNodes());
72             $self->set_xml($data);
73             my $prsr = XML::XPath::XMLParser->new('xml' => $data);
74             $self->set_context($prsr->parse());}}
75             sub strip{my $self = shift(); # strips out all text nodes from any mixed content
76             if(defined($self)){
77             my @nodz = $self->findnodes('//*');
78             for(@nodz){
79             if ( $_->getNodeType() eq ELEMENT_NODE){my @kidz = $_->getChildNodes();
80             for my $kidd (@kidz){
81             if($kidd->getNodeType() eq TEXT_NODE && @kidz > 1 && $kidd->getValue() =~ /^\s*$/){
82             $kidd->setValue('');}}}} # empty them all out
83             $self->reload();}} # reload all XML as text to re-index nodes
84             sub tidy{ # tidy XML indenting with a specified indent string
85             my $self = shift();my $ndnt = shift() || ' ';
86             $ndnt = "\t" if($ndnt =~ /tab/i ); # allow some indent_type descriptions
87             $ndnt = ' ' if($ndnt =~ /spac/i);
88             if(defined($self)){
89             $self->strip(); # strips all object's text nodes from mixed content
90             my $dpth = 0; # keep track of element nest depth
91             my $orte = 0 ;my $nrte = 0 ; # old && new root elements
92             my $prre = '';my $pore = ''; # pre && post root element text
93             my($docu)= $self->findnodes('/');
94             for($docu->getChildNodes()){
95             if ($_->getNodeType == ELEMENT_NODE){$orte = $_; }
96             elsif(!$orte ){$prre .= $_->toString();}
97             else {$pore .= $_->toString();}}
98             ( $orte)= $self->findnodes('/*') unless($orte);
99             if($orte->getChildNodes()){ # recursively tidy children
100             $nrte = $self->_rectidy($orte, ($dpth + 1), $ndnt);}
101             my $data = $xmld . $prre . $nrte->toString() . $pore;
102             $self->set_xml($data);
103             my $prsr = XML::XPath::XMLParser->new('xml' => $data);
104             $self->set_context($prsr->parse());}}
105             sub _rectidy{ # recursively tidy up indent formatting of elements
106             my $self = shift();my $node = shift();
107             my $dpth = shift();my $ndnt = shift();
108             my $tnod = undef; # temporary node which will get nodes surrounding children
109             #$tnod = e($node->getName()); # create element
110             $tnod = XML::XPath::Node::Element->new($node->getName()); # create element
111             for($node->findnodes('@*')){ # copy all attributes
112             $tnod->appendAttribute($_);}
113             for($node->getNamespaces()){ # copy all namespaces
114             $tnod->appendNamespace($_);}
115             my @kidz = $node->getChildNodes();my $lkid;
116             for my $kidd (@kidz){
117             if($kidd->getNodeType() ne TEXT_NODE && (!$lkid ||
118             $lkid->getNodeType() ne TEXT_NODE)){
119             #$tnod->appendChild(t("\n" . ($ndnt x $dpth)));
120             $tnod->appendChild(XML::XPath::Node::Text->new("\n" . ($ndnt x $dpth)));}
121             if($kidd->getNodeType() eq ELEMENT_NODE){
122             my @gkdz = $kidd->getChildNodes();
123             if(@gkdz && ($gkdz[0]->getNodeType() ne TEXT_NODE ||
124             (@gkdz > 1 && $gkdz[1]->getNodeType() ne TEXT_NODE))){
125             $kidd = $self->_rectidy($kidd, ($dpth + 1), $ndnt);}} # recursively tidy
126             $tnod->appendChild($kidd);$lkid = $kidd;}
127             #$tnod->appendChild(t("\n" . ($ndnt x ($dpth - 1))));
128             $tnod->appendChild(XML::XPath::Node::Text->new("\n" . ($ndnt x ($dpth - 1))));
129             return($tnod);}
130             sub compress{ # compress an XML::Tidy object into look-up tables
131             my $self = shift();my $flgz = shift(); # options of node types to include
132             my @elut = ();my @alut = (); # element && attribute look-up-tables
133             my %efou = ();my %afou = (); # element && attribute found flags
134             my @vlut = ();my @tlut = (); # attribute value && text
135             my %vfou = ();my %tfou = ();
136             my @nlut = ();my @clut = (); # namespace && comment
137             my %nfou = ();my %cfou = ();
138             my $cstr = "XML::Tidy::compress v$VERSION";
139             my $ntok = qr/[\(\)\[\]\{\}\/\*\+\?]/; # non-token quoted regex
140             $flgz = 'ea' unless(defined($flgz)); # Default flags: just elemz && attrz
141             $flgz = 'eatvnc' if($flgz eq 'all'); # AttValz && Text seem to work alright but beware of bugs in Comment I haven't been able to squash yet.
142             $self->strip(); # remove non-data text nodes
143             my($root)= $self->findnodes('/');
144             for($root->findnodes('//comment()')){
145             my $text = $_->getNodeValue();
146             if($text =~ s/^XML::Tidy::compress v(\d+)\.(\d+)\.([0-9A-Za-z._]{7})//){
147             croak "!*EROR*! compress() cannot be performed twice on the same object!\n";}}
148             if($flgz =~ /e[^E]*$/){ # elements
149             for($root->findnodes('//*')){
150             my $name = $_->getName();
151             unless(exists($efou{$name})){
152             push(@elut, $name);$efou{$name} = $#elut;}
153             # 5 below is the index of XML::XPath::Node::Element's node_name field
154             ${$_}->[5] = 'e' . b64($efou{$name});} # $_->setName(...
155             $cstr .= "\ne:@elut" if(@elut);}
156             if($flgz =~ /(a[^A]*|v[^V]*)$/){ # attributes (keys or values)
157             for($root->findnodes('//@*')){
158             if($flgz =~ /a[^A]*$/){ # attribute keys
159             my $name = $_->getName();
160             if(exists($efou{$name})){ # reuse element keys matching attributes
161             # 4 is the index of XML::XPath::Node::Attribute's node_key field
162             ${$_}->[4] = 'e' . b64($efou{$name});} # $_->setName(...
163             else{
164             unless(exists($afou{$name})){
165             push(@alut, $name);$afou{$name} = $#alut;}
166             ${$_}->[4] = 'a' . b64($afou{$name});}} # $_->setName(...
167             if($flgz =~ /v[^V]*$/){ # attribute values
168             my $wval = $_->getNodeValue(); $wval = '' unless(defined($wval));
169             for my $valu (split(/\s+/, $wval)){
170             my $repl = '';
171             if (exists($efou{$valu})){ # reuse elem keys matching attr valz
172             $repl = 'e' . b64($efou{$valu});
173             }elsif(exists($afou{$valu})){ # reuse attr keys matching attr valz
174             $repl = 'a' . b64($afou{$valu});
175             }elsif($valu !~ $ntok){
176             unless(exists($vfou{$valu})){
177             push(@vlut, $valu);$vfou{$valu} = $#vlut;}
178             $repl = 'v' . b64($vfou{$valu});}
179             # 5 is the index of XML::XPath::Node::Attribute's node_value field
180             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$repl$2/g if($valu !~ $ntok);}}}
181             $cstr .= "\na:@alut" if(@alut);
182             $cstr .= "\nv:@vlut" if(@vlut);}
183             if($flgz =~ /t[^T]*$/){ # text
184             for($root->findnodes('//text()')){
185             my $wtxt = $_->getNodeValue();
186             for my $text (split(/\s+/, $wtxt)){
187             my $repl = '';
188             if (exists($efou{$text})){ # reuse elem keys matching text token
189             $repl = 'e' . b64($efou{$text});
190             }elsif(exists($afou{$text})){ # reuse attr keys matching text token
191             $repl = 'a' . b64($afou{$text});
192             }elsif(exists($afou{$text})){ # reuse attr valz matching text token
193             $repl = 'v' . b64($vfou{$text});
194             }elsif($text !~ $ntok){
195             unless(exists($tfou{$text})){
196             push(@tlut, $text);$tfou{$text} = $#tlut;}
197             $repl = 't' . b64($tfou{$text});}
198             # 3 is the index of XML::XPath::Node::Text's node_text field
199             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$repl$2/g if($text !~ $ntok);}}
200             $cstr .= "\nt:@tlut" if(@tlut);}
201             if($flgz =~ /c[^C]*$/){ # comment
202             for($root->findnodes('//comment()')){
203             my $wcmt = $_->getNodeValue();
204             for my $cmnt (split(/\s+/, $wcmt)){
205             my $repl = '';
206             if (exists($efou{$cmnt})){ # reuse elem keys matching cmnt token
207             $repl = 'e' . b64($efou{$cmnt});
208             }elsif(exists($afou{$cmnt})){ # reuse attr keys matching cmnt token
209             $repl = 'a' . b64($afou{$cmnt});
210             }elsif(exists($afou{$cmnt})){ # reuse attr valz matching cmnt token
211             $repl = 'v' . b64($vfou{$cmnt});
212             }elsif(exists($tfou{$cmnt})){ # reuse text valz matching cmnt token
213             $repl = 't' . b64($tfou{$cmnt});
214             }elsif($cmnt !~ $ntok){
215             unless(exists($cfou{$cmnt})){
216             push(@clut, $cmnt);$cfou{$cmnt} = $#clut;}
217             $repl = 'c' . b64($cfou{$cmnt});}
218             # 3 is the index of XML::XPath::Node::Comment's node_comment field
219             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$repl$2/g if($cmnt !~ $ntok);}}
220             $cstr .= "\nc:@clut" if(@clut);}
221             $root->appendChild($self->c($cstr));
222             $self->reload();}
223             sub expand{ # uncompress an XML::Tidy object from look-up tables
224             my $self = shift();my $flgz = shift(); # options of node types to include
225             my @elut = ();my @alut = (); # element && attribute look-up-tables
226             my @vlut = ();my @tlut = (); # attribute value && text
227             my @nlut = ();my @clut = (); # namespace && comment
228             my $ntok = qr/[\(\)\[\]\{\}\/\*\+\?]/; # non-token quoted regex
229             my($root)= $self->findnodes('/');
230             for($root->findnodes('//comment()')){
231             my $text = $_->getNodeValue();
232             if($text =~ s/^XML::Tidy::compress v(\d+)\.(\d+)\.([0-9A-Za-z._]{7})//){ # may need to test $1, $2, $3 for versions later
233             while($text =~ s/^\n([eatvnc]):([^\n]+)//){
234             my $ntyp = $1;my $lutd = $2;
235             if ($ntyp eq 'e'){
236             push(@elut, split(/\s+/, $lutd));
237             }elsif($ntyp eq 'a'){
238             push(@alut, split(/\s+/, $lutd));
239             }elsif($ntyp eq 't'){
240             push(@tlut, split(/\s+/, $lutd));
241             }elsif($ntyp eq 'v'){
242             push(@vlut, split(/\s+/, $lutd));
243             }elsif($ntyp eq 'n'){
244             # push(@nlut, split(/\s+/, $lutd));
245             }elsif($ntyp eq 'c'){
246             push(@clut, split(/\s+/, $lutd));}}
247             $root->removeChild($_);}}
248             if(@elut){
249             for($root->findnodes('//*')){
250             my $name = $_->getName();
251             my $coun = $name;
252             if($coun =~ s/^e// && b10($coun) < @elut){
253             $coun = b10($coun);
254             # 5 below is the index of XML::XPath::Node::Element's node_name field
255             ${$_}->[5] = $elut[$coun];} # $_->setName($elut[$coun]);
256             else{croak "!*EROR*! expand() cannot find look-up element:$name!\n";}}}
257             if(@alut){
258             for($root->findnodes('//@*')){
259             my $name = $_->getName();
260             my $coun = $name;
261             if ($coun =~ s/^e// && b10($coun) < @elut){
262             $coun = b10($coun);
263             # 4 below is the index of XML::XPath::Node::Attribute's node_key field
264             ${$_}->[4] = $elut[$coun]; # $_->setName($elut[$coun]);
265             }elsif($coun =~ s/^a// && b10($coun) < @alut){
266             $coun = b10($coun);
267             ${$_}->[4] = $alut[$coun]; # $_->setName($alut[$coun]);
268             }else{croak "!*EROR*! expand() cannot find look-up attribute key:$name!\n";}
269             if(@vlut){
270             my $wval = $_->getNodeValue();
271             for my $valu (split(/\s+/, $wval)){
272             unless($valu =~ $ntok){
273             $coun = $valu;
274             if ($coun =~ s/^e// && b10($coun) < @elut){
275             $coun = b10($coun);
276             # 5 is the index of XML::XPath::Node::Attribute's node_value field
277             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$elut[$coun]$2/g;
278             }elsif($coun =~ s/^a// && b10($coun) < @alut){
279             $coun = b10($coun);
280             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$alut[$coun]$2/g;
281             }elsif($coun =~ s/^v// && b10($coun) < @vlut){
282             $coun = b10($coun);
283             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$vlut[$coun]$2/g;
284             }else{croak "!*EROR*! expand() cannot find look-up attribute value:$valu!\n";}}}}}}
285             if(@tlut){
286             for($root->findnodes('//text()')){
287             my $wtxt = $_->getNodeValue();
288             for my $text (split(/\s+/, $wtxt)){
289             unless($text =~ $ntok){
290             my $coun = $text;
291             if ($coun =~ s/^e// && b10($coun) < @elut){
292             $coun = b10($coun);
293             # 3 is the index of XML::XPath::Node::Text's node_text field
294             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$elut[$coun]$2/g;
295             }elsif($coun =~ s/^a// && b10($coun) < @alut){
296             $coun = b10($coun);
297             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$alut[$coun]$2/g;
298             }elsif($coun =~ s/^t// && b10($coun) < @tlut){
299             $coun = b10($coun);
300             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$tlut[$coun]$2/g;
301             }elsif($coun =~ s/^v// && b10($coun) < @vlut){
302             $coun = b10($coun);
303             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$vlut[$coun]$2/g;
304             }else{croak "!*EROR*! expand() cannot find look-up text token:$text!\n";}}}}}
305             if(@clut){
306             for($root->findnodes('//comment()')){
307             my $wcmt = $_->getNodeValue();
308             for my $cmnt (split(/\s+/, $wcmt)){
309             unless($cmnt =~ $ntok){
310             my $coun = $cmnt;
311             if ($coun =~ s/^e// && b10($coun) < @elut){
312             $coun = b10($coun);
313             # 3 is the index of XML::XPath::Node::Comment's node_comment field
314             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$elut[$coun]$2/g;
315             }elsif($coun =~ s/^a// && b10($coun) < @alut){
316             $coun = b10($coun);
317             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$alut[$coun]$2/g;
318             }elsif($coun =~ s/^v// && b10($coun) < @vlut){
319             $coun = b10($coun);
320             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$vlut[$coun]$2/g;
321             }elsif($coun =~ s/^t// && b10($coun) < @tlut){
322             $coun = b10($coun);
323             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$tlut[$coun]$2/g;
324             }elsif($coun =~ s/^c// && b10($coun) < @clut){
325             $coun = b10($coun);
326             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$clut[$coun]$2/g;
327             }else{croak "!*EROR*! expand() cannot find look-up comment token:$cmnt!\n";}}}}}
328             $self->reload();
329             $self->tidy();}
330             sub _append_node{ # place a node at the end of the proper array for bcompress
331             my $strz = shift();my $flut = shift();
332             my $intz = shift();my $fltz = shift();
333             my $ndty = shift();my $node = shift();
334             my $tokn = '';my $aval = undef; # token key && attribute value strings
335             if ( ${$node}->getNodeType() == ELEMENT_NODE){
336             $tokn = ${$node}->getName();
337             }elsif( ${$node}->getNodeType() == ATTRIBUTE_NODE){
338             $tokn = ${$node}->getName(); # attribute keys
339             $aval = ${$node}->getNodeValue(); # attribute values
340             $aval = '' unless(defined($aval));
341             }elsif( ${$node}->getNodeType() == NAMESPACE_NODE){
342             $tokn = ${$node}->toString(); # namespace prefix && expanded
343             }elsif( ${$node}->getNodeType() == PROCESSING_INSTRUCTION_NODE){
344             $tokn = ${$node}->getTarget(); # PI target
345             $aval = ${$node}->getData(); # PI data
346             $aval = '' unless(defined($aval));
347             }else{ # text, comment
348             $tokn = ${$node}->getNodeValue();}
349             if(defined($tokn) && length($tokn)){
350             unless(exists($flut->{$tokn})){
351             if ($tokn =~ /^([+-]?\d+)$/ && # unsigned 4294967295
352             -2147483648 <= $tokn && $tokn <= 2147483647){
353             push(@{$intz}, $tokn);
354             $flut->{$tokn} = 'l' . (scalar(@{$intz}) - 1);
355             }elsif($tokn =~ /^[+-]?\d+\.\d+$/){ # [+-]1.7x10**-308..[+-]1.7x10**308
356             push(@{$fltz}, $tokn);
357             $flut->{$tokn} = 'd' . (scalar(@{$fltz}) - 1);
358             }else{
359             push(@{$strz}, $tokn);
360             $flut->{$tokn} = (scalar(@{$strz}) - 1);}}}
361             if(defined($aval) && length($aval)){
362             unless(exists($flut->{$aval})){
363             if ($aval =~ /^([+-]?\d+)$/ && # unsigned 4294967295
364             -2147483648 <= $aval && $aval <= 2147483647){
365             push(@{$intz}, $aval);
366             $flut->{$aval} = 'l' . (scalar(@{$intz}) - 1);
367             }elsif($aval =~ /^[+-]?\d+\.\d+$/){ # [+-]1.7x10**-308..[+-]1.7x10**308
368             push(@{$fltz}, $aval);
369             $flut->{$aval} = 'd' . (scalar(@{$fltz}) - 1);
370             }else{
371             push(@{$strz}, $aval);
372             $flut->{$aval} = (scalar(@{$strz}) - 1);}}}
373             if( defined($tokn)){
374             if(length($tokn)){
375             push(@{$ndty}, $flut->{$tokn}, ${$node}->getNodeType());
376             }else{
377             push(@{$ndty}, 1 , ${$node}->getNodeType());}}
378             if( defined($aval)){
379             if(length($aval)){
380             push(@{$ndty}, $flut->{$aval});
381             }else{
382             push(@{$ndty}, 1 );}}
383             if(${$node}->getNodeType() == ELEMENT_NODE){
384             for(${$node}->getNamespaces()){
385             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);} # load namespaces...
386             for(${$node}->getAttributes()){
387             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);} # ...attributes && then child elements recursively
388             for(${$node}->getChildNodes()){
389             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);} # before adding an element-close tag to the node order
390             push(@{$ndty}, 0);}}
391             sub bcompress{ # compress an XML::Tidy object into a binary representation
392             my $self = shift();
393             my $dstf = shift() || 'default.xtb'; # destination binary filename
394             my $bstr = "XML::Tidy::bcompress v$VERSION\0";
395             my @strz = ('', ''); # array of strings
396             my @intz = ( ); # array of ints
397             my @fltz = ( ); # array of floats
398             my @ndty = (); # list of @strz indices && node types
399             my %flut = ('' => 0, '' => 1); # found string lookup-table
400             $self->strip(); # remove non-data text nodes
401             my $bsiz = 1; my $bpak = 'C';
402             my($root)= $self->findnodes('/');
403             for($root->getChildNodes()){
404             _append_node(\@strz, \%flut, \@intz, \@fltz, \@ndty, \$_);}
405             my $nndx = @ndty;
406             while($nndx >= 256){$nndx /= 256.0;$bsiz++ ;}
407             if ($bsiz == 2 ){$bpak = 'S'; }
408             elsif($bsiz > 2 ){$bpak = 'L';$bsiz = 4;} # assume default XML declaration
409             open( DSTF,'>',$dstf) or croak "!*EROR*! Cannot open binary DSTF: $dstf for writing! $!\n";
410             binmode(DSTF);
411             print DSTF $bstr;
412             shift(@strz);shift(@strz); # element-close && empty-string are implied
413             print DSTF pack("C$bpak", $bsiz, scalar(@strz));
414             print DSTF "$_\0" for(@strz) ;
415             print DSTF pack( "$bpak", scalar(@intz));
416             print DSTF pack('l',$_) for(@intz) ;
417             print DSTF pack( "$bpak", scalar(@fltz));
418             print DSTF pack('d',$_) for(@fltz) ;
419             while(@ndty){
420             my $indx = shift(@ndty);
421             if(defined($indx) && $indx){
422             my $type = shift(@ndty);
423             if(defined($type) && $type){
424             if ($indx =~ s/^d//){
425             print DSTF pack("$bpak", (scalar(@strz) + scalar(@intz) + $indx + 2));
426             }elsif($indx =~ s/^l//){
427             print DSTF pack("$bpak", (scalar(@strz) + $indx + 2));
428             }else{
429             print DSTF pack("$bpak", $indx);}
430             print DSTF pack('C', $type);
431             if($type == ATTRIBUTE_NODE ||
432             $type == PROCESSING_INSTRUCTION_NODE){
433             $indx = shift(@ndty);
434             if ($indx =~ s/^d//){
435             print DSTF pack("$bpak", (scalar(@strz) + scalar(@intz) + $indx + 2));
436             }elsif($indx =~ s/^l//){
437             print DSTF pack("$bpak", (scalar(@strz) + $indx + 2));
438             }else{
439             print DSTF pack("$bpak", $indx);}}}
440             }else{
441             print DSTF pack("$bpak", 0);}}
442             close( DSTF);}
443             sub bexpand{ # uncompress a binary file back into an XML::Tidy object
444             my $self = shift();
445             my $srcf = shift() || 'default.xtb'; # source binary filename
446             my $srcd = undef;
447             my $cstr = undef;
448             my $bstr = "XML::Tidy::bcompress v$VERSION\0";
449             my $gxml = ''; # generated XML for new object
450             my @strz = ('', ''); # array of strings
451             my @intz = ( ); # array of ints
452             my @fltz = ( ); # array of floats
453             my @ndty = (); # list of @strz indices && node types
454             my @elst = (); # element stack to track tree reconstruction
455             my $bsiz = 1; my $bpak = 'C'; my $rnam = ''; my $coun = 0;
456             if(-r $srcf){
457             open( SRCF,'<',$srcf) or croak "!*EROR*! Cannot open binary SRCF: $srcf for reading! $!\n";
458             binmode(SRCF); $srcd = join('',);
459             close( SRCF);
460             $cstr = substr($srcd, 0, length($bstr), '');
461             $bsiz = unpack('C', substr($srcd, 0, 1, ''));
462             if ($bsiz == 2){$bpak = 'S';}
463             elsif($bsiz > 2){$bpak = 'L';$bsiz = 4;}
464             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
465             while($coun--){
466             push(@strz, '');
467             my $char = unpack('a', substr($srcd, 0, 1, ''));
468             while($char ne "\0"){
469             $strz[-1] .= $char;
470             $char = unpack('a', substr($srcd, 0, 1, ''));}}
471             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
472             while($coun--){
473             push(@intz, unpack('l', substr($srcd, 0, 4, '')));}
474             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
475             while($coun--){
476             push(@fltz, unpack('d', substr($srcd, 0, 8, '')));} #$fltz[-1] .= '.0' if($fltz[-1] !~ /\./); # mk floats look like floats?
477             while(length($srcd)){
478             push(@ndty, unpack("$bpak", substr($srcd, 0, $bsiz, '')));
479             if($ndty[-1]){
480             push(@ndty, unpack('C' , substr($srcd, 0, 1, '')));
481             $rnam = $strz[$ndty[-2]] if(!length($rnam) && $ndty[-1] == ELEMENT_NODE);
482             if($ndty[-1] == ATTRIBUTE_NODE ||
483             $ndty[-1] == PROCESSING_INSTRUCTION_NODE){
484             push(@ndty, unpack("$bpak", substr($srcd, 0, $bsiz, '')));}}}
485             my $opfl = 0;
486             @elst = (); $gxml = $xmld;
487             while(@ndty){
488             my $indx = shift(@ndty);my $vndx;
489             my $type = ELEMENT_NODE; $type = shift(@ndty) if($indx);
490             if ($type == ELEMENT_NODE){
491             $gxml .= '>' if($opfl);
492             if($indx == 0){ # close element
493             $gxml .= '';
494             $opfl = 0;
495             }else{
496             push(@elst, $strz[$indx]);
497             $gxml .= '<' . $strz[$indx];
498             $opfl = 1;}
499             }elsif($type == ATTRIBUTE_NODE){
500             $vndx = shift(@ndty);
501             if($opfl){
502             $gxml .= ' ';
503             if ($indx >= (scalar(@strz) + scalar(@intz))){
504             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
505             }elsif($indx >= scalar(@strz) ){
506             $gxml .= $intz[($indx - scalar(@strz))];
507             }else {
508             $gxml .= $strz[$indx];}
509             $gxml .= '="';
510             if ($vndx >= (scalar(@strz) + scalar(@intz))){
511             $gxml .= $fltz[($vndx - scalar(@strz) - scalar(@intz))];
512             }elsif($vndx >= scalar(@strz) ){
513             $gxml .= $intz[($vndx - scalar(@strz))];
514             }else {
515             $gxml .= $strz[$vndx];}
516             $gxml .= '"';}
517             }elsif($type == TEXT_NODE){
518             if($opfl) { $gxml .= '>'; $opfl = 0; }
519             if ($indx >= (scalar(@strz) + scalar(@intz))){
520             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
521             }elsif($indx >= scalar(@strz) ){
522             $gxml .= $intz[($indx - scalar(@strz))];
523             }else {
524             $gxml .= $strz[$indx];}
525             }elsif($type == COMMENT_NODE){
526             if($opfl){$gxml .= '>'; $opfl = 0;}
527             $gxml .= '';
535             }elsif($type == PROCESSING_INSTRUCTION_NODE){
536             if($opfl){$gxml .= '>'; $opfl = 0;}
537             $gxml .= '
538             if ($indx >= (scalar(@strz) + scalar(@intz))){
539             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
540             }elsif($indx >= scalar(@strz) ){
541             $gxml .= $intz[($indx - scalar(@strz))];
542             }else {
543             $gxml .= $strz[$indx];}
544             $gxml .= ' ';
545             $vndx = shift(@ndty);
546             if ($vndx >= (scalar(@strz) + scalar(@intz))){
547             $gxml .= $fltz[($vndx - scalar(@strz) - scalar(@intz))];
548             }elsif($vndx >= scalar(@strz) ){
549             $gxml .= $intz[($vndx - scalar(@strz))];
550             }else {
551             $gxml .= $strz[$vndx];}
552             $gxml .= '?>';
553             }elsif($type == NAMESPACE_NODE){
554             $gxml .= ' ' . $strz[$indx] if($opfl);}}
555             my $nslf = XML::Tidy->new('xml' => "$gxml");
556             # $nslf->tidy(); # don't force a tidy() even if it's most likely desired
557             return($nslf);}}
558             sub prune{ # remove a section of the tree at the xpath location parameter
559             my $self = shift(); my $xplc = shift() || return(); # can't prune root node
560             if(defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/){
561             $xplc = shift() || undef;}
562             if(defined($self) && defined($xplc) && length($xplc) && $xplc ne '/'){
563             $self->reload(); # update all nodes && internal XPath indexing before find
564             for($self->findnodes($xplc)){
565             my $prnt = $_->getParentNode();
566             $prnt->removeChild($_) if(defined($prnt));}}}
567             sub write{ # write out an XML file to disk from a Tidy object
568             my $self = shift(); my $root; my $xplc;
569             my $flnm = shift() || $self->get_filename();
570             if( defined($flnm) && $flnm){
571             if($flnm =~ /^[-_]?(xplc$|xpath_loc)/){
572             $xplc = shift() || undef;
573             $flnm = shift() || $self->get_filename();}
574             if($flnm =~ /^[-_]?(flnm|filename)$/ ){
575             $flnm = shift() || $self->get_filename();}}
576             unless(defined($xplc) && $xplc){
577             $xplc = shift() || undef;}
578             if( defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/){
579             $xplc = shift() || undef;}
580             if( defined($self) && defined($flnm)){
581             if(defined($xplc) && $xplc){
582             $root = XML::XPath::Node::Element->new();
583             my($rtnd)= $self->findnodes($xplc);
584             $root->appendChild($rtnd);
585             }else{
586             ($root)= $self->findnodes('/');}
587             open( FILE,'>:encoding(UTF-8)',$flnm) or croak "!*EROR*! Cannot open FILE: $flnm for writing! $!\n";
588             print FILE $xmld;
589             print FILE $_->toString() , "\n" for($root->getChildNodes());
590             close(FILE);
591             }else{
592             croak("!*EROR*! No filename could be found to write() to!\n");}}
593             sub toString{ # return XML string from a Tidy object
594             my $self = shift(); my $root;
595             my $xplc = shift(); my $xmls = $xmld;
596             if( defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/){
597             $xplc = shift() || undef;}
598             if( defined($self)){
599             if(defined($xplc) && $xplc){
600             $root = XML::XPath::Node::Element->new();
601             my($rtnd)= $self->findnodes($xplc);
602             $root->appendChild($rtnd);
603             }else{
604             ($root)= $self->findnodes('/'); # every call to this XPath findnodes('/') leaks memory! I think it's not my fault?
605             }
606             if(defined($root)){
607             $xmls .= $_->toString() . "\n" for($root->getChildNodes());}
608             }else{
609             croak("!*EROR*! No XML::Tidy could be found for toString()!\n");}
610             $xmls=~ s/\n$//; # strip final newline (maybe should check at load if there was a just newline text-node after root element close to leave it in?)
611             return($xmls);}
612             sub AUTOLOAD{ # methods (created as necessary)
613             no strict 'refs';
614             my $self = shift();
615             if($AUTOLOAD =~ /.*::(new|create)?([eactpn])/i){ # createNode Wrappers
616             my $node = lc($2);
617             *{$AUTOLOAD} = sub{ # add called sub to function table
618             my $self = shift();
619             if ($node eq 'e'){return(XML::XPath::Node::Element ->new(@_));}
620             elsif($node eq 'a'){return(XML::XPath::Node::Attribute->new(@_));}
621             elsif($node eq 'c'){return(XML::XPath::Node::Comment ->new(@_));}
622             elsif($node eq 't'){return(XML::XPath::Node::Text ->new(@_));}
623             elsif($node eq 'p'){return(XML::XPath::Node::PI ->new(@_));}
624             elsif($node eq 'n'){return(XML::XPath::Node::Namespace->new(@_));}
625             };
626             return($self->$AUTOLOAD(@_));
627             }else{croak "No such method: $AUTOLOAD\n";}}
628             sub DESTROY{} # do nothing but define in case && to calm test warnings
629             8;
630              
631             =encoding utf8
632              
633             =head1 NAME
634              
635             XML::Tidy - tidy indenting of XML documents
636              
637             =head1 VERSION
638              
639             This documentation refers to version 1.20 of XML::Tidy, which was released on
640             Sun Jul 9 09:43:30:08 -0500 2017.
641              
642             =head1 SYNOPSIS
643              
644             #!/usr/bin/perl
645             use strict;use warnings;
646             use utf8;use XML::Tidy;
647              
648             # create new XML::Tidy object by loading: MainFile.xml
649             my $tidy_obj = XML::Tidy->new('filename' => 'MainFile.xml');
650              
651             # tidy up the indenting
652             $tidy_obj->tidy();
653              
654             # write out changes back to MainFile.xml
655             $tidy_obj->write();
656              
657             =head1 DESCRIPTION
658              
659             This module creates XML document objects (with inheritance from
660             L) to tidy mixed-content (i.e., non-data) text node
661             indenting. There are also some other handy member functions to
662             compress and expand your XML document object (into either a
663             compact XML representation or a binary one).
664              
665             =head1 USAGE
666              
667             =head2 new()
668              
669             This is the standard Tidy object constructor. Except for the added
670             'binary' option, it can take the same parameters as an L
671             object constructor to initialize the XML document object. These can
672             be any one of:
673              
674             'filename' => 'SomeFile.xml'
675             'binary' => 'SomeBinaryFile.xtb'
676             'xml' => $variable_which_holds_a_bunch_of_XML_data
677             'ioref' => $file_InputOutput_reference
678             'context' => $existing_node_at_specified_context_to_become_new_obj
679              
680             =head2 reload()
681              
682             The reload() member function causes the latest data contained in
683             a Tidy object to be re-parsed (which re-indexes all nodes).
684              
685             This can be necessary after modifications have been made to nodes
686             which impact the tree node hierarchy because L's find()
687             member preserves state information which can get out-of-sync.
688              
689             reload() is probably rarely useful by itself but it is needed by
690             strip() and prune() so it is exposed as a method in case it comes in
691             handy for other uses.
692              
693             =head2 strip()
694              
695             The strip() member function searches the Tidy object for all
696             mixed-content (i.e., non-data) text nodes and empties them out.
697             This will basically unformat any markup indenting.
698              
699             strip() is used by compress() and tidy() but it is exposed because it
700             is also worthwhile by itself.
701              
702             =head2 tidy()
703              
704             The tidy() member function can take a single optional parameter as
705             the string that should be inserted for each indent level. Some
706             examples:
707              
708             # Tidy up indenting with default two (2) spaces per indent level
709             $tidy_obj->tidy();
710              
711             # Tidy up indenting with four (4) spaces per indent level
712             $tidy_obj->tidy(' ');
713              
714             # Tidy up indenting with one (1) tab per indent level
715             $tidy_obj->tidy('tab' );
716              
717             # Tidy up indenting with two (2) tabs per indent level
718             $tidy_obj->tidy("\t\t");
719              
720             The default behavior is to use two (2) spaces for each indent
721             level. The Tidy object gets all mixed-content (i.e., non-data)
722             text nodes reformatted to appropriate indent levels according to
723             tree nesting depth.
724              
725             NOTE: tidy() disturbs some XML escapes in whatever ways L
726             does. It has been brought to my attention that these modules also strip
727             CDATA tags from XML files / data they operate on. Even though
728             CDATA tags don't seem very common, I would very much like for them to
729             work smoothly too. Hopefully the vast majority of files will work fine
730             and future support for any of the more rare types can be added later.
731              
732             Additionally, please take notice that every call to tidy() (as well as
733             reload, strip, and most other XML::Tidy functions) leak some memory due
734             to their usage of XPath's findnodes command. This issue was described
735             helpfully at L.
736             Thanks to Jozef!
737              
738             =head2 compress()
739              
740             The compress() member function calls strip() on the Tidy object
741             then creates an encoded comment which contains the names of elements
742             and attributes as they occurred in the original document. Their
743             respective element and attribute names are replaced with just the
744             appropriate index throughout the document.
745              
746             compress() can accept a parameter describing which node types to
747             attempt to shrink down as abbreviations. This parameter should be
748             a string of just the first letters of each node type you wish to
749             include as in the following mapping:
750              
751             e = elements
752             a = attribute keys
753             v = attribute values *EXPERIMENTAL*
754             t = text nodes *EXPERIMENTAL*
755             c = comment nodes *EXPERIMENTAL*
756             n = namespace nodes *not-yet-implemented*
757              
758             Attribute values ('v') and text nodes ('t') both seem to work fine
759             with current tokenization. I've still labeled them EXPERIMENTAL
760             because they seem more likely to cause problems than valid element
761             or attribute key names. I have some bugs in the comment node
762             compression which I haven't been able to find yet so that one should
763             be avoided for now. Since these three node types ('vtc')
764             all require tokenization, they are not included in default compression
765             ('ea'). An example call which includes values and text would be:
766              
767             $tidy_obj->compress('eavt');
768              
769             The original document structure (i.e., node hierarchy) is preserved.
770             compress() significantly reduces the file size of most XML documents
771             for when size matters more than immediate human readability.
772             expand() performs the opposite conversion.
773              
774             =head2 expand()
775              
776             The expand() member function reads any XML::Tidy::compress comments
777             from the Tidy object and uses them to reconstruct the document
778             that was passed to compress().
779              
780             =head2 bcompress('BinaryOutputFilename.xtb')
781              
782             The bcompress() member function stores a binary representation of
783             any Tidy object. The format consists of:
784              
785             0) a null-terminated version string
786             1) a byte specifying how many bytes later indices will be
787             2) the number of bytes from 1 above to designate the total string count
788             3) the number of null-terminated strings from 2 above
789             4) the number of bytes from 1 above to designate the total integer count
790             5) the number of 4-byte integers from 4 above
791             6) the number of bytes from 1 above to designate the total float count
792             7) the number of 8-byte (double-precision) floats from 6 above
793             8) node index sets until the end of the file
794              
795             Normal node index sets consist of two values. The first is an index
796             (again the number of bytes long comes from 1) into the three lists as if
797             they were all linear. The second is a single-byte integer identifying the
798             node type (using standard DOM node type enumerations).
799              
800             A few special cases exist in node index sets though. If the index is
801             null, it is interpreted as a close-element tag (so no accompanying type
802             value is read). On the other end, when the index is non-zero, the type
803             value is always read. In the event that the type corresponds to an
804             attribute or a processing instruction, the next index is read (without
805             another accompanying type value) in order to complete the data fields
806             required by those node types.
807              
808             NOTE: Please bear in mind that the encoding of binary integers and floats
809             only works properly if the values are not surrounded by spaces or other
810             delimiters and each is contained in its own single node. This is
811             necessary to enable thorough reconstruction of whitespace from the
812             original document. I recommend storing every numerical value as an
813             isolated attribute value or text node without any surrounding whitespace.
814              
815             # Examples which encode all numbers as binary:
816            
817             31.255
818            
819             -15.65535
820             16383.7
821             -1023.63
822            
823            
824              
825             # Examples which encode all numbers as strings:
826            
827             2.0
828             4.0 -2.0 4.0
829            
830              
831             The default file extension is .xtb (for XML::Tidy Binary).
832              
833             =head2 bexpand('BinaryInputFilename.xtb')
834              
835             The bexpand() member function reads a binary file which was
836             previously written from bcompress(). bexpand() is an XML::Tidy
837             object constructor like new() so it can be called like:
838              
839             my $xtbo = XML::Tidy->bexpand('BinaryInputFilename.xtb');
840              
841             =head2 prune()
842              
843             The prune() member function takes an XPath location to remove (along
844             with all attributes and child nodes) from the Tidy object. For
845             example, to remove all comments:
846              
847             $tidy_obj->prune('//comment()');
848              
849             or to remove the third baz (XPath indexing is 1-based):
850              
851             $tidy_obj->prune('/foo/bar/baz[3]');
852              
853             Pruning your XML tree is a form of tidying too so it snuck in here. =)
854              
855             =head2 write()
856              
857             The write() member function can take an optional filename parameter
858             to write out any changes to the Tidy object. If no parameters
859             are given, write() overwrites the original XML document file (if
860             a 'filename' parameter was given to the constructor).
861              
862             write() will croak() if no filename can be found to write to.
863              
864             write() can also take a secondary parameter which specifies an XPath
865             location to be written out as the new root element instead of the
866             Tidy object's root. Only the first matching element is written.
867              
868             =head2 toString()
869              
870             The toString() member function is almost identical to write() except
871             that it takes no parameters and simply returns the equivalent XML
872             string as a scalar. It is a little weird because normally only
873             L objects have a toString() member but I figure it
874             makes sense to extend the same syntax to the parent object as well,
875             since it is a useful option.
876              
877             =head1 createNode Wrappers
878              
879             The following are just aliases to Node constructors. They'll work with
880             just the unique portion of the node type as the member function name.
881              
882             =head2 e() or el() or elem() or createElement()
883              
884             wrapper for XML::XPath::Node::Element->new()
885              
886             =head2 a() or at() or attr() or createAttribute()
887              
888             wrapper for XML::XPath::Node::Attribute->new()
889              
890             =head2 c() or cm() or cmnt() or createComment()
891              
892             wrapper for XML::XPath::Node::Comment->new()
893              
894             =head2 t() or tx() or text() or createTextNode()
895              
896             wrapper for XML::XPath::Node::Text->new()
897              
898             =head2 p() or pi() or proc() or createProcessingInstruction()
899              
900             wrapper for XML::XPath::Node::PI->new()
901              
902             =head2 n() or ns() or nspc() or createNamespace()
903              
904             wrapper for XML::XPath::Node::Namespace->new()
905              
906             =head1 EXPORTED CONSTANTS
907              
908             Since they are sometimes needed to compare against, XML::Tidy also
909             exports the same node constants as L (which
910             correspond to DOM values). These include:
911              
912             =head2 UNKNOWN_NODE
913              
914             =head2 ELEMENT_NODE
915              
916             =head2 ATTRIBUTE_NODE
917              
918             =head2 TEXT_NODE
919              
920             =head2 CDATA_SECTION_NODE
921              
922             =head2 ENTITY_REFERENCE_NODE
923              
924             =head2 ENTITY_NODE
925              
926             =head2 PROCESSING_INSTRUCTION_NODE
927              
928             =head2 COMMENT_NODE
929              
930             =head2 DOCUMENT_NODE
931              
932             =head2 DOCUMENT_TYPE_NODE
933              
934             =head2 DOCUMENT_FRAGMENT_NODE
935              
936             =head2 NOTATION_NODE
937              
938             =head2 ELEMENT_DECL_NODE
939              
940             =head2 ATT_DEF_NODE
941              
942             =head2 XML_DECL_NODE
943              
944             =head2 ATTLIST_DECL_NODE
945              
946             =head2 NAMESPACE_NODE
947              
948             XML::Tidy also exports:
949              
950             =head2 STANDARD_XML_DECL
951              
952             which returns a reasonable default XML declaration string
953             (assuming typical "utf-8" encoding).
954              
955             =head1 TODO
956              
957             =over 2
958              
959             =item - fix reload() from messing up Unicode escaped &XYZ; components like Copyright © and Registered ® (probably needs pre and post processing)
960              
961             =item - write many better UTF-8 tests
962              
963             =item - support namespaces
964              
965             =item - handle CDATA
966              
967             =back
968              
969             =head1 CHANGES
970              
971             Revision history for Perl extension XML::Tidy:
972              
973             =over 2
974              
975             =item - 1.20 H79M9hU8 Sun Jul 9 09:43:30:08 -0500 2017
976              
977             * removed broken Build.PL to resolve L. (Thank you, Slaven.)
978              
979             =item - 1.18 H78M5qm1 Sat Jul 8 05:52:48:01 -0500 2017
980              
981             * fixed new() to check file or xml to detect standalone in declaration, from L (Thanks Alex!)
982              
983             * traced tidy() memory leak from L (Thanks Jozef!) which seems to come from every XPath->findnodes() call
984              
985             * aligned synopsis comments
986              
987             * updated write() to use output encoding UTF-8 since that's what almost all XML should rely on (with thanks to RJBS for teaching me much from his great talk at
988             L)
989              
990             * collapsed trailing curly braces on code blocks
991              
992             * added croak for any failed file open attempt
993              
994             =item - 1.16 G6LM4EST Tue Jun 21 04:14:28:29 -0500 2016
995              
996             * stopped using my old fragile package generation and manually updated all distribution files (though L should let me generate much again)
997              
998             * updated license to GPLv3+
999              
1000             * fixed 00pod.t and 01podc.t to eval the Test modules from issue and patch: L (Thanks again MichielB.)
1001              
1002             * replaced all old '&&' with 'and' in POD
1003              
1004             =item - 1.14 G6JMERCY Sun Jun 19 14:27:12:34 -0500 2016
1005              
1006             * separated old PT from VERSION to fix non-numeric issue: L (Thanks to Slaven.)
1007              
1008             * removed Unicode from POD but added encoding utf8 anyway to pass tests and resolve issues: L and
1009             L (Thanks to Sudhanshu and MichielB.)
1010              
1011             =item - 1.12.B55J2qn Thu May 5 19:02:52:49 2011
1012              
1013             * made "1.0" float binarize as float again, rather than just "1" int
1014              
1015             * cleaned up POD and fixed EXPORTED CONSTANTS heads blocking together
1016              
1017             =item - 1.10.B52FpLx Mon May 2 15:51:21:59 2011
1018              
1019             * added tests for undefined non-standard XML declaration to suppress warnings
1020              
1021             =item - 1.8.B2AMvdl Thu Feb 10 22:57:39:47 2011
1022              
1023             * aligned .t code
1024              
1025             * added test for newline before -r to try to resolve: L (Thanks, Leandro.)
1026              
1027             * fixed off-by-one error when new gets a readable (non-newline) filename (that's not "filename" without a pre-'filename' param) to resolve:
1028             L (Thanks, Simone.)
1029              
1030             =item - 1.6.A7RJKwl Tue Jul 27 19:20:58:47 2010
1031              
1032             * added head2 POD for EXPORTED CONSTANTS to try to pass t/00podc.t
1033              
1034             =item - 1.4.A7QCvHw Mon Jul 26 12:57:17:58 2010
1035              
1036             * hacked a little test for non-UTF-8 decl str to resolve FrankGoss' need for ISO-8859-1 decl encoding to persist through tidying
1037              
1038             * md sure META.yml is being generated correctly for the CPAN
1039              
1040             * updated license to GPLv3
1041              
1042             =item - 1.2.75BACCB Fri May 11 10:12:12:11 2007
1043              
1044             * made "1.0" float binarize as just "1" int
1045              
1046             * made ints signed and bounds checked
1047              
1048             * added new('binary' => 'BinFilename.xtb') option
1049              
1050             =item - 1.2.54HJnFa Sun Apr 17 19:49:15:36 2005
1051              
1052             * fixed tidy() processing instruction stripping problem
1053              
1054             * added support for binary ints and floats in bcompress()
1055              
1056             * tightened up binary format and added pod
1057              
1058             =item - 1.2.54HDR1G Sun Apr 17 13:27:01:16 2005
1059              
1060             * added bcompress() and bexpand()
1061              
1062             * added compress() and expand()
1063              
1064             * added toString()
1065              
1066             =item - 1.2.4CKBHxt Mon Dec 20 11:17:59:55 2004
1067              
1068             * added exporting of XML::XPath::Node (DOM) constants
1069              
1070             * added node object creation wrappers (like LibXML)
1071              
1072             =item - 1.2.4CCJW4G Sun Dec 12 19:32:04:16 2004
1073              
1074             * added optional 'xpath_loc' => to prune()
1075              
1076             =item - 1.0.4CAJna1 Fri Dec 10 19:49:36:01 2004
1077              
1078             * added optional 'filename' => to write()
1079              
1080             =item - 1.0.4CAAf5B Fri Dec 10 10:41:05:11 2004
1081              
1082             * removed 2nd param from tidy() so that 1st param is just indent string
1083              
1084             * fixed pod errors
1085              
1086             =item - 1.0.4C9JpoP Thu Dec 9 19:51:50:25 2004
1087              
1088             * added xplc option to write()
1089              
1090             * added prune()
1091              
1092             =item - 1.0.4C8K1Ah Wed Dec 8 20:01:10:43 2004
1093              
1094             * inherited from XPath so that those methods can be called directly
1095              
1096             * original version (separating Tidy.pm from Merge.pm)
1097              
1098             =back
1099              
1100             =head1 INSTALL
1101              
1102             From the command shell, please run:
1103              
1104             `perl -MCPAN -e "install XML::Tidy"`
1105              
1106             or uncompress the package and run the standard:
1107              
1108             `perl Makefile.PL; make; make test; make install`
1109              
1110             =head1 FILES
1111              
1112             XML::Tidy requires:
1113              
1114             L to allow errors to croak() from calling sub
1115              
1116             L to use XPath statements to query and update XML
1117              
1118             L to parse XML documents into XPath objects
1119              
1120             L to handle base-64 indexing for compress() and expand()
1121              
1122             =head1 BUGS
1123              
1124             Please report any bugs or feature requests to bug-XML-Tidy at RT.CPAN.Org, or through the web interface at
1125             L. I will be notified, and then you can be updated of progress on your bug
1126             as I address fixes.
1127              
1128             =head1 SUPPORT
1129              
1130             You can find documentation for this module (after it is installed) with the perldoc command.
1131              
1132             `perldoc XML::Tidy`
1133              
1134             You can also look for information at:
1135              
1136             RT: CPAN's Request Tracker
1137              
1138             HTTPS://RT.CPAN.Org/NoAuth/Bugs.html?Dist=XML-Tidy
1139              
1140             AnnoCPAN: Annotated CPAN documentation
1141              
1142             HTTP://AnnoCPAN.Org/dist/XML-Tidy
1143              
1144             CPAN Ratings
1145              
1146             HTTPS://CPANRatings.Perl.Org/d/XML-Tidy
1147              
1148             Search CPAN
1149              
1150             HTTP://Search.CPAN.Org/dist/XML-Tidy
1151              
1152             =head1 LICENSE
1153              
1154             Most source code should be Free! Code I have lawful authority over is and shall be!
1155             Copyright: (c) 2004-2017, Pip Stuart.
1156             Copyleft : This software is licensed under the GNU General Public License
1157             (version 3 or later). Please consult L
1158             for important information about your freedom. This is Free Software: you
1159             are free to change and redistribute it. There is NO WARRANTY, to the
1160             extent permitted by law. See L for further information.
1161              
1162             =head1 AUTHOR
1163              
1164             Pip Stuart
1165              
1166             =cut