File Coverage

blib/lib/XML/Tidy.pm
Criterion Covered Total %
statement 9 9 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 12 12 100.0


line stmt bran cond sub pod time code
1             # 4C3HOH1: XML::Tidy.pm by Pip Stuart to tidy XML documents as parsed XML::XPath objects;
2             package XML::Tidy;
3 5     5   27348 use strict;use warnings;
  5     5   6  
  5         106  
  5         15  
  5         3  
  5         138  
4             require XML::XPath;
5 5     5   14 use base qw( XML::XPath Exporter );
  5         5  
  5         2162  
6             use vars qw( $AUTOLOAD @EXPORT );
7             use Carp;
8             use Exporter;
9             use Math::BaseCnv qw(:b64);
10             use XML::XPath::XMLParser;
11             our $VERSION = '1.16';our $d8VS='G6LM4EST';
12             @EXPORT = qw(
13             UNKNOWN_NODE
14             ELEMENT_NODE
15             ATTRIBUTE_NODE
16             TEXT_NODE
17             CDATA_SECTION_NODE
18             ENTITY_REFERENCE_NODE
19             ENTITY_NODE
20             PROCESSING_INSTRUCTION_NODE
21             COMMENT_NODE
22             DOCUMENT_NODE
23             DOCUMENT_TYPE_NODE
24             DOCUMENT_FRAGMENT_NODE
25             NOTATION_NODE
26             ELEMENT_DECL_NODE
27             ATT_DEF_NODE
28             XML_DECL_NODE
29             ATTLIST_DECL_NODE
30             NAMESPACE_NODE
31             STANDARD_XML_DECL
32             );
33             sub UNKNOWN_NODE () { 0;}
34             sub ELEMENT_NODE () { 1;}
35             sub ATTRIBUTE_NODE () { 2;}
36             sub TEXT_NODE () { 3;}
37             sub CDATA_SECTION_NODE () { 4;}
38             sub ENTITY_REFERENCE_NODE () { 5;}
39             sub ENTITY_NODE () { 6;}
40             sub PROCESSING_INSTRUCTION_NODE () { 7;}
41             sub COMMENT_NODE () { 8;}
42             sub DOCUMENT_NODE () { 9;}
43             sub DOCUMENT_TYPE_NODE () {10;}
44             sub DOCUMENT_FRAGMENT_NODE () {11;}
45             sub NOTATION_NODE () {12;}
46             sub ELEMENT_DECL_NODE () {13;} # Non core DOM stuff here down
47             sub ATT_DEF_NODE () {14;}
48             sub XML_DECL_NODE () {15;}
49             sub ATTLIST_DECL_NODE () {16;}
50             sub NAMESPACE_NODE () {17;}
51             my $xmld = qq(\n); # Standard XML Declaration
52             sub STANDARD_XML_DECL () {$xmld;}
53             sub new { my $clas = shift(); my $xpob = undef;
54             if (lc($_[0]) eq 'binary' && @_ > 1 && length($_[1]) && -r $_[1]){ $xpob = bexpand($_[1]); }
55             elsif( $_[0] =~ /\.xtb$/i ){ $xpob = bexpand(@_ ); }
56             else { $xpob = XML::XPath->new(@_);
57             shift(@_) if($_[0] eq 'filename'); # special-case loading XML file with non-standard declaration
58             if($_[0] !~ /\n/ && -r $_[0]){ # special-case loading XML file with non-standard declaration (but doesn't handle inline XML data or IORef yet)
59             open(XMLF,'<',$_[0]);$xmld = ;close(XMLF);$xmld =~ s/(\?>).*/$1\n/ if(defined($xmld)); # if provided XML Declaration doesn't seem well-formed, ...
60             $xmld = qq(\n) unless(defined($xmld) && $xmld =~ /^<\?xml version="[^"]+" encoding="[^"]+" *\?>\n$/); # ...reset to Standard
61             }
62             }
63             my $self = bless($xpob, $clas); return($self); # self just a new XPath obj blessed into Tidy class
64             }
65             sub reload { # dump XML text && re-parse object to re-index all nodes cleanly
66             my $self = shift();
67             if(defined($self)) {
68             my($root)= $self->findnodes('/'); my $data = $xmld; $data .= $_->toString() for($root->getChildNodes()); $self->set_xml($data);
69             my $prsr = XML::XPath::XMLParser->new('xml' => $data); $self->set_context($prsr->parse());
70             }
71             }
72             sub strip { # strips out all text nodes from any mixed content
73             my $self = shift();
74             if(defined($self)) {
75             my @nodz = $self->findnodes('//*');
76             for(@nodz) {
77             if($_->getNodeType() eq ELEMENT_NODE) {
78             my @kidz = $_->getChildNodes();
79             for my $kidd (@kidz) {
80             if($kidd->getNodeType() eq TEXT_NODE &&
81             @kidz > 1 && $kidd->getValue() =~ /^\s*$/) {
82             $kidd->setValue(''); # empty them all out
83             }
84             }
85             }
86             }
87             $self->reload(); # reload all XML as text to re-index nodes
88             }
89             }
90             sub tidy { # tidy XML indenting with a specified indent string
91             my $self = shift(); my $ndnt = shift() || ' ';
92             $ndnt = "\t" if($ndnt =~ /tab/i ); # allow some indent_type descriptions
93             $ndnt = ' ' if($ndnt =~ /spac/i);
94             if(defined($self)) {
95             $self->strip(); # strips all object's text nodes from mixed content
96             my $dpth = 0; # keep track of element nest depth
97             my $orte = 0 ; my $nrte = 0 ; # old && new root elements
98             my $prre = ''; my $pore = ''; # pre && post root element text
99             my($docu)= $self->findnodes('/');
100             for($docu->getChildNodes()) {
101             if ($_->getNodeType == ELEMENT_NODE) { $orte = $_; }
102             elsif(!$orte ) { $prre .= $_->toString(); }
103             else { $pore .= $_->toString(); }
104             }
105             ($orte)= $self->findnodes('/*') unless($orte);
106             if($orte->getChildNodes()) { # recursively tidy children
107             $nrte = $self->_rectidy($orte, ($dpth + 1), $ndnt);
108             }
109             my $data = $xmld . $prre . $nrte->toString() . $pore;
110             $self->set_xml($data);
111             my $prsr = XML::XPath::XMLParser->new('xml' => $data);
112             $self->set_context($prsr->parse());
113             }
114             }
115             sub _rectidy { # recursively tidy up indent formatting of elements
116             my $self = shift(); my $node = shift();
117             my $dpth = shift(); my $ndnt = shift();
118             my $tnod = undef; # temporary node which will get nodes surrounding children
119             #$tnod = e($node->getName()); # create element
120             $tnod = XML::XPath::Node::Element->new($node->getName()); # create element
121             for($node->findnodes('@*')) { # copy all attributes
122             $tnod->appendAttribute($_);
123             }
124             for($node->getNamespaces()) { # copy all namespaces
125             $tnod->appendNamespace($_);
126             }
127             my @kidz = $node->getChildNodes(); my $lkid;
128             for my $kidd (@kidz) {
129             if($kidd->getNodeType() ne TEXT_NODE && (!$lkid ||
130             $lkid->getNodeType() ne TEXT_NODE)) {
131             #$tnod->appendChild(t("\n" . ($ndnt x $dpth)));
132             $tnod->appendChild(XML::XPath::Node::Text->new("\n" . ($ndnt x $dpth)));
133             }
134             if($kidd->getNodeType() eq ELEMENT_NODE) {
135             my @gkdz = $kidd->getChildNodes();
136             if(@gkdz && ($gkdz[0]->getNodeType() ne TEXT_NODE ||
137             (@gkdz > 1 && $gkdz[1]->getNodeType() ne TEXT_NODE))) {
138             $kidd = $self->_rectidy($kidd, ($dpth + 1), $ndnt); # recursively tidy
139             }
140             }
141             $tnod->appendChild($kidd);
142             $lkid = $kidd;
143             }
144             #$tnod->appendChild(t("\n" . ($ndnt x ($dpth - 1))));
145             $tnod->appendChild(XML::XPath::Node::Text->new("\n" . ($ndnt x ($dpth - 1))));
146             return($tnod);
147             }
148             sub compress { # compress an XML::Tidy object into look-up tables
149             my $self = shift(); my $flgz = shift(); # options of node types to include
150             my @elut = (); my @alut = (); # element && attribute look-up-tables
151             my %efou = (); my %afou = (); # element && attribute found flags
152             my @vlut = (); my @tlut = (); # attribute value && text
153             my %vfou = (); my %tfou = ();
154             my @nlut = (); my @clut = (); # namespace && comment
155             my %nfou = (); my %cfou = ();
156             my $cstr = "XML::Tidy::compress v$VERSION";
157             my $ntok = qr/[\(\)\[\]\{\}\/\*\+\?]/; # non-token quoted regex
158             $flgz = 'ea' unless(defined($flgz)); # Default flags: just elemz && attrz
159             $flgz = 'eatvnc' if($flgz eq 'all'); # AttValz && Text seem to work alright
160             # but beware of bugs in Comment I
161             # haven't been able to squash yet.
162             $self->strip(); # remove non-data text nodes
163             my($root)= $self->findnodes('/');
164             for($root->findnodes('//comment()')) {
165             my $text = $_->getNodeValue();
166             if($text =~ s/^XML::Tidy::compress v(\d+)\.(\d+)\.([0-9A-Za-z._]{7})//) {
167             croak "!*EROR*! compress() cannot be performed twice on the same object!\n";
168             }
169             }
170             if($flgz =~ /e[^E]*$/) { # elements
171             for($root->findnodes('//*')) {
172             my $name = $_->getName();
173             unless(exists($efou{$name})) {
174             push(@elut, $name);
175             $efou{$name} = $#elut;
176             }
177             # 5 below is the index of XML::XPath::Node::Element's node_name field
178             ${$_}->[5] = 'e' . b64($efou{$name}); # $_->setName(...
179             }
180             $cstr .= "\ne:@elut" if(@elut);
181             }
182             if($flgz =~ /(a[^A]*|v[^V]*)$/) { # attributes (keys or values)
183             for($root->findnodes('//@*')) {
184             if($flgz =~ /a[^A]*$/) { # attribute keys
185             my $name = $_->getName();
186             if(exists($efou{$name})) { # reuse element keys matching attributes
187             # 4 is the index of XML::XPath::Node::Attribute's node_key field
188             ${$_}->[4] = 'e' . b64($efou{$name}); # $_->setName(...
189             } else {
190             unless(exists($afou{$name})) {
191             push(@alut, $name);
192             $afou{$name} = $#alut;
193             }
194             ${$_}->[4] = 'a' . b64($afou{$name}); # $_->setName(...
195             }
196             }
197             if($flgz =~ /v[^V]*$/) { # attribute values
198             my $wval = $_->getNodeValue(); $wval = '' unless(defined($wval));
199             for my $valu (split(/\s+/, $wval)) {
200             my $repl = '';
201             if (exists($efou{$valu})) { # reuse elem keys matching attr valz
202             $repl = 'e' . b64($efou{$valu});
203             } elsif(exists($afou{$valu})) { # reuse attr keys matching attr valz
204             $repl = 'a' . b64($afou{$valu});
205             } elsif($valu !~ $ntok) {
206             unless(exists($vfou{$valu})) {
207             push(@vlut, $valu);
208             $vfou{$valu} = $#vlut;
209             }
210             $repl = 'v' . b64($vfou{$valu});
211             }
212             # 5 is the index of XML::XPath::Node::Attribute's node_value field
213             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$repl$2/g if($valu !~ $ntok);
214             }
215             }
216             }
217             $cstr .= "\na:@alut" if(@alut);
218             $cstr .= "\nv:@vlut" if(@vlut);
219             }
220             if($flgz =~ /t[^T]*$/) { # text
221             for($root->findnodes('//text()')) {
222             my $wtxt = $_->getNodeValue();
223             for my $text (split(/\s+/, $wtxt)) {
224             my $repl = '';
225             if (exists($efou{$text})) { # reuse elem keys matching text token
226             $repl = 'e' . b64($efou{$text});
227             } elsif(exists($afou{$text})) { # reuse attr keys matching text token
228             $repl = 'a' . b64($afou{$text});
229             } elsif(exists($afou{$text})) { # reuse attr valz matching text token
230             $repl = 'v' . b64($vfou{$text});
231             } elsif($text !~ $ntok) {
232             unless(exists($tfou{$text})) {
233             push(@tlut, $text);
234             $tfou{$text} = $#tlut;
235             }
236             $repl = 't' . b64($tfou{$text});
237             }
238             # 3 is the index of XML::XPath::Node::Text's node_text field
239             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$repl$2/g if($text !~ $ntok);
240             }
241             }
242             $cstr .= "\nt:@tlut" if(@tlut);
243             }
244             if($flgz =~ /c[^C]*$/) { # comment
245             for($root->findnodes('//comment()')) {
246             my $wcmt = $_->getNodeValue();
247             for my $cmnt (split(/\s+/, $wcmt)) {
248             my $repl = '';
249             if (exists($efou{$cmnt})) { # reuse elem keys matching cmnt token
250             $repl = 'e' . b64($efou{$cmnt});
251             } elsif(exists($afou{$cmnt})) { # reuse attr keys matching cmnt token
252             $repl = 'a' . b64($afou{$cmnt});
253             } elsif(exists($afou{$cmnt})) { # reuse attr valz matching cmnt token
254             $repl = 'v' . b64($vfou{$cmnt});
255             } elsif(exists($tfou{$cmnt})) { # reuse text valz matching cmnt token
256             $repl = 't' . b64($tfou{$cmnt});
257             } elsif($cmnt !~ $ntok) {
258             unless(exists($cfou{$cmnt})) {
259             push(@clut, $cmnt);
260             $cfou{$cmnt} = $#clut;
261             }
262             $repl = 'c' . b64($cfou{$cmnt});
263             }
264             # 3 is the index of XML::XPath::Node::Comment's node_comment field
265             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$repl$2/g if($cmnt !~ $ntok);
266             }
267             }
268             $cstr .= "\nc:@clut" if(@clut);
269             }
270             $root->appendChild($self->c($cstr));
271             $self->reload();
272             }
273             sub expand { # uncompress an XML::Tidy object from look-up tables
274             my $self = shift(); my $flgz = shift(); # options of node types to include
275             my @elut = (); my @alut = (); # element && attribute look-up-tables
276             my @vlut = (); my @tlut = (); # attribute value && text
277             my @nlut = (); my @clut = (); # namespace && comment
278             my $ntok = qr/[\(\)\[\]\{\}\/\*\+\?]/; # non-token quoted regex
279             my($root)= $self->findnodes('/');
280             for($root->findnodes('//comment()')) {
281             my $text = $_->getNodeValue();
282             if($text =~ s/^XML::Tidy::compress v(\d+)\.(\d+)\.([0-9A-Za-z._]{7})//) {
283             # may need to test $1, $2, $3 for versions later
284             while($text =~ s/^\n([eatvnc]):([^\n]+)//) {
285             my $ntyp = $1; my $lutd = $2;
286             if ($ntyp eq 'e') {
287             push(@elut, split(/\s+/, $lutd));
288             } elsif($ntyp eq 'a') {
289             push(@alut, split(/\s+/, $lutd));
290             } elsif($ntyp eq 't') {
291             push(@tlut, split(/\s+/, $lutd));
292             } elsif($ntyp eq 'v') {
293             push(@vlut, split(/\s+/, $lutd));
294             } elsif($ntyp eq 'n') {
295             # push(@nlut, split(/\s+/, $lutd));
296             } elsif($ntyp eq 'c') {
297             push(@clut, split(/\s+/, $lutd));
298             }
299             }
300             $root->removeChild($_);
301             }
302             }
303             if(@elut) {
304             for($root->findnodes('//*')) {
305             my $name = $_->getName();
306             my $coun = $name;
307             if($coun =~ s/^e// && b10($coun) < @elut) {
308             $coun = b10($coun);
309             # 5 below is the index of XML::XPath::Node::Element's node_name field
310             ${$_}->[5] = $elut[$coun]; # $_->setName($elut[$coun]);
311             } else {
312             croak "!*EROR*! expand() cannot find look-up element:$name!\n";
313             }
314             }
315             }
316             if(@alut) {
317             for($root->findnodes('//@*')) {
318             my $name = $_->getName();
319             my $coun = $name;
320             if ($coun =~ s/^e// && b10($coun) < @elut) {
321             $coun = b10($coun);
322             # 4 below is the index of XML::XPath::Node::Attribute's node_key field
323             ${$_}->[4] = $elut[$coun]; # $_->setName($elut[$coun]);
324             } elsif($coun =~ s/^a// && b10($coun) < @alut) {
325             $coun = b10($coun);
326             ${$_}->[4] = $alut[$coun]; # $_->setName($alut[$coun]);
327             } else {
328             croak "!*EROR*! expand() cannot find look-up attribute key:$name!\n";
329             }
330             if(@vlut) {
331             my $wval = $_->getNodeValue();
332             for my $valu (split(/\s+/, $wval)) {
333             unless($valu =~ $ntok) {
334             $coun = $valu;
335             if ($coun =~ s/^e// && b10($coun) < @elut) {
336             $coun = b10($coun);
337             # 5 is the index of XML::XPath::Node::Attribute's node_value field
338             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$elut[$coun]$2/g;
339             } elsif($coun =~ s/^a// && b10($coun) < @alut) {
340             $coun = b10($coun);
341             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$alut[$coun]$2/g;
342             } elsif($coun =~ s/^v// && b10($coun) < @vlut) {
343             $coun = b10($coun);
344             ${$_}->[5] =~ s/(^|\s+)$valu(\s+|$)/$1$vlut[$coun]$2/g;
345             } else {
346             croak "!*EROR*! expand() cannot find look-up attribute value:$valu!\n";
347             }
348             }
349             }
350             }
351             }
352             }
353             if(@tlut) {
354             for($root->findnodes('//text()')) {
355             my $wtxt = $_->getNodeValue();
356             for my $text (split(/\s+/, $wtxt)) {
357             unless($text =~ $ntok) {
358             my $coun = $text;
359             if ($coun =~ s/^e// && b10($coun) < @elut) {
360             $coun = b10($coun);
361             # 3 is the index of XML::XPath::Node::Text's node_text field
362             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$elut[$coun]$2/g;
363             } elsif($coun =~ s/^a// && b10($coun) < @alut) {
364             $coun = b10($coun);
365             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$alut[$coun]$2/g;
366             } elsif($coun =~ s/^t// && b10($coun) < @tlut) {
367             $coun = b10($coun);
368             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$tlut[$coun]$2/g;
369             } elsif($coun =~ s/^v// && b10($coun) < @vlut) {
370             $coun = b10($coun);
371             ${$_}->[3] =~ s/(^|\s+)$text(\s+|$)/$1$vlut[$coun]$2/g;
372             } else {
373             croak "!*EROR*! expand() cannot find look-up text token:$text!\n";
374             }
375             }
376             }
377             }
378             }
379             if(@clut) {
380             for($root->findnodes('//comment()')) {
381             my $wcmt = $_->getNodeValue();
382             for my $cmnt (split(/\s+/, $wcmt)) {
383             unless($cmnt =~ $ntok) {
384             my $coun = $cmnt;
385             if ($coun =~ s/^e// && b10($coun) < @elut) {
386             $coun = b10($coun);
387             # 3 is the index of XML::XPath::Node::Comment's node_comment field
388             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$elut[$coun]$2/g;
389             } elsif($coun =~ s/^a// && b10($coun) < @alut) {
390             $coun = b10($coun);
391             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$alut[$coun]$2/g;
392             } elsif($coun =~ s/^v// && b10($coun) < @vlut) {
393             $coun = b10($coun);
394             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$vlut[$coun]$2/g;
395             } elsif($coun =~ s/^t// && b10($coun) < @tlut) {
396             $coun = b10($coun);
397             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$tlut[$coun]$2/g;
398             } elsif($coun =~ s/^c// && b10($coun) < @clut) {
399             $coun = b10($coun);
400             ${$_}->[3] =~ s/(^|\s+)$cmnt(\s+|$)/$1$clut[$coun]$2/g;
401             } else {
402             croak "!*EROR*! expand() cannot find look-up comment token:$cmnt!\n";
403             }
404             }
405             }
406             }
407             }
408             $self->reload();
409             $self->tidy();
410             }
411             sub _append_node { # place a node at the end of the proper array for bcompress
412             my $strz = shift(); my $flut = shift();
413             my $intz = shift(); my $fltz = shift();
414             my $ndty = shift(); my $node = shift();
415             my $tokn = ''; my $aval = undef; # token key && attribute value strings
416             if ( ${$node}->getNodeType() == ELEMENT_NODE) {
417             $tokn = ${$node}->getName();
418             } elsif( ${$node}->getNodeType() == ATTRIBUTE_NODE) {
419             $tokn = ${$node}->getName(); # attribute keys
420             $aval = ${$node}->getNodeValue(); # attribute values
421             $aval = '' unless(defined($aval));
422             } elsif( ${$node}->getNodeType() == NAMESPACE_NODE) {
423             $tokn = ${$node}->toString(); # namespace prefix && expanded
424             } elsif( ${$node}->getNodeType() == PROCESSING_INSTRUCTION_NODE) {
425             $tokn = ${$node}->getTarget(); # PI target
426             $aval = ${$node}->getData(); # PI data
427             $aval = '' unless(defined($aval));
428             } else { # text, comment
429             $tokn = ${$node}->getNodeValue();
430             }
431             if(defined($tokn) && length($tokn)) {
432             unless(exists($flut->{$tokn})) {
433             if ($tokn =~ /^([+-]?\d+)$/ && # unsigned 4294967295
434             -2147483648 <= $tokn && $tokn <= 2147483647) {
435             push(@{$intz}, $tokn);
436             $flut->{$tokn} = 'l' . (scalar(@{$intz}) - 1);
437             } elsif($tokn =~ /^[+-]?\d+\.\d+$/) { # [+-]1.7x10**-308..[+-]1.7x10**308
438             push(@{$fltz}, $tokn);
439             $flut->{$tokn} = 'd' . (scalar(@{$fltz}) - 1);
440             } else {
441             push(@{$strz}, $tokn);
442             $flut->{$tokn} = (scalar(@{$strz}) - 1);
443             }
444             }
445             }
446             if(defined($aval) && length($aval)) {
447             unless(exists($flut->{$aval})) {
448             if ($aval =~ /^([+-]?\d+)$/ && # unsigned 4294967295
449             -2147483648 <= $aval && $aval <= 2147483647) {
450             push(@{$intz}, $aval);
451             $flut->{$aval} = 'l' . (scalar(@{$intz}) - 1);
452             } elsif($aval =~ /^[+-]?\d+\.\d+$/) { # [+-]1.7x10**-308..[+-]1.7x10**308
453             push(@{$fltz}, $aval);
454             $flut->{$aval} = 'd' . (scalar(@{$fltz}) - 1);
455             } else {
456             push(@{$strz}, $aval);
457             $flut->{$aval} = (scalar(@{$strz}) - 1);
458             }
459             }
460             }
461             if(defined($tokn)) {
462             if(length($tokn)) {
463             push(@{$ndty}, $flut->{$tokn}, ${$node}->getNodeType());
464             } else {
465             push(@{$ndty}, 1 , ${$node}->getNodeType());
466             }
467             }
468             if(defined($aval)) {
469             if(length($aval)) {
470             push(@{$ndty}, $flut->{$aval});
471             } else {
472             push(@{$ndty}, 1 );
473             }
474             }
475             if(${$node}->getNodeType() == ELEMENT_NODE) {
476             for(${$node}->getNamespaces()) {
477             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);
478             } # load namespaces...
479             for(${$node}->getAttributes()) {
480             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);
481             } # ...attributes && then child elements recursively
482             for(${$node}->getChildNodes()) {
483             _append_node($strz, $flut, $intz, $fltz, $ndty, \$_);
484             } # before adding an element-close tag to the node order
485             push(@{$ndty}, 0);
486             }
487             }
488             sub bcompress { # compress an XML::Tidy object into a binary representation
489             my $self = shift();
490             my $dstf = shift() || 'default.xtb'; # destination binary filename
491             my $bstr = "XML::Tidy::bcompress v$VERSION\0";
492             my @strz = ('', ''); # array of strings
493             my @intz = ( ); # array of ints
494             my @fltz = ( ); # array of floats
495             my @ndty = (); # list of @strz indices && node types
496             my %flut = ('' => 0, '' => 1); # found string lookup-table
497             $self->strip(); # remove non-data text nodes
498             my $bsiz = 1; my $bpak = 'C';
499             my($root)= $self->findnodes('/');
500             for($root->getChildNodes()) {
501             _append_node(\@strz, \%flut, \@intz, \@fltz, \@ndty, \$_);
502             }
503             my $nndx = @ndty;
504             while($nndx >= 256) { $nndx /= 256.0; $bsiz++; }
505             if ($bsiz == 2) { $bpak = 'S'; }
506             elsif($bsiz > 2) { $bpak = 'L'; $bsiz = 4; }
507             # assume default XML declaration
508             open(DSTF,'>',$dstf) or die "!*EROR*! Can't open binary DSTF: $dstf!\n";
509             binmode(DSTF);
510             print DSTF $bstr;
511             shift(@strz); shift(@strz); # element-close && empty-string are implied
512             print DSTF pack("C$bpak", $bsiz, scalar(@strz));
513             print DSTF "$_\0" for(@strz) ;
514             print DSTF pack( "$bpak", scalar(@intz));
515             print DSTF pack('l',$_) for(@intz) ;
516             print DSTF pack( "$bpak", scalar(@fltz));
517             print DSTF pack('d',$_) for(@fltz) ;
518             while(@ndty) {
519             my $indx = shift(@ndty);
520             if(defined($indx) && $indx) {
521             my $type = shift(@ndty);
522             if(defined($type) && $type) {
523             if ($indx =~ s/^d//) {
524             print DSTF pack("$bpak", (scalar(@strz) + scalar(@intz) + $indx + 2));
525             } elsif($indx =~ s/^l//) {
526             print DSTF pack("$bpak", (scalar(@strz) + $indx + 2));
527             } else {
528             print DSTF pack("$bpak", $indx);
529             }
530             print DSTF pack('C', $type);
531             if($type == ATTRIBUTE_NODE ||
532             $type == PROCESSING_INSTRUCTION_NODE) {
533             $indx = shift(@ndty);
534             if ($indx =~ s/^d//) {
535             print DSTF pack("$bpak", (scalar(@strz) + scalar(@intz) + $indx + 2));
536             } elsif($indx =~ s/^l//) {
537             print DSTF pack("$bpak", (scalar(@strz) + $indx + 2));
538             } else {
539             print DSTF pack("$bpak", $indx);
540             }
541             }
542             }
543             } else {
544             print DSTF pack("$bpak", 0);
545             }
546             }
547             close(DSTF);
548             }
549             sub bexpand { # uncompress a binary file back into an XML::Tidy object
550             my $self = shift();
551             my $srcf = shift() || 'default.xtb'; # source binary filename
552             my $srcd = undef;
553             my $cstr = undef;
554             my $bstr = "XML::Tidy::bcompress v$VERSION\0";
555             my $gxml = ''; # generated XML for new object
556             my @strz = ('', ''); # array of strings
557             my @intz = ( ); # array of ints
558             my @fltz = ( ); # array of floats
559             my @ndty = (); # list of @strz indices && node types
560             my @elst = (); # element stack to track tree reconstruction
561             my $bsiz = 1; my $bpak = 'C'; my $rnam = ''; my $coun = 0;
562             if(-r $srcf) {
563             open(SRCF,'<',$srcf);
564             binmode(SRCF);
565             $srcd = join('',);
566             close(SRCF);
567             $cstr = substr($srcd, 0, length($bstr), '');
568             $bsiz = unpack('C', substr($srcd, 0, 1, ''));
569             if ($bsiz == 2) { $bpak = 'S'; }
570             elsif($bsiz > 2) { $bpak = 'L'; $bsiz = 4; }
571             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
572             while($coun--) {
573             push(@strz, '');
574             my $char = unpack('a', substr($srcd, 0, 1, ''));
575             while($char ne "\0") {
576             $strz[-1] .= $char;
577             $char = unpack('a', substr($srcd, 0, 1, ''));
578             }
579             }
580             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
581             while($coun--) {
582             push(@intz, unpack('l', substr($srcd, 0, 4, '')));
583             }
584             $coun = unpack("$bpak", substr($srcd, 0, $bsiz, ''));
585             while($coun--) {
586             push(@fltz, unpack('d', substr($srcd, 0, 8, '')));
587             #$fltz[-1] .= '.0' if($fltz[-1] !~ /\./); # mk floats look like floats?
588             }
589             while(length($srcd)) {
590             push(@ndty, unpack("$bpak", substr($srcd, 0, $bsiz, '')));
591             if($ndty[-1]) {
592             push(@ndty, unpack('C' , substr($srcd, 0, 1, '')));
593             $rnam = $strz[$ndty[-2]] if(!length($rnam) && $ndty[-1] == ELEMENT_NODE);
594             if($ndty[-1] == ATTRIBUTE_NODE ||
595             $ndty[-1] == PROCESSING_INSTRUCTION_NODE) {
596             push(@ndty, unpack("$bpak", substr($srcd, 0, $bsiz, '')));
597             }
598             }
599             }
600             my $opfl = 0;
601             @elst = (); $gxml = $xmld;
602             while(@ndty) {
603             my $indx = shift(@ndty); my $vndx;
604             my $type = ELEMENT_NODE; $type = shift(@ndty) if($indx);
605             if ($type == ELEMENT_NODE) {
606             $gxml .= '>' if($opfl);
607             if($indx == 0) { # close element
608             $gxml .= '';
609             $opfl = 0;
610             } else {
611             push(@elst, $strz[$indx]);
612             $gxml .= '<' . $strz[$indx];
613             $opfl = 1;
614             }
615             } elsif($type == ATTRIBUTE_NODE) {
616             $vndx = shift(@ndty);
617             if($opfl) {
618             $gxml .= ' ';
619             if ($indx >= (scalar(@strz) + scalar(@intz))) {
620             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
621             } elsif($indx >= scalar(@strz) ) {
622             $gxml .= $intz[($indx - scalar(@strz))];
623             } else {
624             $gxml .= $strz[$indx];
625             }
626             $gxml .= '="';
627             if ($vndx >= (scalar(@strz) + scalar(@intz))) {
628             $gxml .= $fltz[($vndx - scalar(@strz) - scalar(@intz))];
629             } elsif($vndx >= scalar(@strz) ) {
630             $gxml .= $intz[($vndx - scalar(@strz))];
631             } else {
632             $gxml .= $strz[$vndx];
633             }
634             $gxml .= '"';
635             }
636             } elsif($type == TEXT_NODE) {
637             if($opfl) { $gxml .= '>'; $opfl = 0; }
638             if ($indx >= (scalar(@strz) + scalar(@intz))) {
639             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
640             } elsif($indx >= scalar(@strz) ) {
641             $gxml .= $intz[($indx - scalar(@strz))];
642             } else {
643             $gxml .= $strz[$indx];
644             }
645             } elsif($type == COMMENT_NODE) {
646             if($opfl) { $gxml .= '>'; $opfl = 0; }
647             $gxml .= '';
656             } elsif($type == PROCESSING_INSTRUCTION_NODE) {
657             if($opfl) { $gxml .= '>'; $opfl = 0; }
658             $gxml .= '
659             if ($indx >= (scalar(@strz) + scalar(@intz))) {
660             $gxml .= $fltz[($indx - scalar(@strz) - scalar(@intz))];
661             } elsif($indx >= scalar(@strz) ) {
662             $gxml .= $intz[($indx - scalar(@strz))];
663             } else {
664             $gxml .= $strz[$indx];
665             }
666             $gxml .= ' ';
667             $vndx = shift(@ndty);
668             if ($vndx >= (scalar(@strz) + scalar(@intz))) {
669             $gxml .= $fltz[($vndx - scalar(@strz) - scalar(@intz))];
670             } elsif($vndx >= scalar(@strz) ) {
671             $gxml .= $intz[($vndx - scalar(@strz))];
672             } else {
673             $gxml .= $strz[$vndx];
674             }
675             $gxml .= '?>';
676             } elsif($type == NAMESPACE_NODE) {
677             $gxml .= ' ' . $strz[$indx] if($opfl);
678             }
679             }
680             my $nslf = XML::Tidy->new('xml' => "$gxml");
681             #$nslf->tidy(); # don't force a tidy() even if it's likely desired
682             return($nslf);
683             }
684             }
685             sub prune { # remove a section of the tree at the xpath location parameter
686             my $self = shift(); my $xplc = shift() || return(); # can't prune root node
687             if(defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/) {
688             $xplc = shift() || undef;
689             }
690             if(defined($self) && defined($xplc) && length($xplc) && $xplc ne '/') {
691             $self->reload(); # update all nodes && internal XPath indexing before find
692             for($self->findnodes($xplc)) {
693             my $prnt = $_->getParentNode();
694             $prnt->removeChild($_) if(defined($prnt));
695             }
696             }
697             }
698             sub write { # write out an XML file to disk from a Tidy object
699             my $self = shift(); my $root; my $xplc;
700             my $flnm = shift() || $self->get_filename();
701             if(defined($flnm) && $flnm) {
702             if($flnm =~ /^[-_]?(xplc$|xpath_loc)/) {
703             $xplc = shift() || undef;
704             $flnm = shift() || $self->get_filename();
705             }
706             if($flnm =~ /^[-_]?(flnm|filename)$/) {
707             $flnm = shift() || $self->get_filename();
708             }
709             }
710             unless(defined($xplc) && $xplc) {
711             $xplc = shift() || undef;
712             }
713             if(defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/) {
714             $xplc = shift() || undef;
715             }
716             if(defined($self) && defined($flnm)) {
717             if(defined($xplc) && $xplc) {
718             $root = XML::XPath::Node::Element->new();
719             my($rtnd)= $self->findnodes($xplc);
720             $root->appendChild($rtnd);
721             } else {
722             ($root)= $self->findnodes('/');
723             }
724             open( FILE,'>',$flnm);
725             print FILE $xmld;
726             print FILE $_->toString() , "\n" for($root->getChildNodes());
727             close(FILE);
728             } else {
729             croak("!*EROR*! No filename could be found to write() to!\n");
730             }
731             }
732             sub toString { # return XML string from a Tidy object
733             my $self = shift(); my $root;
734             my $xplc = shift(); my $xmls = $xmld;
735             if(defined($xplc) && $xplc && $xplc =~ /^[-_]?(xplc$|xpath_loc)/) {
736             $xplc = shift() || undef;
737             }
738             if(defined($self)) {
739             if(defined($xplc) && $xplc) {
740             $root = XML::XPath::Node::Element->new();
741             my($rtnd)= $self->findnodes($xplc);
742             $root->appendChild($rtnd);
743             } else {
744             ($root)= $self->findnodes('/');
745             }
746             $xmls .= $_->toString() . "\n" for($root->getChildNodes());
747             } else {
748             croak("!*EROR*! No XML::Tidy could be found for toString()!\n");
749             }
750             return($xmls);
751             }
752             sub AUTOLOAD { # methods (created as necessary)
753             no strict 'refs';
754             my $self = shift();
755             if($AUTOLOAD =~ /.*::(new|create)?([eactpn])/i) { # createNode Wrappers
756             my $node = lc($2);
757             *{$AUTOLOAD} = sub { # add called sub to function table
758             my $self = shift();
759             if ($node eq 'e') { return(XML::XPath::Node::Element ->new(@_)); }
760             elsif($node eq 'a') { return(XML::XPath::Node::Attribute->new(@_)); }
761             elsif($node eq 'c') { return(XML::XPath::Node::Comment ->new(@_)); }
762             elsif($node eq 't') { return(XML::XPath::Node::Text ->new(@_)); }
763             elsif($node eq 'p') { return(XML::XPath::Node::PI ->new(@_)); }
764             elsif($node eq 'n') { return(XML::XPath::Node::Namespace->new(@_)); }
765             };
766             return($self->$AUTOLOAD(@_));
767             } else {
768             croak "No such method: $AUTOLOAD\n";
769             }
770             }
771             sub DESTROY { } # do nothing but define in case && to calm test warnings
772             8;
773              
774             =encoding utf8
775              
776             =head1 NAME
777              
778             XML::Tidy - tidy indenting of XML documents
779              
780             =head1 VERSION
781              
782             This documentation refers to version 1.16 of XML::Tidy, which was released on
783             Tue Jun 21 04:14:28:29 -0500 2016.
784              
785             =head1 SYNOPSIS
786              
787             use XML::Tidy;
788              
789             # create new XML::Tidy object from MainFile.xml
790             my $tidy_obj = XML::Tidy->new('filename' => 'MainFile.xml');
791              
792             # Tidy up the indenting
793             $tidy_obj->tidy();
794              
795             # Write out changes back to MainFile.xml
796             $tidy_obj->write();
797              
798             =head1 DESCRIPTION
799              
800             This module creates XML document objects (with inheritance from
801             L) to tidy mixed-content (i.e., non-data) text node
802             indenting. There are also some other handy member functions to
803             compress and expand your XML document object (into either a
804             compact XML representation or a binary one).
805              
806             =head1 TODO
807              
808             =over 2
809              
810             =item - maybe add to bcompress support for binary char, wide UTF-8, short ints, and single or quad-precision floats
811              
812             =item - maybe store in compress recurring patterns of node index sets in a new array that can be indexed itself
813              
814             =item - fix reload() from messing up Unicode escaped &XYZ; components like Copyright © and Registered ®
815              
816             =item - What else does Tidy need?
817              
818             =back
819              
820             =head1 USAGE
821              
822             =head2 new()
823              
824             This is the standard Tidy object constructor. Except for the new
825             'binary' option, it can take the same parameters as an L
826             object constructor to initialize the XML document object. These can
827             be any one of:
828              
829             'filename' => 'SomeFile.xml'
830             'binary' => 'SomeBinaryFile.xtb'
831             'xml' => $variable_which_holds_a_bunch_of_XML_data
832             'ioref' => $file_InputOutput_reference
833             'context' => $existing_node_at_specified_context_to_become_new_obj
834              
835             =head2 reload()
836              
837             The reload() member function causes the latest data contained in
838             a Tidy object to be re-parsed (which re-indexes all nodes).
839              
840             This can be necessary after modifications have been made to nodes
841             which impact the tree node hierarchy because L's find()
842             member preserves state information which can get out-of-sync.
843              
844             reload() is probably rarely useful by itself but it is needed by
845             strip() and prune() so it is exposed as a method in case it comes in
846             handy for other uses.
847              
848             =head2 strip()
849              
850             The strip() member function searches the Tidy object for all
851             mixed-content (i.e., non-data) text nodes and empties them out.
852             This will basically unformat any markup indenting.
853              
854             strip() is used by compress() and tidy() but it is exposed because it
855             could be worthwhile by itself.
856              
857             =head2 tidy()
858              
859             The tidy() member function can take a single optional parameter as
860             the string that should be inserted for each indent level. Some
861             examples:
862              
863             # Tidy up indenting with default two (2) spaces per indent level
864             $tidy_obj->tidy();
865              
866             # Tidy up indenting with four (4) spaces per indent level
867             $tidy_obj->tidy(' ');
868              
869             # Tidy up indenting with one (1) tab per indent level
870             $tidy_obj->tidy("\t");
871              
872             The default behavior is to use two (2) spaces for each indent
873             level. The Tidy object gets all mixed-content (i.e., non-data)
874             text nodes reformatted to appropriate indent levels according to
875             tree nesting depth.
876              
877             NOTE: tidy() disturbs some XML escapes in whatever ways L
878             does. It has been brought to my attention that these modules also strip
879             CDATA tags from XML files / data they operate on. Even though
880             CDATA tags don't seem very common, I wish they could work smoothly too.
881             Hopefully the vast majority of files will work fine and support for
882             other types can be added later.
883              
884             =head2 compress()
885              
886             The compress() member function calls strip() on the Tidy object
887             then creates an encoded comment which contains the names of elements
888             and attributes as they occurred in the original document. Their
889             respective element and attribute names are replaced with just the
890             appropriate index throughout the document.
891              
892             compress() can accept a parameter describing which node types to
893             attempt to shrink down as abbreviations. This parameter should be
894             a string of just the first letters of each node type you wish to
895             include as in the following mapping:
896              
897             e = elements
898             a = attribute keys
899             v = attribute values *EXPERIMENTAL*
900             t = text nodes *EXPERIMENTAL*
901             c = comment nodes *EXPERIMENTAL*
902             n = namespace nodes *not-yet-implemented*
903              
904             Attribute values ('v') and text nodes ('t') both seem to work fine
905             with current tokenization. I've still labeled them EXPERIMENTAL
906             because they seem more likely to cause problems than valid element
907             or attribute key names. I have some bugs in the comment node
908             compression which I haven't been able to find yet so that one should
909             be avoided for now. Since these three node types ('vtc')
910             all require tokenization, they are not included in default compression
911             ('ea'). An example call which includes values and text would be:
912              
913             $tidy_obj->compress('eavt');
914              
915             The original document structure (i.e., node hierarchy) is preserved.
916             compress() significantly reduces the file size of most XML documents
917             for when size matters more than immediate human readability.
918             expand() performs the opposite conversion.
919              
920             =head2 expand()
921              
922             The expand() member function reads any XML::Tidy::compress comments
923             from the Tidy object and uses them to reconstruct the document
924             that was passed to compress().
925              
926             =head2 bcompress('BinaryOutputFilename.xtb')
927              
928             The bcompress() member function stores a binary representation of
929             any Tidy object. The format consists of:
930              
931             0) a null-terminated version string
932             1) a byte specifying how many bytes later indices will be
933             2) the number of bytes from 1 above to designate the total string count
934             3) the number of null-terminated strings from 2 above
935             4) the number of bytes from 1 above to designate the total integer count
936             5) the number of 4-byte integers from 4 above
937             6) the number of bytes from 1 above to designate the total float count
938             7) the number of 8-byte (double-precision) floats from 6 above
939             8) node index sets until the end of the file
940              
941             Normal node index sets consist of two values. The first is an index
942             (again the number of bytes long comes from 1) into the three lists as if
943             they were all linear. The second is a single-byte integer identifying the
944             node type (using standard DOM node type enumerations).
945              
946             A few special cases exist in node index sets though. If the index is
947             null, it is interpreted as a close-element tag (so no accompanying type
948             value is read). On the other end, when the index is non-zero, the type
949             value is always read. In the event that the type corresponds to an
950             attribute or a processing instruction, the next index is read (without
951             another accompanying type value) in order to complete the data fields
952             required by those node types.
953              
954             NOTE: Please bear in mind that the encoding of binary integers and floats
955             only works properly if the values are not surrounded by spaces or other
956             delimiters and each is contained in its own single node. This is
957             necessary to enable thorough reconstruction of whitespace from the
958             original document. I recommend storing every numerical value as an
959             isolated attribute value or text node without any surrounding whitespace.
960              
961             # Examples which encode all numbers as binary:
962            
963             31.255
964            
965             -15.65535
966             16383.7
967             -1023.63
968            
969            
970              
971             # Examples which encode all numbers as strings:
972            
973             2.0
974             4.0 -2.0 4.0
975            
976              
977             The default file extension is .xtb (for XML::Tidy binary).
978              
979             =head2 bexpand('BinaryInputFilename.xtb')
980              
981             The bexpand() member function reads a binary file which was
982             previously written from bcompress(). bexpand() is an XML::Tidy
983             object constructor like new() so it can be called like:
984              
985             my $xtbo = XML::Tidy->bexpand('BinaryInputFilename.xtb');
986              
987             =head2 prune()
988              
989             The prune() member function takes an XPath location to remove (along
990             with all attributes and child nodes) from the Tidy object. For
991             example, to remove all comments:
992              
993             $tidy_obj->prune('//comment()');
994              
995             or to remove the third baz (XPath indexing is 1-based):
996              
997             $tidy_obj->prune('/foo/bar/baz[3]');
998              
999             Pruning your XML tree is a form of tidying too so it snuck in here. =)
1000              
1001             =head2 write()
1002              
1003             The write() member function can take an optional filename parameter
1004             to write out any changes to the Tidy object. If no parameters
1005             are given, write() overwrites the original XML document file (if
1006             a 'filename' parameter was given to the constructor).
1007              
1008             write() will croak() if no filename can be found to write to.
1009              
1010             write() can also take a secondary parameter which specifies an XPath
1011             location to be written out as the new root element instead of the
1012             Tidy object's root. Only the first matching element is written.
1013              
1014             =head2 toString()
1015              
1016             The toString() member function is almost identical to write() except
1017             that it takes no parameters and simply returns the equivalent XML
1018             string as a scalar. It is a little weird because normally only
1019             L objects have a toString() member but I figure it
1020             makes sense to extend the same syntax to the parent object as well
1021             since it is a useful option.
1022              
1023             =head1 createNode Wrappers
1024              
1025             The following are just aliases to Node constructors. They'll work with
1026             just the unique portion of the node type as the member function name.
1027              
1028             =head2 e() or el() or elem() or createElement()
1029              
1030             wrapper for XML::XPath::Node::Element->new()
1031              
1032             =head2 a() or at() or attr() or createAttribute()
1033              
1034             wrapper for XML::XPath::Node::Attribute->new()
1035              
1036             =head2 c() or cm() or cmnt() or createComment()
1037              
1038             wrapper for XML::XPath::Node::Comment->new()
1039              
1040             =head2 t() or tx() or text() or createTextNode()
1041              
1042             wrapper for XML::XPath::Node::Text->new()
1043              
1044             =head2 p() or pi() or proc() or createProcessingInstruction()
1045              
1046             wrapper for XML::XPath::Node::PI->new()
1047              
1048             =head2 n() or ns() or nspc() or createNamespace()
1049              
1050             wrapper for XML::XPath::Node::Namespace->new()
1051              
1052             =head1 EXPORTED CONSTANTS
1053              
1054             XML::Tidy also exports the same node constants as L
1055             (which correspond to DOM values). These include:
1056              
1057             =head2 UNKNOWN_NODE
1058              
1059             =head2 ELEMENT_NODE
1060              
1061             =head2 ATTRIBUTE_NODE
1062              
1063             =head2 TEXT_NODE
1064              
1065             =head2 CDATA_SECTION_NODE
1066              
1067             =head2 ENTITY_REFERENCE_NODE
1068              
1069             =head2 ENTITY_NODE
1070              
1071             =head2 PROCESSING_INSTRUCTION_NODE
1072              
1073             =head2 COMMENT_NODE
1074              
1075             =head2 DOCUMENT_NODE
1076              
1077             =head2 DOCUMENT_TYPE_NODE
1078              
1079             =head2 DOCUMENT_FRAGMENT_NODE
1080              
1081             =head2 NOTATION_NODE
1082              
1083             =head2 ELEMENT_DECL_NODE
1084              
1085             =head2 ATT_DEF_NODE
1086              
1087             =head2 XML_DECL_NODE
1088              
1089             =head2 ATTLIST_DECL_NODE
1090              
1091             =head2 NAMESPACE_NODE
1092              
1093             XML::Tidy also exports:
1094              
1095             =head2 STANDARD_XML_DECL
1096              
1097             which returns a reasonable default XML declaration string.
1098              
1099             =head1 CHANGES
1100              
1101             Revision history for Perl extension XML::Tidy:
1102              
1103             =over 2
1104              
1105             =item - 1.16 G6LM4EST Tue Jun 21 04:14:28:29 -0500 2016
1106              
1107             * stopped using my old fragile package generation and manually updated all distribution files (though L should let me generate much again)
1108              
1109             * updated license to GPLv3+
1110              
1111             * fixed 00pod.t and 01podc.t to eval the Test modules from issue and patch: HTTPS://RT.CPAN.Org/Public/Bug/Display.html?id=85592 (Thanks again MichielB.)
1112              
1113             * replaced all old '&&' with 'and' in POD
1114              
1115             =item - 1.14 G6JMERCY Sun Jun 19 14:27:12:34 -0500 2016
1116              
1117             * separated old PT from VERSION to fix non-numeric issue: HTTPS://RT.CPAN.Org/Public/Bug/Display.html?id=56073 (Thanks to Slaven.)
1118              
1119             * removed Unicode from POD but added encoding utf8 anyway to pass tests and resolve issues: HTTPS://RT.CPAN.Org/Public/Bug/Display.html?id=92434 and HTTPS://RT.CPAN.Org/Public/Bug/Display.html?id=85592 (Thanks to Sudhanshu and MichielB.)
1120              
1121             =item - 1.12.B55J2qn Thu May 5 19:02:52:49 2011
1122              
1123             * made "1.0" float binarize as float again, rather than just "1" int
1124              
1125             * cleaned up POD and fixed EXPORTED CONSTANTS heads blocking together
1126              
1127             =item - 1.10.B52FpLx Mon May 2 15:51:21:59 2011
1128              
1129             * added tests for undefined non-standard XML declaration to suppress warnings
1130              
1131             =item - 1.8.B2AMvdl Thu Feb 10 22:57:39:47 2011
1132              
1133             * aligned .t code
1134              
1135             * added test for newline before -r to try to resolve: HTTPS://RT.CPAN.Org/Ticket/Display.html?id=65471 (Thanks, Leandro.)
1136              
1137             * fixed off-by-one error when new gets a readable (non-newline) filename (that's not "filename" without a pre-'filename' param) to resolve: HTTPS://RT.CPAN.Org/Ticket/Display.html?id=65151 (Thanks, Simone.)
1138              
1139             =item - 1.6.A7RJKwl Tue Jul 27 19:20:58:47 2010
1140              
1141             * added head2 POD for EXPORTED CONSTANTS to try to pass t/00podc.t
1142              
1143             =item - 1.4.A7QCvHw Mon Jul 26 12:57:17:58 2010
1144              
1145             * hacked a little test for non-UTF-8 decl str to resolve FrankGoss' need for ISO-8859-1 decl encoding to persist through tidying
1146              
1147             * md sure META.yml is being generated correctly for the CPAN
1148              
1149             * updated license to GPLv3
1150              
1151             =item - 1.2.75BACCB Fri May 11 10:12:12:11 2007
1152              
1153             * made "1.0" float binarize as just "1" int
1154              
1155             * made ints signed and bounds checked
1156              
1157             * added new('binary' => 'BinFilename.xtb') option
1158              
1159             =item - 1.2.54HJnFa Sun Apr 17 19:49:15:36 2005
1160              
1161             * fixed tidy() processing instruction stripping problem
1162              
1163             * added support for binary ints and floats in bcompress()
1164              
1165             * tightened up binary format and added pod
1166              
1167             =item - 1.2.54HDR1G Sun Apr 17 13:27:01:16 2005
1168              
1169             * added bcompress() and bexpand()
1170              
1171             * added compress() and expand()
1172              
1173             * added toString()
1174              
1175             =item - 1.2.4CKBHxt Mon Dec 20 11:17:59:55 2004
1176              
1177             * added exporting of XML::XPath::Node (DOM) constants
1178              
1179             * added node object creation wrappers (like LibXML)
1180              
1181             =item - 1.2.4CCJW4G Sun Dec 12 19:32:04:16 2004
1182              
1183             * added optional 'xpath_loc' => to prune()
1184              
1185             =item - 1.0.4CAJna1 Fri Dec 10 19:49:36:01 2004
1186              
1187             * added optional 'filename' => to write()
1188              
1189             =item - 1.0.4CAAf5B Fri Dec 10 10:41:05:11 2004
1190              
1191             * removed 2nd param from tidy() so that 1st param is just indent string
1192              
1193             * fixed pod errors
1194              
1195             =item - 1.0.4C9JpoP Thu Dec 9 19:51:50:25 2004
1196              
1197             * added xplc option to write()
1198              
1199             * added prune()
1200              
1201             =item - 1.0.4C8K1Ah Wed Dec 8 20:01:10:43 2004
1202              
1203             * inherited from XPath so that those methods can be called directly
1204              
1205             * original version (separating Tidy.pm from Merge.pm)
1206              
1207             =back
1208              
1209             =head1 INSTALL
1210              
1211             From the command shell, please run:
1212              
1213             `perl -MCPAN -e "install XML::Tidy"`
1214              
1215             or uncompress the package and run the standard:
1216              
1217             `perl Makefile.PL; make; make test; make install`
1218              
1219             =head1 FILES
1220              
1221             XML::Tidy requires:
1222              
1223             L to allow errors to croak() from calling sub
1224              
1225             L to use XPath statements to query and update XML
1226              
1227             L to parse XML documents into XPath objects
1228              
1229             L to handle base-64 indexing for compress() and expand()
1230              
1231             =head1 LICENSE
1232              
1233             Most source code should be Free!
1234             Code I have lawful authority over is and shall be!
1235             Copyright: (c) 2004-2016, Pip Stuart.
1236             Copyleft : This software is licensed under the GNU General Public License
1237             (version 3 or later). Please consult HTTP://GNU.Org/licenses/gpl-3.0.txt
1238             for important information about your freedom. This is Free Software: you
1239             are free to change and redistribute it. There is NO WARRANTY, to the
1240             extent permitted by law. See HTTP://FSF.Org for further information.
1241              
1242             =head1 AUTHOR
1243              
1244             Pip Stuart
1245              
1246             =cut