|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package XML::Minify;  | 
| 
2
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
2411016
 | 
 use 5.010000;  | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
403
 | 
    | 
| 
3
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
134
 | 
 use strict;  | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
494
 | 
    | 
| 
4
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
106
 | 
 use warnings;  | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1144
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = "1.00";  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
16464
 | 
 use XML::LibXML; # To be installed from CPAN : sudo cpanm XML::LibXML   | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1402441
 | 
    | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
192
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # CPAN rules !  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
3712
 | 
 use Exporter 'import';  | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
    | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39078
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT = qw(minify);  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = qw(minify);  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %do_not_remove_blanks;  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $we_have_infos_from_dtd;  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %opt = ();  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $doc;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $tree;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $root;  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $parser;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $output;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub traverse($$);  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub minify($%) {  | 
| 
28
 | 
143
 | 
 
 | 
 
 | 
  
143
  
 | 
  
0
  
 | 
66348
 | 
 	my $string = shift;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
308
 | 
 	%do_not_remove_blanks = ();  | 
| 
31
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
252
 | 
 	$we_have_infos_from_dtd = 0;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
143
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
374
 | 
 	if(not defined $string) {  | 
| 
35
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		return undef;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
139
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
366
 | 
 	if($string eq "") {  | 
| 
39
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		return "";  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
465
 | 
 	%opt = @_;  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Reinit output  | 
| 
45
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
269
 | 
 	$output = "";  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# remove_indent is an alias  | 
| 
48
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
318
 | 
 	if($opt{remove_indent}) {  | 
| 
49
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$opt{remove_spaces_line_start} = 1;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Accept "aggressive" and "agressive" (for people making typos... like me :D)  | 
| 
53
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
295
 | 
 	if($opt{agressive}) {  | 
| 
54
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		$opt{aggressive} = 1;  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Insane is more than destructive (and aggressive)   | 
| 
58
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
290
 | 
 	if($opt{insane}) {  | 
| 
59
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 		$opt{destructive} = 1;  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Destructive is more than aggressive  | 
| 
63
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
289
 | 
 	if($opt{destructive}) {  | 
| 
64
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 		$opt{aggressive} = 1;  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Aggressive but relatively soft   | 
| 
68
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
331
 | 
 	if($opt{aggressive}) {  | 
| 
69
 | 
35
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
124
 | 
 		(defined $opt{remove_empty_text}) or $opt{remove_empty_text} = 1;             # a bit aggressive  | 
| 
70
 | 
35
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
82
 | 
 		(defined $opt{remove_blanks_start}) or $opt{remove_blanks_start} = 1;         # aggressive  | 
| 
71
 | 
35
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
78
 | 
 		(defined $opt{remove_blanks_end}) or $opt{remove_blanks_end} = 1;             # aggressive  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Remove indent and pseudo invisible characters   | 
| 
75
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
456
 | 
 	if($opt{destructive}) {  | 
| 
76
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
61
 | 
 		(defined $opt{remove_spaces_line_start}) or $opt{remove_spaces_line_start} = 1;         # very aggressive  | 
| 
77
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 		(defined $opt{remove_spaces_line_end}) or $opt{remove_spaces_line_end} = 1;             # very aggressive  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Densify text nodes but clearly change your data    | 
| 
81
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
310
 | 
 	if($opt{insane}) {  | 
| 
82
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 		(defined $opt{remove_spaces_everywhere}) or $opt{remove_spaces_everywhere} = 1;         # very very aggressive  | 
| 
83
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 		(defined $opt{remove_cr_lf_everywhere}) or $opt{remove_cr_lf_everywhere} = 1;           # very very aggressive   | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Configurable with expand_entities  | 
| 
87
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
657
 | 
 	$parser = XML::LibXML->new(expand_entities => $opt{expand_entities});  | 
| 
88
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11304
 | 
 	$tree = $parser->parse_string($string);  | 
| 
89
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
84415
 | 
 	if($opt{process_xincludes}) {  | 
| 
90
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		$parser->process_xincludes($tree);  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1247
 | 
 	$root = $tree->getDocumentElement;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# I disable automatic xml declaration as :   | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# - It would be printed too late (after pi and subset) and produce broken output  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# - I want to have full control on it  | 
| 
98
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2290
 | 
 	$XML::LibXML::skipXMLDeclaration = 1;  | 
| 
99
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1284
 | 
 	$doc = XML::LibXML::Document->new();  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Configurable with no_prolog : do not put prolog (a bit aggressive for readers)   | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# version=1.0 encoding=UTF-8 : choose values  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # TODO : standalone ?  | 
| 
104
 | 
137
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
621
 | 
 	my $version = $opt{version} // "1.0";  | 
| 
105
 | 
137
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
456
 | 
 	my $encoding = $opt{encoding} // "UTF-8";  | 
| 
106
 | 
137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
407
 | 
 	$opt{no_prolog} or $output  .= "";  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
208
 | 
 	my $rootnode;  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Parsing first level   | 
| 
112
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
487
 | 
 	foreach my $flc ($tree->childNodes()) {  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
160
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2994
 | 
 		if(($flc->nodeType eq XML_DTD_NODE) or ($flc->nodeType eq XML_DOCUMENT_TYPE_NODE)) { # second is synonym but deprecated  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_dtd   | 
| 
116
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
311
 | 
 			my $str = $flc->toString();  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# alternative : my $internaldtd = $tree->internalSubset(); my $str = $internaldtd->toString();  | 
| 
118
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
955
 | 
 			$str =~ s/\R*//g;  | 
| 
119
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
54
 | 
 			$opt{keep_dtd} and $output .= $str;  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ELEMENT_DECL  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ATTRIBUTE_DECL  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ENTITY_DECL   | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_NOTATION_DECL  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# I need to manually (yuck) parse the node as XML::LibXML does not provide (wrap) such function  | 
| 
127
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
 			if($opt{ignore_dtd}) {  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Do not try to get infos from DTD  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			} else {  | 
| 
130
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 				foreach my $dc ($flc->childNodes()) {  | 
| 
131
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
109
 | 
 					if($dc->nodeType == XML_ELEMENT_DECL) {  | 
| 
132
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 						$we_have_infos_from_dtd = "We can remove empty text in leafs if not protected by DTD";  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						# The .* could appear weak (and match towards other ELEMENTS), but we are working on ONLY ONE   | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						# (because we are in one child node)  | 
| 
135
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
120
 | 
 						if($dc->toString() =~ //) {  | 
| 
136
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 							$do_not_remove_blanks{$1} = "Not ignorable due to DTD declaration !";  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						}  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					}  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Some notes :   | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If I iterate over attributes of the childs of DTD (so ELEMENT, ATTLIST etc..) I get a segfault  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Probable bug from XML::LibXML similar to https://rt.cpan.org/Public/Bug/Display.html?id=71076  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If I try to access the content of XML_ENTITY_REF_DECL with getValue I get correct result, but on XML_ELEMENT_DECL I get empty string  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Seems like there's no function to play with DTD  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# I guess we need to write the perl binding for xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, const xmlChar * name)  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If I try to iterate over childNodes, I never see XML_NOTATION_DECL (why?!)  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# One word about DTD and XML::LibXML :   | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# DTD validation works like a charm of course...   | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# But reading from one xml and set to another with experimental function seems just broken or works very weirdly  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Segfault when reading big external subset, weird message "can't import dtd" when trying to add DTD...  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($flc->nodeType eq XML_PI_NODE) {  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_pi  | 
| 
159
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
157
 | 
 			my $str = $flc->toString();  | 
| 
160
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 			$opt{keep_pi} and $output .= $str;  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($flc->nodeType eq XML_COMMENT_NODE) {  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_comments  | 
| 
163
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $str = $flc->toString();  | 
| 
164
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$opt{keep_comments} and $output .= $str;  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($flc->nodeType eq XML_ELEMENT_NODE) { # Actually document node as if we do getDocumentNode  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# "main" tree, only one (parser is protecting us)  | 
| 
167
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
337
 | 
 			$rootnode = traverse($root, $doc);  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ATTRIBUTE_NODE  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_TEXT_NODE  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ENTITY_REF_NODE  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_COMMENT_NODE  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_CDATA_SECTION_NODE  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Ignore   | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_XINCLUDE_START  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_XINCLUDE_END  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Will stay hidden in any case  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_NAMESPACE_DECL  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Not Applicable   | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_DOCUMENT_NODE   | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_DOCUMENT_FRAG_NODE  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_HTML_DOCUMENT_NODE  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# What is it ?  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# XML_ENTITY_NODE  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} else {  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Should I print these unattended things ?  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Should it be configurable ?  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ELEMENT_NODE            => 1  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :  or   | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ATTRIBUTE_NODE          => 2  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :   | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_TEXT_NODE               => 3  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : This is a piece of text  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_CDATA_SECTION_NODE      => 4  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : John Smith]]>  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# CDATA section (not for parsers)  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ENTITY_REF_NODE         => 5  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Entities like &entity;  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ENTITY_NODE             => 6  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_PI_NODE                 => 7   | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Processing Instructions like    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_COMMENT_NODE            => 8  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Comments like   | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_DOCUMENT_NODE           => 9  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# The document itself  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_DOCUMENT_TYPE_NODE      => 10  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : Deprecated, use XML_DOCUMENT_TYPE_NODE  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_DOCUMENT_FRAG_NODE      => 11  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : Never read, for use, should be created as element node  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_NOTATION_NODE           => 12  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :  seems not working  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_HTML_DOCUMENT_NODE      => 13  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :   | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# In HTML context, for us, should be treated as a document node  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_DTD_NODE                => 14  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : 
 | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ELEMENT_DECL            => 15  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :   | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ATTRIBUTE_DECL          => 16  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :   | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_ENTITY_DECL             => 17  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. : Entity">  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_NAMESPACE_DECL          => 18  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# E.G. :    | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_XINCLUDE_START          => 19  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  if we process includes  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# XML_XINCLUDE_END            => 20  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#  if we process includes  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
256
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
749133
 | 
 	$doc->setDocumentElement($rootnode);  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12926
 | 
 	$output .= $doc->toString();  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21196
 | 
 	return $output;  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Traverse the document  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub traverse($$) {  | 
| 
265
 | 
111420
 | 
 
 | 
 
 | 
  
111420
  
 | 
  
0
  
 | 
145926
 | 
         my $node = shift;  | 
| 
266
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
133121
 | 
         my $outnode = shift;  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
272193
 | 
 	my $name = $node->getName();  | 
| 
270
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
382332
 | 
 	my $newnode = $doc->createElement($name);  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
111420
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
233816
 | 
 	if($outnode) {  | 
| 
273
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
618111
 | 
 		$outnode->addChild($newnode);  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
220763
 | 
 	$outnode = $newnode;  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1070139
 | 
         my @as = $node->attributes ;  | 
| 
279
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
665065
 | 
         foreach my $a (@as) {   | 
| 
280
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1593
 | 
                 $outnode->setAttribute($a->nodeName, $a->value);   | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
283
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
184053
 | 
         foreach my $child ($node->childNodes) {  | 
| 
284
 | 
111820
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3153016
 | 
 		if($child->nodeType eq XML_TEXT_NODE) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1592
 | 
 			my $str = $child->data;  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
288
 | 
473
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2154
 | 
 			if($do_not_remove_blanks{$child->parentNode->getName()}) {  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# DO NOT REMOVE, PROTECTED BY DTD ELEMENT DECL	  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			} else {  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# All these substitutions aim to remove indentation that people tend to put in xml files...  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# ...Or just clean on demand (default behavior keeps these blanks)  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Blanks are several things like spaces, tabs, lf, cr, vertical space...  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_blanks_start : remove extra space/lf/cr at the start of the string  | 
| 
298
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1556
 | 
 				$opt{remove_blanks_start} and $str =~ s/\A\s*//g;  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_blanks_end : remove extra space/lf/cr at the end of the string  | 
| 
300
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1414
 | 
 				$opt{remove_blanks_end} and $str =~ s/\s*\Z//g;  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Only CR and LF  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_cr_lf_everywhere : remove extra lf/cr everywhere  | 
| 
306
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1077
 | 
 				$opt{remove_cr_lf_everywhere} and $str =~ s/\R*//g;  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Spaces are 2 things : space and tabs  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_spaces_line_start : remove extra spaces or tabs at the start of each line  | 
| 
312
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1343
 | 
 				$opt{remove_spaces_line_start} and $str =~ s/^( |\t)*//mg;  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_spaces_line_end : remove extra spaces or tabs at the end of each line  | 
| 
314
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1267
 | 
 				$opt{remove_spaces_line_end} and $str =~ s/( |\t)*$//mg;  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_spaces_everywhere : remove extra spaces everywhere  | 
| 
316
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1114
 | 
 				$opt{remove_spaces_everywhere} and $str =~ s/( |\t)*//g;  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Configurable with remove_empty_text : remove text nodes that contains only space/lf/cr  | 
| 
319
 | 
467
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1053
 | 
 				$opt{remove_empty_text} and $str =~ s/\A\s*\Z//g;  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Let me explain, we could have text nodes basically everywhere, and we don't know if whitespaces are ignorable or not.   | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# As we want to minify the xml, we can't just keep all blanks, because it is generally indentation or spaces that could be ignored.  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Here is the strategy :   | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# A. If we have     we should keep it anyway (unless forced with argument)  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# B. If we have     we should *maybe* remove (in this case parent node contains more than one child node : text node + element node)  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# C. If we have     we should *maybe* remove it (in this case parent node contains more than one child node : text node + element node)  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# D. If we have     we should *maybe* remove it (in this case parent node contains more than one child node : text node + element node)  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# B, C, D : remove... unless explicitely declared in DTD as potential #PCDATA container OR unless it contains something...  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# *something* is a comment (not removed), some other text not empty, some cdata.  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Imagine     some text  then we don't want to remove spaces in the first text node  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Same with         | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# But if comments are removed then the latter piece of code will become   | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
335
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1205
 | 
 			my $empty = 1;  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
337
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4794
 | 
 			my $childbak = $child;  | 
| 
338
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
684
 | 
 			my @siblings = ();  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# We want to inspect siblings to the right until we reach an element  | 
| 
340
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1964
 | 
 			while($child = $child->nextSibling) {  | 
| 
341
 | 
396
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2426
 | 
 				if($child->nodeType eq XML_ELEMENT_NODE) {  | 
| 
342
 | 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
368
 | 
 					last;  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
344
 | 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
970
 | 
 				push @siblings, $child;  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
346
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
767
 | 
 			$child = $childbak;  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# We inspect to the left also  | 
| 
348
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3109
 | 
 			while($child = $child->previousSibling) {  | 
| 
349
 | 
400
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2311
 | 
 				if($child->nodeType eq XML_ELEMENT_NODE) {  | 
| 
350
 | 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
308
 | 
 					last;  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
352
 | 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
561
 | 
 				push @siblings, $child;  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Then we will look at each siling to check  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If it is an empty text node or not  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If it is something that will be removed or not  | 
| 
358
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
836
 | 
 			foreach my $child (@siblings) {  | 
| 
359
 | 
245
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
562
 | 
 				if($child->nodeType eq XML_TEXT_NODE) {  | 
| 
360
 | 
90
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
351
 | 
 					if($child->data =~ m/[^ \t\r\n]/) {  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						# Not empty  | 
| 
362
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 						$empty = 0;  | 
| 
363
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 						last;  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					}  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
366
 | 
241
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
671
 | 
 				if($child->nodeType eq XML_COMMENT_NODE and $opt{keep_comments}) {  | 
| 
367
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 					$empty = 0;  | 
| 
368
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 					last;  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
370
 | 
214
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
596
 | 
 				if($child->nodeType eq XML_CDATA_SECTION_NODE and $opt{keep_cdata}) {  | 
| 
371
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 					$empty = 0;  | 
| 
372
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 					last;  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
374
 | 
194
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
458
 | 
 				if($child->nodeType eq XML_PI_NODE and $opt{keep_pi}) {  | 
| 
375
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 					$empty = 0;  | 
| 
376
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 					last;  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Entity refs : we can choose to expand or not... but not to drop them  | 
| 
379
 | 
186
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
811
 | 
 				if($child->nodeType eq XML_ENTITY_REF_NODE) {  | 
| 
380
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 					$empty = 0;  | 
| 
381
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 					last;  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
746
 | 
 			$child = $childbak;  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Were all siblings empty ?   | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Are we alone ? (count child nodes from parent instead of filtered siblings)  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# If there is a DTD, probably we can remove even in the leafs (I'm not doing this at the moment)   | 
| 
391
 | 
473
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2860
 | 
 			if($we_have_infos_from_dtd) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Only trust DTD, no need to consider if we are in a leaf or node  | 
| 
393
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
157
 | 
 				if($do_not_remove_blanks{$child->parentNode->getName()}) {  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# DO NOT REMOVE, PROTECTED BY DTD ELEMENT DECL	  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				} else {  | 
| 
396
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
99
 | 
 					$str =~ s/\A\R*\Z//mg;  | 
| 
397
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
 					$str =~ s/\A\s*\Z//mg;  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
399
 | 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1120
 | 
 			} elsif($empty and @{$child->parentNode->childNodes()} > 1) {  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# Should it be configurable ?   | 
| 
401
 | 
256
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
68073
 | 
 				if($do_not_remove_blanks{$child->parentNode->getName()}) {  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# DO NOT REMOVE, PROTECTED BY DTD ELEMENT DECL	  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				} else {  | 
| 
404
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1331
 | 
 					$str =~ s/\A\R*\Z//mg;  | 
| 
405
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1088
 | 
 					$str =~ s/\A\s*\Z//mg;  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
408
 | 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3062
 | 
 			$outnode->appendText($str);  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($child->nodeType eq XML_ENTITY_REF_NODE) {  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configuration will be done above when creating document  | 
| 
411
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 			my $er = $doc->createEntityReference($child->getName());  | 
| 
412
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 			$outnode->addChild($er);   | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($child->nodeType eq XML_COMMENT_NODE) {  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_comments  | 
| 
415
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
 			my $com = $doc->createComment($child->getData());  | 
| 
416
 | 
32
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
163
 | 
 			$opt{keep_comments} and $outnode->addChild($com);   | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($child->nodeType eq XML_CDATA_SECTION_NODE) {  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_cdata  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#my $cdata = $child->cloneNode(1);  | 
| 
420
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
 			my $cdata = $doc->createCDATASection($child->getData());  | 
| 
421
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
118
 | 
 			$opt{keep_cdata} and $outnode->addChild($cdata);  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($child->nodeType eq XML_PI_NODE) {  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Configurable with keep_pi  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#my $pi = $child->cloneNode(1);  | 
| 
425
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
 			my $pi = $doc->createPI($child->nodeName, $child->getData());  | 
| 
426
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 			$opt{keep_pi} and $outnode->addChild($pi);  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} elsif($child->nodeType eq XML_ELEMENT_NODE) {  | 
| 
428
 | 
111283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177944
 | 
 			$outnode->addChild(traverse($child, $outnode));   | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}   | 
| 
431
 | 
111420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1593787
 | 
 	return $outnode;  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |