| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package simpleXMLParse; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Perl Module: simpleXMLParse |
|
4
|
|
|
|
|
|
|
# Author: Daniel Edward Graham |
|
5
|
|
|
|
|
|
|
# Copyright (c) Daniel Edward Graham 2008-2018 |
|
6
|
|
|
|
|
|
|
# Date: 01/01/2018 |
|
7
|
|
|
|
|
|
|
# License: LGPL 3.0 |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
require Exporter; |
|
11
|
1
|
|
|
1
|
|
4164
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
59
|
|
|
12
|
1
|
|
|
1
|
|
472
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
5307
|
|
|
|
1
|
|
|
|
|
82
|
|
|
13
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# This allows declaration use simpleXMLParse ':all'; |
|
16
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
|
17
|
|
|
|
|
|
|
# will save memory. |
|
18
|
|
|
|
|
|
|
%EXPORT_TAGS = ( 'all' => [ qw( |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
) ] ); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
@EXPORT = qw( |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
$VERSION = '3.1'; |
|
29
|
|
|
|
|
|
|
|
|
30
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
40
|
|
|
31
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
16
|
|
|
32
|
1
|
|
|
1
|
|
3
|
no warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
2639
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#use open ':encoding(utf8)'; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my @cdata; |
|
37
|
|
|
|
|
|
|
my $cdataInd = 0; |
|
38
|
|
|
|
|
|
|
my $MAXIND = 10000; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub new { |
|
41
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
|
42
|
0
|
0
|
|
|
|
|
my %args = (@_ == 1) ? ((ref($_[0]) eq 'HASH') ? %{$_[0]}:(input => $_[0])):@_; |
|
|
0
|
0
|
|
|
|
|
|
|
43
|
0
|
|
|
|
|
|
my $altstyle = 0; |
|
44
|
0
|
|
|
|
|
|
my $fn; |
|
45
|
0
|
|
|
|
|
|
$fn = $args{"input"}; |
|
46
|
0
|
0
|
|
|
|
|
$altstyle = 1 if ($args{"style"} eq '2'); |
|
47
|
0
|
|
|
|
|
|
my $self = {}; |
|
48
|
0
|
|
|
|
|
|
$self->{"xml"} = undef; |
|
49
|
0
|
|
|
|
|
|
$self->{"data"} = undef; |
|
50
|
0
|
0
|
|
|
|
|
open (INFILE1, "$fn") or croak "Unable to process [$fn] $! \n"; |
|
51
|
0
|
|
|
|
|
|
binmode(INFILE1); |
|
52
|
0
|
|
|
|
|
|
my ($c1, $c2, $c3); |
|
53
|
0
|
|
|
|
|
|
read(INFILE1, $c1, 1); |
|
54
|
0
|
|
|
|
|
|
read(INFILE1, $c2, 1); |
|
55
|
0
|
|
|
|
|
|
read(INFILE1, $c3, 1); |
|
56
|
0
|
|
|
|
|
|
close(INFILE1); |
|
57
|
0
|
0
|
0
|
|
|
|
if (($c1 eq "\xFE" && $c2 eq "\xFF") || ($c1 eq "\xFF" && $c2 eq "\xFE")) { |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# UTF-16 |
|
59
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-16)', "$fn") or croak "Unable to process [$fn] $!\n"; |
|
60
|
0
|
|
|
|
|
|
$self->{"xml"} = join '', ; |
|
61
|
|
|
|
|
|
|
} else { |
|
62
|
0
|
0
|
0
|
|
|
|
if ($c1 eq "\xEF" && $c2 eq "\xBB" && $c3 eq "\xBF") { |
|
|
|
|
0
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# UTF-8 with BOM... |
|
64
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-8)', "$fn") or croak "Unable to process [$fn] $!\n"; |
|
65
|
0
|
|
|
|
|
|
my $str = join '', ; |
|
66
|
|
|
|
|
|
|
# $str =~ s/^\xEF\xBB\xBF//g; |
|
67
|
0
|
|
|
|
|
|
$str =~ s/^\x{fffe}//g; |
|
68
|
0
|
|
|
|
|
|
$str =~ s/^\x{feff}//g; |
|
69
|
0
|
|
|
|
|
|
$self->{"xml"} = $str; |
|
70
|
|
|
|
|
|
|
} else { |
|
71
|
|
|
|
|
|
|
# UTF-8 with NO BOM |
|
72
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-8)', "$fn") or croak "Unable to process [$fn] $!\n"; |
|
73
|
0
|
|
|
|
|
|
$self->{"xml"} = join '', ; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
} |
|
76
|
0
|
|
|
|
|
|
close(INFILE); |
|
77
|
0
|
|
|
|
|
|
$self->{"data"} = _ParseXML( $self->{"xml"}, $altstyle ); |
|
78
|
0
|
|
|
|
|
|
my $ret = bless $self; |
|
79
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
|
80
|
0
|
|
|
|
|
|
$ret->_convertToStyle(); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
0
|
|
|
|
|
|
$cdataInd = $cdataInd % $MAXIND; |
|
83
|
0
|
|
|
|
|
|
return $ret; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse { |
|
87
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
88
|
0
|
|
|
|
|
|
return $self->{data}; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _convertToStyle { |
|
92
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
93
|
0
|
|
|
|
|
|
my @recursearr = ($self->{"data"}); |
|
94
|
0
|
|
|
|
|
|
while (@recursearr) { |
|
95
|
0
|
|
|
|
|
|
my $i = pop @recursearr; |
|
96
|
0
|
0
|
|
|
|
|
if (ref($i) eq "HASH") { |
|
97
|
0
|
|
|
|
|
|
foreach my $j (keys %$i) { |
|
98
|
0
|
0
|
|
|
|
|
if ($j =~ /^(.*?)\_(.*?)\_([0-9]+)\_attr$/) { |
|
99
|
0
|
|
|
|
|
|
my ($attrnm, $tagnm, $cnt) = ($1, $2, $3); |
|
100
|
0
|
|
|
|
|
|
$attrnm =~ s/0x0/_/gs; |
|
101
|
0
|
|
|
|
|
|
$tagnm =~ s/0x0/_/gs; |
|
102
|
0
|
|
|
|
|
|
my $n = undef; |
|
103
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}) eq "ARRAY") { |
|
104
|
0
|
|
|
|
|
|
my $hold; |
|
105
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}->[$cnt]) eq '') { |
|
106
|
0
|
|
|
|
|
|
$hold = $i->{$tagnm}->[$cnt]; |
|
107
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt] = { }; |
|
108
|
0
|
0
|
|
|
|
|
if ($hold !~ /^\s*$/ ) { |
|
109
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt]->{content} = $hold; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
0
|
|
|
|
|
|
while (defined($i->{$tagnm}->[$cnt]->{$attrnm.$n})) { |
|
113
|
0
|
|
|
|
|
|
$n++; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt]->{$attrnm.$n} = $i->{$j}; |
|
116
|
|
|
|
|
|
|
} else { |
|
117
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}) eq "HASH") { |
|
118
|
0
|
|
|
|
|
|
my $n = undef; |
|
119
|
0
|
|
|
|
|
|
while (defined($i->{$tagnm}->{$attrnm.$n})) { |
|
120
|
0
|
|
|
|
|
|
$n++; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
0
|
|
|
|
|
|
$i->{$tagnm}->{$attrnm.$n} = $i->{$j}; |
|
123
|
|
|
|
|
|
|
} else { |
|
124
|
0
|
|
|
|
|
|
my $hold; |
|
125
|
0
|
|
|
|
|
|
$hold = $i->{$tagnm}; |
|
126
|
0
|
|
|
|
|
|
$i->{$tagnm} = { }; |
|
127
|
0
|
0
|
|
|
|
|
if ($hold !~ /^\s*$/) { |
|
128
|
0
|
|
|
|
|
|
$i->{$tagnm}->{content} = $hold; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
0
|
|
|
|
|
|
$i->{$tagnm}->{$attrnm} = $i->{$j}; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
} |
|
133
|
0
|
|
|
|
|
|
delete $i->{$j}; |
|
134
|
|
|
|
|
|
|
} else { |
|
135
|
0
|
|
|
|
|
|
push @recursearr, $i->{$j}; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
} else { |
|
139
|
0
|
0
|
|
|
|
|
if (ref($i) eq "ARRAY") { |
|
140
|
0
|
|
|
|
|
|
foreach my $j (@$i) { |
|
141
|
0
|
|
|
|
|
|
push @recursearr, $j; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub _cdatasub { |
|
149
|
0
|
|
|
0
|
|
|
my $cdata = shift; |
|
150
|
0
|
|
|
|
|
|
my $tmpind = $cdataInd++; |
|
151
|
0
|
|
|
|
|
|
$cdata[$tmpind] = $cdata; |
|
152
|
0
|
|
|
|
|
|
return "0x0CDATA0x0".($tmpind)."0x0"; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub _cdatasubout { |
|
156
|
0
|
|
|
0
|
|
|
my $ind = shift; |
|
157
|
0
|
|
|
|
|
|
my $cdata = $cdata[$ind]; |
|
158
|
0
|
|
|
|
|
|
return $cdata; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _unescp { |
|
162
|
0
|
|
|
0
|
|
|
my $firsttag = shift; |
|
163
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\\/\\/gs; |
|
164
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\*/\*/gs; |
|
165
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\|/\|/gs; |
|
166
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\$/\$/gs; |
|
167
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\?/\?/gs; |
|
168
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\{/\{/gs; |
|
169
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\}/\}/gs; |
|
170
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\(/\(/gs; |
|
171
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\)/\)/gs; |
|
172
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\+/\+/gs; |
|
173
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\[/\[/gs; |
|
174
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\]/\]/gs; |
|
175
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\./\./gs; |
|
176
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\^/\^/gs; |
|
177
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\-/\-/gs; |
|
178
|
0
|
|
|
|
|
|
return $firsttag; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub hconv { |
|
182
|
0
|
|
|
0
|
0
|
|
my $arg = $_[0]; |
|
183
|
0
|
|
|
|
|
|
my $p = pack "H*", $arg; |
|
184
|
0
|
|
|
|
|
|
return $p; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub _entity { |
|
188
|
0
|
|
|
0
|
|
|
my $text = shift; |
|
189
|
0
|
|
|
|
|
|
$text =~ s/\<\;/\
|
|
190
|
0
|
|
|
|
|
|
$text =~ s/\>\;/\>/g; |
|
191
|
0
|
|
|
|
|
|
$text =~ s/\&\;/\&/g; |
|
192
|
0
|
|
|
|
|
|
$text =~ s/\&apos\;/\'/g; |
|
193
|
0
|
|
|
|
|
|
$text =~ s/\"\;/\"/g; |
|
194
|
0
|
|
|
|
|
|
$text =~ s/\&\#x([0-9a-fA-F]+)\;/&hconv($1)/ge; |
|
|
0
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
return $text; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub _ParseXML { |
|
199
|
0
|
|
|
0
|
|
|
my ($xml, $altstyle) = @_; |
|
200
|
|
|
|
|
|
|
# $xml =~ s/\n//g; |
|
201
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\[CDATA\[(.*?)\]\]\>/&_cdatasub($1)/egs; |
|
|
0
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\-\-.*?\-\-\>//gs; |
|
203
|
0
|
|
|
|
|
|
$xml =~ s/\<\?xml.*?\?\>//gs; |
|
204
|
0
|
|
|
|
|
|
$xml =~ s/\<\?[^\>]*?\?\>//gs; |
|
205
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\-\-[^\>]*?\-\-\>//gs; |
|
206
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ELEMENT[^\>]*?\>//gs; |
|
207
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ENTITY[^\>]*?\>//gs; |
|
208
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ATTLIST[^\>]*?\>//gs; |
|
209
|
0
|
|
|
|
|
|
$xml =~ s/\<\!DOCTYPE[^\>]*?\>//gs; |
|
210
|
0
|
|
|
|
|
|
my $rethash = (); |
|
211
|
0
|
|
|
|
|
|
my @retarr; |
|
212
|
0
|
|
|
|
|
|
my $firsttag = $xml; |
|
213
|
0
|
|
|
|
|
|
my ( $attr, $innerxml, $xmlfragment ); |
|
214
|
0
|
|
|
|
|
|
$firsttag =~ s/^[\s\n]*\<([^\s\>\n\/]*).*$/$1/gs; |
|
215
|
0
|
|
|
|
|
|
$firsttag =~ s/\\/\\\\/gs; |
|
216
|
0
|
|
|
|
|
|
$firsttag =~ s/\*/\\\*/gs; |
|
217
|
0
|
|
|
|
|
|
$firsttag =~ s/\|/\\\|/gs; |
|
218
|
0
|
|
|
|
|
|
$firsttag =~ s/\$/\\\$/gs; |
|
219
|
0
|
|
|
|
|
|
$firsttag =~ s/\?/\\\?/gs; |
|
220
|
0
|
|
|
|
|
|
$firsttag =~ s/\{/\\\{/gs; |
|
221
|
0
|
|
|
|
|
|
$firsttag =~ s/\}/\\\}/gs; |
|
222
|
0
|
|
|
|
|
|
$firsttag =~ s/\(/\\\(/gs; |
|
223
|
0
|
|
|
|
|
|
$firsttag =~ s/\)/\\\)/gs; |
|
224
|
0
|
|
|
|
|
|
$firsttag =~ s/\+/\\\+/gs; |
|
225
|
0
|
|
|
|
|
|
$firsttag =~ s/\[/\\\[/gs; |
|
226
|
0
|
|
|
|
|
|
$firsttag =~ s/\]/\\\]/gs; |
|
227
|
0
|
|
|
|
|
|
$firsttag =~ s/\./\\\./gs; |
|
228
|
0
|
|
|
|
|
|
$firsttag =~ s/\^/\\\^/gs; |
|
229
|
0
|
|
|
|
|
|
$firsttag =~ s/\-/\\\-/gs; |
|
230
|
|
|
|
|
|
|
|
|
231
|
0
|
0
|
|
|
|
|
if ( $xml =~ /^[\s\n]*\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s ) |
|
232
|
|
|
|
|
|
|
{ |
|
233
|
0
|
|
|
|
|
|
$attr = $1; |
|
234
|
0
|
|
|
|
|
|
$innerxml = $2; |
|
235
|
0
|
|
|
|
|
|
$xmlfragment = $3; |
|
236
|
0
|
|
|
|
|
|
$attr =~ s/\>$//gs; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
else { |
|
239
|
0
|
0
|
|
|
|
|
if ( $xml =~ /^[\s\n]*\<${firsttag}(\/\>|[\s\n][^\>]*\/\>)(.*)$/s ) { |
|
240
|
0
|
|
|
|
|
|
$attr = $1; |
|
241
|
0
|
|
|
|
|
|
$innerxml = ""; |
|
242
|
0
|
|
|
|
|
|
$xmlfragment = $2; |
|
243
|
0
|
|
|
|
|
|
$attr =~ s/\/\>$//gs; |
|
244
|
|
|
|
|
|
|
} else { |
|
245
|
0
|
0
|
|
|
|
|
if (!ref($xml)) { |
|
246
|
0
|
|
|
|
|
|
$xml = _entity($xml); |
|
247
|
0
|
|
|
|
|
|
$xml =~ s/0x0CDATA0x0(\d+?)0x0/&_cdatasubout($1)/egs; |
|
|
0
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
} |
|
249
|
0
|
0
|
|
|
|
|
if ($xml eq '') { |
|
250
|
0
|
|
|
|
|
|
return {}; |
|
251
|
|
|
|
|
|
|
} else { |
|
252
|
0
|
|
|
|
|
|
return $xml; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
} |
|
256
|
0
|
|
|
|
|
|
my $ixml = $innerxml; |
|
257
|
0
|
|
|
|
|
|
while ($ixml =~ /^.*?\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)$/s) { |
|
258
|
0
|
|
|
|
|
|
$ixml = $2; |
|
259
|
0
|
|
|
|
|
|
$innerxml .= "${firsttag}>"; |
|
260
|
0
|
0
|
|
|
|
|
if ($xmlfragment =~ /^(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s) { |
|
261
|
0
|
|
|
|
|
|
my $ix = $1; |
|
262
|
0
|
|
|
|
|
|
$innerxml .= $ix; |
|
263
|
0
|
|
|
|
|
|
$ixml .= $ix; |
|
264
|
0
|
|
|
|
|
|
$xmlfragment = $2; |
|
265
|
|
|
|
|
|
|
} else { |
|
266
|
0
|
|
|
|
|
|
die "Invalid XML innerxml: $innerxml\nixml: $ixml\nxmlfragment: $xmlfragment\n"; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
} |
|
269
|
0
|
|
|
|
|
|
my $nextparse = _ParseXML($innerxml, $altstyle); |
|
270
|
0
|
|
|
|
|
|
$rethash->{&_unescp($firsttag)} = $nextparse; |
|
271
|
0
|
|
|
|
|
|
my @attrarr; |
|
272
|
0
|
|
|
|
|
|
while ( $attr =~ s/^[\s\n]*([^\s\=\n]+)\s*\=\s*(\".*?\"|\'.*?\')(.*)$/$3/gs ) { |
|
273
|
0
|
|
|
|
|
|
my ($name, $val) = ($1, $2); |
|
274
|
0
|
|
|
|
|
|
$val =~ s/^\'(.*)\'$/$1/gs; |
|
275
|
0
|
|
|
|
|
|
$val =~ s/^\"(.*)\"$/$1/gs; |
|
276
|
0
|
|
|
|
|
|
push @attrarr, $name; |
|
277
|
0
|
|
|
|
|
|
push @attrarr, _entity($val); |
|
278
|
|
|
|
|
|
|
} |
|
279
|
0
|
|
|
|
|
|
my $attrcnt = 0; |
|
280
|
0
|
|
|
|
|
|
while ( my $val = shift(@attrarr) ) { |
|
281
|
0
|
|
|
|
|
|
my ($val1, $firsttag1) = ($val, $firsttag); |
|
282
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
|
283
|
0
|
|
|
|
|
|
$val1 =~ s/_/0x0/gs; |
|
284
|
0
|
|
|
|
|
|
$firsttag1 =~ s/_/0x0/gs; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
0
|
|
|
|
|
|
$rethash->{ "$val1" . "_".&_unescp(${firsttag1})."_" . $attrcnt . "_attr" } = shift(@attrarr); |
|
287
|
|
|
|
|
|
|
} |
|
288
|
0
|
|
|
|
|
|
my $retflag = 0; |
|
289
|
0
|
|
|
|
|
|
my ( $xmlfragment1, $xmlfragment2 ); |
|
290
|
0
|
|
|
|
|
|
my %attrhash; |
|
291
|
0
|
|
|
|
|
|
$attrcnt++; |
|
292
|
0
|
|
|
|
|
|
while (1) { |
|
293
|
0
|
0
|
|
|
|
|
if ( $xmlfragment =~ |
|
294
|
|
|
|
|
|
|
/^(.*?)\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s ) |
|
295
|
|
|
|
|
|
|
{ |
|
296
|
0
|
0
|
|
|
|
|
if ( !$retflag ) { |
|
297
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
0
|
|
|
|
|
|
$retflag = 1; |
|
300
|
0
|
|
|
|
|
|
$xmlfragment1 = $1; |
|
301
|
0
|
|
|
|
|
|
$attr = $2; |
|
302
|
0
|
|
|
|
|
|
$innerxml = $3; |
|
303
|
0
|
|
|
|
|
|
$xmlfragment2 = $4; |
|
304
|
|
|
|
|
|
|
} else { |
|
305
|
0
|
0
|
|
|
|
|
if ( $xmlfragment =~ /^(.*?)\<${firsttag}(\/\>|[\s\n][^\>]*\/\>)(.*)$/s ) { |
|
306
|
0
|
0
|
|
|
|
|
if ( !$retflag ) { |
|
307
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
|
308
|
|
|
|
|
|
|
} |
|
309
|
0
|
|
|
|
|
|
$retflag = 1; |
|
310
|
0
|
|
|
|
|
|
$xmlfragment1 = $1; |
|
311
|
0
|
|
|
|
|
|
$attr = $2; |
|
312
|
0
|
|
|
|
|
|
$innerxml = ""; |
|
313
|
0
|
|
|
|
|
|
$xmlfragment2 = $3; |
|
314
|
|
|
|
|
|
|
} else { |
|
315
|
0
|
|
|
|
|
|
last; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
} |
|
318
|
0
|
|
|
|
|
|
$attr =~ s/\/\>$//gs; |
|
319
|
0
|
|
|
|
|
|
$attr =~ s/\>$//gs; |
|
320
|
0
|
|
|
|
|
|
my %opening = ( ); |
|
321
|
0
|
|
|
|
|
|
my %closing = ( ); |
|
322
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
|
323
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<([^\s\n\/\>]+)(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*)$/s) { |
|
324
|
0
|
|
|
|
|
|
my $tg = $2; |
|
325
|
0
|
|
|
|
|
|
$frag = $4; |
|
326
|
0
|
|
|
|
|
|
$opening{$tg}++; |
|
327
|
|
|
|
|
|
|
} |
|
328
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
|
329
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<\/([^\s\n\>]+)[\s\n]*\>(.*)$/s) { |
|
330
|
0
|
|
|
|
|
|
my $tg = $2; |
|
331
|
0
|
|
|
|
|
|
$frag = $3; |
|
332
|
0
|
|
|
|
|
|
$closing{$tg}++; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
|
335
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<([^\s\n\/\>]+)[^\>]*?\/\>(.*)$/s) { |
|
336
|
0
|
|
|
|
|
|
my $tg = $2; |
|
337
|
0
|
|
|
|
|
|
$frag = $3; |
|
338
|
0
|
|
|
|
|
|
$opening{$tg}++; |
|
339
|
0
|
|
|
|
|
|
$closing{$tg}++; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
0
|
|
|
|
|
|
my $flag = 0; |
|
342
|
0
|
|
|
|
|
|
foreach my $k (keys %opening) { |
|
343
|
0
|
0
|
|
|
|
|
if ($opening{$k} > $closing{$k}) { |
|
344
|
0
|
|
|
|
|
|
$xmlfragment = $xmlfragment1 . "<${firsttag}0x0 ${attr}>${innerxml}${firsttag}0x0>". $xmlfragment2; |
|
345
|
0
|
|
|
|
|
|
$flag = 1; |
|
346
|
0
|
|
|
|
|
|
last; |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
} |
|
349
|
0
|
0
|
|
|
|
|
next if ($flag); |
|
350
|
0
|
|
|
|
|
|
my $ixml = $innerxml; |
|
351
|
0
|
|
|
|
|
|
while ($ixml =~ /.*?\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)$/s) { |
|
352
|
0
|
|
|
|
|
|
$ixml = $2; |
|
353
|
0
|
|
|
|
|
|
$innerxml .= "${firsttag}>"; |
|
354
|
0
|
0
|
|
|
|
|
if ($xmlfragment2 =~ /(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s) { |
|
355
|
0
|
|
|
|
|
|
my $ix = $1; |
|
356
|
0
|
|
|
|
|
|
$innerxml .= $ix; |
|
357
|
0
|
|
|
|
|
|
$ixml .= $ix; |
|
358
|
0
|
|
|
|
|
|
$xmlfragment2 = $2; |
|
359
|
|
|
|
|
|
|
} else { |
|
360
|
0
|
|
|
|
|
|
die "Invalid XML"; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
} |
|
363
|
0
|
|
|
|
|
|
$xmlfragment = $xmlfragment1 . $xmlfragment2; |
|
364
|
0
|
|
|
|
|
|
while ( $attr =~ s/^[\s\n]*([^\s\=\n]+)\s*\=\s*(\".*?\"|\'.*?\')(.*)$/$3/gs ) { |
|
365
|
0
|
|
|
|
|
|
my ($name, $val) = ($1, $2); |
|
366
|
0
|
|
|
|
|
|
$val =~ s/^\'(.*)\'$/$1/gs; |
|
367
|
0
|
|
|
|
|
|
$val =~ s/^\"(.*)\"$/$1/gs; |
|
368
|
0
|
|
|
|
|
|
push @attrarr, $name; |
|
369
|
0
|
|
|
|
|
|
push @attrarr, _entity($val); |
|
370
|
|
|
|
|
|
|
} |
|
371
|
0
|
|
|
|
|
|
while ( my $val = shift(@attrarr) ) { |
|
372
|
0
|
|
|
|
|
|
my ($val1, $firsttag1) = ($val, $firsttag); |
|
373
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
|
374
|
0
|
|
|
|
|
|
$val1 =~ s/_/0x0/gs; |
|
375
|
0
|
|
|
|
|
|
$firsttag1 =~ s/_/0x0/gs; |
|
376
|
|
|
|
|
|
|
} |
|
377
|
0
|
|
|
|
|
|
$rethash->{ "$val1" . "_".&_unescp(${firsttag1})."_" . $attrcnt . "_attr" } = shift(@attrarr); |
|
378
|
|
|
|
|
|
|
} |
|
379
|
0
|
|
|
|
|
|
$attrcnt++; |
|
380
|
0
|
|
|
|
|
|
$nextparse = _ParseXML($innerxml, $altstyle); |
|
381
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
0
|
0
|
|
|
|
|
if (@retarr) { |
|
384
|
0
|
0
|
|
|
|
|
if (@retarr == 1) { |
|
385
|
0
|
|
|
|
|
|
$rethash->{_unescp($firsttag)} = $retarr[0]; |
|
386
|
|
|
|
|
|
|
} else { |
|
387
|
0
|
|
|
|
|
|
$rethash->{_unescp($firsttag)} = \@retarr; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
} |
|
390
|
0
|
|
|
|
|
|
$xmlfragment =~ s/${firsttag}0x0/${firsttag}/gs; |
|
391
|
0
|
|
|
|
|
|
my $remainderparse = _ParseXML($xmlfragment, $altstyle); |
|
392
|
0
|
|
|
|
|
|
my $attrcnt; |
|
393
|
|
|
|
|
|
|
my $attrfrag; |
|
394
|
0
|
0
|
|
|
|
|
if ( ref($remainderparse) eq "HASH" ) { |
|
395
|
0
|
|
|
|
|
|
foreach ( keys %{$remainderparse} ) { |
|
|
0
|
|
|
|
|
|
|
|
396
|
0
|
|
|
|
|
|
$rethash->{&_unescp($_)} = $remainderparse->{&_unescp($_)}; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
} |
|
399
|
0
|
0
|
|
|
|
|
if ( keys %{$rethash} ) { |
|
|
0
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
return $rethash; |
|
401
|
|
|
|
|
|
|
} |
|
402
|
|
|
|
|
|
|
else { |
|
403
|
|
|
|
|
|
|
# return undef; |
|
404
|
0
|
|
|
|
|
|
return {}; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
1; |
|
409
|
|
|
|
|
|
|
__END__ |