| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# Petal::Canonicalizer::XHTML - Builds an XHTML canonical Petal file |
|
3
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
4
|
|
|
|
|
|
|
# Author: Jean-Michel Hiver |
|
5
|
|
|
|
|
|
|
# Description: This modules mainly implements the XML::Parser |
|
6
|
|
|
|
|
|
|
# 'Stream' interface. It receives XML events and builds Petal |
|
7
|
|
|
|
|
|
|
# canonical data, i.e. |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# Hello |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# Might be canonicalized to something like |
|
12
|
|
|
|
|
|
|
# |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
# Hello |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# |
|
17
|
|
|
|
|
|
|
# On top of that, Petal::Canonicalizer::XHTML will self close certain |
|
18
|
|
|
|
|
|
|
# XHTML specific tags, like or |
|
19
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
20
|
|
|
|
|
|
|
package Petal::Canonicalizer::XHTML; |
|
21
|
77
|
|
|
77
|
|
585
|
use strict; |
|
|
77
|
|
|
|
|
174
|
|
|
|
77
|
|
|
|
|
2362
|
|
|
22
|
77
|
|
|
77
|
|
439
|
use warnings; |
|
|
77
|
|
|
|
|
159
|
|
|
|
77
|
|
|
|
|
2604
|
|
|
23
|
77
|
|
|
77
|
|
416
|
use base qw /Petal::Canonicalizer::XML/; |
|
|
77
|
|
|
|
|
129
|
|
|
|
77
|
|
|
|
|
113270
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# http://lists.webarch.co.uk/pipermail/petal/2002-August/000074.html |
|
27
|
|
|
|
|
|
|
# here's a list[1] of empty elements (which need careful handling): |
|
28
|
|
|
|
|
|
|
# |
|
29
|
|
|
|
|
|
|
# ---8<--- |
|
30
|
|
|
|
|
|
|
# area |
|
31
|
|
|
|
|
|
|
# base |
|
32
|
|
|
|
|
|
|
# basefont |
|
33
|
|
|
|
|
|
|
# br |
|
34
|
|
|
|
|
|
|
# col |
|
35
|
|
|
|
|
|
|
# frame |
|
36
|
|
|
|
|
|
|
# hr |
|
37
|
|
|
|
|
|
|
# img |
|
38
|
|
|
|
|
|
|
# input |
|
39
|
|
|
|
|
|
|
# isindex |
|
40
|
|
|
|
|
|
|
# link |
|
41
|
|
|
|
|
|
|
# meta |
|
42
|
|
|
|
|
|
|
# param |
|
43
|
|
|
|
|
|
|
# --->8--- |
|
44
|
|
|
|
|
|
|
# |
|
45
|
|
|
|
|
|
|
# [1] http://www.w3.org/TR/html401/index/elements.html |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# $class->StartTag(); |
|
49
|
|
|
|
|
|
|
# ------------------- |
|
50
|
|
|
|
|
|
|
# Called for every start tag with a second parameter of the element type. |
|
51
|
|
|
|
|
|
|
# It will check for special PETAL attributes like petal:if, petal:loop, etc... |
|
52
|
|
|
|
|
|
|
# and rewrite the start tag into @Petal::Canonicalizer::XML::Result accordingly. |
|
53
|
|
|
|
|
|
|
# |
|
54
|
|
|
|
|
|
|
# For example |
|
55
|
|
|
|
|
|
|
# |
|
56
|
|
|
|
|
|
|
# |
|
57
|
|
|
|
|
|
|
# Is rewritten |
|
58
|
|
|
|
|
|
|
# ... |
|
59
|
|
|
|
|
|
|
sub StartTag |
|
60
|
|
|
|
|
|
|
{ |
|
61
|
116
|
|
|
116
|
0
|
338
|
Petal::load_code_generator(); # we will use it later |
|
62
|
|
|
|
|
|
|
|
|
63
|
116
|
|
|
|
|
167
|
my $class = shift; |
|
64
|
116
|
|
|
|
|
209
|
push @Petal::Canonicalizer::XML::NodeStack, {}; |
|
65
|
116
|
100
|
|
|
|
363
|
return if ($class->_is_inside_content_or_replace()); |
|
66
|
|
|
|
|
|
|
|
|
67
|
112
|
|
|
|
|
171
|
my $tag = $_; |
|
68
|
112
|
|
|
|
|
599
|
($tag) = $tag =~ /^<\s*((?:\w|\:|\-)*)/; |
|
69
|
112
|
|
|
|
|
337
|
my $att = { %_ }; |
|
70
|
|
|
|
|
|
|
|
|
71
|
112
|
|
|
|
|
505
|
$class->_use_macro ($tag, $att); |
|
72
|
112
|
|
|
|
|
341
|
$class->_on_error ($tag, $att); |
|
73
|
112
|
|
|
|
|
333
|
$class->_define ($tag, $att); |
|
74
|
112
|
|
|
|
|
345
|
$class->_define_slot ($tag, $att); |
|
75
|
112
|
|
|
|
|
384
|
$class->_condition ($tag, $att); |
|
76
|
112
|
|
|
|
|
364
|
$class->_repeat ($tag, $att); |
|
77
|
112
|
100
|
66
|
|
|
302
|
$class->_is_xinclude ($tag) and $class->_xinclude ($tag, $att) and return; |
|
78
|
111
|
|
|
|
|
342
|
$class->_replace ($tag, $att); |
|
79
|
|
|
|
|
|
|
|
|
80
|
111
|
|
|
|
|
242
|
my $petal = quotemeta ($Petal::NS); |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# if a petal:replace attribute was set, then at this point _is_inside_content_or_replace() |
|
83
|
|
|
|
|
|
|
# should return TRUE and this code should not be executed |
|
84
|
111
|
100
|
|
|
|
270
|
unless ($class->_is_inside_content_or_replace()) |
|
85
|
|
|
|
|
|
|
{ |
|
86
|
|
|
|
|
|
|
# for every attribute which is not a petal: attribute, |
|
87
|
|
|
|
|
|
|
# we need to convert $variable into |
|
88
|
102
|
|
|
|
|
138
|
foreach my $key (keys %{$att}) |
|
|
102
|
|
|
|
|
398
|
|
|
89
|
|
|
|
|
|
|
{ |
|
90
|
53
|
100
|
|
|
|
368
|
next if ($key =~ /^$petal:/); |
|
91
|
30
|
|
|
|
|
82
|
my $text = $att->{$key}; |
|
92
|
30
|
|
|
|
|
44
|
my $token_re = $Petal::Hash::String::TOKEN_RE; |
|
93
|
30
|
|
|
|
|
341
|
my @vars = $text =~ /$token_re/gsm; |
|
94
|
30
|
|
|
|
|
74
|
my %vars = map { $_ => 1 } @vars; |
|
|
0
|
|
|
|
|
0
|
|
|
95
|
30
|
|
|
|
|
62
|
@vars = sort { length ($b) <=> length ($a) } keys %vars; |
|
|
0
|
|
|
|
|
0
|
|
|
96
|
30
|
|
|
|
|
62
|
foreach my $var (@vars) |
|
97
|
|
|
|
|
|
|
{ |
|
98
|
0
|
|
|
|
|
0
|
my $command = $var; |
|
99
|
0
|
|
|
|
|
0
|
$command =~ s/^\$//; |
|
100
|
0
|
|
|
|
|
0
|
$command =~ s/^\{//; |
|
101
|
0
|
|
|
|
|
0
|
$command =~ s/\}$//; |
|
102
|
0
|
|
|
|
|
0
|
$command = $class->_encode_backslash_semicolon ($command); |
|
103
|
0
|
|
|
|
|
0
|
$command = ""; |
|
104
|
0
|
|
|
|
|
0
|
$text =~ s/\Q$var\E/$command/g; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
30
|
|
|
|
|
83
|
$att->{$key} = $text; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
102
|
|
|
|
|
356
|
$class->_attributes ($tag, $att); |
|
110
|
|
|
|
|
|
|
|
|
111
|
102
|
|
|
|
|
208
|
my @att_str = (); |
|
112
|
102
|
|
|
|
|
131
|
foreach my $key (keys %{$att}) |
|
|
102
|
|
|
|
|
257
|
|
|
113
|
|
|
|
|
|
|
{ |
|
114
|
46
|
100
|
|
|
|
255
|
next if ($key =~ /^$petal:/); |
|
115
|
31
|
|
|
|
|
70
|
my $value = $att->{$key}; |
|
116
|
31
|
100
|
|
|
|
96
|
if ($value =~ /^<\?attr/) |
|
117
|
|
|
|
|
|
|
{ |
|
118
|
8
|
|
|
|
|
25
|
push @att_str, $value; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
else |
|
121
|
|
|
|
|
|
|
{ |
|
122
|
23
|
|
|
|
|
80
|
my $tokens = Petal::CodeGenerator->_tokenize (\$value); |
|
123
|
|
|
|
|
|
|
my @res = map { |
|
124
|
|
|
|
|
|
|
($_ =~ /$Petal::CodeGenerator::PI_RE/s) ? |
|
125
|
|
|
|
|
|
|
$_ : |
|
126
|
23
|
50
|
|
|
|
314
|
do { |
|
127
|
23
|
|
|
|
|
46
|
$_ =~ s/\&/&/g; |
|
128
|
23
|
|
|
|
|
36
|
$_ =~ s/\</g; |
|
129
|
23
|
|
|
|
|
35
|
$_ =~ s/\>/>/g; |
|
130
|
23
|
|
|
|
|
31
|
$_ =~ s/\"/"/g; |
|
131
|
23
|
|
|
|
|
64
|
$_; |
|
132
|
|
|
|
|
|
|
}; |
|
133
|
23
|
|
|
|
|
39
|
} @{$tokens}; |
|
|
23
|
|
|
|
|
50
|
|
|
134
|
23
|
|
|
|
|
106
|
push @att_str, $key . '="' . (join '', @res) . '"'; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
} |
|
137
|
102
|
|
|
|
|
258
|
my $att_str = join " ", @att_str; |
|
138
|
|
|
|
|
|
|
|
|
139
|
102
|
100
|
33
|
|
|
2137
|
if ( (uc ($tag) eq 'AREA') or |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
140
|
|
|
|
|
|
|
(uc ($tag) eq 'BASE') or |
|
141
|
|
|
|
|
|
|
(uc ($tag) eq 'BASEFONT') or |
|
142
|
|
|
|
|
|
|
(uc ($tag) eq 'BR') or |
|
143
|
|
|
|
|
|
|
(uc ($tag) eq 'COL') or |
|
144
|
|
|
|
|
|
|
(uc ($tag) eq 'FRAME') or |
|
145
|
|
|
|
|
|
|
(uc ($tag) eq 'HR') or |
|
146
|
|
|
|
|
|
|
(uc ($tag) eq 'IMG') or |
|
147
|
|
|
|
|
|
|
(uc ($tag) eq 'INPUT') or |
|
148
|
|
|
|
|
|
|
(uc ($tag) eq 'ISINDEX') or |
|
149
|
|
|
|
|
|
|
(uc ($tag) eq 'LINK') or |
|
150
|
|
|
|
|
|
|
(uc ($tag) eq 'META') or |
|
151
|
|
|
|
|
|
|
(uc ($tag) eq 'PARAM') ) |
|
152
|
|
|
|
|
|
|
{ |
|
153
|
7
|
50
|
|
|
|
22
|
if (defined $att->{"$petal:omit-tag"}) |
|
154
|
|
|
|
|
|
|
{ |
|
155
|
0
|
|
0
|
|
|
0
|
my $expression = $att->{"$petal:omit-tag"} || 'string:1'; |
|
156
|
0
|
|
|
|
|
0
|
$Petal::Canonicalizer::XML::NodeStack[$#Petal::Canonicalizer::XML::NodeStack]->{'omit-tag'} = $expression; |
|
157
|
0
|
0
|
0
|
|
|
0
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? |
|
158
|
|
|
|
|
|
|
"<$tag $att_str />" : |
|
159
|
|
|
|
|
|
|
"<$tag />"; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
else |
|
162
|
|
|
|
|
|
|
{ |
|
163
|
7
|
100
|
66
|
|
|
41
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? "<$tag $att_str />" : "<$tag />"; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
else |
|
167
|
|
|
|
|
|
|
{ |
|
168
|
95
|
100
|
|
|
|
352
|
if (defined $att->{"$petal:omit-tag"}) |
|
169
|
|
|
|
|
|
|
{ |
|
170
|
3
|
|
100
|
|
|
13
|
my $expression = $att->{"$petal:omit-tag"} || 'string:1'; |
|
171
|
3
|
|
|
|
|
8
|
$Petal::Canonicalizer::XML::NodeStack[$#Petal::Canonicalizer::XML::NodeStack]->{'omit-tag'} = $expression; |
|
172
|
3
|
50
|
33
|
|
|
23
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? |
|
173
|
|
|
|
|
|
|
"<$tag $att_str>" : |
|
174
|
|
|
|
|
|
|
"<$tag>"; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
else |
|
177
|
|
|
|
|
|
|
{ |
|
178
|
92
|
100
|
66
|
|
|
536
|
push @Petal::Canonicalizer::XML::Result, (defined $att_str and $att_str) ? "<$tag $att_str>" : "<$tag>"; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
|
|
182
|
102
|
|
|
|
|
413
|
$class->_content ($tag, $att); |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# $class->EndTag(); |
|
188
|
|
|
|
|
|
|
# ----------------- |
|
189
|
|
|
|
|
|
|
# Called for every end tag with a second parameter of the element type. |
|
190
|
|
|
|
|
|
|
# It will check in the @Petal::Canonicalizer::XML::NodeStack to see if this end-tag also needs to close |
|
191
|
|
|
|
|
|
|
# some 'condition' or 'repeat' statements, i.e. |
|
192
|
|
|
|
|
|
|
# |
|
193
|
|
|
|
|
|
|
# |
|
194
|
|
|
|
|
|
|
# |
|
195
|
|
|
|
|
|
|
# Could be rewritten |
|
196
|
|
|
|
|
|
|
# |
|
197
|
|
|
|
|
|
|
# |
|
198
|
|
|
|
|
|
|
# |
|
199
|
|
|
|
|
|
|
# If the starting LI used a loop, i.e. |
|
200
|
|
|
|
|
|
|
sub EndTag |
|
201
|
|
|
|
|
|
|
{ |
|
202
|
116
|
|
|
116
|
0
|
184
|
my $class = shift; |
|
203
|
116
|
100
|
|
|
|
289
|
return if ($class->_is_inside_content_or_replace ( 'endtag' )); |
|
204
|
108
|
|
|
|
|
575
|
my ($tag) = $_ =~ /^<\/\s*((?:\w|\:|\-)*)/; |
|
205
|
108
|
|
|
|
|
196
|
my $node = pop (@Petal::Canonicalizer::XML::NodeStack); |
|
206
|
|
|
|
|
|
|
|
|
207
|
108
|
100
|
|
|
|
279
|
return if ($class->_is_xinclude ($tag)); |
|
208
|
|
|
|
|
|
|
|
|
209
|
107
|
50
|
66
|
|
|
2399
|
if ( (not (defined $node->{replace} and $node->{replace})) and |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
210
|
|
|
|
|
|
|
(uc ($tag) ne 'AREA') and |
|
211
|
|
|
|
|
|
|
(uc ($tag) ne 'BASE') and |
|
212
|
|
|
|
|
|
|
(uc ($tag) ne 'BASEFONT') and |
|
213
|
|
|
|
|
|
|
(uc ($tag) ne 'BR') and |
|
214
|
|
|
|
|
|
|
(uc ($tag) ne 'COL') and |
|
215
|
|
|
|
|
|
|
(uc ($tag) ne 'FRAME') and |
|
216
|
|
|
|
|
|
|
(uc ($tag) ne 'HR') and |
|
217
|
|
|
|
|
|
|
(uc ($tag) ne 'IMG') and |
|
218
|
|
|
|
|
|
|
(uc ($tag) ne 'INPUT') and |
|
219
|
|
|
|
|
|
|
(uc ($tag) ne 'ISINDEX') and |
|
220
|
|
|
|
|
|
|
(uc ($tag) ne 'LINK') and |
|
221
|
|
|
|
|
|
|
(uc ($tag) ne 'META') and |
|
222
|
|
|
|
|
|
|
(uc ($tag) ne 'PARAM') ) |
|
223
|
|
|
|
|
|
|
{ |
|
224
|
95
|
100
|
|
|
|
249
|
if (defined $node->{'omit-tag'}) |
|
225
|
|
|
|
|
|
|
{ |
|
226
|
3
|
|
|
|
|
6
|
my $expression = $node->{'omit-tag'}; |
|
227
|
3
|
|
|
|
|
13
|
push @Petal::Canonicalizer::XML::Result, "$tag>"; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
else |
|
230
|
|
|
|
|
|
|
{ |
|
231
|
92
|
|
|
|
|
284
|
push @Petal::Canonicalizer::XML::Result, "$tag>"; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
107
|
|
100
|
|
|
358
|
my $repeat = $node->{repeat} || '0'; |
|
236
|
107
|
|
100
|
|
|
288
|
my $condition = $node->{condition} || '0'; |
|
237
|
107
|
|
50
|
|
|
446
|
my $define_slot = $node->{define_slot} || '0'; |
|
238
|
107
|
|
|
|
|
286
|
push @Petal::Canonicalizer::XML::Result, map { '' } 1 .. ($repeat+$condition+$define_slot); |
|
|
21
|
|
|
|
|
52
|
|
|
239
|
|
|
|
|
|
|
|
|
240
|
107
|
50
|
|
|
|
377
|
if (exists $node->{'on-error'}) |
|
241
|
|
|
|
|
|
|
{ |
|
242
|
0
|
|
|
|
|
|
my $expression = $node->{'on-error'}; |
|
243
|
0
|
|
|
|
|
|
push @Petal::Canonicalizer::XML::Result, ""; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
1; |