| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package dirtyRSS; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
6948
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
46
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
8547
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
@dirtyRSS::ISA = qw[Exporter]; |
|
9
|
|
|
|
|
|
|
@dirtyRSS::EXPORT = qw[&parse &disptree]; |
|
10
|
|
|
|
|
|
|
$dirtyRSS::VERSION = '0.3'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %htmlescapes = ( |
|
13
|
|
|
|
|
|
|
'quot' => 34, |
|
14
|
|
|
|
|
|
|
'amp' => 38, |
|
15
|
|
|
|
|
|
|
'apos' => 39, |
|
16
|
|
|
|
|
|
|
'lt' => 60, |
|
17
|
|
|
|
|
|
|
'gt' => 62, |
|
18
|
|
|
|
|
|
|
'nbsp' => 32, # Was 160, but we make it a normal space |
|
19
|
|
|
|
|
|
|
'iexcl' => 161, |
|
20
|
|
|
|
|
|
|
'cent' => 162, |
|
21
|
|
|
|
|
|
|
'pound' => 163, |
|
22
|
|
|
|
|
|
|
'curren' => 164, |
|
23
|
|
|
|
|
|
|
'yen' => 165, |
|
24
|
|
|
|
|
|
|
'brvbar' => 166, |
|
25
|
|
|
|
|
|
|
'sect' => 167, |
|
26
|
|
|
|
|
|
|
'uml' => 168, |
|
27
|
|
|
|
|
|
|
'copy' => 169, |
|
28
|
|
|
|
|
|
|
'ordf' => 170, |
|
29
|
|
|
|
|
|
|
'laquo' => 171, |
|
30
|
|
|
|
|
|
|
'not' => 172, |
|
31
|
|
|
|
|
|
|
'shy' => 173, |
|
32
|
|
|
|
|
|
|
'reg' => 174, |
|
33
|
|
|
|
|
|
|
'macr' => 175, |
|
34
|
|
|
|
|
|
|
'deg' => 176, |
|
35
|
|
|
|
|
|
|
'plusmn' => 177, |
|
36
|
|
|
|
|
|
|
'sup2' => 178, |
|
37
|
|
|
|
|
|
|
'sup3' => 179, |
|
38
|
|
|
|
|
|
|
'acute' => 180, |
|
39
|
|
|
|
|
|
|
'micro' => 181, |
|
40
|
|
|
|
|
|
|
'para' => 182, |
|
41
|
|
|
|
|
|
|
'middot' => 183, |
|
42
|
|
|
|
|
|
|
'cedil' => 184, |
|
43
|
|
|
|
|
|
|
'sup1' => 185, |
|
44
|
|
|
|
|
|
|
'ordm' => 186, |
|
45
|
|
|
|
|
|
|
'raquo' => 187, |
|
46
|
|
|
|
|
|
|
'frac14' => 188, |
|
47
|
|
|
|
|
|
|
'frac12' => 189, |
|
48
|
|
|
|
|
|
|
'frac34' => 190, |
|
49
|
|
|
|
|
|
|
'iquest' => 191, |
|
50
|
|
|
|
|
|
|
'agrave' => 192, |
|
51
|
|
|
|
|
|
|
'aacute' => 193, |
|
52
|
|
|
|
|
|
|
'acirc' => 194, |
|
53
|
|
|
|
|
|
|
'atilde' => 195, |
|
54
|
|
|
|
|
|
|
'auml' => 196, |
|
55
|
|
|
|
|
|
|
'aring' => 197, |
|
56
|
|
|
|
|
|
|
'aelig' => 198, |
|
57
|
|
|
|
|
|
|
'ccedil' => 199, |
|
58
|
|
|
|
|
|
|
'egrave' => 200, |
|
59
|
|
|
|
|
|
|
'eacute' => 201, |
|
60
|
|
|
|
|
|
|
'ecirc' => 202, |
|
61
|
|
|
|
|
|
|
'euml' => 203, |
|
62
|
|
|
|
|
|
|
'igrave' => 204, |
|
63
|
|
|
|
|
|
|
'iacute' => 205, |
|
64
|
|
|
|
|
|
|
'icirc' => 206, |
|
65
|
|
|
|
|
|
|
'iuml' => 207, |
|
66
|
|
|
|
|
|
|
'eth' => 208, |
|
67
|
|
|
|
|
|
|
'ntilde' => 209, |
|
68
|
|
|
|
|
|
|
'ograve' => 210, |
|
69
|
|
|
|
|
|
|
'oacute' => 211, |
|
70
|
|
|
|
|
|
|
'ocirc' => 212, |
|
71
|
|
|
|
|
|
|
'otilde' => 213, |
|
72
|
|
|
|
|
|
|
'ouml' => 214, |
|
73
|
|
|
|
|
|
|
'times' => 215, |
|
74
|
|
|
|
|
|
|
'oslash' => 216, |
|
75
|
|
|
|
|
|
|
'ugrave' => 217, |
|
76
|
|
|
|
|
|
|
'uacute' => 218, |
|
77
|
|
|
|
|
|
|
'ucirc' => 219, |
|
78
|
|
|
|
|
|
|
'uuml' => 220, |
|
79
|
|
|
|
|
|
|
'yacute' => 221, |
|
80
|
|
|
|
|
|
|
'thorn' => 222, |
|
81
|
|
|
|
|
|
|
'szlig' => 223, |
|
82
|
|
|
|
|
|
|
'agrave' => 224, |
|
83
|
|
|
|
|
|
|
'aacute' => 225, |
|
84
|
|
|
|
|
|
|
'acirc' => 226, |
|
85
|
|
|
|
|
|
|
'atilde' => 227, |
|
86
|
|
|
|
|
|
|
'auml' => 228, |
|
87
|
|
|
|
|
|
|
'aring' => 229, |
|
88
|
|
|
|
|
|
|
'aelig' => 230, |
|
89
|
|
|
|
|
|
|
'ccedil' => 231, |
|
90
|
|
|
|
|
|
|
'egrave' => 232, |
|
91
|
|
|
|
|
|
|
'eacute' => 233, |
|
92
|
|
|
|
|
|
|
'ecirc' => 234, |
|
93
|
|
|
|
|
|
|
'euml' => 235, |
|
94
|
|
|
|
|
|
|
'igrave' => 236, |
|
95
|
|
|
|
|
|
|
'iacute' => 237, |
|
96
|
|
|
|
|
|
|
'icirc' => 238, |
|
97
|
|
|
|
|
|
|
'iuml' => 239, |
|
98
|
|
|
|
|
|
|
'eth' => 240, |
|
99
|
|
|
|
|
|
|
'ntilde' => 241, |
|
100
|
|
|
|
|
|
|
'ograve' => 242, |
|
101
|
|
|
|
|
|
|
'oacute' => 243, |
|
102
|
|
|
|
|
|
|
'ocirc' => 244, |
|
103
|
|
|
|
|
|
|
'otilde' => 245, |
|
104
|
|
|
|
|
|
|
'ouml' => 246, |
|
105
|
|
|
|
|
|
|
'divide' => 247, |
|
106
|
|
|
|
|
|
|
'oslash' => 248, |
|
107
|
|
|
|
|
|
|
'ugrave' => 249, |
|
108
|
|
|
|
|
|
|
'uacute' => 250, |
|
109
|
|
|
|
|
|
|
'ucirc' => 251, |
|
110
|
|
|
|
|
|
|
'uuml' => 252, |
|
111
|
|
|
|
|
|
|
'yacute' => 253, |
|
112
|
|
|
|
|
|
|
'thorn' => 254, |
|
113
|
|
|
|
|
|
|
'yuml' => 255 |
|
114
|
|
|
|
|
|
|
); |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# These are typical HTML tags, which should be omitted. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
our %ignore_tags = ( |
|
119
|
|
|
|
|
|
|
'img' => 1, |
|
120
|
|
|
|
|
|
|
'a' => 1, |
|
121
|
|
|
|
|
|
|
'p' => 1, |
|
122
|
|
|
|
|
|
|
'br' => 1, |
|
123
|
|
|
|
|
|
|
'div' => 1, |
|
124
|
|
|
|
|
|
|
'span' => 1, |
|
125
|
|
|
|
|
|
|
'b' => 1, |
|
126
|
|
|
|
|
|
|
'i' => 1, |
|
127
|
|
|
|
|
|
|
'u' => 1, |
|
128
|
|
|
|
|
|
|
'body' => 1, |
|
129
|
|
|
|
|
|
|
'center' => 1, |
|
130
|
|
|
|
|
|
|
'code' => 1, |
|
131
|
|
|
|
|
|
|
'font' => 1, |
|
132
|
|
|
|
|
|
|
'form' => 1, |
|
133
|
|
|
|
|
|
|
'h1' => 1, |
|
134
|
|
|
|
|
|
|
'h2' => 1, |
|
135
|
|
|
|
|
|
|
'h3' => 1, |
|
136
|
|
|
|
|
|
|
'h4' => 1, |
|
137
|
|
|
|
|
|
|
'head' => 1, |
|
138
|
|
|
|
|
|
|
'hr' => 1, |
|
139
|
|
|
|
|
|
|
'html' => 1, |
|
140
|
|
|
|
|
|
|
'li' => 1, |
|
141
|
|
|
|
|
|
|
'ul' => 1, |
|
142
|
|
|
|
|
|
|
'ol' => 1, |
|
143
|
|
|
|
|
|
|
'pre' => 1, |
|
144
|
|
|
|
|
|
|
'style' => 1, |
|
145
|
|
|
|
|
|
|
'sub' => 1, |
|
146
|
|
|
|
|
|
|
'sup' => 1, |
|
147
|
|
|
|
|
|
|
'script' => 1, |
|
148
|
|
|
|
|
|
|
'small' => 1, |
|
149
|
|
|
|
|
|
|
'big' => 1, |
|
150
|
|
|
|
|
|
|
'table' => 1, |
|
151
|
|
|
|
|
|
|
'td' => 1, |
|
152
|
|
|
|
|
|
|
'tr' => 1, |
|
153
|
|
|
|
|
|
|
'th' => 1, |
|
154
|
|
|
|
|
|
|
'textarea'=> 1, |
|
155
|
|
|
|
|
|
|
'strong' => 1, |
|
156
|
|
|
|
|
|
|
'strike' => 1, |
|
157
|
|
|
|
|
|
|
'blockquote' => 1, |
|
158
|
|
|
|
|
|
|
); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
our %ns = ( |
|
161
|
|
|
|
|
|
|
# RSS 2.0 tags |
|
162
|
|
|
|
|
|
|
'xml' => 'xml', |
|
163
|
|
|
|
|
|
|
'rss' => 'rss', |
|
164
|
|
|
|
|
|
|
'rdf' => 'rdf', |
|
165
|
|
|
|
|
|
|
'item' => 'item', |
|
166
|
|
|
|
|
|
|
'channel' => 'channel', |
|
167
|
|
|
|
|
|
|
'image' => 'image', |
|
168
|
|
|
|
|
|
|
'title' => 'title', |
|
169
|
|
|
|
|
|
|
'link' => 'link', |
|
170
|
|
|
|
|
|
|
'description' => 'description', |
|
171
|
|
|
|
|
|
|
'language' => 'language', |
|
172
|
|
|
|
|
|
|
'copyright' => 'copyright', |
|
173
|
|
|
|
|
|
|
'pubdate' => 'pubdate', |
|
174
|
|
|
|
|
|
|
'lastbuilddate'=> 'lastbuilddate', |
|
175
|
|
|
|
|
|
|
'category' => 'category', |
|
176
|
|
|
|
|
|
|
'generator' => 'generator', |
|
177
|
|
|
|
|
|
|
'ttl' => 'ttl', |
|
178
|
|
|
|
|
|
|
'url' => 'url', |
|
179
|
|
|
|
|
|
|
'width' => 'width', |
|
180
|
|
|
|
|
|
|
'height' => 'height', |
|
181
|
|
|
|
|
|
|
'version' => 'version', |
|
182
|
|
|
|
|
|
|
'encoding' => 'encoding', |
|
183
|
|
|
|
|
|
|
'guid' => 'guid', |
|
184
|
|
|
|
|
|
|
'enclosure' => 'enclosure', |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# RSS 1.0 tags translated to RSS 2.0 |
|
187
|
|
|
|
|
|
|
'subject' => 'category', |
|
188
|
|
|
|
|
|
|
'rights' => 'copyright', |
|
189
|
|
|
|
|
|
|
'modified' => 'lastbuilddate', |
|
190
|
|
|
|
|
|
|
'date' => 'pubdate', |
|
191
|
|
|
|
|
|
|
'resource' => 'resource', # 1.0 specific! |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# Atom 1.0 tags translated to RSS 2.0 |
|
194
|
|
|
|
|
|
|
'feed' => 'channel', |
|
195
|
|
|
|
|
|
|
'summary' => 'description', |
|
196
|
|
|
|
|
|
|
'content' => 'description', |
|
197
|
|
|
|
|
|
|
'subtitle' => 'description', |
|
198
|
|
|
|
|
|
|
'lang' => 'language', |
|
199
|
|
|
|
|
|
|
'published' => 'pubdate', |
|
200
|
|
|
|
|
|
|
'updated' => 'lastbuilddate', |
|
201
|
|
|
|
|
|
|
'logo' => 'image', |
|
202
|
|
|
|
|
|
|
'entry' => 'item', |
|
203
|
|
|
|
|
|
|
'href' => 'link', |
|
204
|
|
|
|
|
|
|
); |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Note that %specials refer to the *right* side of %ns, so only one |
|
207
|
|
|
|
|
|
|
# entry is needed for each functional tag or its alias |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# TRUE means array type |
|
210
|
|
|
|
|
|
|
our %specials = ( |
|
211
|
|
|
|
|
|
|
'item' => 1, |
|
212
|
|
|
|
|
|
|
'channel' => 1, |
|
213
|
|
|
|
|
|
|
'image' => 1, |
|
214
|
|
|
|
|
|
|
'xml' => 0, |
|
215
|
|
|
|
|
|
|
'rss' => 0, |
|
216
|
|
|
|
|
|
|
'rdf' => 0, |
|
217
|
|
|
|
|
|
|
); |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub parse { |
|
220
|
1
|
|
|
1
|
0
|
102
|
my ($in, $debug) = @_; |
|
221
|
|
|
|
|
|
|
|
|
222
|
1
|
|
|
|
|
5
|
$in =~ s///gs; # Remove comments |
|
223
|
|
|
|
|
|
|
|
|
224
|
1
|
|
|
|
|
90
|
my @segs = map { /^[ \n\r\t]*(.*?)[ \n\r\t]*$/s } ($in =~ /(|<[^>]+?>|[^<]+)/gs); |
|
|
90
|
|
|
|
|
309
|
|
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# Strip off CDATAs. Added a prefix space to avoid accidental tag hits |
|
227
|
1
|
50
|
|
|
|
13
|
@segs = map { /^$/s ? " $1" : $_ } @segs; |
|
|
90
|
|
|
|
|
172
|
|
|
228
|
|
|
|
|
|
|
|
|
229
|
1
|
|
|
|
|
11
|
@segs = grep { length > 0 } @segs; |
|
|
90
|
|
|
|
|
136
|
|
|
230
|
|
|
|
|
|
|
|
|
231
|
1
|
|
|
|
|
5
|
my @stack = (); |
|
232
|
1
|
|
|
|
|
2
|
my @valstack = (); |
|
233
|
1
|
|
|
|
|
3
|
my %tree = (); |
|
234
|
1
|
|
|
|
|
2
|
my $here = \%tree; |
|
235
|
1
|
|
|
|
|
2
|
my @parent = (); |
|
236
|
1
|
|
|
|
|
2
|
my $lastval = ""; |
|
237
|
|
|
|
|
|
|
|
|
238
|
1
|
|
|
|
|
3
|
foreach my $elem (@segs) { |
|
239
|
63
|
|
|
|
|
328
|
my ($modifier, $tag, $attr, $empty) = ($elem =~ /^<([!?\#]{0,1})[ \n\r\t]*([^ \n\r\t]*[^ \/\n\r\t])[ \n\r\t]*(.*?)[ \n\r\t]*(\/{0,1})>$/s); |
|
240
|
|
|
|
|
|
|
|
|
241
|
63
|
100
|
|
|
|
126
|
$empty = 1 if ($modifier); |
|
242
|
|
|
|
|
|
|
|
|
243
|
63
|
100
|
|
|
|
99
|
if (defined $tag) { |
|
244
|
45
|
|
|
|
|
61
|
$tag = lc $tag; # We're case-insensitive |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Note that the regex below removes "dc:"-like namespace prefices |
|
247
|
45
|
|
|
|
|
43
|
my $closing; |
|
248
|
45
|
|
|
|
|
185
|
($closing, $tag) = ($tag =~ /^(\/{0,1}).*?:{0,1}([^:]*)$/); |
|
249
|
|
|
|
|
|
|
|
|
250
|
45
|
50
|
|
|
|
125
|
if ($ignore_tags{$tag}) { |
|
251
|
0
|
|
|
|
|
0
|
htmltags($here, unescape($elem)); |
|
252
|
0
|
|
|
|
|
0
|
next; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
45
|
100
|
|
|
|
76
|
unless ($closing) { # Opening tags... |
|
256
|
23
|
|
|
|
|
35
|
push @stack, $tag; |
|
257
|
|
|
|
|
|
|
|
|
258
|
23
|
|
|
|
|
34
|
my $alias = $ns{$tag}; |
|
259
|
|
|
|
|
|
|
|
|
260
|
23
|
50
|
|
|
|
35
|
if (defined $alias) { |
|
261
|
23
|
|
|
|
|
37
|
push @valstack, $lastval; |
|
262
|
23
|
|
|
|
|
57
|
$lastval = ""; |
|
263
|
|
|
|
|
|
|
|
|
264
|
23
|
100
|
|
|
|
55
|
if (defined $specials{$alias}) { |
|
265
|
5
|
|
|
|
|
7
|
push @parent, $here; |
|
266
|
5
|
|
|
|
|
7
|
$here = {}; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Note that attributes may pollute the parent hash. This is |
|
270
|
|
|
|
|
|
|
# necessary to support Atom 1.0 |
|
271
|
|
|
|
|
|
|
|
|
272
|
23
|
|
|
|
|
56
|
my @pairs = ($attr =~ /([^ \n\r\t]+?=\'[^\']*?\'|[^ \n\r\t]+?=\"[^\"]*?\"|[^ \n\r\t]+?=[^ \n\r\t]*)/g); |
|
273
|
|
|
|
|
|
|
|
|
274
|
23
|
|
|
|
|
117
|
foreach my $p (@pairs) { |
|
275
|
4
|
|
|
|
|
19
|
my ($k, $v) = ($p =~ /(.+?)=(.*)/); |
|
276
|
|
|
|
|
|
|
|
|
277
|
4
|
|
|
|
|
8
|
$k = lc $k; |
|
278
|
|
|
|
|
|
|
|
|
279
|
4
|
50
|
33
|
|
|
35
|
$v = $1 |
|
280
|
|
|
|
|
|
|
if (($v =~ /^\'(.*)\'$/s) || ($v =~ /^\"(.*)\"$/s)); |
|
281
|
|
|
|
|
|
|
|
|
282
|
4
|
|
|
|
|
16
|
($k) = ($k =~ /([^:]*)$/); # Remove namespace prefix if present |
|
283
|
|
|
|
|
|
|
|
|
284
|
4
|
|
|
|
|
9
|
my $alias = $ns{$k}; |
|
285
|
|
|
|
|
|
|
|
|
286
|
4
|
100
|
|
|
|
8
|
if (defined $alias) { |
|
287
|
3
|
|
|
|
|
8
|
$here->{$alias} = unescape($v); |
|
288
|
|
|
|
|
|
|
} else { |
|
289
|
1
|
50
|
|
|
|
7
|
warn "Ignored attribute $k=$v\n" |
|
290
|
|
|
|
|
|
|
if $debug; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
} else { |
|
294
|
0
|
0
|
|
|
|
0
|
warn "Ignored tag $tag\n" |
|
295
|
|
|
|
|
|
|
if $debug; |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
45
|
100
|
100
|
|
|
154
|
if ($closing || $empty) { # Closing tags, or close an empty opening tag |
|
300
|
23
|
|
|
|
|
33
|
my $p = pop @stack; |
|
301
|
|
|
|
|
|
|
|
|
302
|
23
|
50
|
|
|
|
50
|
return "Bad XML tag nesting. Expected end tag for '$p', got '/$tag'" |
|
303
|
|
|
|
|
|
|
unless ($p eq $tag); |
|
304
|
|
|
|
|
|
|
|
|
305
|
23
|
|
|
|
|
40
|
my $alias = $ns{$tag}; |
|
306
|
|
|
|
|
|
|
|
|
307
|
23
|
50
|
|
|
|
42
|
if (defined $alias) { |
|
308
|
23
|
|
|
|
|
27
|
my $thislastval = $lastval; |
|
309
|
23
|
|
|
|
|
29
|
$lastval = pop @valstack; |
|
310
|
|
|
|
|
|
|
|
|
311
|
23
|
100
|
|
|
|
62
|
if (defined $specials{$alias}) { |
|
312
|
5
|
|
|
|
|
6
|
my $parent = pop @parent; |
|
313
|
|
|
|
|
|
|
|
|
314
|
5
|
100
|
|
|
|
34
|
if ($specials{$alias}) { # Array type |
|
315
|
3
|
50
|
33
|
|
|
27
|
$parent->{$alias} = [] |
|
316
|
|
|
|
|
|
|
unless ((ref $parent->{$alias}) && |
|
317
|
|
|
|
|
|
|
(ref $parent->{$alias}) eq 'ARRAY'); |
|
318
|
3
|
|
|
|
|
7
|
push @{$parent->{$alias}}, $here; |
|
|
3
|
|
|
|
|
8
|
|
|
319
|
|
|
|
|
|
|
} else { |
|
320
|
2
|
|
|
|
|
6
|
$parent->{$alias} = $here; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# UGLY HACK ALERT: |
|
324
|
|
|
|
|
|
|
# Just before leaving a tree node, we clean the 'description' |
|
325
|
|
|
|
|
|
|
# from possible HTML tags, and harvest the relevant values, |
|
326
|
|
|
|
|
|
|
# if applicable. This is because some feeds think that the |
|
327
|
|
|
|
|
|
|
# description should be rendered on a browser as is (cross |
|
328
|
|
|
|
|
|
|
# scripting, anybody?) |
|
329
|
|
|
|
|
|
|
|
|
330
|
5
|
100
|
|
|
|
20
|
$here->{'description'} =~ s/(<.*?>)/htmltags($here, $1)/ges |
|
|
0
|
|
|
|
|
0
|
|
|
331
|
|
|
|
|
|
|
if (defined $here->{'description'}); |
|
332
|
|
|
|
|
|
|
|
|
333
|
5
|
|
|
|
|
14
|
$here = $parent; |
|
334
|
|
|
|
|
|
|
} else { |
|
335
|
18
|
0
|
33
|
|
|
61
|
$here->{$alias} = unescape($thislastval) |
|
|
|
|
33
|
|
|
|
|
|
336
|
|
|
|
|
|
|
unless ((length($thislastval) == 0) && |
|
337
|
|
|
|
|
|
|
(defined $here->{$alias}) && |
|
338
|
|
|
|
|
|
|
(length $here->{$alias})); |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
} else { |
|
343
|
18
|
50
|
|
|
|
42
|
$lastval = (length $lastval) ? "$lastval $elem" : $elem; |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
|
347
|
1
|
50
|
|
|
|
5
|
return("Bad XML nesting: There were unclosed tags at EOF") |
|
348
|
|
|
|
|
|
|
if (@stack); |
|
349
|
|
|
|
|
|
|
|
|
350
|
1
|
|
|
|
|
13
|
return \%tree; |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub htmltags { |
|
355
|
0
|
|
|
0
|
0
|
0
|
my ($here, $seg) = @_; |
|
356
|
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
0
|
my ($tag, $attr) = ($seg =~ /^<[ \n\r\t]*([^ \n\r\t]+)[ \n\r\t]*(.*?)[ \n\r\t]*>$/s); |
|
358
|
|
|
|
|
|
|
|
|
359
|
0
|
0
|
|
|
|
0
|
return "" unless (defined $tag); |
|
360
|
|
|
|
|
|
|
|
|
361
|
0
|
|
|
|
|
0
|
$tag = lc $tag; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Respect HTML line breaks, even though the renderer won't |
|
364
|
0
|
0
|
0
|
|
|
0
|
return "\n" |
|
365
|
|
|
|
|
|
|
if (($tag eq 'p') || ($tag eq 'br')); |
|
366
|
|
|
|
|
|
|
|
|
367
|
0
|
0
|
0
|
|
|
0
|
if (($tag eq 'img') && !(defined $here->{'altimage'})) { |
|
|
|
0
|
0
|
|
|
|
|
|
368
|
0
|
|
|
|
|
0
|
my $new = {}; |
|
369
|
0
|
|
|
|
|
0
|
$here->{'altimage'} = $new; |
|
370
|
0
|
|
|
|
|
0
|
$here = $new; |
|
371
|
|
|
|
|
|
|
} elsif (($tag eq 'a') && !(defined $here->{'altlink'})) { |
|
372
|
0
|
|
|
|
|
0
|
my $new = {}; |
|
373
|
0
|
|
|
|
|
0
|
$here->{'altlink'} = $new; |
|
374
|
0
|
|
|
|
|
0
|
$here = $new; |
|
375
|
0
|
|
|
|
|
0
|
} else { return ""; } |
|
376
|
|
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
0
|
my @pairs = ($attr =~ /([^ \n\r\t]+?=\'[^\']*?\'|[^ \n\r\t]+?=\"[^\"]*?\"|[^ \n\r\t]+?=[^ \n\r\t]*)/g); |
|
378
|
|
|
|
|
|
|
|
|
379
|
0
|
|
|
|
|
0
|
foreach my $p (@pairs) { |
|
380
|
0
|
|
|
|
|
0
|
my ($k, $v) = ($p =~ /(.+?)=(.*)/); |
|
381
|
|
|
|
|
|
|
|
|
382
|
0
|
|
|
|
|
0
|
$k = lc $k; |
|
383
|
|
|
|
|
|
|
|
|
384
|
0
|
0
|
0
|
|
|
0
|
$v = $1 |
|
385
|
|
|
|
|
|
|
if (($v =~ /^\'(.*)\'$/s) || ($v =~ /^\"(.*)\"$/s)); |
|
386
|
|
|
|
|
|
|
|
|
387
|
0
|
|
|
|
|
0
|
$here->{$k} = $v; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
0
|
return ""; # This makes the function useful in substitutions |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub single_unescape { |
|
394
|
0
|
|
|
0
|
0
|
0
|
my ($ent) = @_; |
|
395
|
|
|
|
|
|
|
|
|
396
|
0
|
|
|
|
|
0
|
my $ord = $htmlescapes{lc($ent)}; |
|
397
|
0
|
0
|
|
|
|
0
|
return chr($ord) if defined $ord; |
|
398
|
0
|
|
|
|
|
0
|
return ""; # Conversion failed, return nothing |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub unescape { |
|
402
|
|
|
|
|
|
|
# Note! Unicode characters are escaped to space! |
|
403
|
21
|
|
|
21
|
0
|
24
|
my ($x) = @_; |
|
404
|
|
|
|
|
|
|
# For now, we go wild, and convert all escape markers |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Run twice, because of double-nested markups :-O |
|
407
|
21
|
|
|
|
|
50
|
for (my $i=0; $i<2; $i++) { |
|
408
|
42
|
|
|
|
|
48
|
$x =~ s/&(\w+);/single_unescape($1)/ge; |
|
|
0
|
|
|
|
|
0
|
|
|
409
|
42
|
0
|
|
|
|
48
|
$x =~ s/&\#(\d+);/chr($1 < 256 ? $1 : 32)/ge; |
|
|
0
|
|
|
|
|
0
|
|
|
410
|
42
|
0
|
|
|
|
87
|
$x =~ s/&\#x([0-9a-fA-F]+);/chr(hex($1) < 256 ? hex($1) : 32)/ige; |
|
|
0
|
|
|
|
|
0
|
|
|
411
|
|
|
|
|
|
|
} |
|
412
|
21
|
|
|
|
|
105
|
return $x; |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub disptree { |
|
416
|
0
|
|
|
0
|
0
|
|
my ($what, $s) = @_; |
|
417
|
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
foreach my $k (sort keys %{$what}) { |
|
|
0
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
|
my $v = $what->{$k}; |
|
420
|
|
|
|
|
|
|
|
|
421
|
0
|
0
|
|
|
|
|
if ((ref $v) eq 'HASH') { |
|
422
|
0
|
|
|
|
|
|
print " "x$s."$k\n"; |
|
423
|
0
|
|
|
|
|
|
disptree($v, $s+2); |
|
424
|
0
|
|
|
|
|
|
next; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
0
|
0
|
|
|
|
|
if ((ref $v) eq 'ARRAY') { |
|
428
|
0
|
|
|
|
|
|
my $count; |
|
429
|
0
|
|
|
|
|
|
for ($count=0; $count<=$#{$v}; $count++) { |
|
|
0
|
|
|
|
|
|
|
|
430
|
0
|
|
|
|
|
|
print " "x$s.$k."[$count]\n"; |
|
431
|
0
|
|
|
|
|
|
disptree($v->[$count], $s+2); |
|
432
|
|
|
|
|
|
|
} |
|
433
|
0
|
|
|
|
|
|
next; |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
print " "x$s."$k => $v\n"; |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
1; |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
__END__ |