line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include |
2
|
|
|
|
|
|
|
#include |
3
|
|
|
|
|
|
|
#include |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#include "houdini.h" |
6
|
|
|
|
|
|
|
#include "html_unescape.h" |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
static inline void |
9
|
2
|
|
|
|
|
|
gh_buf_put_utf8(gh_buf *ob, int c) |
10
|
|
|
|
|
|
|
{ |
11
|
|
|
|
|
|
|
unsigned char unichar[4]; |
12
|
|
|
|
|
|
|
|
13
|
2
|
50
|
|
|
|
|
if (c < 0x80) { |
14
|
2
|
|
|
|
|
|
gh_buf_putc(ob, c); |
15
|
|
|
|
|
|
|
} |
16
|
0
|
0
|
|
|
|
|
else if (c < 0x800) { |
17
|
0
|
|
|
|
|
|
unichar[0] = 192 + (c / 64); |
18
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c % 64); |
19
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 2); |
20
|
|
|
|
|
|
|
} |
21
|
0
|
0
|
|
|
|
|
else if (c - 0xd800u < 0x800) { |
22
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '?'); |
23
|
|
|
|
|
|
|
} |
24
|
0
|
0
|
|
|
|
|
else if (c < 0x10000) { |
25
|
0
|
|
|
|
|
|
unichar[0] = 224 + (c / 4096); |
26
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c / 64) % 64; |
27
|
0
|
|
|
|
|
|
unichar[2] = 128 + (c % 64); |
28
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 3); |
29
|
|
|
|
|
|
|
} |
30
|
0
|
0
|
|
|
|
|
else if (c < 0x110000) { |
31
|
0
|
|
|
|
|
|
unichar[0] = 240 + (c / 262144); |
32
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c / 4096) % 64; |
33
|
0
|
|
|
|
|
|
unichar[2] = 128 + (c / 64) % 64; |
34
|
0
|
|
|
|
|
|
unichar[3] = 128 + (c % 64); |
35
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 4); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
else { |
38
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '?'); |
39
|
|
|
|
|
|
|
} |
40
|
2
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
static size_t |
43
|
16
|
|
|
|
|
|
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size) |
44
|
|
|
|
|
|
|
{ |
45
|
|
|
|
|
|
|
size_t i = 0; |
46
|
|
|
|
|
|
|
|
47
|
16
|
100
|
|
|
|
|
if (size > 3 && src[0] == '#') { |
|
|
100
|
|
|
|
|
|
48
|
|
|
|
|
|
|
int codepoint = 0; |
49
|
|
|
|
|
|
|
|
50
|
2
|
50
|
|
|
|
|
if (_isdigit(src[1])) { |
51
|
6
|
50
|
|
|
|
|
for (i = 1; i < size && _isdigit(src[i]); ++i) |
|
|
100
|
|
|
|
|
|
52
|
4
|
|
|
|
|
|
codepoint = (codepoint * 10) + (src[i] - '0'); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
0
|
0
|
|
|
|
|
else if (src[1] == 'x' || src[1] == 'X') { |
56
|
0
|
0
|
|
|
|
|
for (i = 2; i < size && _isxdigit(src[i]); ++i) |
|
|
0
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
2
|
50
|
|
|
|
|
if (i < size && src[i] == ';' && codepoint) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
61
|
2
|
|
|
|
|
|
gh_buf_put_utf8(ob, codepoint); |
62
|
2
|
|
|
|
|
|
return i + 1; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
else { |
67
|
14
|
100
|
|
|
|
|
if (size > MAX_WORD_LENGTH) |
68
|
|
|
|
|
|
|
size = MAX_WORD_LENGTH; |
69
|
|
|
|
|
|
|
|
70
|
22
|
50
|
|
|
|
|
for (i = MIN_WORD_LENGTH; i < size; ++i) { |
71
|
22
|
50
|
|
|
|
|
if (src[i] == ' ') |
72
|
|
|
|
|
|
|
break; |
73
|
|
|
|
|
|
|
|
74
|
22
|
100
|
|
|
|
|
if (src[i] == ';') { |
75
|
14
|
|
|
|
|
|
const struct html_ent *entity = find_entity((char *)src, i); |
76
|
|
|
|
|
|
|
|
77
|
14
|
50
|
|
|
|
|
if (entity != NULL) { |
78
|
14
|
|
|
|
|
|
gh_buf_put(ob, entity->utf8, entity->utf8_len); |
79
|
14
|
|
|
|
|
|
return i + 1; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
break; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '&'); |
88
|
0
|
|
|
|
|
|
return 0; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
int |
92
|
6
|
|
|
|
|
|
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size) |
93
|
|
|
|
|
|
|
{ |
94
|
|
|
|
|
|
|
size_t i = 0, org; |
95
|
|
|
|
|
|
|
|
96
|
22
|
100
|
|
|
|
|
while (i < size) { |
97
|
|
|
|
|
|
|
org = i; |
98
|
78
|
100
|
|
|
|
|
while (i < size && src[i] != '&') |
|
|
100
|
|
|
|
|
|
99
|
59
|
|
|
|
|
|
i++; |
100
|
|
|
|
|
|
|
|
101
|
19
|
100
|
|
|
|
|
if (likely(i > org)) { |
102
|
12
|
100
|
|
|
|
|
if (unlikely(org == 0)) { |
103
|
3
|
50
|
|
|
|
|
if (i >= size) |
104
|
|
|
|
|
|
|
return 0; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
9
|
|
|
|
|
|
gh_buf_put(ob, src + org, i - org); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/* escaping */ |
113
|
16
|
50
|
|
|
|
|
if (i >= size) |
114
|
|
|
|
|
|
|
break; |
115
|
|
|
|
|
|
|
|
116
|
16
|
|
|
|
|
|
i++; |
117
|
16
|
|
|
|
|
|
i += unescape_ent(ob, src + i, size - i); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
return 1; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|