|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #include   | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #include   | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #include   | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #include "houdini.h"  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #include "html_unescape.h"  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static inline void  | 
| 
9
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 gh_buf_put_utf8(gh_buf *ob, int c)  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	unsigned char unichar[4];  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (c < 0x80) {  | 
| 
14
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_putc(ob, c);  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
16
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else if (c < 0x800) {  | 
| 
17
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[0] = 192 + (c / 64);  | 
| 
18
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[1] = 128 + (c % 64);  | 
| 
19
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_put(ob, unichar, 2);  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
21
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else if (c - 0xd800u < 0x800) {  | 
| 
22
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_putc(ob, '?');  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
24
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else if (c < 0x10000) {  | 
| 
25
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[0] = 224 + (c / 4096);  | 
| 
26
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[1] = 128 + (c / 64) % 64;  | 
| 
27
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[2] = 128 + (c % 64);  | 
| 
28
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_put(ob, unichar, 3);  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
30
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else if (c < 0x110000) {  | 
| 
31
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[0] = 240 + (c / 262144);  | 
| 
32
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[1] = 128 + (c / 4096) % 64;  | 
| 
33
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[2] = 128 + (c / 64) % 64;  | 
| 
34
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		unichar[3] = 128 + (c % 64);  | 
| 
35
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_put(ob, unichar, 4);  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
38
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		gh_buf_putc(ob, '?');  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
40
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static size_t  | 
| 
43
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 unescape_ent(gh_buf *ob, const uint8_t *src, size_t size)  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	size_t i = 0;  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
16
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (size > 3 && src[0] == '#') {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		int codepoint = 0;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (_isdigit(src[1])) {  | 
| 
51
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			for (i = 1; i < size && _isdigit(src[i]); ++i)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				codepoint = (codepoint * 10) + (src[i] - '0');  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else if (src[1] == 'x' || src[1] == 'X') {  | 
| 
56
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			for (i = 2; i < size && _isxdigit(src[i]); ++i)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (i < size && src[i] == ';' && codepoint) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			gh_buf_put_utf8(ob, codepoint);  | 
| 
62
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return i + 1;  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
67
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (size > MAX_WORD_LENGTH)  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			size = MAX_WORD_LENGTH;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
22
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		for (i = MIN_WORD_LENGTH; i < size; ++i) {  | 
| 
71
 | 
22
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			if (src[i] == ' ')  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				break;  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			if (src[i] == ';') {  | 
| 
75
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				const struct html_ent *entity = find_entity((char *)src, i);  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				if (entity != NULL) {  | 
| 
78
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					gh_buf_put(ob, entity->utf8, entity->utf8_len);  | 
| 
79
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					return i + 1;  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				break;  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	gh_buf_putc(ob, '&');  | 
| 
88
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return 0;  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 int  | 
| 
92
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size)  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	size_t  i = 0, org;  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	while (i < size) {  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		org = i;  | 
| 
98
 | 
78
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		while (i < size && src[i] != '&')  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			i++;  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
19
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (likely(i > org)) {  | 
| 
102
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			if (unlikely(org == 0)) {  | 
| 
103
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				if (i >= size)  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					return 0;  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			gh_buf_put(ob, src + org, i - org);  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		/* escaping */  | 
| 
113
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (i >= size)  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			break;  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		i++;  | 
| 
117
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		i += unescape_ent(ob, src + i, size - i);  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return 1;  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    |