File Coverage

hoedown/src/escape.c
Criterion Covered Total %
statement 19 32 59.3
branch 17 26 65.3
condition n/a
subroutine n/a
pod n/a
total 36 58 62.0


line stmt bran cond sub pod time code
1             #include "escape.h"
2              
3             #include
4             #include
5             #include
6              
7              
8             #define likely(x) __builtin_expect((x),1)
9             #define unlikely(x) __builtin_expect((x),0)
10              
11              
12             /*
13             * The following characters will not be escaped:
14             *
15             * -_.+!*'(),%#@?=;:/,+&$ alphanum
16             *
17             * Note that this character set is the addition of:
18             *
19             * - The characters which are safe to be in an URL
20             * - The characters which are *not* safe to be in
21             * an URL because they are RESERVED characters.
22             *
23             * We assume (lazily) that any RESERVED char that
24             * appears inside an URL is actually meant to
25             * have its native function (i.e. as an URL
26             * component/separator) and hence needs no escaping.
27             *
28             * There are two exceptions: the chacters & (amp)
29             * and ' (single quote) do not appear in the table.
30             * They are meant to appear in the URL as components,
31             * yet they require special HTML-entity escaping
32             * to generate valid HTML markup.
33             *
34             * All other characters will be escaped to %XX.
35             *
36             */
37             static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
38             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40             0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
41             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
42             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
44             0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
46             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54             };
55              
56             void
57 1           hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
58             {
59             static const char hex_chars[] = "0123456789ABCDEF";
60             size_t i = 0, mark;
61             char hex_str[3];
62              
63 1           hex_str[0] = '%';
64              
65 1 50         while (i < size) {
66             mark = i;
67 15 100         while (i < size && HREF_SAFE[data[i]]) i++;
    50          
68              
69             /* Optimization for cases where there's nothing to escape */
70 1 50         if (mark == 0 && i >= size) {
71 1           hoedown_buffer_put(ob, data, size);
72 1           return;
73             }
74              
75 0 0         if (likely(i > mark)) {
76 0           hoedown_buffer_put(ob, data + mark, i - mark);
77             }
78              
79             /* escaping */
80 0 0         if (i >= size)
81             break;
82              
83 0           switch (data[i]) {
84             /* amp appears all the time in URLs, but needs
85             * HTML-entity escaping to be inside an href */
86             case '&':
87 0           HOEDOWN_BUFPUTSL(ob, "&");
88 0           break;
89              
90             /* the single quote is a valid URL character
91             * according to the standard; it needs HTML
92             * entity escaping too */
93             case '\'':
94 0           HOEDOWN_BUFPUTSL(ob, "'");
95 0           break;
96              
97             /* the space can be escaped to %20 or a plus
98             * sign. we're going with the generic escape
99             * for now. the plus thing is more commonly seen
100             * when building GET strings */
101             #if 0
102             case ' ':
103             hoedown_buffer_putc(ob, '+');
104             break;
105             #endif
106              
107             /* every other character goes with a %XX escaping */
108             default:
109 0           hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
110 0           hex_str[2] = hex_chars[data[i] & 0xF];
111 0           hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
112             }
113              
114 0           i++;
115             }
116             }
117              
118              
119             /**
120             * According to the OWASP rules:
121             *
122             * & --> &
123             * < --> <
124             * > --> >
125             * " --> "
126             * ' --> ' ' is not recommended
127             * / --> / forward slash is included as it helps end an HTML entity
128             *
129             */
130             static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
131             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
133             0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
134             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
135             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
142             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
143             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147             };
148              
149             static const char *HTML_ESCAPES[] = {
150             "",
151             """,
152             "&",
153             "'",
154             "/",
155             "<",
156             ">"
157             };
158              
159             void
160 50           hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
161             {
162             size_t i = 0, mark;
163              
164             while (1) {
165             mark = i;
166 118 100         while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
    100          
167              
168             /* Optimization for cases where there's nothing to escape */
169 27 100         if (mark == 0 && i >= size) {
170 24           hoedown_buffer_put(ob, data, size);
171 24           return;
172             }
173              
174 3 100         if (likely(i > mark))
175 2           hoedown_buffer_put(ob, data + mark, i - mark);
176              
177 3 100         if (i >= size) break;
178              
179             /* The forward slash is only escaped in secure mode */
180 2 50         if (!secure && data[i] == '/') {
    50          
181 2           hoedown_buffer_putc(ob, '/');
182             } else {
183 0           hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
184             }
185              
186 2           i++;
187 2           }
188             }