File Coverage

houdini/houdini_href_e.c
Criterion Covered Total %
statement 16 20 80.0
branch 14 14 100.0
condition n/a
subroutine n/a
pod n/a
total 30 34 88.2


line stmt bran cond sub pod time code
1             #include
2             #include
3             #include
4              
5             #include "houdini.h"
6              
7             /*
8             * The following characters will not be escaped:
9             *
10             * -_.+!*'(),%#@?=;:/,+&$ alphanum
11             *
12             * Note that this character set is the addition of:
13             *
14             * - The characters which are safe to be in an URL
15             * - The characters which are *not* safe to be in
16             * an URL because they are RESERVED characters.
17             *
18             * We asume (lazily) that any RESERVED char that
19             * appears inside an URL is actually meant to
20             * have its native function (i.e. as an URL
21             * component/separator) and hence needs no escaping.
22             *
23             * There are two exceptions: the chacters & (amp)
24             * and ' (single quote) do not appear in the table.
25             * They are meant to appear in the URL as components,
26             * yet they require special HTML-entity escaping
27             * to generate valid HTML markup.
28             *
29             * All other characters will be escaped to %XX.
30             *
31             */
32             static const char HREF_SAFE[] = {
33             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35             0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
36             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
37             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
39             0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
41             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49             };
50              
51             int
52 4           houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
53             {
54             static const uint8_t hex_chars[] = "0123456789ABCDEF";
55             size_t i = 0, org;
56             uint8_t hex_str[3];
57              
58 4           hex_str[0] = '%';
59              
60 10 100         while (i < size) {
61             org = i;
62 51 100         while (i < size && HREF_SAFE[src[i]] != 0)
    100          
63 42           i++;
64              
65 9 100         if (likely(i > org)) {
66 6 100         if (unlikely(org == 0)) {
67 3 100         if (i >= size)
68             return 0;
69              
70 2           gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
71             }
72              
73 5           gh_buf_put(ob, src + org, i - org);
74             }
75              
76             /* escaping */
77 8 100         if (i >= size)
78             break;
79              
80 6           switch (src[i]) {
81             /* amp appears all the time in URLs, but needs
82             * HTML-entity escaping to be inside an href */
83             case '&':
84 0           gh_buf_PUTS(ob, "&");
85 0           break;
86              
87             /* the single quote is a valid URL character
88             * according to the standard; it needs HTML
89             * entity escaping too */
90             case '\'':
91 0           gh_buf_PUTS(ob, "'");
92 0           break;
93            
94             /* the space can be escaped to %20 or a plus
95             * sign. we're going with the generic escape
96             * for now. the plus thing is more commonly seen
97             * when building GET strings */
98             #if 0
99             case ' ':
100             gh_buf_putc(ob, '+');
101             break;
102             #endif
103              
104             /* every other character goes with a %XX escaping */
105             default:
106 6           hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
107 6           hex_str[2] = hex_chars[src[i] & 0xF];
108 6           gh_buf_put(ob, hex_str, 3);
109             }
110              
111 6           i++;
112             }
113              
114             return 1;
115             }