| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
2
|
|
|
|
|
|
|
#include "perl.h" |
|
3
|
|
|
|
|
|
|
#include "XSUB.h" |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */ |
|
6
|
|
|
|
|
|
|
/* Unity build: the parser sources live in src/ and are pulled in below |
|
7
|
|
|
|
|
|
|
* so the per-xs Makefile doesn't need extra OBJECT entries. Order |
|
8
|
|
|
|
|
|
|
* matters: headers first, then .c bodies. All symbols here are static |
|
9
|
|
|
|
|
|
|
* to the compilation unit except mds_render_html_to_sv, which the XS |
|
10
|
|
|
|
|
|
|
* glue at the bottom of this file calls. |
|
11
|
|
|
|
|
|
|
*/ |
|
12
|
|
|
|
|
|
|
#define MDS_UNITY_BUILD 1 |
|
13
|
|
|
|
|
|
|
#include "../../src/mds_arena.c" |
|
14
|
|
|
|
|
|
|
#include "../../src/mds_buf.c" |
|
15
|
|
|
|
|
|
|
#include "../../src/mds_linkref.c" |
|
16
|
|
|
|
|
|
|
#include "../../src/mds_footnote.c" |
|
17
|
|
|
|
|
|
|
#include "../../src/mds_block.c" |
|
18
|
|
|
|
|
|
|
#include "../../src/mds_inline.c" |
|
19
|
|
|
|
|
|
|
#include "../../src/mds_render_html.c" |
|
20
|
|
|
|
|
|
|
#include "../../src/mds_gfm.c" |
|
21
|
|
|
|
|
|
|
#include "../../src/mds.c" |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* SIMD foundation: always build the scalar + dispatch units. The |
|
24
|
|
|
|
|
|
|
* AVX2/SSE2/NEON files compile only when their feature macro is set. */ |
|
25
|
|
|
|
|
|
|
#include "../../src/simd/mds_simd_scalar.c" |
|
26
|
|
|
|
|
|
|
#ifdef MDS_HAVE_SSE2 |
|
27
|
|
|
|
|
|
|
# include "../../src/simd/mds_simd_sse2.c" |
|
28
|
|
|
|
|
|
|
#endif |
|
29
|
|
|
|
|
|
|
#ifdef MDS_HAVE_AVX2 |
|
30
|
|
|
|
|
|
|
# include "../../src/simd/mds_simd_avx2.c" |
|
31
|
|
|
|
|
|
|
#endif |
|
32
|
|
|
|
|
|
|
#ifdef MDS_HAVE_NEON |
|
33
|
|
|
|
|
|
|
# include "../../src/simd/mds_simd_neon.c" |
|
34
|
|
|
|
|
|
|
#endif |
|
35
|
|
|
|
|
|
|
#include "../../src/simd/mds_simd_dispatch.c" |
|
36
|
|
|
|
|
|
|
#include "../../src/simd/mds_dispatch.c" |
|
37
|
|
|
|
|
|
|
/* ------------------------------------------------------------------ */ |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
/* Special-byte table used by strip_markdown_except_lists_tables to skip |
|
40
|
|
|
|
|
|
|
* over runs of ordinary text quickly. Bytes here are the ones that can |
|
41
|
|
|
|
|
|
|
* START a markdown construct in that pass. */ |
|
42
|
|
|
|
|
|
|
static unsigned char mds_special[256]; |
|
43
|
|
|
|
|
|
|
static int mds_special_init = 0; |
|
44
|
8
|
|
|
|
|
|
static void mds_build_special(void) { |
|
45
|
8
|
100
|
|
|
|
|
if (mds_special_init) return; |
|
46
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'#'] = 1; |
|
47
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'`'] = 1; |
|
48
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'*'] = 1; |
|
49
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'_'] = 1; |
|
50
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'~'] = 1; |
|
51
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'['] = 1; |
|
52
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'!'] = 1; |
|
53
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'|'] = 1; |
|
54
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'\n'] = 1; |
|
55
|
2
|
|
|
|
|
|
mds_special[(unsigned char)'\r'] = 1; |
|
56
|
2
|
|
|
|
|
|
mds_special_init = 1; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
8
|
|
|
|
|
|
static SV* strip_markdown_except_lists_tables(const char* input) { |
|
60
|
8
|
|
|
|
|
|
SV* out = newSVpv("", 0); |
|
61
|
8
|
|
|
|
|
|
const char* p = input; |
|
62
|
|
|
|
|
|
|
|
|
63
|
8
|
|
|
|
|
|
mds_build_special(); |
|
64
|
|
|
|
|
|
|
|
|
65
|
112
|
100
|
|
|
|
|
while (*p) { |
|
66
|
|
|
|
|
|
|
// Unordered lists: keep marker, remove space |
|
67
|
104
|
100
|
|
|
|
|
if ((p == input || *(p-1) == '\n') && (*p == '-' || *p == '*' || *p == '+') && *(p+1) == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
68
|
4
|
|
|
|
|
|
sv_catpvn(out, p, 1); |
|
69
|
4
|
|
|
|
|
|
sv_catpvn(out, " ", 1); // Keep space after marker |
|
70
|
4
|
|
|
|
|
|
p += 2; |
|
71
|
|
|
|
|
|
|
// Task list [ ] or [x] immediately after bullet — strip the box. |
|
72
|
4
|
100
|
|
|
|
|
if (*p == '[' && (*(p+1) == ' ' || *(p+1) == 'x' || *(p+1) == 'X') && *(p+2) == ']') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
73
|
2
|
|
|
|
|
|
p += 3; |
|
74
|
2
|
50
|
|
|
|
|
if (*p == ' ') p++; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
4
|
|
|
|
|
|
continue; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
// Tables: just copy everything (including pipes and dashes) |
|
79
|
100
|
100
|
|
|
|
|
if (*p == '|') { |
|
80
|
12
|
|
|
|
|
|
sv_catpvn(out, p, 1); |
|
81
|
12
|
|
|
|
|
|
p++; |
|
82
|
12
|
|
|
|
|
|
continue; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
// Table separator row (---): just copy dashes and pipes |
|
85
|
88
|
100
|
|
|
|
|
if (*p == '-' && ((p > input && *(p-1) == '|') || (p == input))) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
86
|
3
|
|
|
|
|
|
sv_catpvn(out, p, 1); |
|
87
|
3
|
|
|
|
|
|
p++; |
|
88
|
3
|
|
|
|
|
|
continue; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
// Remove bold (** or __) |
|
91
|
85
|
100
|
|
|
|
|
if ((*p == '*' && *(p+1) == '*') || (*p == '_' && *(p+1) == '_')) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
92
|
10
|
|
|
|
|
|
p += 2; |
|
93
|
10
|
|
|
|
|
|
continue; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
// Remove italic (* or _) |
|
96
|
75
|
50
|
|
|
|
|
if (*p == '*' || *p == '_') { |
|
|
|
100
|
|
|
|
|
|
|
97
|
4
|
|
|
|
|
|
p++; |
|
98
|
4
|
|
|
|
|
|
continue; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
// Remove strikethrough (~~) |
|
101
|
71
|
50
|
|
|
|
|
if (*p == '~' && *(p+1) == '~') { |
|
|
|
0
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
p += 2; |
|
103
|
0
|
|
|
|
|
|
continue; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
// Remove inline code (`) and fenced code (```) — keep content, drop fences |
|
106
|
71
|
100
|
|
|
|
|
if (*p == '`') { |
|
107
|
3
|
100
|
|
|
|
|
if (*(p+1) == '`' && *(p+2) == '`') { |
|
|
|
50
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
// Fenced block: drop the opening fence line entirely |
|
109
|
|
|
|
|
|
|
// (but preserve one newline so surrounding text stays |
|
110
|
|
|
|
|
|
|
// on its own line). |
|
111
|
|
|
|
|
|
|
const char* fence; |
|
112
|
|
|
|
|
|
|
const char* body_end; |
|
113
|
1
|
|
|
|
|
|
p += 3; |
|
114
|
1
|
50
|
|
|
|
|
while (*p && *p != '\n') p++; |
|
|
|
50
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
// keep the '\n' that terminated the fence line in output |
|
116
|
1
|
50
|
|
|
|
|
if (*p == '\n') { sv_catpvn(out, "\n", 1); p++; } |
|
117
|
1
|
|
|
|
|
|
fence = strstr(p, "```"); |
|
118
|
1
|
50
|
|
|
|
|
body_end = fence ? fence : p + strlen(p); |
|
119
|
1
|
50
|
|
|
|
|
if (body_end > p) |
|
120
|
1
|
|
|
|
|
|
sv_catpvn(out, p, (STRLEN)(body_end - p)); |
|
121
|
1
|
50
|
|
|
|
|
if (fence) { |
|
122
|
1
|
|
|
|
|
|
p = fence + 3; |
|
123
|
|
|
|
|
|
|
// drop the rest of the closing fence line |
|
124
|
1
|
50
|
|
|
|
|
while (*p && *p != '\n') p++; |
|
|
|
50
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
} else { |
|
126
|
0
|
|
|
|
|
|
p = body_end; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
} else { |
|
129
|
|
|
|
|
|
|
const char* code_start; |
|
130
|
1
|
|
|
|
|
|
p++; |
|
131
|
1
|
|
|
|
|
|
code_start = p; |
|
132
|
6
|
50
|
|
|
|
|
while (*p && *p != '`') p++; |
|
|
|
100
|
|
|
|
|
|
|
133
|
1
|
50
|
|
|
|
|
if (p > code_start) |
|
134
|
1
|
|
|
|
|
|
sv_catpvn(out, code_start, (STRLEN)(p - code_start)); |
|
135
|
1
|
50
|
|
|
|
|
if (*p == '`') p++; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
2
|
|
|
|
|
|
continue; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
// Remove images  |
|
140
|
69
|
100
|
|
|
|
|
if (*p == '!' && *(p+1) == '[') { |
|
|
|
50
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
|
p += 2; |
|
142
|
2
|
50
|
|
|
|
|
while (*p && *p != ']') p++; |
|
|
|
100
|
|
|
|
|
|
|
143
|
1
|
50
|
|
|
|
|
if (*p == ']') p++; |
|
144
|
1
|
50
|
|
|
|
|
if (*p == '(') { |
|
145
|
1
|
|
|
|
|
|
p++; |
|
146
|
6
|
50
|
|
|
|
|
while (*p && *p != ')') p++; |
|
|
|
100
|
|
|
|
|
|
|
147
|
1
|
50
|
|
|
|
|
if (*p == ')') p++; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
1
|
|
|
|
|
|
continue; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
// Remove links [text](url), keep text |
|
152
|
68
|
100
|
|
|
|
|
if (*p == '[') { |
|
153
|
|
|
|
|
|
|
const char* text_start; |
|
154
|
|
|
|
|
|
|
int text_len; |
|
155
|
1
|
|
|
|
|
|
p++; |
|
156
|
1
|
|
|
|
|
|
text_start = p; |
|
157
|
11
|
50
|
|
|
|
|
while (*p && *p != ']') p++; |
|
|
|
100
|
|
|
|
|
|
|
158
|
1
|
|
|
|
|
|
text_len = (int)(p - text_start); |
|
159
|
1
|
50
|
|
|
|
|
if (text_len > 0) |
|
160
|
1
|
|
|
|
|
|
sv_catpvn(out, text_start, text_len); |
|
161
|
1
|
50
|
|
|
|
|
if (*p == ']') p++; |
|
162
|
1
|
50
|
|
|
|
|
if (*p == '(') { |
|
163
|
1
|
|
|
|
|
|
p++; |
|
164
|
9
|
50
|
|
|
|
|
while (*p && *p != ')') p++; |
|
|
|
100
|
|
|
|
|
|
|
165
|
1
|
50
|
|
|
|
|
if (*p == ')') p++; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
1
|
|
|
|
|
|
continue; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
// Remove headers (#) |
|
170
|
67
|
100
|
|
|
|
|
if (*p == '#') { |
|
171
|
6
|
100
|
|
|
|
|
while (*p == '#') p++; |
|
172
|
3
|
50
|
|
|
|
|
if (*p == ' ') p++; |
|
173
|
3
|
|
|
|
|
|
continue; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
// Remove task list [ ] or [x] |
|
176
|
64
|
50
|
|
|
|
|
if (*p == '[' && (*(p+1) == ' ' || *(p+1) == 'x' || *(p+1) == 'X') && *(p+2) == ']') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
p += 3; |
|
178
|
0
|
0
|
|
|
|
|
if (*p == ' ') p++; |
|
179
|
0
|
|
|
|
|
|
continue; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
// Default: emit a single special byte, OR batch a run of non-special. |
|
182
|
|
|
|
|
|
|
/* Special set for strip is the markdown special set PLUS '|', '-', '+', |
|
183
|
|
|
|
|
|
|
* and ASCII digits (all line-start-conditional in this function). */ |
|
184
|
64
|
100
|
|
|
|
|
if (mds_special[(unsigned char)*p] |
|
185
|
44
|
50
|
|
|
|
|
|| *p == '|' || *p == '-' || *p == '+' |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
186
|
32
|
100
|
|
|
|
|
|| (*p >= '1' && *p <= '9')) { |
|
|
|
100
|
|
|
|
|
|
|
187
|
36
|
|
|
|
|
|
sv_catpvn(out, p, 1); |
|
188
|
36
|
|
|
|
|
|
p++; |
|
189
|
|
|
|
|
|
|
} else { |
|
190
|
28
|
|
|
|
|
|
const char* run = p; |
|
191
|
131
|
100
|
|
|
|
|
do { p++; } while (*p && !mds_special[(unsigned char)*p] |
|
192
|
111
|
50
|
|
|
|
|
&& *p != '|' && *p != '-' && *p != '+' |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
193
|
248
|
100
|
|
|
|
|
&& !(*p >= '1' && *p <= '9')); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
194
|
28
|
|
|
|
|
|
sv_catpvn(out, run, (STRLEN)(p - run)); |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
8
|
|
|
|
|
|
return out; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
/* Shared options-hash decoder used by both the procedural |
|
201
|
|
|
|
|
|
|
* markdown_to_html entry point and the persistent session render path. |
|
202
|
|
|
|
|
|
|
* GFM is the default; an `hv` of NULL returns the GFM preset unchanged. */ |
|
203
|
2225
|
|
|
|
|
|
static unsigned mds_flags_from_hv(pTHX_ HV* h) { |
|
204
|
2225
|
|
|
|
|
|
unsigned flags = MDS_FLAGS_GFM; |
|
205
|
|
|
|
|
|
|
SV** v; |
|
206
|
2225
|
100
|
|
|
|
|
if (!h) return flags; |
|
207
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "gfm", 3, 0)) && !SvTRUE(*v)) flags = MDS_FLAGS_COMMONMARK; |
|
|
|
100
|
|
|
|
|
|
|
208
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "tables", 6, 0))) flags = SvTRUE(*v) ? (flags | MDS_FLAG_TABLES) : (flags & ~MDS_FLAG_TABLES); |
|
|
|
50
|
|
|
|
|
|
|
209
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "strikethrough", 13, 0))) flags = SvTRUE(*v) ? (flags | MDS_FLAG_STRIKE) : (flags & ~MDS_FLAG_STRIKE); |
|
|
|
50
|
|
|
|
|
|
|
210
|
2070
|
50
|
|
|
|
|
if ((v = hv_fetch(h, "tasklist", 8, 0))) flags = SvTRUE(*v) ? (flags | MDS_FLAG_TASKLIST) : (flags & ~MDS_FLAG_TASKLIST); |
|
|
|
0
|
|
|
|
|
|
|
211
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "autolink", 8, 0))) flags = SvTRUE(*v) ? (flags | MDS_FLAG_AUTOLINK) : (flags & ~MDS_FLAG_AUTOLINK); |
|
|
|
50
|
|
|
|
|
|
|
212
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "disallow_raw_html", 17, 0))) flags = SvTRUE(*v) ? (flags | MDS_FLAG_DISALLOW_RAW_HTML) : (flags & ~MDS_FLAG_DISALLOW_RAW_HTML); |
|
|
|
50
|
|
|
|
|
|
|
213
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "hard_breaks", 11, 0)) && SvTRUE(*v)) flags |= MDS_FLAG_HARD_BREAKS; |
|
|
|
50
|
|
|
|
|
|
|
214
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "unsafe", 6, 0)) && SvTRUE(*v)) flags |= MDS_FLAG_UNSAFE; |
|
|
|
50
|
|
|
|
|
|
|
215
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "no_simd", 7, 0)) && SvTRUE(*v)) flags |= MDS_FLAG_NO_SIMD; |
|
|
|
50
|
|
|
|
|
|
|
216
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "strict_utf8", 11, 0)) && SvTRUE(*v)) flags |= MDS_FLAG_STRICT_UTF8; |
|
|
|
50
|
|
|
|
|
|
|
217
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "headers", 7, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_HEADINGS; |
|
|
|
50
|
|
|
|
|
|
|
218
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "italic", 6, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_EMPH; |
|
|
|
50
|
|
|
|
|
|
|
219
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "bold", 4, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_STRONG; |
|
|
|
50
|
|
|
|
|
|
|
220
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "code", 4, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_CODE; |
|
|
|
50
|
|
|
|
|
|
|
221
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "links", 5, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_LINKS; |
|
|
|
50
|
|
|
|
|
|
|
222
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "images", 6, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_IMAGES; |
|
|
|
50
|
|
|
|
|
|
|
223
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "ordered_lists", 13, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_ORDERED_LISTS; |
|
|
|
50
|
|
|
|
|
|
|
224
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "unordered_lists", 15, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_UNORDERED_LISTS; |
|
|
|
50
|
|
|
|
|
|
|
225
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "blockquote", 10, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_QUOTES; |
|
|
|
50
|
|
|
|
|
|
|
226
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "thematic_break", 14, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_THEMATIC_BREAK; |
|
|
|
50
|
|
|
|
|
|
|
227
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "fenced_code", 11, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_FENCED_CODE; |
|
|
|
50
|
|
|
|
|
|
|
228
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "indented_code", 13, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_INDENTED_CODE; |
|
|
|
50
|
|
|
|
|
|
|
229
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "html", 4, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_HTML; |
|
|
|
50
|
|
|
|
|
|
|
230
|
2070
|
100
|
|
|
|
|
if ((v = hv_fetch(h, "references", 10, 0)) && !SvTRUE(*v)) flags |= MDS_FLAG_NO_REFERENCES; |
|
|
|
50
|
|
|
|
|
|
|
231
|
2070
|
|
|
|
|
|
return flags; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
/* Persistent session struct. The arena's head page is kept warm between |
|
235
|
|
|
|
|
|
|
* render() calls by mds_arena_reset, eliminating the per-parse malloc |
|
236
|
|
|
|
|
|
|
* for sub-page workloads, and the block-scanner scratch buffers are |
|
237
|
|
|
|
|
|
|
* persisted alongside so realloc traffic is amortised. */ |
|
238
|
|
|
|
|
|
|
typedef struct mds_session { |
|
239
|
|
|
|
|
|
|
mds_arena arena; |
|
240
|
|
|
|
|
|
|
mds_block_scratch scratch; |
|
241
|
|
|
|
|
|
|
unsigned flags; |
|
242
|
|
|
|
|
|
|
} mds_session; |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
/* Magic glue: a Markdown::Simple object is a blessed SVRV whose IV slot |
|
245
|
|
|
|
|
|
|
* holds an mds_session* pointer. The pointer is owned by an |
|
246
|
|
|
|
|
|
|
* ext-magic record attached to that IV, so the session is released |
|
247
|
|
|
|
|
|
|
* automatically when the SV is destroyed (covering scope exit, undef, |
|
248
|
|
|
|
|
|
|
* and global destruction without a Perl-level DESTROY method). |
|
249
|
|
|
|
|
|
|
*/ |
|
250
|
3
|
|
|
|
|
|
static int mds_session_mg_free(pTHX_ SV* sv, MAGIC* mg) { |
|
251
|
|
|
|
|
|
|
mds_session* s; |
|
252
|
|
|
|
|
|
|
PERL_UNUSED_ARG(sv); |
|
253
|
3
|
|
|
|
|
|
s = (mds_session*)mg->mg_ptr; |
|
254
|
3
|
50
|
|
|
|
|
if (s) { |
|
255
|
3
|
|
|
|
|
|
mds_arena_free(&s->arena); |
|
256
|
3
|
|
|
|
|
|
mds_block_scratch_free(&s->scratch); |
|
257
|
3
|
|
|
|
|
|
free(s); |
|
258
|
3
|
|
|
|
|
|
mg->mg_ptr = NULL; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
3
|
|
|
|
|
|
return 0; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
static MGVTBL mds_session_mg_vtbl = { |
|
264
|
|
|
|
|
|
|
NULL, /* get */ |
|
265
|
|
|
|
|
|
|
NULL, /* set */ |
|
266
|
|
|
|
|
|
|
NULL, /* len */ |
|
267
|
|
|
|
|
|
|
NULL, /* clear */ |
|
268
|
|
|
|
|
|
|
mds_session_mg_free, /* free */ |
|
269
|
|
|
|
|
|
|
NULL, /* copy */ |
|
270
|
|
|
|
|
|
|
NULL, /* dup */ |
|
271
|
|
|
|
|
|
|
NULL /* local */ |
|
272
|
|
|
|
|
|
|
}; |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
/* Look up the session pointer from a blessed SVRV ($self). Croaks on |
|
275
|
|
|
|
|
|
|
* mismatch so misuse fails loudly rather than dereferencing garbage. */ |
|
276
|
221
|
|
|
|
|
|
static mds_session* mds_session_from_self(pTHX_ SV* self, const char* who) { |
|
277
|
|
|
|
|
|
|
SV* iv; |
|
278
|
|
|
|
|
|
|
MAGIC* mg; |
|
279
|
221
|
50
|
|
|
|
|
if (!self || !SvROK(self)) |
|
|
|
50
|
|
|
|
|
|
|
280
|
0
|
|
|
|
|
|
croak("%s: invalid invocant (expected a Markdown::Simple object)", who); |
|
281
|
221
|
|
|
|
|
|
iv = SvRV(self); |
|
282
|
221
|
50
|
|
|
|
|
for (mg = SvMAGIC(iv); mg; mg = mg->mg_moremagic) { |
|
283
|
221
|
50
|
|
|
|
|
if (mg->mg_type == PERL_MAGIC_ext && mg->mg_virtual == &mds_session_mg_vtbl) |
|
|
|
50
|
|
|
|
|
|
|
284
|
221
|
|
|
|
|
|
return (mds_session*)mg->mg_ptr; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
0
|
|
|
|
|
|
croak("%s: invocant has no Markdown::Simple session attached", who); |
|
287
|
|
|
|
|
|
|
return NULL; /* not reached */ |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
MODULE = Markdown::Simple PACKAGE = Markdown::Simple |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
SV* |
|
293
|
|
|
|
|
|
|
strip_markdown(input) |
|
294
|
|
|
|
|
|
|
const char* input |
|
295
|
|
|
|
|
|
|
CODE: |
|
296
|
8
|
|
|
|
|
|
RETVAL = strip_markdown_except_lists_tables(input); |
|
297
|
|
|
|
|
|
|
OUTPUT: |
|
298
|
|
|
|
|
|
|
RETVAL |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
SV* |
|
301
|
|
|
|
|
|
|
markdown_to_html(input, ...) |
|
302
|
|
|
|
|
|
|
SV* input; |
|
303
|
|
|
|
|
|
|
CODE: |
|
304
|
|
|
|
|
|
|
{ |
|
305
|
|
|
|
|
|
|
STRLEN n; |
|
306
|
2222
|
|
|
|
|
|
const char* s = SvPV(input, n); |
|
307
|
2222
|
|
|
|
|
|
SV* out = newSVpv("", 0); |
|
308
|
2068
|
50
|
|
|
|
|
HV* h = (items > 1 && SvOK(ST(1)) && SvROK(ST(1)) && SvTYPE(SvRV(ST(1))) == SVt_PVHV) |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
309
|
4290
|
100
|
|
|
|
|
? (HV*)SvRV(ST(1)) : NULL; |
|
310
|
2222
|
|
|
|
|
|
unsigned flags = mds_flags_from_hv(aTHX_ h); |
|
311
|
2222
|
|
|
|
|
|
mds_render_html_to_sv(aTHX_ s, n, flags, out); |
|
312
|
2222
|
100
|
|
|
|
|
if (SvCUR(out) == 0 && (flags & MDS_FLAG_STRICT_UTF8) && n) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
/* Distinguish "empty input" from "rejected as malformed UTF-8". */ |
|
314
|
2
|
|
|
|
|
|
const mds_simd_ops* ops = (flags & MDS_FLAG_NO_SIMD) |
|
315
|
1
|
50
|
|
|
|
|
? mds_simd_ops_scalar() : mds_simd_get(); |
|
316
|
1
|
50
|
|
|
|
|
if (!ops->validate_utf8(s, n)) |
|
317
|
1
|
|
|
|
|
|
croak("markdown_to_html: input is not valid UTF-8"); |
|
318
|
|
|
|
|
|
|
} |
|
319
|
2221
|
|
|
|
|
|
RETVAL = out; |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
OUTPUT: |
|
322
|
|
|
|
|
|
|
RETVAL |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# ---- Persistent session (reusable arena) ------------------------------- |
|
325
|
|
|
|
|
|
|
# Markdown::Simple->new(\%opts) -> blessed object owning a warm parser |
|
326
|
|
|
|
|
|
|
# $self->render($markdown) -> SV with rendered HTML |
|
327
|
|
|
|
|
|
|
# $self->flags -> integer flag mask (read-only) |
|
328
|
|
|
|
|
|
|
# |
|
329
|
|
|
|
|
|
|
# The session is released by SV magic when the object goes out of scope; |
|
330
|
|
|
|
|
|
|
# no explicit DESTROY method is required. |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
SV* |
|
333
|
|
|
|
|
|
|
new(class, opts = NULL) |
|
334
|
|
|
|
|
|
|
SV* class; |
|
335
|
|
|
|
|
|
|
SV* opts; |
|
336
|
|
|
|
|
|
|
PREINIT: |
|
337
|
3
|
100
|
|
|
|
|
HV* h = NULL; |
|
338
|
|
|
|
|
|
|
HV* stash; |
|
339
|
|
|
|
|
|
|
mds_session* s; |
|
340
|
|
|
|
|
|
|
SV* iv; |
|
341
|
|
|
|
|
|
|
SV* rv; |
|
342
|
|
|
|
|
|
|
const char* klass; |
|
343
|
|
|
|
|
|
|
CODE: |
|
344
|
|
|
|
|
|
|
{ |
|
345
|
3
|
100
|
|
|
|
|
if (opts && SvOK(opts)) { |
|
|
|
50
|
|
|
|
|
|
|
346
|
2
|
50
|
|
|
|
|
if (!SvROK(opts) || SvTYPE(SvRV(opts)) != SVt_PVHV) |
|
|
|
50
|
|
|
|
|
|
|
347
|
0
|
|
|
|
|
|
croak("Markdown::Simple::new: options must be a HASH reference"); |
|
348
|
2
|
|
|
|
|
|
h = (HV*)SvRV(opts); |
|
349
|
|
|
|
|
|
|
} |
|
350
|
3
|
50
|
|
|
|
|
if (!SvOK(class)) |
|
351
|
0
|
|
|
|
|
|
croak("Markdown::Simple::new: missing class name"); |
|
352
|
3
|
50
|
|
|
|
|
klass = SvROK(class) ? sv_reftype(SvRV(class), 1) : SvPV_nolen(class); |
|
353
|
3
|
|
|
|
|
|
stash = gv_stashpv(klass, GV_ADD); |
|
354
|
|
|
|
|
|
|
|
|
355
|
3
|
|
|
|
|
|
s = (mds_session*)malloc(sizeof(mds_session)); |
|
356
|
3
|
50
|
|
|
|
|
if (!s) croak("Markdown::Simple::new: out of memory"); |
|
357
|
3
|
|
|
|
|
|
mds_arena_init(&s->arena); |
|
358
|
3
|
|
|
|
|
|
memset(&s->scratch, 0, sizeof s->scratch); |
|
359
|
3
|
|
|
|
|
|
s->flags = mds_flags_from_hv(aTHX_ h); |
|
360
|
|
|
|
|
|
|
|
|
361
|
3
|
|
|
|
|
|
iv = newSViv(0); |
|
362
|
3
|
|
|
|
|
|
sv_magicext(iv, NULL, PERL_MAGIC_ext, &mds_session_mg_vtbl, (const char*)s, 0); |
|
363
|
3
|
|
|
|
|
|
rv = newRV_noinc(iv); |
|
364
|
3
|
|
|
|
|
|
sv_bless(rv, stash); |
|
365
|
3
|
|
|
|
|
|
RETVAL = rv; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
OUTPUT: |
|
368
|
|
|
|
|
|
|
RETVAL |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
SV* |
|
371
|
|
|
|
|
|
|
render(self, input) |
|
372
|
|
|
|
|
|
|
SV* self; |
|
373
|
|
|
|
|
|
|
SV* input; |
|
374
|
|
|
|
|
|
|
PREINIT: |
|
375
|
|
|
|
|
|
|
mds_session* s; |
|
376
|
|
|
|
|
|
|
STRLEN n; |
|
377
|
|
|
|
|
|
|
const char* in; |
|
378
|
|
|
|
|
|
|
SV* out; |
|
379
|
|
|
|
|
|
|
CODE: |
|
380
|
|
|
|
|
|
|
{ |
|
381
|
221
|
|
|
|
|
|
s = mds_session_from_self(aTHX_ self, "Markdown::Simple::render"); |
|
382
|
221
|
50
|
|
|
|
|
in = SvOK(input) ? SvPV(input, n) : (n = 0, ""); |
|
383
|
221
|
|
|
|
|
|
out = newSVpv("", 0); |
|
384
|
221
|
|
|
|
|
|
mds_render_html_to_sv_ex(aTHX_ in, n, s->flags, out, &s->arena, &s->scratch); |
|
385
|
221
|
100
|
|
|
|
|
if (SvCUR(out) == 0 && (s->flags & MDS_FLAG_STRICT_UTF8) && n) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
const mds_simd_ops* ops = (s->flags & MDS_FLAG_NO_SIMD) |
|
387
|
0
|
0
|
|
|
|
|
? mds_simd_ops_scalar() : mds_simd_get(); |
|
388
|
0
|
0
|
|
|
|
|
if (!ops->validate_utf8(in, n)) |
|
389
|
0
|
|
|
|
|
|
croak("render: input is not valid UTF-8"); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
221
|
|
|
|
|
|
RETVAL = out; |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
OUTPUT: |
|
394
|
|
|
|
|
|
|
RETVAL |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
UV |
|
397
|
|
|
|
|
|
|
flags(self) |
|
398
|
|
|
|
|
|
|
SV* self; |
|
399
|
|
|
|
|
|
|
CODE: |
|
400
|
|
|
|
|
|
|
{ |
|
401
|
0
|
|
|
|
|
|
mds_session* s = mds_session_from_self(aTHX_ self, "Markdown::Simple::flags"); |
|
402
|
0
|
0
|
|
|
|
|
RETVAL = (UV)s->flags; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
OUTPUT: |
|
405
|
|
|
|
|
|
|
RETVAL |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# ---- SIMD backend introspection ---------------------------------------- |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
SV* |
|
410
|
|
|
|
|
|
|
_simd_backend() |
|
411
|
|
|
|
|
|
|
CODE: |
|
412
|
|
|
|
|
|
|
{ |
|
413
|
5
|
|
|
|
|
|
RETVAL = newSVpv(mds_simd_backend(), 0); |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
OUTPUT: |
|
416
|
|
|
|
|
|
|
RETVAL |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
void |
|
419
|
|
|
|
|
|
|
_simd_force_scalar(on) |
|
420
|
|
|
|
|
|
|
int on; |
|
421
|
|
|
|
|
|
|
CODE: |
|
422
|
|
|
|
|
|
|
{ |
|
423
|
1394
|
|
|
|
|
|
mds_simd_force_scalar(on); |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# ---- Classifier introspection (test-only) ------------------------------ |
|
427
|
|
|
|
|
|
|
# _classify_structural($bytes) -- runs the *active* backend |
|
428
|
|
|
|
|
|
|
# _classify_structural_scalar($bytes) -- runs the scalar reference |
|
429
|
|
|
|
|
|
|
# Both return a byte string of ceil(len/8) bytes; bit i (LSB-first within |
|
430
|
|
|
|
|
|
|
# each byte) is set iff input byte i is structural. |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
SV* |
|
433
|
|
|
|
|
|
|
_classify_structural(input) |
|
434
|
|
|
|
|
|
|
SV* input; |
|
435
|
|
|
|
|
|
|
CODE: |
|
436
|
|
|
|
|
|
|
{ |
|
437
|
|
|
|
|
|
|
STRLEN n; |
|
438
|
|
|
|
|
|
|
const char* s; |
|
439
|
|
|
|
|
|
|
size_t words; |
|
440
|
|
|
|
|
|
|
uint64_t* bm; |
|
441
|
|
|
|
|
|
|
size_t out_bytes; |
|
442
|
599
|
|
|
|
|
|
s = SvPV(input, n); |
|
443
|
599
|
|
|
|
|
|
words = (n + 63) >> 6; |
|
444
|
599
|
100
|
|
|
|
|
if (!words) words = 1; |
|
445
|
599
|
|
|
|
|
|
bm = (uint64_t*)calloc(words, sizeof(uint64_t)); |
|
446
|
599
|
50
|
|
|
|
|
if (!bm) croak("oom"); |
|
447
|
599
|
|
|
|
|
|
mds_simd_get()->classify_structural(s, n, bm); |
|
448
|
599
|
|
|
|
|
|
out_bytes = (n + 7) >> 3; |
|
449
|
599
|
|
|
|
|
|
RETVAL = newSVpvn((const char*)bm, out_bytes); |
|
450
|
599
|
|
|
|
|
|
free(bm); |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
OUTPUT: |
|
453
|
|
|
|
|
|
|
RETVAL |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
SV* |
|
456
|
|
|
|
|
|
|
_classify_structural_scalar(input) |
|
457
|
|
|
|
|
|
|
SV* input; |
|
458
|
|
|
|
|
|
|
CODE: |
|
459
|
|
|
|
|
|
|
{ |
|
460
|
|
|
|
|
|
|
STRLEN n; |
|
461
|
|
|
|
|
|
|
const char* s; |
|
462
|
|
|
|
|
|
|
size_t words; |
|
463
|
|
|
|
|
|
|
uint64_t* bm; |
|
464
|
|
|
|
|
|
|
size_t out_bytes; |
|
465
|
600
|
|
|
|
|
|
s = SvPV(input, n); |
|
466
|
600
|
|
|
|
|
|
words = (n + 63) >> 6; |
|
467
|
600
|
100
|
|
|
|
|
if (!words) words = 1; |
|
468
|
600
|
|
|
|
|
|
bm = (uint64_t*)calloc(words, sizeof(uint64_t)); |
|
469
|
600
|
50
|
|
|
|
|
if (!bm) croak("oom"); |
|
470
|
600
|
|
|
|
|
|
mds_simd_ops_scalar()->classify_structural(s, n, bm); |
|
471
|
600
|
|
|
|
|
|
out_bytes = (n + 7) >> 3; |
|
472
|
600
|
|
|
|
|
|
RETVAL = newSVpvn((const char*)bm, out_bytes); |
|
473
|
600
|
|
|
|
|
|
free(bm); |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
OUTPUT: |
|
476
|
|
|
|
|
|
|
RETVAL |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# ---- UTF-8 validator + line scanner (test-only) ------------------------ |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
int |
|
481
|
|
|
|
|
|
|
_validate_utf8(input) |
|
482
|
|
|
|
|
|
|
SV* input; |
|
483
|
|
|
|
|
|
|
CODE: |
|
484
|
|
|
|
|
|
|
{ |
|
485
|
|
|
|
|
|
|
STRLEN n; |
|
486
|
|
|
|
|
|
|
const char* s; |
|
487
|
280
|
|
|
|
|
|
s = SvPV(input, n); |
|
488
|
280
|
|
|
|
|
|
RETVAL = mds_simd_get()->validate_utf8(s, n); |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
OUTPUT: |
|
491
|
|
|
|
|
|
|
RETVAL |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
int |
|
494
|
|
|
|
|
|
|
_validate_utf8_scalar(input) |
|
495
|
|
|
|
|
|
|
SV* input; |
|
496
|
|
|
|
|
|
|
CODE: |
|
497
|
|
|
|
|
|
|
{ |
|
498
|
|
|
|
|
|
|
STRLEN n; |
|
499
|
|
|
|
|
|
|
const char* s; |
|
500
|
280
|
|
|
|
|
|
s = SvPV(input, n); |
|
501
|
280
|
|
|
|
|
|
RETVAL = mds_simd_ops_scalar()->validate_utf8(s, n); |
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
OUTPUT: |
|
504
|
|
|
|
|
|
|
RETVAL |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# Both _find_newlines variants return: |
|
507
|
|
|
|
|
|
|
# - undef when the offset table overflows the provided cap, or |
|
508
|
|
|
|
|
|
|
# - a packed string of native-endian uint32_t values (one per '\n'). |
|
509
|
|
|
|
|
|
|
SV* |
|
510
|
|
|
|
|
|
|
_find_newlines(input) |
|
511
|
|
|
|
|
|
|
SV* input; |
|
512
|
|
|
|
|
|
|
CODE: |
|
513
|
|
|
|
|
|
|
{ |
|
514
|
|
|
|
|
|
|
STRLEN n; |
|
515
|
|
|
|
|
|
|
const char* s; |
|
516
|
|
|
|
|
|
|
size_t cap; |
|
517
|
|
|
|
|
|
|
uint32_t* offs; |
|
518
|
|
|
|
|
|
|
size_t k; |
|
519
|
306
|
|
|
|
|
|
s = SvPV(input, n); |
|
520
|
306
|
100
|
|
|
|
|
cap = n ? n : 1; |
|
521
|
306
|
|
|
|
|
|
offs = (uint32_t*)malloc(cap * sizeof(uint32_t)); |
|
522
|
306
|
50
|
|
|
|
|
if (!offs) croak("oom"); |
|
523
|
306
|
|
|
|
|
|
k = mds_simd_get()->find_newlines(s, n, offs, cap); |
|
524
|
306
|
50
|
|
|
|
|
if (k == (size_t)-1) { free(offs); XSRETURN_UNDEF; } |
|
525
|
306
|
|
|
|
|
|
RETVAL = newSVpvn((const char*)offs, k * sizeof(uint32_t)); |
|
526
|
306
|
|
|
|
|
|
free(offs); |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
OUTPUT: |
|
529
|
|
|
|
|
|
|
RETVAL |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
SV* |
|
532
|
|
|
|
|
|
|
_find_newlines_scalar(input) |
|
533
|
|
|
|
|
|
|
SV* input; |
|
534
|
|
|
|
|
|
|
CODE: |
|
535
|
|
|
|
|
|
|
{ |
|
536
|
|
|
|
|
|
|
STRLEN n; |
|
537
|
|
|
|
|
|
|
const char* s; |
|
538
|
|
|
|
|
|
|
size_t cap; |
|
539
|
|
|
|
|
|
|
uint32_t* offs; |
|
540
|
|
|
|
|
|
|
size_t k; |
|
541
|
209
|
|
|
|
|
|
s = SvPV(input, n); |
|
542
|
209
|
100
|
|
|
|
|
cap = n ? n : 1; |
|
543
|
209
|
|
|
|
|
|
offs = (uint32_t*)malloc(cap * sizeof(uint32_t)); |
|
544
|
209
|
50
|
|
|
|
|
if (!offs) croak("oom"); |
|
545
|
209
|
|
|
|
|
|
k = mds_simd_ops_scalar()->find_newlines(s, n, offs, cap); |
|
546
|
209
|
50
|
|
|
|
|
if (k == (size_t)-1) { free(offs); XSRETURN_UNDEF; } |
|
547
|
209
|
|
|
|
|
|
RETVAL = newSVpvn((const char*)offs, k * sizeof(uint32_t)); |
|
548
|
209
|
|
|
|
|
|
free(offs); |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
OUTPUT: |
|
551
|
|
|
|
|
|
|
RETVAL |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
# Bounded-cap variant for testing the overflow sentinel path. |
|
554
|
|
|
|
|
|
|
SV* |
|
555
|
|
|
|
|
|
|
_find_newlines_capped(input, cap) |
|
556
|
|
|
|
|
|
|
SV* input; |
|
557
|
|
|
|
|
|
|
int cap; |
|
558
|
|
|
|
|
|
|
CODE: |
|
559
|
|
|
|
|
|
|
{ |
|
560
|
3
|
|
|
|
|
|
STRLEN n; const char* s = SvPV(input, n); |
|
561
|
3
|
|
|
|
|
|
size_t cc = cap < 0 ? 0 : (size_t)cap; |
|
562
|
3
|
50
|
|
|
|
|
uint32_t* offs = cc ? (uint32_t*)malloc(cc * sizeof(uint32_t)) : NULL; |
|
563
|
3
|
|
|
|
|
|
size_t k = mds_simd_get()->find_newlines(s, n, offs, cc); |
|
564
|
3
|
100
|
|
|
|
|
if (k == (size_t)-1) { free(offs); XSRETURN_UNDEF; } |
|
565
|
2
|
50
|
|
|
|
|
RETVAL = newSVpvn(offs ? (const char*)offs : "", k * sizeof(uint32_t)); |
|
566
|
2
|
|
|
|
|
|
free(offs); |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
OUTPUT: |
|
569
|
|
|
|
|
|
|
RETVAL |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# ---- Arena profile from the last parse --------------------------------- |
|
572
|
|
|
|
|
|
|
# Returns a hashref describing arena usage from the most recent call to |
|
573
|
|
|
|
|
|
|
# mds_render_html_to_sv. Intended for bench/profile_arena.pl; not |
|
574
|
|
|
|
|
|
|
# thread-safe (the underlying snapshot is a single static). |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
SV* |
|
577
|
|
|
|
|
|
|
_last_arena_profile() |
|
578
|
|
|
|
|
|
|
CODE: |
|
579
|
|
|
|
|
|
|
{ |
|
580
|
1
|
|
|
|
|
|
HV* h = newHV(); |
|
581
|
1
|
|
|
|
|
|
hv_stores(h, "total_alloc", newSVuv((UV)mds_last_arena_profile.total_alloc)); |
|
582
|
1
|
|
|
|
|
|
hv_stores(h, "page_count", newSVuv((UV)mds_last_arena_profile.page_count)); |
|
583
|
1
|
|
|
|
|
|
hv_stores(h, "big_count", newSVuv((UV)mds_last_arena_profile.big_count)); |
|
584
|
1
|
|
|
|
|
|
hv_stores(h, "big_bytes", newSVuv((UV)mds_last_arena_profile.big_bytes)); |
|
585
|
1
|
|
|
|
|
|
hv_stores(h, "head_used_last", newSVuv((UV)mds_last_arena_profile.head_used_last)); |
|
586
|
1
|
|
|
|
|
|
hv_stores(h, "head_cap_last", newSVuv((UV)mds_last_arena_profile.head_cap_last)); |
|
587
|
1
|
|
|
|
|
|
hv_stores(h, "page_size", newSVuv((UV)MDS_ARENA_PAGE)); |
|
588
|
1
|
|
|
|
|
|
hv_stores(h, "big_threshold", newSVuv((UV)MDS_ARENA_BIG)); |
|
589
|
1
|
|
|
|
|
|
RETVAL = newRV_noinc((SV*)h); |
|
590
|
|
|
|
|
|
|
} |
|
591
|
|
|
|
|
|
|
OUTPUT: |
|
592
|
|
|
|
|
|
|
RETVAL |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# ---- Arena + buffer self-tests ----------------------------------------- |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
SV* |
|
597
|
|
|
|
|
|
|
_arena_test() |
|
598
|
|
|
|
|
|
|
CODE: |
|
599
|
|
|
|
|
|
|
{ |
|
600
|
|
|
|
|
|
|
/* Exercise alignment, page chaining, and the big-alloc path. */ |
|
601
|
|
|
|
|
|
|
mds_arena a; |
|
602
|
1
|
|
|
|
|
|
int aligned_ok = 1; |
|
603
|
|
|
|
|
|
|
int chained_ok; |
|
604
|
|
|
|
|
|
|
void* big; |
|
605
|
|
|
|
|
|
|
int big_ok; |
|
606
|
|
|
|
|
|
|
int reset_ok; |
|
607
|
|
|
|
|
|
|
HV* h_a; |
|
608
|
|
|
|
|
|
|
int i_a; |
|
609
|
|
|
|
|
|
|
size_t si_a; |
|
610
|
1
|
|
|
|
|
|
mds_arena_init(&a); |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
/* 1. Alignment: every returned pointer is MDS_ARENA_ALIGN-aligned. */ |
|
613
|
65
|
100
|
|
|
|
|
for (i_a = 0; i_a < 64; i_a++) { |
|
614
|
64
|
|
|
|
|
|
{ void* p = mds_arena_alloc(&a, 1 + (i_a * 7)); |
|
615
|
64
|
50
|
|
|
|
|
if (((uintptr_t)p) & (MDS_ARENA_ALIGN - 1)) { aligned_ok = 0; break; } } |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
/* 2. Page chaining: allocate enough to force a second page. */ |
|
619
|
201
|
100
|
|
|
|
|
for (si_a = 0; si_a < 200; si_a++) mds_arena_alloc(&a, 1024); |
|
620
|
1
|
50
|
|
|
|
|
chained_ok = (a.head && a.head->next != NULL); |
|
|
|
50
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
/* 3. Big-alloc: > MDS_ARENA_BIG goes to dedicated page. */ |
|
623
|
1
|
|
|
|
|
|
big = mds_arena_alloc(&a, MDS_ARENA_BIG * 2); |
|
624
|
1
|
50
|
|
|
|
|
big_ok = (big != NULL && a.big != NULL); |
|
|
|
50
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
/* 4. Reset: walks back to single warm page, no big pages. */ |
|
627
|
1
|
|
|
|
|
|
mds_arena_reset(&a); |
|
628
|
1
|
50
|
|
|
|
|
reset_ok = (a.head && a.head->next == NULL && a.big == NULL); |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
|
630
|
1
|
|
|
|
|
|
mds_arena_free(&a); |
|
631
|
|
|
|
|
|
|
|
|
632
|
1
|
|
|
|
|
|
h_a = newHV(); |
|
633
|
1
|
|
|
|
|
|
hv_stores(h_a, "aligned", newSViv(aligned_ok)); |
|
634
|
1
|
|
|
|
|
|
hv_stores(h_a, "chained", newSViv(chained_ok)); |
|
635
|
1
|
|
|
|
|
|
hv_stores(h_a, "big", newSViv(big_ok)); |
|
636
|
1
|
|
|
|
|
|
hv_stores(h_a, "reset", newSViv(reset_ok)); |
|
637
|
1
|
|
|
|
|
|
RETVAL = newRV_noinc((SV*)h_a); |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
OUTPUT: |
|
640
|
|
|
|
|
|
|
RETVAL |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
SV* |
|
643
|
|
|
|
|
|
|
_buf_test() |
|
644
|
|
|
|
|
|
|
CODE: |
|
645
|
|
|
|
|
|
|
{ |
|
646
|
|
|
|
|
|
|
/* Exercise mds_buf: grows, preserves contents, finalises SvCUR. */ |
|
647
|
|
|
|
|
|
|
SV* out; |
|
648
|
|
|
|
|
|
|
mds_buf b; |
|
649
|
|
|
|
|
|
|
int i_b; |
|
650
|
|
|
|
|
|
|
int len_ok; |
|
651
|
|
|
|
|
|
|
const char* p_b; |
|
652
|
|
|
|
|
|
|
int data_ok; |
|
653
|
|
|
|
|
|
|
HV* h_b; |
|
654
|
1
|
|
|
|
|
|
out = newSVpv("", 0); |
|
655
|
1
|
|
|
|
|
|
mds_buf_init(aTHX_ &b, out, 8); /* tiny hint to force growth */ |
|
656
|
1001
|
100
|
|
|
|
|
for (i_b = 0; i_b < 1000; i_b++) mds_buf_write(aTHX_ &b, "abcdef", 6); |
|
657
|
1
|
|
|
|
|
|
mds_buf_finalize(aTHX_ &b); |
|
658
|
|
|
|
|
|
|
|
|
659
|
1
|
|
|
|
|
|
len_ok = (SvCUR(out) == 6000); |
|
660
|
1
|
|
|
|
|
|
p_b = SvPVX(out); |
|
661
|
2
|
50
|
|
|
|
|
data_ok = (memcmp(p_b, "abcdef", 6) == 0 && |
|
662
|
1
|
50
|
|
|
|
|
memcmp(p_b + 5994, "abcdef", 6) == 0); |
|
663
|
|
|
|
|
|
|
|
|
664
|
1
|
|
|
|
|
|
h_b = newHV(); |
|
665
|
1
|
|
|
|
|
|
hv_stores(h_b, "len", newSViv(len_ok)); |
|
666
|
1
|
|
|
|
|
|
hv_stores(h_b, "data", newSViv(data_ok)); |
|
667
|
1
|
|
|
|
|
|
SvREFCNT_dec(out); |
|
668
|
1
|
|
|
|
|
|
RETVAL = newRV_noinc((SV*)h_b); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
OUTPUT: |
|
671
|
|
|
|
|
|
|
RETVAL |
|
672
|
|
|
|
|
|
|
|