line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
$Id: Encode.xs,v 2.41 2017/06/10 17:23:50 dankogai Exp $ |
3
|
|
|
|
|
|
|
*/ |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT |
6
|
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
|
#include "encode.h" |
10
|
|
|
|
|
|
|
#include "def_t.h" |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# define PERLIO_MODNAME "PerlIO::encoding" |
13
|
|
|
|
|
|
|
# define PERLIO_FILENAME "PerlIO/encoding.pm" |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
/* set 1 or more to profile. t/encoding.t dumps core because of |
16
|
|
|
|
|
|
|
Perl_warner and PerlIO don't work well */ |
17
|
|
|
|
|
|
|
#define ENCODE_XS_PROFILE 0 |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
/* set 0 to disable floating point to calculate buffer size for |
20
|
|
|
|
|
|
|
encode_method(). 1 is recommended. 2 restores NI-S original */ |
21
|
|
|
|
|
|
|
#define ENCODE_XS_USEFP 1 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#define UNIMPLEMENTED(x,y) static y x (SV *sv, char *encoding) { \ |
24
|
|
|
|
|
|
|
Perl_croak_nocontext("panic_unimplemented"); \ |
25
|
|
|
|
|
|
|
PERL_UNUSED_VAR(sv); \ |
26
|
|
|
|
|
|
|
PERL_UNUSED_VAR(encoding); \ |
27
|
|
|
|
|
|
|
return (y)0; /* fool picky compilers */ \ |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
/**/ |
30
|
|
|
|
|
|
|
|
31
|
0
|
|
|
|
|
|
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
32
|
0
|
|
|
|
|
|
UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#ifndef SvIV_nomg |
35
|
|
|
|
|
|
|
#define SvIV_nomg SvIV |
36
|
|
|
|
|
|
|
#endif |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
static void |
39
|
190
|
|
|
|
|
|
Encode_XSEncoding(pTHX_ encode_t * enc) |
40
|
|
|
|
|
|
|
{ |
41
|
190
|
|
|
|
|
|
dSP; |
42
|
190
|
|
|
|
|
|
HV *stash = gv_stashpv("Encode::XS", TRUE); |
43
|
190
|
|
|
|
|
|
SV *iv = newSViv(PTR2IV(enc)); |
44
|
190
|
|
|
|
|
|
SV *sv = sv_bless(newRV_noinc(iv),stash); |
45
|
190
|
|
|
|
|
|
int i = 0; |
46
|
|
|
|
|
|
|
/* with the SvLEN() == 0 hack, PVX won't be freed. We cast away name's |
47
|
|
|
|
|
|
|
constness, in the hope that perl won't mess with it. */ |
48
|
|
|
|
|
|
|
assert(SvTYPE(iv) >= SVt_PV); assert(SvLEN(iv) == 0); |
49
|
190
|
|
|
|
|
|
SvFLAGS(iv) |= SVp_POK; |
50
|
190
|
|
|
|
|
|
SvPVX(iv) = (char*) enc->name[0]; |
51
|
190
|
50
|
|
|
|
|
PUSHMARK(sp); |
52
|
190
|
50
|
|
|
|
|
XPUSHs(sv); |
53
|
380
|
100
|
|
|
|
|
while (enc->name[i]) { |
54
|
190
|
|
|
|
|
|
const char *name = enc->name[i++]; |
55
|
190
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
56
|
|
|
|
|
|
|
} |
57
|
190
|
|
|
|
|
|
PUTBACK; |
58
|
190
|
|
|
|
|
|
call_pv("Encode::define_encoding", G_DISCARD); |
59
|
190
|
|
|
|
|
|
SvREFCNT_dec(sv); |
60
|
190
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
static void |
63
|
0
|
|
|
|
|
|
call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) |
64
|
|
|
|
|
|
|
{ |
65
|
|
|
|
|
|
|
/* Exists for breakpointing */ |
66
|
|
|
|
|
|
|
PERL_UNUSED_VAR(routine); |
67
|
|
|
|
|
|
|
PERL_UNUSED_VAR(done); |
68
|
|
|
|
|
|
|
PERL_UNUSED_VAR(dest); |
69
|
|
|
|
|
|
|
PERL_UNUSED_VAR(orig); |
70
|
0
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
static void |
73
|
23
|
|
|
|
|
|
utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) |
74
|
|
|
|
|
|
|
{ |
75
|
23
|
100
|
|
|
|
|
if (!modify) { |
76
|
15
|
|
|
|
|
|
SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen)); |
77
|
15
|
|
|
|
|
|
SvUTF8_on(tmp); |
78
|
15
|
100
|
|
|
|
|
if (SvTAINTED(*src)) |
|
|
50
|
|
|
|
|
|
79
|
0
|
0
|
|
|
|
|
SvTAINTED_on(tmp); |
80
|
15
|
|
|
|
|
|
*src = tmp; |
81
|
15
|
|
|
|
|
|
*s = (U8 *)SvPVX(*src); |
82
|
|
|
|
|
|
|
} |
83
|
23
|
50
|
|
|
|
|
if (*slen) { |
84
|
23
|
50
|
|
|
|
|
if (!utf8_to_bytes(*s, slen)) |
85
|
0
|
|
|
|
|
|
croak("Wide character"); |
86
|
23
|
|
|
|
|
|
SvCUR_set(*src, *slen); |
87
|
|
|
|
|
|
|
} |
88
|
23
|
|
|
|
|
|
SvUTF8_off(*src); |
89
|
23
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
static void |
92
|
20
|
|
|
|
|
|
utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) |
93
|
|
|
|
|
|
|
{ |
94
|
20
|
100
|
|
|
|
|
if (!modify) { |
95
|
13
|
|
|
|
|
|
SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen)); |
96
|
13
|
100
|
|
|
|
|
if (SvTAINTED(*src)) |
|
|
50
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
SvTAINTED_on(tmp); |
98
|
13
|
|
|
|
|
|
*src = tmp; |
99
|
|
|
|
|
|
|
} |
100
|
20
|
|
|
|
|
|
sv_utf8_upgrade_nomg(*src); |
101
|
20
|
100
|
|
|
|
|
*s = (U8 *)SvPV_nomg(*src, *slen); |
102
|
20
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" |
105
|
|
|
|
|
|
|
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" |
106
|
|
|
|
|
|
|
#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode" |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
static SV * |
109
|
268
|
|
|
|
|
|
do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) |
110
|
|
|
|
|
|
|
{ |
111
|
268
|
|
|
|
|
|
dSP; |
112
|
|
|
|
|
|
|
int argc; |
113
|
|
|
|
|
|
|
SV *retval; |
114
|
268
|
|
|
|
|
|
ENTER; |
115
|
268
|
|
|
|
|
|
SAVETMPS; |
116
|
268
|
50
|
|
|
|
|
PUSHMARK(sp); |
117
|
268
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(ch))); |
118
|
268
|
|
|
|
|
|
PUTBACK; |
119
|
268
|
|
|
|
|
|
argc = call_sv(fallback_cb, G_SCALAR); |
120
|
268
|
|
|
|
|
|
SPAGAIN; |
121
|
268
|
50
|
|
|
|
|
if (argc != 1){ |
122
|
0
|
|
|
|
|
|
croak("fallback sub must return scalar!"); |
123
|
|
|
|
|
|
|
} |
124
|
268
|
|
|
|
|
|
retval = POPs; |
125
|
268
|
|
|
|
|
|
SvREFCNT_inc(retval); |
126
|
268
|
|
|
|
|
|
PUTBACK; |
127
|
268
|
50
|
|
|
|
|
FREETMPS; |
128
|
268
|
|
|
|
|
|
LEAVE; |
129
|
268
|
|
|
|
|
|
return retval; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
static SV * |
133
|
10
|
|
|
|
|
|
do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb) |
134
|
|
|
|
|
|
|
{ |
135
|
10
|
|
|
|
|
|
dSP; |
136
|
|
|
|
|
|
|
int argc; |
137
|
|
|
|
|
|
|
STRLEN i; |
138
|
|
|
|
|
|
|
SV *retval; |
139
|
10
|
|
|
|
|
|
ENTER; |
140
|
10
|
|
|
|
|
|
SAVETMPS; |
141
|
10
|
50
|
|
|
|
|
PUSHMARK(sp); |
142
|
24
|
100
|
|
|
|
|
for (i=0; i
|
143
|
14
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(s[i]))); |
144
|
10
|
|
|
|
|
|
PUTBACK; |
145
|
10
|
|
|
|
|
|
argc = call_sv(fallback_cb, G_SCALAR); |
146
|
10
|
|
|
|
|
|
SPAGAIN; |
147
|
10
|
50
|
|
|
|
|
if (argc != 1){ |
148
|
0
|
|
|
|
|
|
croak("fallback sub must return scalar!"); |
149
|
|
|
|
|
|
|
} |
150
|
10
|
|
|
|
|
|
retval = POPs; |
151
|
10
|
|
|
|
|
|
SvREFCNT_inc(retval); |
152
|
10
|
|
|
|
|
|
PUTBACK; |
153
|
10
|
50
|
|
|
|
|
FREETMPS; |
154
|
10
|
|
|
|
|
|
LEAVE; |
155
|
10
|
|
|
|
|
|
return retval; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
static SV * |
159
|
58864
|
|
|
|
|
|
encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen, |
160
|
|
|
|
|
|
|
int check, STRLEN * offset, SV * term, int * retcode, |
161
|
|
|
|
|
|
|
SV *fallback_cb) |
162
|
|
|
|
|
|
|
{ |
163
|
58864
|
|
|
|
|
|
STRLEN tlen = slen; |
164
|
58864
|
|
|
|
|
|
STRLEN ddone = 0; |
165
|
58864
|
|
|
|
|
|
STRLEN sdone = 0; |
166
|
|
|
|
|
|
|
/* We allocate slen+1. |
167
|
|
|
|
|
|
|
PerlIO dumps core if this value is smaller than this. */ |
168
|
58864
|
|
|
|
|
|
SV *dst = newSV(slen+1); |
169
|
58864
|
|
|
|
|
|
U8 *d = (U8 *)SvPVX(dst); |
170
|
58864
|
|
|
|
|
|
STRLEN dlen = SvLEN(dst)-1; |
171
|
58864
|
|
|
|
|
|
int code = 0; |
172
|
58864
|
|
|
|
|
|
STRLEN trmlen = 0; |
173
|
58864
|
50
|
|
|
|
|
U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; |
|
|
0
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
58864
|
100
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
58864
|
50
|
|
|
|
|
if (offset) { |
178
|
0
|
|
|
|
|
|
s += *offset; |
179
|
0
|
0
|
|
|
|
|
if (slen > *offset){ /* safeguard against slen overflow */ |
180
|
0
|
|
|
|
|
|
slen -= *offset; |
181
|
|
|
|
|
|
|
}else{ |
182
|
0
|
|
|
|
|
|
slen = 0; |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
|
tlen = slen; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
58864
|
50
|
|
|
|
|
if (slen == 0){ |
188
|
0
|
|
|
|
|
|
SvCUR_set(dst, 0); |
189
|
0
|
|
|
|
|
|
SvPOK_only(dst); |
190
|
0
|
|
|
|
|
|
goto ENCODE_END; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
62145
|
100
|
|
|
|
|
while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, |
194
|
|
|
|
|
|
|
trm, trmlen)) ) |
195
|
|
|
|
|
|
|
{ |
196
|
3434
|
|
|
|
|
|
SvCUR_set(dst, dlen+ddone); |
197
|
3434
|
|
|
|
|
|
SvPOK_only(dst); |
198
|
|
|
|
|
|
|
|
199
|
3434
|
50
|
|
|
|
|
if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
200
|
|
|
|
|
|
|
code == ENCODE_FOUND_TERM) { |
201
|
|
|
|
|
|
|
break; |
202
|
|
|
|
|
|
|
} |
203
|
3323
|
|
|
|
|
|
switch (code) { |
204
|
|
|
|
|
|
|
case ENCODE_NOSPACE: |
205
|
|
|
|
|
|
|
{ |
206
|
1143
|
|
|
|
|
|
STRLEN more = 0; /* make sure you initialize! */ |
207
|
|
|
|
|
|
|
STRLEN sleft; |
208
|
1143
|
|
|
|
|
|
sdone += slen; |
209
|
1143
|
|
|
|
|
|
ddone += dlen; |
210
|
1143
|
|
|
|
|
|
sleft = tlen - sdone; |
211
|
|
|
|
|
|
|
#if ENCODE_XS_PROFILE >= 2 |
212
|
|
|
|
|
|
|
Perl_warn(aTHX_ |
213
|
|
|
|
|
|
|
"more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", |
214
|
|
|
|
|
|
|
more, sdone, sleft, SvLEN(dst)); |
215
|
|
|
|
|
|
|
#endif |
216
|
1143
|
50
|
|
|
|
|
if (sdone != 0) { /* has src ever been processed ? */ |
217
|
|
|
|
|
|
|
#if ENCODE_XS_USEFP == 2 |
218
|
|
|
|
|
|
|
more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone |
219
|
|
|
|
|
|
|
- SvLEN(dst); |
220
|
|
|
|
|
|
|
#elif ENCODE_XS_USEFP |
221
|
1143
|
|
|
|
|
|
more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); |
222
|
|
|
|
|
|
|
#else |
223
|
|
|
|
|
|
|
/* safe until SvLEN(dst) == MAX_INT/16 */ |
224
|
|
|
|
|
|
|
more = (16*SvLEN(dst)+1)/sdone/16 * sleft; |
225
|
|
|
|
|
|
|
#endif |
226
|
|
|
|
|
|
|
} |
227
|
1143
|
|
|
|
|
|
more += UTF8_MAXLEN; /* insurance policy */ |
228
|
1143
|
50
|
|
|
|
|
d = (U8 *) SvGROW(dst, SvLEN(dst) + more); |
|
|
50
|
|
|
|
|
|
229
|
|
|
|
|
|
|
/* dst need to grow need MORE bytes! */ |
230
|
1143
|
50
|
|
|
|
|
if (ddone >= SvLEN(dst)) { |
231
|
0
|
|
|
|
|
|
Perl_croak(aTHX_ "Destination couldn't be grown."); |
232
|
|
|
|
|
|
|
} |
233
|
1143
|
|
|
|
|
|
dlen = SvLEN(dst)-ddone-1; |
234
|
1143
|
|
|
|
|
|
d += ddone; |
235
|
1143
|
|
|
|
|
|
s += slen; |
236
|
1143
|
|
|
|
|
|
slen = tlen-sdone; |
237
|
1143
|
|
|
|
|
|
continue; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
case ENCODE_NOREP: |
240
|
|
|
|
|
|
|
/* encoding */ |
241
|
2180
|
100
|
|
|
|
|
if (dir == enc->f_utf8) { |
242
|
|
|
|
|
|
|
STRLEN clen; |
243
|
1577
|
|
|
|
|
|
UV ch = |
244
|
1577
|
|
|
|
|
|
utf8n_to_uvuni(s+slen, (tlen-sdone-slen), |
245
|
|
|
|
|
|
|
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); |
246
|
|
|
|
|
|
|
/* if non-representable multibyte prefix at end of current buffer - break*/ |
247
|
1577
|
50
|
|
|
|
|
if (clen > tlen - sdone - slen) break; |
248
|
1577
|
100
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR) { |
249
|
1
|
|
|
|
|
|
Perl_croak(aTHX_ ERR_ENCODE_NOMAP, |
250
|
|
|
|
|
|
|
(UV)ch, enc->name[0]); |
251
|
|
|
|
|
|
|
return &PL_sv_undef; /* never reaches but be safe */ |
252
|
|
|
|
|
|
|
} |
253
|
1576
|
100
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
254
|
129
|
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
255
|
|
|
|
|
|
|
ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); |
256
|
|
|
|
|
|
|
} |
257
|
1576
|
100
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR){ |
258
|
2
|
|
|
|
|
|
goto ENCODE_SET_SRC; |
259
|
|
|
|
|
|
|
} |
260
|
1574
|
100
|
|
|
|
|
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
261
|
|
|
|
|
|
|
STRLEN sublen; |
262
|
|
|
|
|
|
|
char *substr; |
263
|
1034
|
|
|
|
|
|
SV* subchar = |
264
|
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
265
|
|
|
|
|
|
|
? do_fallback_cb(aTHX_ ch, fallback_cb) |
266
|
1034
|
100
|
|
|
|
|
: newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
267
|
260
|
|
|
|
|
|
check & ENCODE_HTMLCREF ? "%" UVuf ";" : |
268
|
|
|
|
|
|
|
"%" UVxf ";", (UV)ch); |
269
|
1034
|
100
|
|
|
|
|
substr = SvPV(subchar, sublen); |
270
|
1034
|
100
|
|
|
|
|
if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
271
|
0
|
|
|
|
|
|
SvREFCNT_dec(subchar); |
272
|
0
|
|
|
|
|
|
croak("Wide character"); |
273
|
|
|
|
|
|
|
} |
274
|
1034
|
|
|
|
|
|
sdone += slen + clen; |
275
|
1034
|
|
|
|
|
|
ddone += dlen + sublen; |
276
|
1034
|
|
|
|
|
|
sv_catpvn(dst, substr, sublen); |
277
|
1034
|
|
|
|
|
|
SvREFCNT_dec(subchar); |
278
|
|
|
|
|
|
|
} else { |
279
|
|
|
|
|
|
|
/* fallback char */ |
280
|
540
|
|
|
|
|
|
sdone += slen + clen; |
281
|
540
|
|
|
|
|
|
ddone += dlen + enc->replen; |
282
|
1574
|
|
|
|
|
|
sv_catpvn(dst, (char*)enc->rep, enc->replen); |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
/* decoding */ |
286
|
|
|
|
|
|
|
else { |
287
|
603
|
50
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR){ |
288
|
0
|
|
|
|
|
|
Perl_croak(aTHX_ ERR_DECODE_NOMAP, |
289
|
0
|
|
|
|
|
|
enc->name[0], (UV)s[slen]); |
290
|
|
|
|
|
|
|
return &PL_sv_undef; /* never reaches but be safe */ |
291
|
|
|
|
|
|
|
} |
292
|
603
|
50
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
293
|
0
|
|
|
|
|
|
Perl_warner( |
294
|
|
|
|
|
|
|
aTHX_ packWARN(WARN_UTF8), |
295
|
|
|
|
|
|
|
ERR_DECODE_NOMAP, |
296
|
0
|
|
|
|
|
|
enc->name[0], (UV)s[slen]); |
297
|
|
|
|
|
|
|
} |
298
|
603
|
100
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR){ |
299
|
39
|
|
|
|
|
|
goto ENCODE_SET_SRC; |
300
|
|
|
|
|
|
|
} |
301
|
564
|
100
|
|
|
|
|
if (check & |
302
|
|
|
|
|
|
|
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
303
|
|
|
|
|
|
|
STRLEN sublen; |
304
|
|
|
|
|
|
|
char *substr; |
305
|
514
|
|
|
|
|
|
SV* subchar = |
306
|
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
307
|
130
|
|
|
|
|
|
? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) |
308
|
514
|
100
|
|
|
|
|
: newSVpvf("\\x%02" UVXf, (UV)s[slen]); |
309
|
514
|
50
|
|
|
|
|
substr = SvPVutf8(subchar, sublen); |
310
|
514
|
|
|
|
|
|
sdone += slen + 1; |
311
|
514
|
|
|
|
|
|
ddone += dlen + sublen; |
312
|
514
|
|
|
|
|
|
sv_catpvn(dst, substr, sublen); |
313
|
514
|
|
|
|
|
|
SvREFCNT_dec(subchar); |
314
|
|
|
|
|
|
|
} else { |
315
|
50
|
|
|
|
|
|
sdone += slen + 1; |
316
|
50
|
|
|
|
|
|
ddone += dlen + strlen(FBCHAR_UTF8); |
317
|
50
|
|
|
|
|
|
sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8)); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
/* settle variables when fallback */ |
321
|
2138
|
|
|
|
|
|
d = (U8 *)SvEND(dst); |
322
|
2138
|
|
|
|
|
|
dlen = SvLEN(dst) - ddone - 1; |
323
|
2138
|
|
|
|
|
|
s = (U8*)SvPVX(src) + sdone; |
324
|
2138
|
|
|
|
|
|
slen = tlen - sdone; |
325
|
2138
|
|
|
|
|
|
break; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
default: |
328
|
0
|
0
|
|
|
|
|
Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
329
|
0
|
|
|
|
|
|
code, (dir == enc->f_utf8) ? "to" : "from", |
330
|
|
|
|
|
|
|
enc->name[0]); |
331
|
|
|
|
|
|
|
return &PL_sv_undef; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
ENCODE_SET_SRC: |
335
|
58863
|
100
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)){ |
|
|
100
|
|
|
|
|
|
336
|
18178
|
|
|
|
|
|
sdone = SvCUR(src) - (slen+sdone); |
337
|
18178
|
100
|
|
|
|
|
if (sdone) { |
338
|
144
|
|
|
|
|
|
sv_setpvn(src, (char*)s+slen, sdone); |
339
|
|
|
|
|
|
|
} |
340
|
18178
|
|
|
|
|
|
SvCUR_set(src, sdone); |
341
|
18178
|
100
|
|
|
|
|
SvSETMAGIC(src); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
/* warn("check = 0x%X, code = 0x%d\n", check, code); */ |
344
|
|
|
|
|
|
|
|
345
|
58863
|
|
|
|
|
|
SvCUR_set(dst, dlen+ddone); |
346
|
58863
|
|
|
|
|
|
SvPOK_only(dst); |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
#if ENCODE_XS_PROFILE |
349
|
|
|
|
|
|
|
if (SvCUR(dst) > SvCUR(src)){ |
350
|
|
|
|
|
|
|
Perl_warn(aTHX_ |
351
|
|
|
|
|
|
|
"SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", |
352
|
|
|
|
|
|
|
SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), |
353
|
|
|
|
|
|
|
(SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
#endif |
356
|
|
|
|
|
|
|
|
357
|
58863
|
50
|
|
|
|
|
if (offset) |
358
|
0
|
|
|
|
|
|
*offset += sdone + slen; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
ENCODE_END: |
361
|
58863
|
|
|
|
|
|
*SvEND(dst) = '\0'; |
362
|
58863
|
50
|
|
|
|
|
if (retcode) *retcode = code; |
363
|
58863
|
|
|
|
|
|
return dst; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
static bool |
367
|
31133
|
|
|
|
|
|
strict_utf8(pTHX_ SV* sv) |
368
|
|
|
|
|
|
|
{ |
369
|
|
|
|
|
|
|
HV* hv; |
370
|
|
|
|
|
|
|
SV** svp; |
371
|
31133
|
|
|
|
|
|
sv = SvRV(sv); |
372
|
31133
|
50
|
|
|
|
|
if (!sv || SvTYPE(sv) != SVt_PVHV) |
|
|
50
|
|
|
|
|
|
373
|
0
|
|
|
|
|
|
return 0; |
374
|
31133
|
|
|
|
|
|
hv = (HV*)sv; |
375
|
31133
|
|
|
|
|
|
svp = hv_fetch(hv, "strict_utf8", 11, 0); |
376
|
31133
|
100
|
|
|
|
|
if (!svp) |
377
|
76
|
|
|
|
|
|
return 0; |
378
|
31057
|
50
|
|
|
|
|
return SvTRUE(*svp); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
/* |
382
|
|
|
|
|
|
|
* https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126 |
383
|
|
|
|
|
|
|
*/ |
384
|
|
|
|
|
|
|
#ifndef UNICODE_IS_NONCHAR |
385
|
|
|
|
|
|
|
#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE) |
386
|
|
|
|
|
|
|
#endif |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
#ifndef UNICODE_IS_SUPER |
389
|
|
|
|
|
|
|
#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX) |
390
|
|
|
|
|
|
|
#endif |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c)) |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
#ifndef UTF_ACCUMULATION_OVERFLOW_MASK |
395
|
|
|
|
|
|
|
#ifndef CHARBITS |
396
|
|
|
|
|
|
|
#define CHARBITS CHAR_BIT |
397
|
|
|
|
|
|
|
#endif |
398
|
|
|
|
|
|
|
#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT)) |
399
|
|
|
|
|
|
|
#endif |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
/* |
402
|
|
|
|
|
|
|
* Convert non strict utf8 sequence of len >= 2 to unicode codepoint |
403
|
|
|
|
|
|
|
*/ |
404
|
|
|
|
|
|
|
static UV |
405
|
46374
|
|
|
|
|
|
convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) |
406
|
|
|
|
|
|
|
{ |
407
|
|
|
|
|
|
|
UV uv; |
408
|
46374
|
|
|
|
|
|
U8 *ptr = s; |
409
|
46374
|
|
|
|
|
|
bool overflowed = 0; |
410
|
|
|
|
|
|
|
|
411
|
46374
|
100
|
|
|
|
|
uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s)); |
412
|
|
|
|
|
|
|
|
413
|
46374
|
|
|
|
|
|
len--; |
414
|
46374
|
|
|
|
|
|
s++; |
415
|
|
|
|
|
|
|
|
416
|
138608
|
100
|
|
|
|
|
while (len--) { |
417
|
92396
|
100
|
|
|
|
|
if (!UTF8_IS_CONTINUATION(*s)) { |
418
|
162
|
|
|
|
|
|
*rlen = s-ptr; |
419
|
162
|
|
|
|
|
|
return 0; |
420
|
|
|
|
|
|
|
} |
421
|
92234
|
100
|
|
|
|
|
if (uv & UTF_ACCUMULATION_OVERFLOW_MASK) |
422
|
1
|
|
|
|
|
|
overflowed = 1; |
423
|
92234
|
|
|
|
|
|
uv = UTF8_ACCUMULATE(uv, *s); |
424
|
92234
|
|
|
|
|
|
s++; |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
46212
|
|
|
|
|
|
*rlen = s-ptr; |
428
|
|
|
|
|
|
|
|
429
|
46212
|
100
|
|
|
|
|
if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
430
|
22
|
|
|
|
|
|
return 0; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
46190
|
|
|
|
|
|
return uv; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
static U8* |
437
|
31129
|
|
|
|
|
|
process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, |
438
|
|
|
|
|
|
|
bool encode, bool strict, bool stop_at_partial) |
439
|
|
|
|
|
|
|
{ |
440
|
|
|
|
|
|
|
UV uv; |
441
|
|
|
|
|
|
|
STRLEN ulen; |
442
|
|
|
|
|
|
|
SV *fallback_cb; |
443
|
|
|
|
|
|
|
int check; |
444
|
|
|
|
|
|
|
U8 *d; |
445
|
|
|
|
|
|
|
STRLEN dlen; |
446
|
|
|
|
|
|
|
char esc[UTF8_MAXLEN * 6 + 1]; |
447
|
|
|
|
|
|
|
STRLEN i; |
448
|
|
|
|
|
|
|
|
449
|
31129
|
100
|
|
|
|
|
if (SvROK(check_sv)) { |
450
|
|
|
|
|
|
|
/* croak("UTF-8 decoder doesn't support callback CHECK"); */ |
451
|
14
|
|
|
|
|
|
fallback_cb = check_sv; |
452
|
14
|
|
|
|
|
|
check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
else { |
455
|
31115
|
|
|
|
|
|
fallback_cb = &PL_sv_undef; |
456
|
31115
|
50
|
|
|
|
|
check = SvIV_nomg(check_sv); |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
31129
|
|
|
|
|
|
SvPOK_only(dst); |
460
|
31129
|
|
|
|
|
|
SvCUR_set(dst,0); |
461
|
|
|
|
|
|
|
|
462
|
31129
|
50
|
|
|
|
|
dlen = (s && e && s < e) ? e-s+1 : 1; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
463
|
31129
|
50
|
|
|
|
|
d = (U8 *) SvGROW(dst, dlen); |
|
|
50
|
|
|
|
|
|
464
|
|
|
|
|
|
|
|
465
|
143969
|
100
|
|
|
|
|
while (s < e) { |
466
|
112929
|
100
|
|
|
|
|
if (UTF8_IS_INVARIANT(*s)) { |
467
|
66382
|
|
|
|
|
|
*d++ = *s++; |
468
|
66382
|
|
|
|
|
|
continue; |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
46547
|
|
|
|
|
|
uv = 0; |
472
|
46547
|
|
|
|
|
|
ulen = 1; |
473
|
46547
|
100
|
|
|
|
|
if (! UTF8_IS_CONTINUATION(*s)) { |
474
|
|
|
|
|
|
|
/* Not an invariant nor a continuation; must be a start byte. (We |
475
|
|
|
|
|
|
|
* can't test for UTF8_IS_START as that excludes things like \xC0 |
476
|
|
|
|
|
|
|
* which are start bytes, but always lead to overlongs */ |
477
|
|
|
|
|
|
|
|
478
|
46393
|
|
|
|
|
|
U8 skip = UTF8SKIP(s); |
479
|
46393
|
100
|
|
|
|
|
if ((s + skip) > e) { |
480
|
|
|
|
|
|
|
/* just calculate ulen, in pathological cases can be smaller then e-s */ |
481
|
56
|
100
|
|
|
|
|
if (e-s >= 2) |
482
|
37
|
|
|
|
|
|
convert_utf8_multi_seq(s, e-s, &ulen); |
483
|
|
|
|
|
|
|
else |
484
|
19
|
|
|
|
|
|
ulen = 1; |
485
|
|
|
|
|
|
|
|
486
|
56
|
100
|
|
|
|
|
if ((stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) && ulen == (STRLEN)(e-s)) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
487
|
4
|
|
|
|
|
|
break; |
488
|
|
|
|
|
|
|
|
489
|
52
|
|
|
|
|
|
goto malformed_byte; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
46337
|
|
|
|
|
|
uv = convert_utf8_multi_seq(s, skip, &ulen); |
493
|
46337
|
100
|
|
|
|
|
if (uv == 0) |
494
|
155
|
|
|
|
|
|
goto malformed_byte; |
495
|
46182
|
100
|
|
|
|
|
else if (strict && !UNICODE_IS_STRICT(uv)) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
496
|
|
|
|
|
|
|
goto malformed; |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
/* Whole char is good */ |
500
|
46152
|
|
|
|
|
|
memcpy(d, s, skip); |
501
|
46152
|
|
|
|
|
|
d += skip; |
502
|
46152
|
|
|
|
|
|
s += skip; |
503
|
46152
|
|
|
|
|
|
continue; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
/* If we get here there is something wrong with alleged UTF-8 */ |
507
|
|
|
|
|
|
|
/* uv is used only when encoding */ |
508
|
|
|
|
|
|
|
malformed_byte: |
509
|
361
|
50
|
|
|
|
|
if (uv == 0) |
510
|
361
|
|
|
|
|
|
uv = (UV)*s; |
511
|
361
|
50
|
|
|
|
|
if (encode || ulen == 0) |
|
|
50
|
|
|
|
|
|
512
|
0
|
|
|
|
|
|
ulen = 1; |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
malformed: |
515
|
391
|
100
|
|
|
|
|
if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ))) |
|
|
100
|
|
|
|
|
|
516
|
458
|
100
|
|
|
|
|
for (i=0; i
|
517
|
391
|
100
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR){ |
518
|
65
|
100
|
|
|
|
|
if (encode) |
519
|
14
|
50
|
|
|
|
|
Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); |
520
|
|
|
|
|
|
|
else |
521
|
51
|
100
|
|
|
|
|
Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); |
522
|
|
|
|
|
|
|
} |
523
|
326
|
100
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
524
|
129
|
50
|
|
|
|
|
if (encode) |
525
|
0
|
0
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
526
|
|
|
|
|
|
|
ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); |
527
|
|
|
|
|
|
|
else |
528
|
129
|
50
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
529
|
|
|
|
|
|
|
ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); |
530
|
|
|
|
|
|
|
} |
531
|
326
|
100
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR) { |
532
|
20
|
|
|
|
|
|
break; |
533
|
|
|
|
|
|
|
} |
534
|
306
|
100
|
|
|
|
|
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
535
|
|
|
|
|
|
|
STRLEN sublen; |
536
|
|
|
|
|
|
|
char *substr; |
537
|
|
|
|
|
|
|
SV* subchar; |
538
|
14
|
100
|
|
|
|
|
if (encode) { |
539
|
3
|
|
|
|
|
|
subchar = |
540
|
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
541
|
|
|
|
|
|
|
? do_fallback_cb(aTHX_ uv, fallback_cb) |
542
|
4
|
100
|
|
|
|
|
: newSVpvf(check & ENCODE_PERLQQ |
|
|
50
|
|
|
|
|
|
543
|
1
|
|
|
|
|
|
? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") |
544
|
1
|
50
|
|
|
|
|
: check & ENCODE_HTMLCREF ? "%" UVuf ";" |
|
|
0
|
|
|
|
|
|
545
|
|
|
|
|
|
|
: "%" UVxf ";", uv); |
546
|
3
|
50
|
|
|
|
|
substr = SvPV(subchar, sublen); |
547
|
3
|
50
|
|
|
|
|
if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
548
|
0
|
|
|
|
|
|
SvREFCNT_dec(subchar); |
549
|
0
|
|
|
|
|
|
croak("Wide character"); |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
} else { |
552
|
11
|
100
|
|
|
|
|
if (fallback_cb != &PL_sv_undef) { |
553
|
|
|
|
|
|
|
/* in decode mode we have sequence of wrong bytes */ |
554
|
10
|
|
|
|
|
|
subchar = do_bytes_fallback_cb(aTHX_ s, ulen, fallback_cb); |
555
|
|
|
|
|
|
|
} else { |
556
|
1
|
|
|
|
|
|
char *ptr = esc; |
557
|
|
|
|
|
|
|
/* ENCODE_PERLQQ is already stored in esc */ |
558
|
1
|
50
|
|
|
|
|
if (check & (ENCODE_HTMLCREF|ENCODE_XMLCREF)) |
559
|
0
|
0
|
|
|
|
|
for (i=0; i
|
|
|
0
|
|
|
|
|
|
560
|
1
|
|
|
|
|
|
subchar = newSVpvn(esc, strlen(esc)); |
561
|
|
|
|
|
|
|
} |
562
|
11
|
50
|
|
|
|
|
substr = SvPVutf8(subchar, sublen); |
563
|
|
|
|
|
|
|
} |
564
|
14
|
|
|
|
|
|
dlen += sublen - ulen; |
565
|
14
|
|
|
|
|
|
SvCUR_set(dst, d-(U8 *)SvPVX(dst)); |
566
|
14
|
|
|
|
|
|
*SvEND(dst) = '\0'; |
567
|
14
|
|
|
|
|
|
sv_catpvn(dst, substr, sublen); |
568
|
14
|
|
|
|
|
|
SvREFCNT_dec(subchar); |
569
|
14
|
50
|
|
|
|
|
d = (U8 *) SvGROW(dst, dlen) + SvCUR(dst); |
|
|
50
|
|
|
|
|
|
570
|
|
|
|
|
|
|
} else { |
571
|
292
|
|
|
|
|
|
STRLEN fbcharlen = strlen(FBCHAR_UTF8); |
572
|
292
|
|
|
|
|
|
dlen += fbcharlen - ulen; |
573
|
292
|
100
|
|
|
|
|
if (SvLEN(dst) < dlen) { |
574
|
42
|
|
|
|
|
|
SvCUR_set(dst, d-(U8 *)SvPVX(dst)); |
575
|
42
|
|
|
|
|
|
d = (U8 *) sv_grow(dst, dlen) + SvCUR(dst); |
576
|
|
|
|
|
|
|
} |
577
|
292
|
|
|
|
|
|
memcpy(d, FBCHAR_UTF8, fbcharlen); |
578
|
292
|
|
|
|
|
|
d += fbcharlen; |
579
|
|
|
|
|
|
|
} |
580
|
306
|
|
|
|
|
|
s += ulen; |
581
|
|
|
|
|
|
|
} |
582
|
31064
|
|
|
|
|
|
SvCUR_set(dst, d-(U8 *)SvPVX(dst)); |
583
|
31064
|
|
|
|
|
|
*SvEND(dst) = '\0'; |
584
|
|
|
|
|
|
|
|
585
|
31064
|
|
|
|
|
|
return s; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
void |
594
|
|
|
|
|
|
|
Method_decode(obj,src,check_sv = &PL_sv_no) |
595
|
|
|
|
|
|
|
SV * obj |
596
|
|
|
|
|
|
|
SV * src |
597
|
|
|
|
|
|
|
SV * check_sv |
598
|
|
|
|
|
|
|
PREINIT: |
599
|
|
|
|
|
|
|
STRLEN slen; |
600
|
|
|
|
|
|
|
U8 *s; |
601
|
|
|
|
|
|
|
U8 *e; |
602
|
|
|
|
|
|
|
SV *dst; |
603
|
30547
|
|
|
|
|
|
bool renewed = 0; |
604
|
|
|
|
|
|
|
int check; |
605
|
|
|
|
|
|
|
bool modify; |
606
|
30547
|
|
|
|
|
|
dSP; |
607
|
|
|
|
|
|
|
INIT: |
608
|
30547
|
100
|
|
|
|
|
SvGETMAGIC(src); |
|
|
50
|
|
|
|
|
|
609
|
30547
|
50
|
|
|
|
|
SvGETMAGIC(check_sv); |
|
|
0
|
|
|
|
|
|
610
|
30547
|
100
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); |
|
|
50
|
|
|
|
|
|
611
|
30547
|
100
|
|
|
|
|
modify = (check && !(check & ENCODE_LEAVE_SRC)); |
|
|
100
|
|
|
|
|
|
612
|
|
|
|
|
|
|
PPCODE: |
613
|
30547
|
100
|
|
|
|
|
if (!SvOK(src)) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
614
|
4
|
|
|
|
|
|
XSRETURN_UNDEF; |
615
|
30543
|
100
|
|
|
|
|
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
616
|
30543
|
100
|
|
|
|
|
if (SvUTF8(src)) |
617
|
10
|
|
|
|
|
|
utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); |
618
|
30543
|
|
|
|
|
|
e = s+slen; |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
/* |
621
|
|
|
|
|
|
|
* PerlIO check -- we assume the object is of PerlIO if renewed |
622
|
|
|
|
|
|
|
*/ |
623
|
30543
|
|
|
|
|
|
ENTER; SAVETMPS; |
624
|
30543
|
50
|
|
|
|
|
PUSHMARK(sp); |
625
|
30543
|
50
|
|
|
|
|
XPUSHs(obj); |
626
|
30543
|
|
|
|
|
|
PUTBACK; |
627
|
30543
|
50
|
|
|
|
|
if (call_method("renewed",G_SCALAR) == 1) { |
628
|
30543
|
|
|
|
|
|
SPAGAIN; |
629
|
30543
|
50
|
|
|
|
|
renewed = (bool)POPi; |
630
|
30543
|
|
|
|
|
|
PUTBACK; |
631
|
|
|
|
|
|
|
#if 0 |
632
|
|
|
|
|
|
|
fprintf(stderr, "renewed == %d\n", renewed); |
633
|
|
|
|
|
|
|
#endif |
634
|
|
|
|
|
|
|
} |
635
|
30543
|
50
|
|
|
|
|
FREETMPS; LEAVE; |
636
|
|
|
|
|
|
|
/* end PerlIO check */ |
637
|
|
|
|
|
|
|
|
638
|
30543
|
100
|
|
|
|
|
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ |
639
|
30543
|
|
|
|
|
|
s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
/* Clear out translated part of source unless asked not to */ |
642
|
30492
|
100
|
|
|
|
|
if (modify) { |
643
|
45
|
|
|
|
|
|
slen = e-s; |
644
|
45
|
100
|
|
|
|
|
if (slen) { |
645
|
22
|
|
|
|
|
|
sv_setpvn(src, (char*)s, slen); |
646
|
|
|
|
|
|
|
} |
647
|
45
|
|
|
|
|
|
SvCUR_set(src, slen); |
648
|
45
|
100
|
|
|
|
|
SvSETMAGIC(src); |
649
|
|
|
|
|
|
|
} |
650
|
30492
|
|
|
|
|
|
SvUTF8_on(dst); |
651
|
30492
|
100
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
652
|
30492
|
|
|
|
|
|
ST(0) = dst; |
653
|
30496
|
|
|
|
|
|
XSRETURN(1); |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
void |
656
|
|
|
|
|
|
|
Method_encode(obj,src,check_sv = &PL_sv_no) |
657
|
|
|
|
|
|
|
SV * obj |
658
|
|
|
|
|
|
|
SV * src |
659
|
|
|
|
|
|
|
SV * check_sv |
660
|
|
|
|
|
|
|
PREINIT: |
661
|
|
|
|
|
|
|
STRLEN slen; |
662
|
|
|
|
|
|
|
U8 *s; |
663
|
|
|
|
|
|
|
U8 *e; |
664
|
|
|
|
|
|
|
SV *dst; |
665
|
|
|
|
|
|
|
int check; |
666
|
|
|
|
|
|
|
bool modify; |
667
|
|
|
|
|
|
|
INIT: |
668
|
1334
|
100
|
|
|
|
|
SvGETMAGIC(src); |
|
|
50
|
|
|
|
|
|
669
|
1334
|
50
|
|
|
|
|
SvGETMAGIC(check_sv); |
|
|
0
|
|
|
|
|
|
670
|
1334
|
100
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); |
|
|
50
|
|
|
|
|
|
671
|
1334
|
100
|
|
|
|
|
modify = (check && !(check & ENCODE_LEAVE_SRC)); |
|
|
100
|
|
|
|
|
|
672
|
|
|
|
|
|
|
PPCODE: |
673
|
1334
|
100
|
|
|
|
|
if (!SvOK(src)) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
674
|
5
|
|
|
|
|
|
XSRETURN_UNDEF; |
675
|
1329
|
100
|
|
|
|
|
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
676
|
1329
|
|
|
|
|
|
e = s+slen; |
677
|
1329
|
100
|
|
|
|
|
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ |
678
|
1329
|
100
|
|
|
|
|
if (SvUTF8(src)) { |
679
|
|
|
|
|
|
|
/* Already encoded */ |
680
|
590
|
100
|
|
|
|
|
if (strict_utf8(aTHX_ obj)) { |
681
|
586
|
|
|
|
|
|
s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
else { |
684
|
|
|
|
|
|
|
/* trust it and just copy the octets */ |
685
|
4
|
|
|
|
|
|
sv_setpvn(dst,(char *)s,(e-s)); |
686
|
576
|
|
|
|
|
|
s = e; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
else { |
690
|
|
|
|
|
|
|
/* Native bytes - can always encode */ |
691
|
739
|
50
|
|
|
|
|
U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ |
|
|
100
|
|
|
|
|
|
692
|
1561
|
100
|
|
|
|
|
while (s < e) { |
693
|
|
|
|
|
|
|
#ifdef append_utf8_from_native_byte |
694
|
|
|
|
|
|
|
append_utf8_from_native_byte(*s, &d); |
695
|
|
|
|
|
|
|
s++; |
696
|
|
|
|
|
|
|
#else |
697
|
822
|
|
|
|
|
|
UV uv = NATIVE_TO_UNI((UV) *s); |
698
|
822
|
|
|
|
|
|
s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ |
699
|
822
|
100
|
|
|
|
|
if (UNI_IS_INVARIANT(uv)) |
700
|
813
|
|
|
|
|
|
*d++ = (U8)UTF_TO_NATIVE(uv); |
701
|
|
|
|
|
|
|
else { |
702
|
9
|
|
|
|
|
|
*d++ = (U8)UTF8_EIGHT_BIT_HI(uv); |
703
|
9
|
|
|
|
|
|
*d++ = (U8)UTF8_EIGHT_BIT_LO(uv); |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
#endif |
706
|
|
|
|
|
|
|
} |
707
|
739
|
|
|
|
|
|
SvCUR_set(dst, d- (U8 *)SvPVX(dst)); |
708
|
739
|
|
|
|
|
|
*SvEND(dst) = '\0'; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
/* Clear out translated part of source unless asked not to */ |
712
|
1315
|
100
|
|
|
|
|
if (modify) { |
713
|
11
|
|
|
|
|
|
slen = e-s; |
714
|
11
|
50
|
|
|
|
|
if (slen) { |
715
|
0
|
|
|
|
|
|
sv_setpvn(src, (char*)s, slen); |
716
|
|
|
|
|
|
|
} |
717
|
11
|
|
|
|
|
|
SvCUR_set(src, slen); |
718
|
11
|
100
|
|
|
|
|
SvSETMAGIC(src); |
719
|
|
|
|
|
|
|
} |
720
|
1315
|
|
|
|
|
|
SvPOK_only(dst); |
721
|
1315
|
|
|
|
|
|
SvUTF8_off(dst); |
722
|
1315
|
100
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
723
|
1315
|
|
|
|
|
|
ST(0) = dst; |
724
|
1320
|
|
|
|
|
|
XSRETURN(1); |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
SV * |
731
|
|
|
|
|
|
|
Method_renew(obj) |
732
|
|
|
|
|
|
|
SV * obj |
733
|
|
|
|
|
|
|
CODE: |
734
|
|
|
|
|
|
|
PERL_UNUSED_VAR(obj); |
735
|
22
|
|
|
|
|
|
RETVAL = newSVsv(obj); |
736
|
|
|
|
|
|
|
OUTPUT: |
737
|
|
|
|
|
|
|
RETVAL |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
int |
740
|
|
|
|
|
|
|
Method_renewed(obj) |
741
|
|
|
|
|
|
|
SV * obj |
742
|
|
|
|
|
|
|
CODE: |
743
|
0
|
|
|
|
|
|
RETVAL = 0; |
744
|
|
|
|
|
|
|
PERL_UNUSED_VAR(obj); |
745
|
|
|
|
|
|
|
OUTPUT: |
746
|
|
|
|
|
|
|
RETVAL |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
SV * |
749
|
|
|
|
|
|
|
Method_name(obj) |
750
|
|
|
|
|
|
|
SV * obj |
751
|
|
|
|
|
|
|
PREINIT: |
752
|
|
|
|
|
|
|
encode_t *enc; |
753
|
|
|
|
|
|
|
INIT: |
754
|
807
|
50
|
|
|
|
|
enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
755
|
|
|
|
|
|
|
CODE: |
756
|
807
|
|
|
|
|
|
RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0])); |
757
|
|
|
|
|
|
|
OUTPUT: |
758
|
|
|
|
|
|
|
RETVAL |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
bool |
761
|
|
|
|
|
|
|
Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) |
762
|
|
|
|
|
|
|
SV * obj |
763
|
|
|
|
|
|
|
SV * dst |
764
|
|
|
|
|
|
|
SV * src |
765
|
|
|
|
|
|
|
SV * off |
766
|
|
|
|
|
|
|
SV * term |
767
|
|
|
|
|
|
|
SV * check_sv |
768
|
|
|
|
|
|
|
PREINIT: |
769
|
|
|
|
|
|
|
int check; |
770
|
|
|
|
|
|
|
SV *fallback_cb; |
771
|
|
|
|
|
|
|
bool modify; |
772
|
|
|
|
|
|
|
encode_t *enc; |
773
|
|
|
|
|
|
|
STRLEN offset; |
774
|
0
|
|
|
|
|
|
int code = 0; |
775
|
|
|
|
|
|
|
U8 *s; |
776
|
|
|
|
|
|
|
STRLEN slen; |
777
|
|
|
|
|
|
|
SV *tmp; |
778
|
|
|
|
|
|
|
INIT: |
779
|
0
|
0
|
|
|
|
|
SvGETMAGIC(src); |
|
|
0
|
|
|
|
|
|
780
|
0
|
0
|
|
|
|
|
SvGETMAGIC(check_sv); |
|
|
0
|
|
|
|
|
|
781
|
0
|
0
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); |
|
|
0
|
|
|
|
|
|
782
|
0
|
0
|
|
|
|
|
fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; |
783
|
0
|
0
|
|
|
|
|
modify = (check && !(check & ENCODE_LEAVE_SRC)); |
|
|
0
|
|
|
|
|
|
784
|
0
|
0
|
|
|
|
|
enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
785
|
0
|
0
|
|
|
|
|
offset = (STRLEN)SvIV(off); |
786
|
|
|
|
|
|
|
CODE: |
787
|
0
|
0
|
|
|
|
|
if (!SvOK(src)) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
788
|
0
|
|
|
|
|
|
XSRETURN_NO; |
789
|
0
|
0
|
|
|
|
|
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
790
|
0
|
0
|
|
|
|
|
if (SvUTF8(src)) |
791
|
0
|
|
|
|
|
|
utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); |
792
|
0
|
|
|
|
|
|
tmp = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, |
793
|
|
|
|
|
|
|
&offset, term, &code, fallback_cb); |
794
|
0
|
|
|
|
|
|
sv_catsv(dst, tmp); |
795
|
0
|
|
|
|
|
|
SvREFCNT_dec(tmp); |
796
|
0
|
|
|
|
|
|
SvIV_set(off, (IV)offset); |
797
|
0
|
|
|
|
|
|
RETVAL = (code == ENCODE_FOUND_TERM); |
798
|
|
|
|
|
|
|
OUTPUT: |
799
|
|
|
|
|
|
|
RETVAL |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
SV * |
802
|
|
|
|
|
|
|
Method_decode(obj,src,check_sv = &PL_sv_no) |
803
|
|
|
|
|
|
|
SV * obj |
804
|
|
|
|
|
|
|
SV * src |
805
|
|
|
|
|
|
|
SV * check_sv |
806
|
|
|
|
|
|
|
PREINIT: |
807
|
|
|
|
|
|
|
int check; |
808
|
|
|
|
|
|
|
SV *fallback_cb; |
809
|
|
|
|
|
|
|
bool modify; |
810
|
|
|
|
|
|
|
encode_t *enc; |
811
|
|
|
|
|
|
|
U8 *s; |
812
|
|
|
|
|
|
|
STRLEN slen; |
813
|
|
|
|
|
|
|
INIT: |
814
|
23799
|
100
|
|
|
|
|
SvGETMAGIC(src); |
|
|
50
|
|
|
|
|
|
815
|
23799
|
50
|
|
|
|
|
SvGETMAGIC(check_sv); |
|
|
0
|
|
|
|
|
|
816
|
23799
|
100
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); |
|
|
100
|
|
|
|
|
|
817
|
23799
|
100
|
|
|
|
|
fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; |
818
|
23799
|
100
|
|
|
|
|
modify = (check && !(check & ENCODE_LEAVE_SRC)); |
|
|
100
|
|
|
|
|
|
819
|
23799
|
50
|
|
|
|
|
enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
820
|
|
|
|
|
|
|
CODE: |
821
|
23799
|
100
|
|
|
|
|
if (!SvOK(src)) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
822
|
107
|
|
|
|
|
|
XSRETURN_UNDEF; |
823
|
23692
|
100
|
|
|
|
|
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
824
|
23692
|
100
|
|
|
|
|
if (SvUTF8(src)) |
825
|
13
|
|
|
|
|
|
utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); |
826
|
23692
|
|
|
|
|
|
RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, |
827
|
|
|
|
|
|
|
NULL, Nullsv, NULL, fallback_cb); |
828
|
23692
|
|
|
|
|
|
SvUTF8_on(RETVAL); |
829
|
|
|
|
|
|
|
OUTPUT: |
830
|
|
|
|
|
|
|
RETVAL |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
SV * |
833
|
|
|
|
|
|
|
Method_encode(obj,src,check_sv = &PL_sv_no) |
834
|
|
|
|
|
|
|
SV * obj |
835
|
|
|
|
|
|
|
SV * src |
836
|
|
|
|
|
|
|
SV * check_sv |
837
|
|
|
|
|
|
|
PREINIT: |
838
|
|
|
|
|
|
|
int check; |
839
|
|
|
|
|
|
|
SV *fallback_cb; |
840
|
|
|
|
|
|
|
bool modify; |
841
|
|
|
|
|
|
|
encode_t *enc; |
842
|
|
|
|
|
|
|
U8 *s; |
843
|
|
|
|
|
|
|
STRLEN slen; |
844
|
|
|
|
|
|
|
INIT: |
845
|
35279
|
100
|
|
|
|
|
SvGETMAGIC(src); |
|
|
50
|
|
|
|
|
|
846
|
35279
|
50
|
|
|
|
|
SvGETMAGIC(check_sv); |
|
|
0
|
|
|
|
|
|
847
|
35279
|
100
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); |
|
|
50
|
|
|
|
|
|
848
|
35279
|
100
|
|
|
|
|
fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; |
849
|
35279
|
100
|
|
|
|
|
modify = (check && !(check & ENCODE_LEAVE_SRC)); |
|
|
100
|
|
|
|
|
|
850
|
35279
|
50
|
|
|
|
|
enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
851
|
|
|
|
|
|
|
CODE: |
852
|
35279
|
100
|
|
|
|
|
if (!SvOK(src)) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
853
|
107
|
|
|
|
|
|
XSRETURN_UNDEF; |
854
|
35172
|
100
|
|
|
|
|
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
855
|
35172
|
100
|
|
|
|
|
if (!SvUTF8(src)) |
856
|
20
|
|
|
|
|
|
utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); |
857
|
35172
|
|
|
|
|
|
RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, |
858
|
|
|
|
|
|
|
NULL, Nullsv, NULL, fallback_cb); |
859
|
|
|
|
|
|
|
OUTPUT: |
860
|
|
|
|
|
|
|
RETVAL |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
bool |
863
|
|
|
|
|
|
|
Method_needs_lines(obj) |
864
|
|
|
|
|
|
|
SV * obj |
865
|
|
|
|
|
|
|
CODE: |
866
|
|
|
|
|
|
|
PERL_UNUSED_VAR(obj); |
867
|
22
|
|
|
|
|
|
RETVAL = FALSE; |
868
|
|
|
|
|
|
|
OUTPUT: |
869
|
|
|
|
|
|
|
RETVAL |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
bool |
872
|
|
|
|
|
|
|
Method_perlio_ok(obj) |
873
|
|
|
|
|
|
|
SV * obj |
874
|
|
|
|
|
|
|
PREINIT: |
875
|
|
|
|
|
|
|
SV *sv; |
876
|
|
|
|
|
|
|
CODE: |
877
|
|
|
|
|
|
|
PERL_UNUSED_VAR(obj); |
878
|
8
|
|
|
|
|
|
sv = eval_pv("require PerlIO::encoding", 0); |
879
|
8
|
50
|
|
|
|
|
RETVAL = SvTRUE(sv); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
880
|
|
|
|
|
|
|
OUTPUT: |
881
|
|
|
|
|
|
|
RETVAL |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
SV * |
884
|
|
|
|
|
|
|
Method_mime_name(obj) |
885
|
|
|
|
|
|
|
SV * obj |
886
|
|
|
|
|
|
|
PREINIT: |
887
|
|
|
|
|
|
|
encode_t *enc; |
888
|
|
|
|
|
|
|
INIT: |
889
|
110
|
50
|
|
|
|
|
enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
890
|
|
|
|
|
|
|
CODE: |
891
|
110
|
|
|
|
|
|
ENTER; |
892
|
110
|
|
|
|
|
|
SAVETMPS; |
893
|
110
|
50
|
|
|
|
|
PUSHMARK(sp); |
894
|
110
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); |
895
|
110
|
|
|
|
|
|
PUTBACK; |
896
|
110
|
|
|
|
|
|
call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); |
897
|
110
|
|
|
|
|
|
SPAGAIN; |
898
|
110
|
|
|
|
|
|
RETVAL = newSVsv(POPs); |
899
|
110
|
|
|
|
|
|
PUTBACK; |
900
|
110
|
50
|
|
|
|
|
FREETMPS; |
901
|
110
|
|
|
|
|
|
LEAVE; |
902
|
|
|
|
|
|
|
OUTPUT: |
903
|
|
|
|
|
|
|
RETVAL |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
PROTOTYPES: ENABLE |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
I32 |
910
|
|
|
|
|
|
|
_bytes_to_utf8(sv, ...) |
911
|
|
|
|
|
|
|
SV * sv |
912
|
|
|
|
|
|
|
PREINIT: |
913
|
|
|
|
|
|
|
SV * encoding; |
914
|
|
|
|
|
|
|
INIT: |
915
|
0
|
0
|
|
|
|
|
encoding = items == 2 ? ST(1) : Nullsv; |
916
|
|
|
|
|
|
|
CODE: |
917
|
0
|
0
|
|
|
|
|
if (encoding) |
918
|
0
|
0
|
|
|
|
|
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
919
|
|
|
|
|
|
|
else { |
920
|
|
|
|
|
|
|
STRLEN len; |
921
|
0
|
0
|
|
|
|
|
U8* s = (U8*)SvPV(sv, len); |
922
|
|
|
|
|
|
|
U8* converted; |
923
|
|
|
|
|
|
|
|
924
|
0
|
|
|
|
|
|
converted = bytes_to_utf8(s, &len); /* This allocs */ |
925
|
0
|
|
|
|
|
|
sv_setpvn(sv, (char *)converted, len); |
926
|
0
|
|
|
|
|
|
SvUTF8_on(sv); /* XXX Should we? */ |
927
|
0
|
|
|
|
|
|
Safefree(converted); /* ... so free it */ |
928
|
0
|
|
|
|
|
|
RETVAL = len; |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
OUTPUT: |
931
|
|
|
|
|
|
|
RETVAL |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
I32 |
934
|
|
|
|
|
|
|
_utf8_to_bytes(sv, ...) |
935
|
|
|
|
|
|
|
SV * sv |
936
|
|
|
|
|
|
|
PREINIT: |
937
|
|
|
|
|
|
|
SV * to; |
938
|
|
|
|
|
|
|
SV * check; |
939
|
|
|
|
|
|
|
INIT: |
940
|
0
|
0
|
|
|
|
|
to = items > 1 ? ST(1) : Nullsv; |
941
|
0
|
0
|
|
|
|
|
check = items > 2 ? ST(2) : Nullsv; |
942
|
|
|
|
|
|
|
CODE: |
943
|
0
|
0
|
|
|
|
|
if (to) { |
944
|
0
|
0
|
|
|
|
|
RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
945
|
|
|
|
|
|
|
} else { |
946
|
|
|
|
|
|
|
STRLEN len; |
947
|
0
|
0
|
|
|
|
|
U8 *s = (U8*)SvPV(sv, len); |
948
|
|
|
|
|
|
|
|
949
|
0
|
|
|
|
|
|
RETVAL = 0; |
950
|
0
|
0
|
|
|
|
|
if (SvTRUE(check)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
951
|
|
|
|
|
|
|
/* Must do things the slow way */ |
952
|
|
|
|
|
|
|
U8 *dest; |
953
|
|
|
|
|
|
|
/* We need a copy to pass to check() */ |
954
|
0
|
|
|
|
|
|
U8 *src = s; |
955
|
0
|
|
|
|
|
|
U8 *send = s + len; |
956
|
|
|
|
|
|
|
U8 *d0; |
957
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
|
New(83, dest, len, U8); /* I think */ |
959
|
0
|
|
|
|
|
|
d0 = dest; |
960
|
|
|
|
|
|
|
|
961
|
0
|
0
|
|
|
|
|
while (s < send) { |
962
|
0
|
0
|
|
|
|
|
if (*s < 0x80){ |
963
|
0
|
|
|
|
|
|
*dest++ = *s++; |
964
|
|
|
|
|
|
|
} else { |
965
|
|
|
|
|
|
|
STRLEN ulen; |
966
|
0
|
|
|
|
|
|
UV uv = *s++; |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
/* Have to do it all ourselves because of error routine, |
969
|
|
|
|
|
|
|
aargh. */ |
970
|
0
|
0
|
|
|
|
|
if (!(uv & 0x40)){ goto failure; } |
971
|
0
|
0
|
|
|
|
|
if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
972
|
0
|
0
|
|
|
|
|
else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
973
|
0
|
0
|
|
|
|
|
else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
974
|
0
|
0
|
|
|
|
|
else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
975
|
0
|
0
|
|
|
|
|
else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
976
|
0
|
0
|
|
|
|
|
else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
977
|
0
|
|
|
|
|
|
else { ulen = 13; uv = 0; } |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
/* Note change to utf8.c variable naming, for variety */ |
980
|
0
|
0
|
|
|
|
|
while (ulen--) { |
981
|
0
|
0
|
|
|
|
|
if ((*s & 0xc0) != 0x80){ |
982
|
0
|
|
|
|
|
|
goto failure; |
983
|
|
|
|
|
|
|
} else { |
984
|
0
|
|
|
|
|
|
uv = (uv << 6) | (*s++ & 0x3f); |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
} |
987
|
0
|
0
|
|
|
|
|
if (uv > 256) { |
988
|
|
|
|
|
|
|
failure: |
989
|
0
|
|
|
|
|
|
call_failure(check, s, dest, src); |
990
|
|
|
|
|
|
|
/* Now what happens? */ |
991
|
|
|
|
|
|
|
} |
992
|
0
|
|
|
|
|
|
*dest++ = (U8)uv; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
} |
995
|
0
|
|
|
|
|
|
RETVAL = dest - d0; |
996
|
0
|
|
|
|
|
|
sv_usepvn(sv, (char *)dest, RETVAL); |
997
|
0
|
|
|
|
|
|
SvUTF8_off(sv); |
998
|
|
|
|
|
|
|
} else { |
999
|
0
|
0
|
|
|
|
|
RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
1000
|
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
OUTPUT: |
1003
|
|
|
|
|
|
|
RETVAL |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
bool |
1006
|
|
|
|
|
|
|
is_utf8(sv, check = 0) |
1007
|
|
|
|
|
|
|
SV * sv |
1008
|
|
|
|
|
|
|
int check |
1009
|
|
|
|
|
|
|
PREINIT: |
1010
|
|
|
|
|
|
|
char *str; |
1011
|
|
|
|
|
|
|
STRLEN len; |
1012
|
|
|
|
|
|
|
CODE: |
1013
|
654
|
100
|
|
|
|
|
SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ |
|
|
50
|
|
|
|
|
|
1014
|
654
|
100
|
|
|
|
|
str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1015
|
654
|
|
|
|
|
|
RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
1016
|
654
|
100
|
|
|
|
|
if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1017
|
0
|
|
|
|
|
|
RETVAL = FALSE; |
1018
|
|
|
|
|
|
|
OUTPUT: |
1019
|
|
|
|
|
|
|
RETVAL |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
SV * |
1022
|
|
|
|
|
|
|
_utf8_on(sv) |
1023
|
|
|
|
|
|
|
SV * sv |
1024
|
|
|
|
|
|
|
CODE: |
1025
|
8
|
100
|
|
|
|
|
SvGETMAGIC(sv); |
|
|
50
|
|
|
|
|
|
1026
|
8
|
100
|
|
|
|
|
if (!SvTAINTED(sv) && SvPOKp(sv)) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1027
|
6
|
100
|
|
|
|
|
if (SvTHINKFIRST(sv)) sv_force_normal(sv); |
1028
|
6
|
100
|
|
|
|
|
RETVAL = boolSV(SvUTF8(sv)); |
1029
|
6
|
|
|
|
|
|
SvUTF8_on(sv); |
1030
|
6
|
100
|
|
|
|
|
SvSETMAGIC(sv); |
1031
|
|
|
|
|
|
|
} else { |
1032
|
2
|
|
|
|
|
|
RETVAL = &PL_sv_undef; |
1033
|
|
|
|
|
|
|
} |
1034
|
|
|
|
|
|
|
OUTPUT: |
1035
|
|
|
|
|
|
|
RETVAL |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
SV * |
1038
|
|
|
|
|
|
|
_utf8_off(sv) |
1039
|
|
|
|
|
|
|
SV * sv |
1040
|
|
|
|
|
|
|
CODE: |
1041
|
6
|
100
|
|
|
|
|
SvGETMAGIC(sv); |
|
|
50
|
|
|
|
|
|
1042
|
6
|
100
|
|
|
|
|
if (!SvTAINTED(sv) && SvPOKp(sv)) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1043
|
4
|
100
|
|
|
|
|
if (SvTHINKFIRST(sv)) sv_force_normal(sv); |
1044
|
4
|
100
|
|
|
|
|
RETVAL = boolSV(SvUTF8(sv)); |
1045
|
4
|
|
|
|
|
|
SvUTF8_off(sv); |
1046
|
4
|
100
|
|
|
|
|
SvSETMAGIC(sv); |
1047
|
|
|
|
|
|
|
} else { |
1048
|
2
|
|
|
|
|
|
RETVAL = &PL_sv_undef; |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
OUTPUT: |
1051
|
|
|
|
|
|
|
RETVAL |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
void |
1054
|
|
|
|
|
|
|
onBOOT() |
1055
|
|
|
|
|
|
|
CODE: |
1056
|
|
|
|
|
|
|
{ |
1057
|
|
|
|
|
|
|
#include "def_t.exh" |
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
BOOT: |
1061
|
|
|
|
|
|
|
{ |
1062
|
38
|
|
|
|
|
|
HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD); |
1063
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "DIE_ON_ERR", newSViv(ENCODE_DIE_ON_ERR)); |
1064
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "WARN_ON_ERR", newSViv(ENCODE_WARN_ON_ERR)); |
1065
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "RETURN_ON_ERR", newSViv(ENCODE_RETURN_ON_ERR)); |
1066
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "LEAVE_SRC", newSViv(ENCODE_LEAVE_SRC)); |
1067
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "PERLQQ", newSViv(ENCODE_PERLQQ)); |
1068
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "HTMLCREF", newSViv(ENCODE_HTMLCREF)); |
1069
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "XMLCREF", newSViv(ENCODE_XMLCREF)); |
1070
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "STOP_AT_PARTIAL", newSViv(ENCODE_STOP_AT_PARTIAL)); |
1071
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_DEFAULT", newSViv(ENCODE_FB_DEFAULT)); |
1072
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_CROAK", newSViv(ENCODE_FB_CROAK)); |
1073
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_QUIET", newSViv(ENCODE_FB_QUIET)); |
1074
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_WARN", newSViv(ENCODE_FB_WARN)); |
1075
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_PERLQQ", newSViv(ENCODE_FB_PERLQQ)); |
1076
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF)); |
1077
|
38
|
|
|
|
|
|
newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF)); |
1078
|
|
|
|
|
|
|
} |