| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
2
|
|
|
|
|
|
|
#include "perl.h" |
|
3
|
|
|
|
|
|
|
#include "XSUB.h" |
|
4
|
|
|
|
|
|
|
#include "perliol.h" |
|
5
|
|
|
|
|
|
|
#include "ppport.h" |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
#define UTF8_MAX_BYTES 4 |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
static const U8 xs_utf8_sequence_len[0x100] = { |
|
10
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x00-0x0F */ |
|
11
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x10-0x1F */ |
|
12
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x20-0x2F */ |
|
13
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x30-0x3F */ |
|
14
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x40-0x4F */ |
|
15
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x50-0x5F */ |
|
16
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x60-0x6F */ |
|
17
|
|
|
|
|
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0x70-0x7F */ |
|
18
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8F */ |
|
19
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9F */ |
|
20
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xA0-0xAF */ |
|
21
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xB0-0xBF */ |
|
22
|
|
|
|
|
|
|
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xC0-0xCF */ |
|
23
|
|
|
|
|
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xD0-0xDF */ |
|
24
|
|
|
|
|
|
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* 0xE0-0xEF */ |
|
25
|
|
|
|
|
|
|
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, /* 0xF0-0xFF */ |
|
26
|
|
|
|
|
|
|
}; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
typedef enum { STRICT_UTF8=0, ALLOW_SURROGATES=1, ALLOW_NONCHARACTERS=2, ALLOW_NONSHORTEST=4 } utf8_flags; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
|
32
|
3634
|
|
|
|
|
|
static STRLEN skip_sequence(const U8 *cur, const STRLEN len) { |
|
33
|
3634
|
|
|
|
|
|
STRLEN i, n = xs_utf8_sequence_len[*cur]; |
|
34
|
|
|
|
|
|
|
|
|
35
|
3634
|
100
|
|
|
|
|
if (n < 1 || len < 2) |
|
|
|
100
|
|
|
|
|
|
|
36
|
1035
|
|
|
|
|
|
return 1; |
|
37
|
|
|
|
|
|
|
|
|
38
|
2599
|
|
|
|
|
|
switch (cur[0]) { |
|
39
|
2
|
50
|
|
|
|
|
case 0xE0: if ((cur[1] & 0xE0) != 0xA0) return 1; break; |
|
40
|
2050
|
100
|
|
|
|
|
case 0xED: if ((cur[1] & 0xE0) != 0x80) return 1; break; |
|
41
|
33
|
100
|
|
|
|
|
case 0xF4: if ((cur[1] & 0xF0) != 0x80) return 1; break; |
|
42
|
100
|
100
|
|
|
|
|
case 0xF0: if ((cur[1] & 0xF0) == 0x80) return 1; /* FALLTROUGH */ |
|
43
|
510
|
50
|
|
|
|
|
default: if ((cur[1] & 0xC0) != 0x80) return 1; break; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
544
|
50
|
|
|
|
|
if (n > len) |
|
47
|
544
|
|
|
|
|
|
n = len; |
|
48
|
1056
|
100
|
|
|
|
|
for (i = 2; i < n; i++) |
|
49
|
512
|
50
|
|
|
|
|
if ((cur[i] & 0xC0) != 0x80) |
|
50
|
0
|
|
|
|
|
|
break; |
|
51
|
544
|
|
|
|
|
|
return i; |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
3360
|
|
|
|
|
|
static void report_illformed(pTHX_ const U8 *cur, STRLEN len, bool eof) { |
|
55
|
|
|
|
|
|
|
static const char *hex = "0123456789ABCDEF"; |
|
56
|
|
|
|
|
|
|
const char *fmt; |
|
57
|
|
|
|
|
|
|
char seq[UTF8_MAX_BYTES * 3]; |
|
58
|
3360
|
|
|
|
|
|
char *d = seq; |
|
59
|
|
|
|
|
|
|
|
|
60
|
3360
|
100
|
|
|
|
|
if (eof) |
|
61
|
272
|
|
|
|
|
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s> at end of file"; |
|
62
|
|
|
|
|
|
|
else |
|
63
|
3088
|
|
|
|
|
|
fmt = "Can't decode ill-formed UTF-8 octet sequence <%s>"; |
|
64
|
|
|
|
|
|
|
|
|
65
|
7247
|
100
|
|
|
|
|
while (len-- > 0) { |
|
66
|
3887
|
|
|
|
|
|
const U8 c = *cur++; |
|
67
|
3887
|
|
|
|
|
|
*d++ = hex[c >> 4]; |
|
68
|
3887
|
|
|
|
|
|
*d++ = hex[c & 15]; |
|
69
|
3887
|
100
|
|
|
|
|
if (len) |
|
70
|
527
|
|
|
|
|
|
*d++ = ' '; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
3360
|
|
|
|
|
|
*d = 0; |
|
73
|
3360
|
|
|
|
|
|
Perl_croak(aTHX_ fmt, seq); |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
66
|
|
|
|
|
|
static void report_noncharacter(pTHX_ UV usv) { |
|
77
|
|
|
|
|
|
|
static const char *fmt = "Can't interchange noncharacter code point U+%"UVXf; |
|
78
|
66
|
|
|
|
|
|
Perl_croak(aTHX_ fmt, usv); |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
6090
|
|
|
|
|
|
static STRLEN validate(pTHX_ const U8 *buf, const U8 *end, const int flags, PerlIO* handle) { |
|
82
|
6090
|
|
|
|
|
|
const bool eof = PerlIO_eof(handle); |
|
83
|
6090
|
|
|
|
|
|
const U8 *cur = buf; |
|
84
|
6090
|
|
|
|
|
|
const U8 *end4 = end - UTF8_MAX_BYTES; |
|
85
|
6090
|
|
|
|
|
|
STRLEN skip = 0; |
|
86
|
|
|
|
|
|
|
U32 v; |
|
87
|
|
|
|
|
|
|
|
|
88
|
17203
|
100
|
|
|
|
|
while (cur < end4) { |
|
89
|
12761
|
100
|
|
|
|
|
while (cur < end4 && *cur < 0x80) |
|
|
|
100
|
|
|
|
|
|
|
90
|
3013
|
|
|
|
|
|
cur++; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
check: |
|
93
|
14267
|
|
|
|
|
|
switch (xs_utf8_sequence_len[*cur]) { |
|
94
|
|
|
|
|
|
|
case 0: |
|
95
|
1033
|
|
|
|
|
|
goto illformed; |
|
96
|
|
|
|
|
|
|
case 1: |
|
97
|
11
|
|
|
|
|
|
cur += 1; |
|
98
|
11
|
|
|
|
|
|
break; |
|
99
|
|
|
|
|
|
|
case 2: |
|
100
|
|
|
|
|
|
|
/* 110xxxxx 10xxxxxx */ |
|
101
|
313
|
50
|
|
|
|
|
if ((cur[1] & 0xC0) != 0x80) |
|
102
|
0
|
|
|
|
|
|
goto illformed; |
|
103
|
313
|
|
|
|
|
|
cur += 2; |
|
104
|
313
|
|
|
|
|
|
break; |
|
105
|
|
|
|
|
|
|
case 3: |
|
106
|
25170
|
|
|
|
|
|
v = ((U32)cur[0] << 16) |
|
107
|
12585
|
|
|
|
|
|
| ((U32)cur[1] << 8) |
|
108
|
12585
|
|
|
|
|
|
| ((U32)cur[2]); |
|
109
|
|
|
|
|
|
|
/* 1110xxxx 10xxxxxx 10xxxxxx */ |
|
110
|
12585
|
50
|
|
|
|
|
if ((v & 0x00F0C0C0) != 0x00E08080 || |
|
|
|
100
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
/* Non-shortest form */ |
|
112
|
|
|
|
|
|
|
v < 0x00E0A080) |
|
113
|
|
|
|
|
|
|
goto illformed; |
|
114
|
|
|
|
|
|
|
/* Surrogates U+D800..U+DFFF */ |
|
115
|
12583
|
100
|
|
|
|
|
if (!(flags & ALLOW_SURROGATES) && (v & 0x00EFA080) == 0x00EDA080) |
|
|
|
100
|
|
|
|
|
|
|
116
|
2048
|
|
|
|
|
|
goto illformed; |
|
117
|
|
|
|
|
|
|
/* Non-characters U+FDD0..U+FDEF, U+FFFE..U+FFFF */ |
|
118
|
10535
|
100
|
|
|
|
|
if (!(flags & ALLOW_NONCHARACTERS) && v >= 0x00EFB790 && (v <= 0x00EFB7AF || v >= 0x00EFBFBE)) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
goto noncharacter; |
|
120
|
10501
|
|
|
|
|
|
cur += 3; |
|
121
|
10501
|
|
|
|
|
|
break; |
|
122
|
|
|
|
|
|
|
case 4: |
|
123
|
650
|
|
|
|
|
|
v = ((U32)cur[0] << 24) |
|
124
|
325
|
|
|
|
|
|
| ((U32)cur[1] << 16) |
|
125
|
325
|
|
|
|
|
|
| ((U32)cur[2] << 8) |
|
126
|
325
|
|
|
|
|
|
| ((U32)cur[3]); |
|
127
|
|
|
|
|
|
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
|
128
|
325
|
50
|
|
|
|
|
if ((v & 0xF8C0C0C0) != 0xF0808080 || |
|
|
|
100
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
/* Non-shortest form */ |
|
130
|
321
|
100
|
|
|
|
|
v < 0xF0908080 || |
|
131
|
|
|
|
|
|
|
/* Greater than U+10FFFF */ |
|
132
|
|
|
|
|
|
|
v > 0xF48FBFBF) |
|
133
|
|
|
|
|
|
|
goto illformed; |
|
134
|
|
|
|
|
|
|
/* Non-characters U+nFFFE..U+nFFFF on plane 1-16 */ |
|
135
|
320
|
100
|
|
|
|
|
if (!(flags & ALLOW_NONCHARACTERS) && (v & 0x000FBFBE) == 0x000FBFBE) |
|
|
|
100
|
|
|
|
|
|
|
136
|
32
|
|
|
|
|
|
goto noncharacter; |
|
137
|
288
|
|
|
|
|
|
cur += 4; |
|
138
|
288
|
|
|
|
|
|
break; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
7455
|
100
|
|
|
|
|
if (cur < end) { |
|
143
|
5065
|
100
|
|
|
|
|
if (cur + xs_utf8_sequence_len[*cur] <= end) |
|
144
|
4519
|
|
|
|
|
|
goto check; |
|
145
|
546
|
|
|
|
|
|
skip = skip_sequence(cur, end - cur); |
|
146
|
546
|
100
|
|
|
|
|
if (eof || cur + skip < end) |
|
|
|
50
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
goto illformed; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
2664
|
|
|
|
|
|
return cur - buf; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
illformed: |
|
152
|
3360
|
100
|
|
|
|
|
if (!skip) |
|
153
|
3088
|
|
|
|
|
|
skip = skip_sequence(cur, end - cur); |
|
154
|
3360
|
|
|
|
|
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR; |
|
155
|
3360
|
|
|
|
|
|
report_illformed(aTHX_ cur, skip, eof); |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
noncharacter: |
|
158
|
66
|
100
|
|
|
|
|
if (v < 0xF0808080) |
|
159
|
34
|
|
|
|
|
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x0F0000) >> 4; |
|
160
|
|
|
|
|
|
|
else |
|
161
|
32
|
|
|
|
|
|
v = (v & 0x3F) | (v & 0x3F00) >> 2 | (v & 0x3F0000) >> 4 | (v & 0x07000000) >> 6; |
|
162
|
66
|
|
|
|
|
|
PerlIOBase(handle)->flags |= PERLIO_F_ERROR; |
|
163
|
66
|
|
|
|
|
|
report_noncharacter(aTHX_ v); |
|
164
|
0
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
typedef struct { |
|
167
|
|
|
|
|
|
|
PerlIOBuf buf; |
|
168
|
|
|
|
|
|
|
STDCHAR leftovers[UTF8_MAX_BYTES]; |
|
169
|
|
|
|
|
|
|
size_t leftover_length; |
|
170
|
|
|
|
|
|
|
utf8_flags flags; |
|
171
|
|
|
|
|
|
|
} PerlIOUnicode; |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
static struct { |
|
174
|
|
|
|
|
|
|
const char* name; |
|
175
|
|
|
|
|
|
|
size_t length; |
|
176
|
|
|
|
|
|
|
utf8_flags value; |
|
177
|
|
|
|
|
|
|
} map[] = { |
|
178
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_surrogates"), ALLOW_SURROGATES }, |
|
179
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_noncharacters"), ALLOW_NONCHARACTERS }, |
|
180
|
|
|
|
|
|
|
{ STR_WITH_LEN("allow_nonshortest"), ALLOW_NONSHORTEST }, |
|
181
|
|
|
|
|
|
|
{ STR_WITH_LEN("strict"), 0 }, |
|
182
|
|
|
|
|
|
|
{ STR_WITH_LEN("loose"), ALLOW_SURROGATES | ALLOW_NONCHARACTERS | ALLOW_NONSHORTEST }, |
|
183
|
|
|
|
|
|
|
}; |
|
184
|
|
|
|
|
|
|
|
|
185
|
2114
|
|
|
|
|
|
static utf8_flags lookup_parameter(pTHX_ const char* ptr, size_t len) { |
|
186
|
|
|
|
|
|
|
unsigned i; |
|
187
|
2180
|
50
|
|
|
|
|
for (i = 0; i < sizeof map / sizeof *map; ++i) { |
|
188
|
2180
|
100
|
|
|
|
|
if (map[i].length == len && memcmp(ptr, map[i].name, len) == 0) |
|
|
|
50
|
|
|
|
|
|
|
189
|
2114
|
|
|
|
|
|
return map[i].value; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
0
|
|
|
|
|
|
Perl_croak(aTHX_ "Unknown argument to :utf8_strict: %*s", (int)len, ptr); |
|
192
|
|
|
|
|
|
|
} |
|
193
|
5817
|
|
|
|
|
|
static utf8_flags parse_parameters(pTHX_ SV* param) { |
|
194
|
|
|
|
|
|
|
STRLEN len; |
|
195
|
|
|
|
|
|
|
const char *begin, *delim; |
|
196
|
5817
|
50
|
|
|
|
|
if (!param || !SvOK(param)) |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
197
|
3703
|
|
|
|
|
|
return 0; |
|
198
|
|
|
|
|
|
|
|
|
199
|
2114
|
50
|
|
|
|
|
begin = SvPV(param, len); |
|
200
|
2114
|
|
|
|
|
|
delim = strchr(begin, ','); |
|
201
|
2114
|
50
|
|
|
|
|
if(delim) { |
|
202
|
0
|
|
|
|
|
|
utf8_flags ret = 0; |
|
203
|
0
|
|
|
|
|
|
const char* end = begin + len; |
|
204
|
|
|
|
|
|
|
do { |
|
205
|
0
|
|
|
|
|
|
ret |= lookup_parameter(aTHX_ begin, delim - begin); |
|
206
|
0
|
|
|
|
|
|
begin = delim + 1; |
|
207
|
0
|
|
|
|
|
|
delim = strchr(begin, ','); |
|
208
|
0
|
0
|
|
|
|
|
} while (delim); |
|
209
|
0
|
0
|
|
|
|
|
if (begin < end) |
|
210
|
0
|
|
|
|
|
|
ret |= lookup_parameter(aTHX_ begin, end - begin); |
|
211
|
0
|
|
|
|
|
|
return ret; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
else { |
|
214
|
5817
|
|
|
|
|
|
return lookup_parameter(aTHX_ begin, len); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
#define line_buffered(flags) ((flags & (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) == (PERLIO_F_LINEBUF | PERLIO_F_CANWRITE)) |
|
219
|
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
void PerlIOBase_flush_linebuf(pTHX) { |
|
221
|
|
|
|
|
|
|
#ifdef dVAR |
|
222
|
|
|
|
|
|
|
dVAR; |
|
223
|
|
|
|
|
|
|
#endif |
|
224
|
0
|
|
|
|
|
|
PerlIOl **table = &PL_perlio; |
|
225
|
|
|
|
|
|
|
PerlIOl *f; |
|
226
|
0
|
0
|
|
|
|
|
while ((f = *table)) { |
|
227
|
|
|
|
|
|
|
int i; |
|
228
|
0
|
|
|
|
|
|
table = (PerlIOl **) (f++); |
|
229
|
0
|
0
|
|
|
|
|
for (i = 1; i < 64; i++) { |
|
230
|
0
|
0
|
|
|
|
|
if (f->next && line_buffered(PerlIOBase(&(f->next))->flags)) |
|
|
|
0
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
PerlIO_flush(&(f->next)); |
|
232
|
0
|
|
|
|
|
|
f++; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
0
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
5817
|
|
|
|
|
|
static IV PerlIOUnicode_pushed(pTHX_ PerlIO* f, const char* mode, SV* arg, PerlIO_funcs* tab) { |
|
238
|
5817
|
|
|
|
|
|
utf8_flags flags = parse_parameters(aTHX_ arg); |
|
239
|
5817
|
50
|
|
|
|
|
if (PerlIOBuf_pushed(aTHX_ f, mode, arg, tab) == 0) { |
|
240
|
5817
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_UTF8; |
|
241
|
5817
|
|
|
|
|
|
PerlIOSelf(f, PerlIOUnicode)->flags = flags; |
|
242
|
5817
|
|
|
|
|
|
return 0; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
0
|
|
|
|
|
|
return -1; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
8478
|
|
|
|
|
|
static IV PerlIOUnicode_fill(pTHX_ PerlIO* f) { |
|
248
|
8478
|
|
|
|
|
|
PerlIOUnicode * const u = PerlIOSelf(f, PerlIOUnicode); |
|
249
|
8478
|
|
|
|
|
|
PerlIOBuf * const b = &u->buf; |
|
250
|
8478
|
|
|
|
|
|
PerlIO *n = PerlIONext(f); |
|
251
|
|
|
|
|
|
|
SSize_t avail; |
|
252
|
8478
|
|
|
|
|
|
Size_t read_bytes = 0; |
|
253
|
|
|
|
|
|
|
STDCHAR *end; |
|
254
|
|
|
|
|
|
|
SSize_t fit; |
|
255
|
|
|
|
|
|
|
|
|
256
|
8478
|
50
|
|
|
|
|
if (PerlIO_flush(f) != 0) |
|
257
|
0
|
|
|
|
|
|
return -1; |
|
258
|
8478
|
50
|
|
|
|
|
if (PerlIOBase(f)->flags & PERLIO_F_TTY) |
|
259
|
0
|
|
|
|
|
|
PerlIOBase_flush_linebuf(aTHX); |
|
260
|
|
|
|
|
|
|
|
|
261
|
8478
|
50
|
|
|
|
|
if (!b->buf) |
|
262
|
0
|
|
|
|
|
|
PerlIO_get_base(f); |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
assert(b->buf); |
|
265
|
|
|
|
|
|
|
|
|
266
|
8478
|
100
|
|
|
|
|
if (u->leftover_length) { |
|
267
|
274
|
|
|
|
|
|
Copy(u->leftovers, b->buf, u->leftover_length, STDCHAR); |
|
268
|
274
|
|
|
|
|
|
b->end = b->buf + u->leftover_length; |
|
269
|
274
|
|
|
|
|
|
read_bytes = u->leftover_length; |
|
270
|
274
|
|
|
|
|
|
u->leftover_length = 0; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
else { |
|
273
|
8204
|
|
|
|
|
|
b->ptr = b->end = b->buf; |
|
274
|
|
|
|
|
|
|
} |
|
275
|
8478
|
|
|
|
|
|
fit = (SSize_t)b->bufsiz - (b->end - b->buf); |
|
276
|
|
|
|
|
|
|
|
|
277
|
8478
|
50
|
|
|
|
|
if (!PerlIOValid(n)) { |
|
|
|
50
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_EOF; |
|
279
|
0
|
|
|
|
|
|
return -1; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
8478
|
50
|
|
|
|
|
if (PerlIO_fast_gets(n)) { |
|
283
|
|
|
|
|
|
|
/* |
|
284
|
|
|
|
|
|
|
* Layer below is also buffered. We do _NOT_ want to call its |
|
285
|
|
|
|
|
|
|
* ->Read() because that will loop till it gets what we asked for |
|
286
|
|
|
|
|
|
|
* which may hang on a pipe etc. Instead take anything it has to |
|
287
|
|
|
|
|
|
|
* hand, or ask it to fill _once_. |
|
288
|
|
|
|
|
|
|
*/ |
|
289
|
8478
|
|
|
|
|
|
avail = PerlIO_get_cnt(n); |
|
290
|
8478
|
50
|
|
|
|
|
if (avail <= 0) { |
|
291
|
8478
|
|
|
|
|
|
avail = PerlIO_fill(n); |
|
292
|
8478
|
100
|
|
|
|
|
if (avail == 0) |
|
293
|
5818
|
|
|
|
|
|
avail = PerlIO_get_cnt(n); |
|
294
|
|
|
|
|
|
|
else { |
|
295
|
2660
|
50
|
|
|
|
|
if (!PerlIO_error(n) && PerlIO_eof(n)) |
|
|
|
50
|
|
|
|
|
|
|
296
|
2660
|
|
|
|
|
|
avail = 0; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
} |
|
299
|
8478
|
100
|
|
|
|
|
if (avail > 0) { |
|
300
|
5818
|
|
|
|
|
|
STDCHAR *ptr = PerlIO_get_ptr(n); |
|
301
|
5818
|
|
|
|
|
|
const SSize_t cnt = avail; |
|
302
|
5818
|
100
|
|
|
|
|
if (avail > fit) |
|
303
|
1
|
|
|
|
|
|
avail = fit; |
|
304
|
5818
|
|
|
|
|
|
Copy(ptr, b->end, avail, STDCHAR); |
|
305
|
5818
|
|
|
|
|
|
PerlIO_set_ptrcnt(n, ptr + avail, cnt - avail); |
|
306
|
8478
|
|
|
|
|
|
read_bytes += avail; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
else { |
|
310
|
0
|
|
|
|
|
|
avail = PerlIO_read(n, b->end, fit); |
|
311
|
0
|
0
|
|
|
|
|
if (avail > 0) |
|
312
|
0
|
|
|
|
|
|
read_bytes += avail; |
|
313
|
|
|
|
|
|
|
} |
|
314
|
8478
|
100
|
|
|
|
|
if (avail <= 0) { |
|
315
|
2660
|
50
|
|
|
|
|
if (avail < 0 || (read_bytes == 0 && PerlIO_eof(n))) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
316
|
2388
|
50
|
|
|
|
|
PerlIOBase(f)->flags |= (avail == 0) ? PERLIO_F_EOF : PERLIO_F_ERROR; |
|
317
|
2388
|
|
|
|
|
|
return -1; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
} |
|
320
|
6090
|
|
|
|
|
|
end = b->buf + read_bytes; |
|
321
|
6090
|
|
|
|
|
|
b->end = b->buf + validate(aTHX_ (const U8 *)b->buf, (const U8 *)end, u->flags, n); |
|
322
|
2664
|
100
|
|
|
|
|
if (b->end < end) { |
|
323
|
274
|
|
|
|
|
|
size_t len = b->buf + read_bytes - b->end; |
|
324
|
274
|
|
|
|
|
|
Copy(b->end, u->leftovers, len, char); |
|
325
|
274
|
|
|
|
|
|
u->leftover_length = len; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
2664
|
|
|
|
|
|
PerlIOBase(f)->flags |= PERLIO_F_RDBUF; |
|
328
|
|
|
|
|
|
|
|
|
329
|
2664
|
|
|
|
|
|
return 0; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
PERLIO_FUNCS_DECL(PerlIO_utf8_strict) = { |
|
333
|
|
|
|
|
|
|
sizeof(PerlIO_funcs), |
|
334
|
|
|
|
|
|
|
"utf8_strict", |
|
335
|
|
|
|
|
|
|
sizeof(PerlIOUnicode), |
|
336
|
|
|
|
|
|
|
PERLIO_K_BUFFERED|PERLIO_K_UTF8, |
|
337
|
|
|
|
|
|
|
PerlIOUnicode_pushed, |
|
338
|
|
|
|
|
|
|
PerlIOBuf_popped, |
|
339
|
|
|
|
|
|
|
PerlIOBuf_open, |
|
340
|
|
|
|
|
|
|
PerlIOBase_binmode, |
|
341
|
|
|
|
|
|
|
NULL, |
|
342
|
|
|
|
|
|
|
PerlIOBase_fileno, |
|
343
|
|
|
|
|
|
|
PerlIOBuf_dup, |
|
344
|
|
|
|
|
|
|
PerlIOBuf_read, |
|
345
|
|
|
|
|
|
|
PerlIOBase_unread, |
|
346
|
|
|
|
|
|
|
PerlIOBuf_write, |
|
347
|
|
|
|
|
|
|
PerlIOBuf_seek, |
|
348
|
|
|
|
|
|
|
PerlIOBuf_tell, |
|
349
|
|
|
|
|
|
|
PerlIOBuf_close, |
|
350
|
|
|
|
|
|
|
PerlIOBuf_flush, |
|
351
|
|
|
|
|
|
|
PerlIOUnicode_fill, |
|
352
|
|
|
|
|
|
|
PerlIOBase_eof, |
|
353
|
|
|
|
|
|
|
PerlIOBase_error, |
|
354
|
|
|
|
|
|
|
PerlIOBase_clearerr, |
|
355
|
|
|
|
|
|
|
PerlIOBase_setlinebuf, |
|
356
|
|
|
|
|
|
|
PerlIOBuf_get_base, |
|
357
|
|
|
|
|
|
|
PerlIOBuf_bufsiz, |
|
358
|
|
|
|
|
|
|
PerlIOBuf_get_ptr, |
|
359
|
|
|
|
|
|
|
PerlIOBuf_get_cnt, |
|
360
|
|
|
|
|
|
|
PerlIOBuf_set_ptrcnt, |
|
361
|
|
|
|
|
|
|
}; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
MODULE = PerlIO::utf8_strict |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
BOOT: |
|
368
|
8
|
|
|
|
|
|
PerlIO_define_layer(aTHX_ (PerlIO_funcs*)&PerlIO_utf8_strict); |
|
369
|
|
|
|
|
|
|
|