| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
2
|
|
|
|
|
|
|
#include "perl.h" |
|
3
|
|
|
|
|
|
|
#include "XSUB.h" |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#ifdef XS_VERSION |
|
6
|
|
|
|
|
|
|
#undef XS_VERSION |
|
7
|
|
|
|
|
|
|
#endif |
|
8
|
|
|
|
|
|
|
#define XS_VERSION "2.500" |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#define BASE 36 |
|
11
|
|
|
|
|
|
|
#define TMIN 1 |
|
12
|
|
|
|
|
|
|
#define TMAX 26 |
|
13
|
|
|
|
|
|
|
#define SKEW 38 |
|
14
|
|
|
|
|
|
|
#define DAMP 700 |
|
15
|
|
|
|
|
|
|
#define INITIAL_BIAS 72 |
|
16
|
|
|
|
|
|
|
#define INITIAL_N 128 |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x) |
|
19
|
|
|
|
|
|
|
#define DELIM '-' |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t)) |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#ifndef utf8_to_uvchr_buf |
|
24
|
|
|
|
|
|
|
#define utf8_to_uvchr_buf(in_p,in_e,u8) utf8_to_uvchr(in_p,u8); |
|
25
|
|
|
|
|
|
|
#endif |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
static char enc_digit[BASE] = { |
|
28
|
|
|
|
|
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
|
29
|
|
|
|
|
|
|
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
|
30
|
|
|
|
|
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
|
31
|
|
|
|
|
|
|
}; |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
static IV dec_digit[0x80] = { |
|
34
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */ |
|
35
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */ |
|
36
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */ |
|
37
|
|
|
|
|
|
|
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */ |
|
38
|
|
|
|
|
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */ |
|
39
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */ |
|
40
|
|
|
|
|
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */ |
|
41
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */ |
|
42
|
|
|
|
|
|
|
}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
static int adapt(int delta, int numpoints, int first) { |
|
45
|
|
|
|
|
|
|
int k; |
|
46
|
|
|
|
|
|
|
|
|
47
|
22587
|
100
|
|
|
|
|
delta /= first ? DAMP : 2; |
|
|
|
100
|
|
|
|
|
|
|
48
|
22587
|
|
|
|
|
|
delta += delta/numpoints; |
|
49
|
|
|
|
|
|
|
|
|
50
|
41035
|
100
|
|
|
|
|
for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE) |
|
|
|
100
|
|
|
|
|
|
|
51
|
18448
|
|
|
|
|
|
delta /= BASE-TMIN; |
|
52
|
|
|
|
|
|
|
|
|
53
|
22587
|
|
|
|
|
|
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW)); |
|
54
|
|
|
|
|
|
|
}; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
static void |
|
57
|
124540
|
|
|
|
|
|
grow_string(SV *const sv, char **start, char **current, char **end, STRLEN add) |
|
58
|
|
|
|
|
|
|
{ |
|
59
|
|
|
|
|
|
|
STRLEN len; |
|
60
|
|
|
|
|
|
|
|
|
61
|
124540
|
100
|
|
|
|
|
if(*current + add <= *end) |
|
62
|
|
|
|
|
|
|
return; |
|
63
|
|
|
|
|
|
|
|
|
64
|
22
|
|
|
|
|
|
len = (*current - *start); |
|
65
|
22
|
50
|
|
|
|
|
*start = SvGROW(sv, (len + add + 15) & ~15); |
|
|
|
50
|
|
|
|
|
|
|
66
|
22
|
|
|
|
|
|
*current = *start + len; |
|
67
|
22
|
|
|
|
|
|
*end = *start + SvLEN(sv); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
SV* |
|
73
|
|
|
|
|
|
|
encode_punycode(input) |
|
74
|
|
|
|
|
|
|
SV * input |
|
75
|
|
|
|
|
|
|
PREINIT: |
|
76
|
|
|
|
|
|
|
UV c, m, n = INITIAL_N; |
|
77
|
|
|
|
|
|
|
int k, q, t; |
|
78
|
|
|
|
|
|
|
int bias = INITIAL_BIAS; |
|
79
|
|
|
|
|
|
|
int delta = 0, skip_delta; |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
const char *in_s, *in_p, *in_e, *skip_p; |
|
82
|
|
|
|
|
|
|
char *re_s, *re_p, *re_e; |
|
83
|
|
|
|
|
|
|
int first = 1; |
|
84
|
|
|
|
|
|
|
STRLEN length_guess, len, h, u8; |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
CODE: |
|
87
|
1735
|
100
|
|
|
|
|
in_s = in_p = SvPVutf8(input, len); |
|
88
|
1735
|
|
|
|
|
|
in_e = in_s + len; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
length_guess = len; |
|
91
|
1735
|
100
|
|
|
|
|
if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */ |
|
92
|
1735
|
|
|
|
|
|
length_guess += 2; /* plus DELIM + '\0' */ |
|
93
|
|
|
|
|
|
|
|
|
94
|
1735
|
|
|
|
|
|
RETVAL = NEWSV('P',length_guess); |
|
95
|
1735
|
|
|
|
|
|
SvPOK_only(RETVAL); |
|
96
|
1735
|
50
|
|
|
|
|
re_s = re_p = SvPV_nolen(RETVAL); |
|
97
|
1735
|
|
|
|
|
|
re_e = re_s + SvLEN(RETVAL); |
|
98
|
|
|
|
|
|
|
h = 0; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
/* copy basic code points */ |
|
101
|
18065
|
100
|
|
|
|
|
while(in_p < in_e) { |
|
102
|
16330
|
100
|
|
|
|
|
if( isBASE(*in_p) ) { |
|
103
|
7282
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
|
104
|
7282
|
|
|
|
|
|
*re_p++ = *in_p; |
|
105
|
7282
|
|
|
|
|
|
h++; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
16330
|
|
|
|
|
|
in_p++; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
/* add DELIM if needed */ |
|
111
|
1735
|
100
|
|
|
|
|
if(h) { |
|
112
|
455
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
|
113
|
1735
|
|
|
|
|
|
*re_p++ = DELIM; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
for(;;) { |
|
117
|
|
|
|
|
|
|
/* find smallest code point not yet handled */ |
|
118
|
|
|
|
|
|
|
m = UV_MAX; |
|
119
|
|
|
|
|
|
|
q = skip_delta = 0; |
|
120
|
|
|
|
|
|
|
|
|
121
|
36383
|
100
|
|
|
|
|
for(in_p = skip_p = in_s; in_p < in_e;) { |
|
122
|
31390
|
50
|
|
|
|
|
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8); |
|
123
|
|
|
|
|
|
|
c = NATIVE_TO_UNI(c); |
|
124
|
|
|
|
|
|
|
|
|
125
|
31390
|
100
|
|
|
|
|
if(c >= n && c < m) { |
|
126
|
|
|
|
|
|
|
m = c; |
|
127
|
|
|
|
|
|
|
skip_p = in_p; |
|
128
|
|
|
|
|
|
|
skip_delta = q; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
31390
|
100
|
|
|
|
|
if(c < n) |
|
131
|
24390
|
|
|
|
|
|
++q; |
|
132
|
31390
|
|
|
|
|
|
in_p += u8; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
4993
|
100
|
|
|
|
|
if(m == UV_MAX) |
|
135
|
|
|
|
|
|
|
break; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
/* increase delta to the state corresponding to |
|
138
|
|
|
|
|
|
|
the m code point at the beginning of the string */ |
|
139
|
3258
|
|
|
|
|
|
delta += (m-n) * (h+1); |
|
140
|
|
|
|
|
|
|
n = m; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
/* now find the chars to be encoded in this round */ |
|
143
|
|
|
|
|
|
|
|
|
144
|
3258
|
|
|
|
|
|
delta += skip_delta; |
|
145
|
16691
|
100
|
|
|
|
|
for(in_p = skip_p; in_p < in_e;) { |
|
146
|
13433
|
50
|
|
|
|
|
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8); |
|
147
|
|
|
|
|
|
|
c = NATIVE_TO_UNI(c); |
|
148
|
|
|
|
|
|
|
|
|
149
|
13433
|
100
|
|
|
|
|
if(c < n) { |
|
150
|
7904
|
|
|
|
|
|
++delta; |
|
151
|
5529
|
100
|
|
|
|
|
} else if( c == n ) { |
|
152
|
|
|
|
|
|
|
q = delta; |
|
153
|
|
|
|
|
|
|
|
|
154
|
6793
|
|
|
|
|
|
for(k = BASE;; k += BASE) { |
|
155
|
10156
|
100
|
|
|
|
|
t = TMIN_MAX(k - bias); |
|
156
|
10156
|
100
|
|
|
|
|
if(q < t) break; |
|
157
|
6793
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
|
158
|
6793
|
|
|
|
|
|
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))]; |
|
159
|
6793
|
|
|
|
|
|
q = (q-t) / (BASE-t); |
|
160
|
6793
|
|
|
|
|
|
} |
|
161
|
3363
|
50
|
|
|
|
|
if(q > BASE) croak("input exceeds punycode limit"); |
|
162
|
3363
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
|
163
|
3363
|
|
|
|
|
|
*re_p++ = enc_digit[q]; |
|
164
|
3363
|
|
|
|
|
|
bias = adapt(delta, h+1, first); |
|
165
|
|
|
|
|
|
|
delta = first = 0; |
|
166
|
3363
|
|
|
|
|
|
++h; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
13433
|
|
|
|
|
|
in_p += u8; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
3258
|
|
|
|
|
|
++delta; |
|
171
|
3258
|
|
|
|
|
|
++n; |
|
172
|
3258
|
|
|
|
|
|
} |
|
173
|
1735
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
|
174
|
1735
|
|
|
|
|
|
*re_p = 0; |
|
175
|
1735
|
|
|
|
|
|
SvCUR_set(RETVAL, re_p - re_s); |
|
176
|
|
|
|
|
|
|
OUTPUT: |
|
177
|
|
|
|
|
|
|
RETVAL |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
SV* |
|
180
|
|
|
|
|
|
|
decode_punycode(input) |
|
181
|
|
|
|
|
|
|
SV * input |
|
182
|
|
|
|
|
|
|
PREINIT: |
|
183
|
|
|
|
|
|
|
UV c, n = INITIAL_N; |
|
184
|
|
|
|
|
|
|
IV dc; |
|
185
|
|
|
|
|
|
|
int i = 0, oldi, j, k, t, w; |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
int bias = INITIAL_BIAS; |
|
188
|
|
|
|
|
|
|
int delta = 0, skip_delta; |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
const char *in_s, *in_p, *in_e, *skip_p; |
|
191
|
|
|
|
|
|
|
char *re_s, *re_p, *re_e; |
|
192
|
|
|
|
|
|
|
int first = 1; |
|
193
|
|
|
|
|
|
|
STRLEN length_guess, len, h, u8; |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
CODE: |
|
196
|
8401
|
100
|
|
|
|
|
in_s = in_p = SvPV_nolen(input); |
|
197
|
8401
|
|
|
|
|
|
in_e = SvEND(input); |
|
198
|
|
|
|
|
|
|
|
|
199
|
8401
|
|
|
|
|
|
length_guess = SvCUR(input) * 2; |
|
200
|
8401
|
50
|
|
|
|
|
if(length_guess < 256) length_guess = 256; |
|
201
|
|
|
|
|
|
|
|
|
202
|
8401
|
|
|
|
|
|
RETVAL = NEWSV('D',length_guess); |
|
203
|
8401
|
|
|
|
|
|
SvPOK_only(RETVAL); |
|
204
|
8401
|
50
|
|
|
|
|
re_s = re_p = SvPV_nolen(RETVAL); |
|
205
|
8401
|
|
|
|
|
|
re_e = re_s + SvLEN(RETVAL); |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
skip_p = NULL; |
|
208
|
85694
|
100
|
|
|
|
|
for(in_p = in_s; in_p < in_e; in_p++) { |
|
209
|
77293
|
|
|
|
|
|
c = *in_p; /* we don't care whether it's UTF-8 */ |
|
210
|
77293
|
50
|
|
|
|
|
if(!isBASE(c)) croak("non-base character in input for decode_punycode"); |
|
211
|
77293
|
100
|
|
|
|
|
if(c == DELIM) skip_p = in_p; |
|
212
|
77293
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, 1); |
|
213
|
77293
|
|
|
|
|
|
*re_p++ = c; /* copy it */ |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
8401
|
100
|
|
|
|
|
if(skip_p) { |
|
217
|
2061
|
|
|
|
|
|
h = skip_p - in_s; /* base chars handled */ |
|
218
|
2061
|
|
|
|
|
|
re_p = re_s + h; /* points to end of base chars */ |
|
219
|
8401
|
|
|
|
|
|
skip_p++; /* skip over DELIM */ |
|
220
|
|
|
|
|
|
|
} else { |
|
221
|
|
|
|
|
|
|
h = 0; /* no base chars */ |
|
222
|
6340
|
|
|
|
|
|
re_p = re_s; |
|
223
|
|
|
|
|
|
|
skip_p = in_s; /* read everything */ |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
27625
|
100
|
|
|
|
|
for(in_p = skip_p; in_p < in_e; i++) { |
|
227
|
|
|
|
|
|
|
oldi = i; |
|
228
|
|
|
|
|
|
|
w = 1; |
|
229
|
|
|
|
|
|
|
|
|
230
|
51247
|
|
|
|
|
|
for(k = BASE;; k+= BASE) { |
|
231
|
70477
|
100
|
|
|
|
|
if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode"); |
|
232
|
70471
|
|
|
|
|
|
dc = dec_digit[*in_p++]; /* we already know it's in 0..127 */ |
|
233
|
70471
|
50
|
|
|
|
|
if(dc < 0) croak("invalid digit in input for decode_punycode"); |
|
234
|
70471
|
|
|
|
|
|
c = (UV)dc; |
|
235
|
70471
|
|
|
|
|
|
i += c * w; |
|
236
|
70471
|
100
|
|
|
|
|
t = TMIN_MAX(k - bias); |
|
237
|
70471
|
100
|
|
|
|
|
if(c < t) break; |
|
238
|
51247
|
|
|
|
|
|
w *= BASE-t; |
|
239
|
51247
|
|
|
|
|
|
} |
|
240
|
19224
|
|
|
|
|
|
h++; |
|
241
|
19224
|
|
|
|
|
|
bias = adapt(i-oldi, h, first); |
|
242
|
|
|
|
|
|
|
first = 0; |
|
243
|
19224
|
|
|
|
|
|
n += i / h; /* code point n to insert */ |
|
244
|
19224
|
|
|
|
|
|
i = i % h; /* at position i */ |
|
245
|
|
|
|
|
|
|
|
|
246
|
19224
|
50
|
|
|
|
|
u8 = UNISKIP(n); /* how many bytes we need */ |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
j = i; |
|
249
|
36412
|
100
|
|
|
|
|
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */ |
|
250
|
17188
|
|
|
|
|
|
skip_p+=UTF8SKIP(skip_p); |
|
251
|
|
|
|
|
|
|
|
|
252
|
19224
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, u8); |
|
253
|
19224
|
100
|
|
|
|
|
if(skip_p < re_p) /* move succeeding chars */ |
|
254
|
7777
|
|
|
|
|
|
Move(skip_p, skip_p + u8, re_p - skip_p, char); |
|
255
|
19224
|
|
|
|
|
|
re_p += u8; |
|
256
|
19224
|
|
|
|
|
|
uvuni_to_utf8_flags((U8*)skip_p, n, UNICODE_ALLOW_ANY); |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
8395
|
50
|
|
|
|
|
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */ |
|
260
|
8395
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, 1); |
|
261
|
8395
|
|
|
|
|
|
*re_p = 0; |
|
262
|
8395
|
|
|
|
|
|
SvCUR_set(RETVAL, re_p - re_s); |
|
263
|
|
|
|
|
|
|
OUTPUT: |
|
264
|
|
|
|
|
|
|
RETVAL |