| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Unicode::String; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright 1997-1999, Gisle Aas. |
|
4
|
|
|
|
|
|
|
|
|
5
|
5
|
|
|
5
|
|
4786
|
use strict; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
124
|
|
|
6
|
5
|
|
|
5
|
|
15
|
use vars qw($VERSION @ISA @EXPORT_OK $UTF7_OPTIONAL_DIRECT_CHARS); |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
259
|
|
|
7
|
5
|
|
|
5
|
|
15
|
use Carp; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
680
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
|
10
|
|
|
|
|
|
|
require DynaLoader; |
|
11
|
|
|
|
|
|
|
@ISA = qw(Exporter DynaLoader); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
|
14
|
|
|
|
|
|
|
utf16 utf16le utf16be ucs2 |
|
15
|
|
|
|
|
|
|
utf8 |
|
16
|
|
|
|
|
|
|
utf7 |
|
17
|
|
|
|
|
|
|
ucs4 utf32 utf32be utf32le |
|
18
|
|
|
|
|
|
|
latin1 |
|
19
|
|
|
|
|
|
|
uchr uhex |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
byteswap2 byteswap4 |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$VERSION = '2.10'; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$UTF7_OPTIONAL_DIRECT_CHARS ||= 1; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
bootstrap Unicode::String $VERSION; |
|
29
|
|
|
|
|
|
|
|
|
30
|
5
|
|
|
|
|
39
|
use overload '""' => \&as_string, |
|
31
|
|
|
|
|
|
|
'bool' => \&as_bool, |
|
32
|
|
|
|
|
|
|
'0+' => \&as_num, |
|
33
|
|
|
|
|
|
|
'.=' => \&append, |
|
34
|
|
|
|
|
|
|
'.' => \&concat, |
|
35
|
|
|
|
|
|
|
'x' => \&repeat, |
|
36
|
|
|
|
|
|
|
'=' => \©, |
|
37
|
5
|
|
|
5
|
|
4462
|
'fallback' => 1; |
|
|
5
|
|
|
|
|
3570
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my %stringify = ( |
|
40
|
|
|
|
|
|
|
unicode => \&utf16, |
|
41
|
|
|
|
|
|
|
utf16 => \&utf16, |
|
42
|
|
|
|
|
|
|
utf16be => \&utf16, |
|
43
|
|
|
|
|
|
|
utf16le => \&utf16le, |
|
44
|
|
|
|
|
|
|
ucs2 => \&utf16, |
|
45
|
|
|
|
|
|
|
utf8 => \&utf8, |
|
46
|
|
|
|
|
|
|
utf7 => \&utf7, |
|
47
|
|
|
|
|
|
|
ucs4 => \&ucs4, |
|
48
|
|
|
|
|
|
|
utf32 => \&ucs4, |
|
49
|
|
|
|
|
|
|
utf32be => \&ucs4, |
|
50
|
|
|
|
|
|
|
utf32le => \&utf32le, |
|
51
|
|
|
|
|
|
|
latin1 => \&latin1, |
|
52
|
|
|
|
|
|
|
'hex' => \&hex, |
|
53
|
|
|
|
|
|
|
); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $stringify_as = \&utf8; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# some aliases |
|
58
|
|
|
|
|
|
|
*ucs2 = \&utf16; |
|
59
|
|
|
|
|
|
|
*utf16be = \&utf16; |
|
60
|
|
|
|
|
|
|
*utf32 = \&ucs4; |
|
61
|
|
|
|
|
|
|
*utf32be = \&ucs4; |
|
62
|
|
|
|
|
|
|
*uhex = \&hex; |
|
63
|
|
|
|
|
|
|
*uchr = \&chr; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub new |
|
66
|
|
|
|
|
|
|
{ |
|
67
|
|
|
|
|
|
|
#_dump_arg("new", @_); |
|
68
|
54
|
|
|
54
|
1
|
796
|
my $class = shift; |
|
69
|
54
|
|
|
|
|
38
|
my $str; |
|
70
|
54
|
|
|
|
|
58
|
my $self = bless \$str, $class; |
|
71
|
54
|
100
|
|
|
|
83
|
&$stringify_as($self, shift) if @_; |
|
72
|
54
|
|
|
|
|
59
|
$self; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub repeat |
|
77
|
|
|
|
|
|
|
{ |
|
78
|
1
|
|
|
1
|
1
|
6
|
my($self, $count) = @_; |
|
79
|
1
|
|
|
|
|
1
|
my $class = ref($self); |
|
80
|
1
|
|
|
|
|
3
|
my $str = $$self x $count; |
|
81
|
1
|
|
|
|
|
2
|
bless \$str, $class; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _dump_arg |
|
86
|
|
|
|
|
|
|
{ |
|
87
|
0
|
|
|
0
|
|
0
|
my $func = shift; |
|
88
|
0
|
|
|
|
|
0
|
print "$func("; |
|
89
|
0
|
0
|
|
|
|
0
|
print join(",", map { if (defined $_) { |
|
|
0
|
|
|
|
|
0
|
|
|
90
|
0
|
|
|
|
|
0
|
my $x = overload::StrVal($_); |
|
91
|
0
|
|
|
|
|
0
|
$x =~ s/\n/\\n/g; |
|
92
|
0
|
0
|
|
|
|
0
|
$x = '""' unless length $x; |
|
93
|
0
|
|
|
|
|
0
|
$x; |
|
94
|
|
|
|
|
|
|
} else { |
|
95
|
0
|
|
|
|
|
0
|
"undef" |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
} @_); |
|
98
|
0
|
|
|
|
|
0
|
print ")\n"; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub concat |
|
103
|
|
|
|
|
|
|
{ |
|
104
|
|
|
|
|
|
|
#_dump_arg("concat", @_); |
|
105
|
4
|
|
|
4
|
1
|
21
|
my($self, $other, $reversed) = @_; |
|
106
|
4
|
|
|
|
|
5
|
my $class = ref($self); |
|
107
|
4
|
100
|
|
|
|
10
|
unless (UNIVERSAL::isa($other, 'Unicode::String')) { |
|
108
|
2
|
|
|
|
|
4
|
$other = Unicode::String->new($other); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
4
|
100
|
|
|
|
9
|
my $str = $reversed ? $$other . $$self : $$self . $$other; |
|
111
|
4
|
|
|
|
|
9
|
bless \$str, $class; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub append |
|
116
|
|
|
|
|
|
|
{ |
|
117
|
|
|
|
|
|
|
#_dump_arg("append", @_); |
|
118
|
20
|
|
|
20
|
1
|
56
|
my($self, $other) = @_; |
|
119
|
20
|
50
|
|
|
|
43
|
unless (UNIVERSAL::isa($other, 'Unicode::String')) { |
|
120
|
0
|
|
|
|
|
0
|
$other = Unicode::String->new($other); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
20
|
|
|
|
|
66
|
$$self .= $$other; |
|
123
|
20
|
|
|
|
|
31
|
$self; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub copy |
|
128
|
|
|
|
|
|
|
{ |
|
129
|
1
|
|
|
1
|
1
|
14
|
my($self) = @_; |
|
130
|
1
|
|
|
|
|
2
|
my $class = ref($self); |
|
131
|
1
|
|
|
|
|
2
|
my $copy = $$self; |
|
132
|
1
|
|
|
|
|
2
|
bless \$copy, $class; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub as_string |
|
137
|
|
|
|
|
|
|
{ |
|
138
|
|
|
|
|
|
|
#_dump_arg("as_string", @_); |
|
139
|
3
|
|
|
3
|
1
|
16
|
&$stringify_as($_[0]); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub as_bool |
|
144
|
|
|
|
|
|
|
{ |
|
145
|
|
|
|
|
|
|
# This is different from perl's normal behaviour by not letting |
|
146
|
|
|
|
|
|
|
# a U+0030 ("0") be false. |
|
147
|
3
|
|
|
3
|
1
|
12
|
my $self = shift; |
|
148
|
3
|
100
|
|
|
|
16
|
$$self ? 1 : ""; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub as_num |
|
153
|
|
|
|
|
|
|
{ |
|
154
|
|
|
|
|
|
|
# Should be able to use the numeric property from Unidata |
|
155
|
|
|
|
|
|
|
# in order to parse a large number of numbers. Currently we |
|
156
|
|
|
|
|
|
|
# only convert it to a plain string and let perl's normal |
|
157
|
|
|
|
|
|
|
# num-converter do the job. |
|
158
|
1
|
|
|
1
|
1
|
10
|
my $self = shift; |
|
159
|
1
|
|
|
|
|
2
|
my $str = $self->utf8; |
|
160
|
1
|
|
|
|
|
2
|
$str + 0; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub stringify_as |
|
165
|
|
|
|
|
|
|
{ |
|
166
|
3
|
|
|
3
|
1
|
21
|
my $class; |
|
167
|
3
|
50
|
|
|
|
6
|
if (@_ > 1) { |
|
168
|
0
|
|
|
|
|
0
|
$class = shift; |
|
169
|
0
|
0
|
|
|
|
0
|
$class = ref($class) if ref($class); |
|
170
|
|
|
|
|
|
|
} else { |
|
171
|
3
|
|
|
|
|
3
|
$class = "Unicode::String"; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
3
|
|
|
|
|
3
|
my $old = $stringify_as; |
|
174
|
3
|
50
|
|
|
|
5
|
if (@_) { |
|
175
|
3
|
|
|
|
|
2
|
my $as = shift; |
|
176
|
|
|
|
|
|
|
croak("Don't know how to stringify as '$as'") |
|
177
|
3
|
100
|
|
|
|
156
|
unless exists $stringify{$as}; |
|
178
|
2
|
|
|
|
|
4
|
$stringify_as = $stringify{$as}; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
2
|
|
|
|
|
2
|
$old; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub utf16 |
|
185
|
|
|
|
|
|
|
{ |
|
186
|
78
|
|
|
78
|
0
|
255
|
my $self = shift; |
|
187
|
78
|
100
|
|
|
|
105
|
unless (ref $self) { |
|
188
|
29
|
|
|
|
|
37
|
my $u = new Unicode::String; |
|
189
|
29
|
|
|
|
|
41
|
$u->utf16($self); |
|
190
|
29
|
|
|
|
|
91
|
return $u; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
49
|
|
|
|
|
45
|
my $old = $$self; |
|
193
|
49
|
100
|
|
|
|
63
|
if (@_) { |
|
194
|
31
|
|
|
|
|
31
|
$$self = shift; |
|
195
|
31
|
100
|
|
|
|
50
|
if ((length($$self) % 2) != 0) { |
|
196
|
1
|
50
|
|
|
|
17
|
warn "Uneven UTF16 data" if $^W; |
|
197
|
1
|
|
|
|
|
82
|
$$self .= "\0"; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
31
|
100
|
|
|
|
54
|
if ($$self =~ /^\xFF\xFE/) { |
|
200
|
|
|
|
|
|
|
# the string needs byte swapping |
|
201
|
2
|
|
|
|
|
5
|
$$self = byteswap2($$self); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
49
|
|
|
|
|
61
|
$old; |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub utf16le |
|
209
|
|
|
|
|
|
|
{ |
|
210
|
3
|
|
|
3
|
1
|
5
|
my $self = shift; |
|
211
|
3
|
100
|
|
|
|
7
|
unless (ref $self) { |
|
212
|
2
|
|
|
|
|
4
|
my $u = new Unicode::String; |
|
213
|
2
|
|
|
|
|
7
|
$u->utf16(byteswap2($self)); |
|
214
|
2
|
|
|
|
|
3
|
return $u; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
1
|
|
|
|
|
3
|
my $old = byteswap2($$self); |
|
217
|
1
|
50
|
|
|
|
6
|
if (@_) { |
|
218
|
0
|
|
|
|
|
0
|
$self->utf16(byteswap2(shift)); |
|
219
|
|
|
|
|
|
|
} |
|
220
|
1
|
|
|
|
|
3
|
$old; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub utf32le |
|
225
|
|
|
|
|
|
|
{ |
|
226
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
227
|
2
|
100
|
|
|
|
5
|
unless (ref $self) { |
|
228
|
1
|
|
|
|
|
4
|
my $u = new Unicode::String; |
|
229
|
1
|
|
|
|
|
5
|
$u->ucs4(byteswap4($self)); |
|
230
|
1
|
|
|
|
|
2
|
return $u; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
1
|
|
|
|
|
8
|
my $old = byteswap4($self->ucs4); |
|
233
|
1
|
50
|
|
|
|
5
|
if (@_) { |
|
234
|
0
|
|
|
|
|
0
|
$self->ucs4(byteswap4(shift)); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
1
|
|
|
|
|
3
|
$old; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub utf7 # rfc1642 |
|
241
|
|
|
|
|
|
|
{ |
|
242
|
32
|
|
|
32
|
1
|
547
|
my $self = shift; |
|
243
|
32
|
100
|
|
|
|
46
|
unless (ref $self) { |
|
244
|
|
|
|
|
|
|
# act as ctor |
|
245
|
10
|
|
|
|
|
18
|
my $u = new Unicode::String; |
|
246
|
10
|
|
|
|
|
22
|
$u->utf7($self); |
|
247
|
10
|
|
|
|
|
18
|
return $u; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
22
|
|
|
|
|
16
|
my $old; |
|
250
|
22
|
100
|
|
|
|
51
|
if (defined wantarray) { |
|
251
|
|
|
|
|
|
|
# encode into $old |
|
252
|
12
|
|
|
|
|
8
|
$old = ""; |
|
253
|
12
|
|
|
|
|
22
|
pos($$self) = 0; |
|
254
|
12
|
|
|
|
|
14
|
my $len = length($$self); |
|
255
|
12
|
|
|
|
|
18
|
while (pos($$self) < $len) { |
|
256
|
42
|
100
|
100
|
|
|
308
|
if (($UTF7_OPTIONAL_DIRECT_CHARS && |
|
|
|
50
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
257
|
|
|
|
|
|
|
$$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gc) |
|
258
|
|
|
|
|
|
|
|| $$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gc) |
|
259
|
|
|
|
|
|
|
{ |
|
260
|
|
|
|
|
|
|
#print "Plain ", utf16($1)->latin1, "\n"; |
|
261
|
19
|
|
|
|
|
25
|
$old .= utf16($1)->latin1; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
elsif (($UTF7_OPTIONAL_DIRECT_CHARS && |
|
264
|
|
|
|
|
|
|
$$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gsc) |
|
265
|
|
|
|
|
|
|
|| $$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gsc) |
|
266
|
|
|
|
|
|
|
{ |
|
267
|
|
|
|
|
|
|
#print "Unplain ", utf16($1)->hex, "\n"; |
|
268
|
23
|
100
|
|
|
|
37
|
if ($1 eq "\0+") { |
|
269
|
2
|
|
|
|
|
4
|
$old .= "+-"; |
|
270
|
|
|
|
|
|
|
} else { |
|
271
|
21
|
|
|
|
|
535
|
require MIME::Base64; |
|
272
|
21
|
|
|
|
|
567
|
my $base64 = MIME::Base64::encode($1, ''); |
|
273
|
21
|
|
|
|
|
40
|
$base64 =~ s/=+$//; |
|
274
|
21
|
|
|
|
|
48
|
$old .= "+$base64-"; |
|
275
|
|
|
|
|
|
|
# XXX should we determine when the final "-" is |
|
276
|
|
|
|
|
|
|
# unnecessary? depends on next char not being part |
|
277
|
|
|
|
|
|
|
# of the base64 char set. |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
} else { |
|
280
|
0
|
|
|
|
|
0
|
die "This should not happen, pos=" . pos($$self) . |
|
281
|
|
|
|
|
|
|
": " . $self->hex . "\n"; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
22
|
100
|
|
|
|
30
|
if (@_) { |
|
287
|
|
|
|
|
|
|
# decode |
|
288
|
11
|
|
|
|
|
10
|
my $len = length($_[0]); |
|
289
|
11
|
|
|
|
|
11
|
$$self = ""; |
|
290
|
11
|
|
|
|
|
14
|
pos($_[0]) = 0; |
|
291
|
11
|
|
|
|
|
19
|
while (pos($_[0]) < $len) { |
|
292
|
40
|
100
|
|
|
|
117
|
if ($_[0] =~ /\G([^+]+)/gc) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
293
|
18
|
|
|
|
|
60
|
$self->append(latin1($1)); |
|
294
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+-/gc) { |
|
295
|
2
|
|
|
|
|
4
|
$$self .= "\0+"; |
|
296
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+([A-Za-z0-9+\/]+)-?/gc) { |
|
297
|
20
|
|
|
|
|
22
|
my $base64 = $1; |
|
298
|
20
|
|
|
|
|
20
|
my $pad = length($base64) % 4; |
|
299
|
20
|
100
|
|
|
|
32
|
$base64 .= "=" x (4 - $pad) if $pad; |
|
300
|
20
|
|
|
|
|
50
|
require MIME::Base64; |
|
301
|
20
|
|
|
|
|
36
|
$$self .= MIME::Base64::decode($base64); |
|
302
|
20
|
50
|
|
|
|
45
|
if ((length($$self) % 2) != 0) { |
|
303
|
0
|
0
|
|
|
|
0
|
warn "Uneven UTF7 base64-data" if $^W; |
|
304
|
0
|
|
|
|
|
0
|
chop($$self); # correct it |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+/gc) { |
|
307
|
0
|
0
|
|
|
|
0
|
warn "Bad UTF7 data escape" if $^W; |
|
308
|
0
|
|
|
|
|
0
|
$$self .= "\0+"; |
|
309
|
|
|
|
|
|
|
} else { |
|
310
|
0
|
|
|
|
|
0
|
die "This should not happen " . pos($_[0]); |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
} |
|
314
|
22
|
|
|
|
|
37
|
$old; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub hex |
|
319
|
|
|
|
|
|
|
{ |
|
320
|
27
|
|
|
27
|
1
|
330
|
my $self = shift; |
|
321
|
27
|
50
|
|
|
|
51
|
unless (ref $self) { |
|
322
|
0
|
|
|
|
|
0
|
my $u = new Unicode::String; |
|
323
|
0
|
|
|
|
|
0
|
$u->hex($self); |
|
324
|
0
|
|
|
|
|
0
|
return $u; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
27
|
|
|
|
|
19
|
my $old; |
|
327
|
27
|
50
|
33
|
|
|
134
|
if (defined($$self) && defined wantarray) { |
|
328
|
27
|
|
|
|
|
53
|
$old = unpack("H*", $$self); |
|
329
|
27
|
|
|
|
|
188
|
$old =~ s/(....)/U+$1 /g; |
|
330
|
27
|
|
|
|
|
94
|
$old =~ s/\s+$//; |
|
331
|
|
|
|
|
|
|
} |
|
332
|
27
|
50
|
|
|
|
45
|
if (@_) { |
|
333
|
0
|
|
|
|
|
0
|
my $new = shift; |
|
334
|
0
|
|
|
|
|
0
|
$new =~ tr/0-9A-Fa-f//cd; # leave only hex chars |
|
335
|
0
|
0
|
|
|
|
0
|
croak("Hex string length must be multiple of four") |
|
336
|
|
|
|
|
|
|
unless (length($new) % 4) == 0; |
|
337
|
0
|
|
|
|
|
0
|
$$self = pack("H*", $new); |
|
338
|
|
|
|
|
|
|
} |
|
339
|
27
|
|
|
|
|
141
|
$old; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub length |
|
344
|
|
|
|
|
|
|
{ |
|
345
|
6
|
|
|
6
|
1
|
1699
|
my $self = shift; |
|
346
|
6
|
|
|
|
|
56
|
int(length($$self) / 2); |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub byteswap |
|
350
|
|
|
|
|
|
|
{ |
|
351
|
1
|
|
|
1
|
1
|
62
|
my $self = shift; |
|
352
|
1
|
|
|
|
|
42
|
byteswap2($$self); |
|
353
|
1
|
|
|
|
|
1
|
$self; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub unpack |
|
357
|
|
|
|
|
|
|
{ |
|
358
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
359
|
0
|
|
|
|
|
0
|
unpack("n*", $$self) |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub pack |
|
364
|
|
|
|
|
|
|
{ |
|
365
|
6
|
|
|
6
|
1
|
36
|
my $self = shift; |
|
366
|
6
|
|
|
|
|
16
|
$$self = pack("n*", @_); |
|
367
|
6
|
|
|
|
|
6
|
$self; |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub ord |
|
372
|
|
|
|
|
|
|
{ |
|
373
|
12
|
|
|
12
|
1
|
53
|
my $self = shift; |
|
374
|
12
|
50
|
|
|
|
16
|
return () unless defined $$self; |
|
375
|
|
|
|
|
|
|
|
|
376
|
12
|
|
|
|
|
11
|
my $array = wantarray; |
|
377
|
12
|
|
|
|
|
10
|
my @ret; |
|
378
|
|
|
|
|
|
|
my @chars; |
|
379
|
12
|
100
|
|
|
|
13
|
if ($array) { |
|
380
|
4
|
|
|
|
|
7
|
@chars = CORE::unpack("n*", $$self); |
|
381
|
|
|
|
|
|
|
} else { |
|
382
|
8
|
|
|
|
|
11
|
@chars = CORE::unpack("n2", $$self); |
|
383
|
|
|
|
|
|
|
} |
|
384
|
|
|
|
|
|
|
|
|
385
|
12
|
|
|
|
|
19
|
while (@chars) { |
|
386
|
20
|
|
|
|
|
15
|
my $first = shift(@chars); |
|
387
|
20
|
100
|
100
|
|
|
43
|
if ($first >= 0xD800 && $first <= 0xDFFF) { # surrogate |
|
388
|
6
|
|
|
|
|
5
|
my $second = shift(@chars); |
|
389
|
|
|
|
|
|
|
#print "F=$first S=$second\n"; |
|
390
|
6
|
100
|
100
|
|
|
28
|
if ($first >= 0xDC00 || $second < 0xDC00 || $second > 0xDFFF) { |
|
|
|
|
66
|
|
|
|
|
|
391
|
2
|
|
|
|
|
209
|
carp(sprintf("Bad surrogate pair (U+%04x U+%04x)", |
|
392
|
|
|
|
|
|
|
$first, $second)); |
|
393
|
2
|
|
|
|
|
57
|
unshift(@chars, $second); |
|
394
|
2
|
|
|
|
|
4
|
next; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
4
|
|
|
|
|
7
|
push(@ret, ($first-0xD800)*0x400 + ($second-0xDC00) + 0x10000); |
|
397
|
|
|
|
|
|
|
} else { |
|
398
|
14
|
|
|
|
|
11
|
push(@ret, $first); |
|
399
|
|
|
|
|
|
|
} |
|
400
|
18
|
100
|
|
|
|
27
|
last unless $array; |
|
401
|
|
|
|
|
|
|
} |
|
402
|
12
|
100
|
|
|
|
28
|
$array ? @ret : $ret[0]; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub name |
|
407
|
|
|
|
|
|
|
{ |
|
408
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
409
|
0
|
|
|
|
|
0
|
require Unicode::CharName; |
|
410
|
0
|
0
|
|
|
|
0
|
if (wantarray) { |
|
411
|
0
|
|
|
|
|
0
|
return map { Unicode::CharName::uname($_) } $self->ord; |
|
|
0
|
|
|
|
|
0
|
|
|
412
|
|
|
|
|
|
|
} else { |
|
413
|
0
|
|
|
|
|
0
|
return Unicode::CharName::uname(scalar($self->ord)); |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub chr |
|
419
|
|
|
|
|
|
|
{ |
|
420
|
8
|
|
|
8
|
1
|
81
|
my($self,$val) = @_; |
|
421
|
8
|
100
|
|
|
|
12
|
unless (ref $self) { |
|
422
|
|
|
|
|
|
|
# act as ctor |
|
423
|
4
|
|
|
|
|
8
|
my $u = new Unicode::String; |
|
424
|
4
|
|
|
|
|
8
|
return $u->uchr($self); |
|
425
|
|
|
|
|
|
|
} |
|
426
|
4
|
100
|
|
|
|
6
|
if ($val > 0xFFFF) { |
|
427
|
|
|
|
|
|
|
# must be represented by a surrogate pair |
|
428
|
2
|
50
|
|
|
|
4
|
return undef if $val > 0x10FFFF; # Unicode limit |
|
429
|
2
|
|
|
|
|
2
|
$val -= 0x10000; |
|
430
|
2
|
|
|
|
|
5
|
my $h = int($val / 0x400) + 0xD800; |
|
431
|
2
|
|
|
|
|
2
|
my $l = ($val % 0x400) + 0xDC00; |
|
432
|
2
|
|
|
|
|
4
|
$$self = CORE::pack("n2", $h, $l); |
|
433
|
|
|
|
|
|
|
} else { |
|
434
|
2
|
|
|
|
|
40
|
$$self = CORE::pack("n", $val); |
|
435
|
|
|
|
|
|
|
} |
|
436
|
4
|
|
|
|
|
13
|
$self; |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub substr |
|
441
|
|
|
|
|
|
|
{ |
|
442
|
6
|
|
|
6
|
1
|
27
|
my($self, $offset, $length, $substitute) = @_; |
|
443
|
6
|
|
100
|
|
|
12
|
$offset ||= 0; |
|
444
|
6
|
|
|
|
|
4
|
$offset *= 2; |
|
445
|
6
|
|
|
|
|
7
|
my $substr; |
|
446
|
6
|
50
|
|
|
|
7
|
if (defined $substitute) { |
|
447
|
0
|
0
|
|
|
|
0
|
unless (UNIVERSAL::isa($substitute, 'Unicode::String')) { |
|
448
|
0
|
|
|
|
|
0
|
$substitute = Unicode::String->new($substitute); |
|
449
|
|
|
|
|
|
|
} |
|
450
|
0
|
0
|
|
|
|
0
|
if (defined $length) { |
|
451
|
0
|
|
|
|
|
0
|
$substr = substr($$self, $offset, $length*2) = $$substitute; |
|
452
|
|
|
|
|
|
|
} else { |
|
453
|
0
|
|
|
|
|
0
|
$substr = substr($$self, $offset) = $$substitute; |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
} else { |
|
456
|
6
|
100
|
|
|
|
7
|
if (defined $length) { |
|
457
|
4
|
|
|
|
|
6
|
$substr = substr($$self, $offset, $length*2); |
|
458
|
|
|
|
|
|
|
} else { |
|
459
|
2
|
|
|
|
|
4
|
$substr = substr($$self, $offset); |
|
460
|
|
|
|
|
|
|
} |
|
461
|
|
|
|
|
|
|
} |
|
462
|
6
|
|
|
|
|
14
|
bless \$substr, ref($self); |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub index |
|
467
|
|
|
|
|
|
|
{ |
|
468
|
5
|
|
|
5
|
1
|
16
|
my($self, $other, $pos) = @_; |
|
469
|
5
|
|
100
|
|
|
11
|
$pos ||= 0; |
|
470
|
5
|
|
|
|
|
5
|
$pos *= 2; |
|
471
|
5
|
50
|
|
|
|
7
|
$other = Unicode::String->new($other) unless ref($other); |
|
472
|
5
|
|
100
|
|
|
21
|
$pos++ while ($pos = index($$self, $$other, $pos)) > 0 && ($pos%2) != 0; |
|
473
|
5
|
100
|
|
|
|
8
|
$pos /= 2 if $pos > 0; |
|
474
|
5
|
|
|
|
|
5
|
$pos; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub rindex |
|
479
|
|
|
|
|
|
|
{ |
|
480
|
0
|
|
|
0
|
0
|
|
my($self, $other, $pos) = @_; |
|
481
|
0
|
|
0
|
|
|
|
$pos ||= 0; |
|
482
|
0
|
|
|
|
|
|
die "NYI"; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub chop |
|
487
|
|
|
|
|
|
|
{ |
|
488
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
489
|
0
|
0
|
|
|
|
|
if (CORE::length $$self) { |
|
490
|
0
|
|
|
|
|
|
my $chop = chop($$self); |
|
491
|
0
|
|
|
|
|
|
$chop = chop($$self) . $chop; |
|
492
|
0
|
|
|
|
|
|
return bless \$chop, ref($self); |
|
493
|
|
|
|
|
|
|
} |
|
494
|
0
|
|
|
|
|
|
undef; |
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# XXX: Ideas to be implemented |
|
499
|
|
|
|
|
|
|
sub scan; |
|
500
|
|
|
|
|
|
|
sub reverse; |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub lc; |
|
503
|
|
|
|
|
|
|
sub lcfirst; |
|
504
|
|
|
|
|
|
|
sub uc; |
|
505
|
|
|
|
|
|
|
sub ucfirst; |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub split; |
|
508
|
|
|
|
|
|
|
sub sprintf; |
|
509
|
|
|
|
|
|
|
sub study; |
|
510
|
|
|
|
|
|
|
sub tr; |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
1; |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
__END__ |