| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#-*- perl -*- |
|
2
|
|
|
|
|
|
|
#-*- coding: us-ascii -*- |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Encode::ISO2022; |
|
5
|
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
26536
|
use 5.007003; |
|
|
2
|
|
|
|
|
7
|
|
|
|
2
|
|
|
|
|
75
|
|
|
7
|
2
|
|
|
2
|
|
10
|
use strict; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
74
|
|
|
8
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
7
|
|
|
|
2
|
|
|
|
|
68
|
|
|
9
|
2
|
|
|
2
|
|
10
|
use base qw(Encode::Encoding); |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
2068
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
2
|
|
|
2
|
|
25532
|
use Carp qw(carp croak); |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
168
|
|
|
13
|
2
|
|
|
2
|
|
11
|
use XSLoader; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
7396
|
|
|
14
|
|
|
|
|
|
|
XSLoader::load(__PACKAGE__, $VERSION); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $err_encode_nomap = '"\x{%*v04X}" does not map to %s'; |
|
17
|
|
|
|
|
|
|
my $err_decode_nomap = '%s "\x%*v02X" does not map to Unicode'; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $DIE_ON_ERR = Encode::DIE_ON_ERR(); |
|
20
|
|
|
|
|
|
|
my $FB_QUIET = Encode::FB_QUIET(); |
|
21
|
|
|
|
|
|
|
my $HTMLCREF = Encode::HTMLCREF(); |
|
22
|
|
|
|
|
|
|
my $LEAVE_SRC = Encode::LEAVE_SRC(); |
|
23
|
|
|
|
|
|
|
my $PERLQQ = Encode::PERLQQ(); |
|
24
|
|
|
|
|
|
|
my $RETURN_ON_ERR = Encode::RETURN_ON_ERR(); |
|
25
|
|
|
|
|
|
|
my $WARN_ON_ERR = Encode::WARN_ON_ERR(); |
|
26
|
|
|
|
|
|
|
my $XMLCREF = Encode::XMLCREF(); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Constructor |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub Define { |
|
31
|
1
|
|
|
1
|
0
|
4
|
my $pkg = shift; |
|
32
|
1
|
|
|
|
|
32
|
my %opts = @_; |
|
33
|
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
3
|
my $Name = $opts{Name}; |
|
35
|
1
|
50
|
|
|
|
10
|
croak 'No name defined' unless $Name; |
|
36
|
|
|
|
|
|
|
|
|
37
|
1
|
50
|
|
|
|
3
|
my @CCS = @{$opts{CCS} || []}; |
|
|
1
|
|
|
|
|
9
|
|
|
38
|
1
|
50
|
|
|
|
6
|
croak 'No CCS defined' unless @CCS; |
|
39
|
1
|
|
|
|
|
3
|
my @ccs; |
|
40
|
1
|
|
|
|
|
3
|
foreach my $ccs (@CCS) { |
|
41
|
10
|
|
|
|
|
11
|
my $encoding; |
|
42
|
10
|
50
|
|
|
|
33
|
if (ref $ccs->{encoding}) { |
|
|
|
50
|
|
|
|
|
|
|
43
|
0
|
|
|
|
|
0
|
$encoding = $ccs->{encoding}; |
|
44
|
|
|
|
|
|
|
} elsif ($ccs->{encoding}) { |
|
45
|
10
|
|
|
|
|
32
|
$encoding = Encode::find_encoding($ccs->{encoding}); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
10
|
50
|
0
|
|
|
139
|
croak sprintf 'Unknown encoding "%s"', ($ccs->{encoding} || '') |
|
48
|
|
|
|
|
|
|
unless $encoding; |
|
49
|
10
|
|
|
|
|
77
|
push @ccs, { %$ccs, encoding => $encoding }; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
1
|
|
50
|
|
|
11
|
my $self = bless { |
|
53
|
|
|
|
|
|
|
CCS => [@ccs], |
|
54
|
|
|
|
|
|
|
LineInit => $opts{LineInit}, |
|
55
|
|
|
|
|
|
|
Name => $Name, |
|
56
|
|
|
|
|
|
|
SubChar => ($opts{SubChar} || '?') |
|
57
|
|
|
|
|
|
|
} => $pkg; |
|
58
|
|
|
|
|
|
|
|
|
59
|
1
|
50
|
|
|
|
11
|
Encode::define_alias($opts{Alias} => "\"$Name\"") if $opts{Alias}; |
|
60
|
1
|
|
|
|
|
21
|
$Encode::Encoding{$Name} = $self; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# decode method |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub decode { |
|
66
|
1
|
|
|
1
|
1
|
2157
|
my ($self, $str, $chk) = @_; |
|
67
|
|
|
|
|
|
|
|
|
68
|
1
|
|
|
|
|
2
|
my $chk_sub; |
|
69
|
1
|
|
|
|
|
2
|
my $utf8 = ''; |
|
70
|
1
|
|
|
|
|
2
|
my $errChar; |
|
71
|
|
|
|
|
|
|
|
|
72
|
1
|
50
|
|
|
|
5
|
if (ref $chk eq 'CODE') { |
|
73
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
|
74
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
1
|
|
|
|
|
9
|
$self->init_state(1); |
|
78
|
|
|
|
|
|
|
|
|
79
|
1
|
|
|
|
|
4
|
pos($str) = 0; |
|
80
|
1
|
|
|
|
|
2
|
my $chunk = ''; |
|
81
|
|
|
|
|
|
|
CHUNKS: |
|
82
|
1
|
|
|
|
|
11
|
while ( |
|
83
|
|
|
|
|
|
|
$str =~ m{ |
|
84
|
|
|
|
|
|
|
\G |
|
85
|
|
|
|
|
|
|
( |
|
86
|
|
|
|
|
|
|
( # designation (FIXME) |
|
87
|
|
|
|
|
|
|
\e\x24?[\x28-\x2B\x2D-\x2F][\x20-\x2F]*[\x40-\x7E] | |
|
88
|
|
|
|
|
|
|
\e\x24[\x40-\x42] | |
|
89
|
|
|
|
|
|
|
) | |
|
90
|
|
|
|
|
|
|
( # locking shift |
|
91
|
|
|
|
|
|
|
\x0E|\x0F|\e[\x6E\x6F\x7C\x7D\x7E] |
|
92
|
|
|
|
|
|
|
) | |
|
93
|
|
|
|
|
|
|
) |
|
94
|
|
|
|
|
|
|
( |
|
95
|
|
|
|
|
|
|
( # single shift 2 |
|
96
|
|
|
|
|
|
|
\x8E|\e\x4E |
|
97
|
|
|
|
|
|
|
) | |
|
98
|
|
|
|
|
|
|
( # single shift 3 |
|
99
|
|
|
|
|
|
|
\x8F|\e\x4F |
|
100
|
|
|
|
|
|
|
) | |
|
101
|
|
|
|
|
|
|
) |
|
102
|
|
|
|
|
|
|
( |
|
103
|
|
|
|
|
|
|
[^\x0E\x0F\e\x8E\x8F]* |
|
104
|
|
|
|
|
|
|
) |
|
105
|
|
|
|
|
|
|
}gcx |
|
106
|
|
|
|
|
|
|
) { |
|
107
|
17166
|
|
|
|
|
61040
|
my ($func, $g_seq, $ls, $ss, $ss2, $ss3, $chunk) = |
|
108
|
|
|
|
|
|
|
($1, $2, $3, $4, $5, $6, $7); |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# process designation and invokation. |
|
111
|
17166
|
|
|
|
|
16635
|
my $errSeq; |
|
112
|
17166
|
100
|
|
|
|
25478
|
if ($g_seq) { |
|
|
|
50
|
|
|
|
|
|
|
113
|
17152
|
50
|
|
|
|
40111
|
unless (defined $self->designate_dec($g_seq)) { |
|
114
|
0
|
|
|
|
|
0
|
$errSeq = $g_seq; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
} elsif ($ls) { |
|
117
|
0
|
0
|
|
|
|
0
|
unless (defined $self->invoke_dec($ls)) { |
|
118
|
0
|
|
|
|
|
0
|
$errSeq = $ls; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
} |
|
121
|
17166
|
50
|
|
|
|
36503
|
if ($errSeq) { |
|
122
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
|
123
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->name, '\x', $errSeq; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
|
126
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->name, '\x', $errSeq; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
|
129
|
0
|
|
|
|
|
0
|
pos($str) -= length($errSeq) + length($chunk); |
|
130
|
0
|
|
|
|
|
0
|
last; # CHUNKS |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { |
|
135
|
0
|
|
|
|
|
0
|
$chk_sub->(ord $_) |
|
136
|
|
|
|
|
|
|
} split(//, $errSeq . $chunk); |
|
137
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
|
138
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errSeq . $chunk; |
|
139
|
|
|
|
|
|
|
} else { |
|
140
|
0
|
|
|
|
|
0
|
$utf8 .= "\x{FFFD}" x length($chunk); |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
0
|
next; # CHUNKS |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# process encoded elements |
|
147
|
17166
|
|
|
|
|
30557
|
while (length $chunk) { |
|
148
|
17173
|
|
|
|
|
17670
|
my ($conv, $bytes); |
|
149
|
|
|
|
|
|
|
|
|
150
|
17173
|
|
|
|
|
32719
|
($conv, $bytes) = $self->_decode($chunk, $ss); |
|
151
|
17173
|
50
|
|
|
|
36786
|
if (defined $conv) { |
|
152
|
17173
|
|
|
|
|
19386
|
$utf8 .= $conv; |
|
153
|
|
|
|
|
|
|
|
|
154
|
17173
|
100
|
66
|
|
|
52129
|
if ($conv =~ /[\r\n]/ and $self->{LineInit}) { |
|
155
|
1076
|
|
|
|
|
2538
|
$self->init_state(1); |
|
156
|
|
|
|
|
|
|
} |
|
157
|
17173
|
|
|
|
|
118160
|
next; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
0
|
|
0
|
|
|
0
|
$errChar = substr($chunk, 0, $bytes || 1); |
|
161
|
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
|
163
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->name, '\x', $errChar; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
|
166
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->name, '\x', $errChar; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
|
169
|
0
|
|
|
|
|
0
|
last CHUNKS; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Maybe erroneous designation: Force invoking CL and retry. |
|
173
|
0
|
0
|
|
|
|
0
|
if ($errChar =~ /^[\x00-\x1F]/) { |
|
174
|
0
|
|
|
|
|
0
|
my @ccs = grep { $_->{cl} } @{$self->{CCS}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
175
|
0
|
0
|
|
|
|
0
|
if (@ccs) { |
|
176
|
0
|
|
|
|
|
0
|
$self->designate($ccs[0]); |
|
177
|
0
|
|
|
|
|
0
|
next; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
substr($chunk, 0, length $errChar) = ''; |
|
182
|
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
|
0
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { |
|
185
|
0
|
|
|
|
|
0
|
$chk_sub->(ord $_) |
|
186
|
|
|
|
|
|
|
} split(//, $errChar); |
|
187
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
|
188
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errChar; |
|
189
|
|
|
|
|
|
|
} else { |
|
190
|
0
|
|
|
|
|
0
|
$utf8 .= "\x{FFFD}"; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} # CHUNKS |
|
194
|
1
|
|
|
|
|
6
|
pos($str) -= length($chunk); |
|
195
|
1
|
50
|
|
|
|
10
|
$_[1] = substr($str, pos $str) unless $chk & $LEAVE_SRC; |
|
196
|
|
|
|
|
|
|
|
|
197
|
1
|
|
|
|
|
154
|
return $utf8; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _decode { |
|
201
|
17173
|
|
|
17173
|
|
24723
|
my ($self, $chunk, $ss) = @_; |
|
202
|
|
|
|
|
|
|
|
|
203
|
17173
|
|
|
|
|
15592
|
my @ccs; |
|
204
|
|
|
|
|
|
|
my $conv; |
|
205
|
0
|
|
|
|
|
0
|
my $errLen; |
|
206
|
|
|
|
|
|
|
|
|
207
|
17173
|
100
|
|
|
|
24000
|
if ($ss) { |
|
208
|
160
|
100
|
100
|
|
|
423
|
@ccs = grep { |
|
209
|
16
|
|
|
|
|
27
|
$_->{_designated_to} and |
|
210
|
|
|
|
|
|
|
$_->{ss} and $_->{ss} eq $ss |
|
211
|
16
|
|
|
|
|
15
|
} @{$self->{CCS}}; |
|
212
|
|
|
|
|
|
|
} else { |
|
213
|
171570
|
100
|
33
|
|
|
811148
|
@ccs = grep { |
|
214
|
17157
|
|
|
|
|
29995
|
$_->{_invoked_to} or |
|
215
|
|
|
|
|
|
|
not ($_->{g} or $_->{g_init} or $_->{ls} or $_->{ss}) |
|
216
|
17157
|
|
|
|
|
16134
|
} @{$self->{CCS}}; |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
17173
|
|
|
|
|
27334
|
foreach my $ccs (@ccs) { |
|
220
|
17173
|
|
100
|
|
|
43446
|
my $bytes = $ccs->{bytes} || 1; |
|
221
|
17173
|
50
|
|
|
|
36442
|
my $range = |
|
|
|
100
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$ccs->{range} ? $ccs->{range} : $ccs->{gr} ? '\xA0-\xFF' : undef; |
|
223
|
17173
|
|
|
|
|
19631
|
my $residue = ''; |
|
224
|
|
|
|
|
|
|
|
|
225
|
17173
|
100
|
|
|
|
28428
|
if ($range) { |
|
226
|
10906
|
50
|
|
|
|
77528
|
if ($chunk =~ /^[^$range]/) { |
|
|
|
50
|
|
|
|
|
|
|
227
|
0
|
|
|
|
|
0
|
next; |
|
228
|
|
|
|
|
|
|
} elsif ($chunk =~ s/([^$range].*)$//s) { |
|
229
|
0
|
|
|
|
|
0
|
$residue = $1; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
17173
|
100
|
|
|
|
31036
|
if ($ss) { |
|
234
|
16
|
50
|
|
|
|
28
|
if ($bytes <= length $chunk) { |
|
235
|
16
|
|
|
|
|
30
|
$residue = substr($chunk, $bytes) . $residue; |
|
236
|
16
|
|
|
|
|
25
|
$chunk = substr($chunk, 0, $bytes); |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
17173
|
50
|
|
|
|
27367
|
if ($ccs->{gr}) { |
|
241
|
0
|
|
|
|
|
0
|
$chunk =~ tr/\x20-\x7F\xA0-\xFF/\xA0-\xFF\x20-\x7F/; |
|
242
|
0
|
|
|
|
|
0
|
$conv = $ccs->{encoding}->decode($chunk, $FB_QUIET); |
|
243
|
0
|
|
|
|
|
0
|
$chunk =~ tr/\x20-\x7F\xA0-\xFF/\xA0-\xFF\x20-\x7F/; |
|
244
|
|
|
|
|
|
|
} else { |
|
245
|
17173
|
|
|
|
|
61803
|
$conv = $ccs->{encoding}->decode($chunk, $FB_QUIET); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
17173
|
50
|
66
|
|
|
77656
|
if ($range and $chunk =~ /^([$range]{1,$bytes})/) { |
|
249
|
0
|
|
|
|
|
0
|
my $len = length $1; |
|
250
|
0
|
0
|
0
|
|
|
0
|
if (not defined $errLen or $len < $errLen) { |
|
251
|
0
|
|
|
|
|
0
|
$errLen = $len; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
17173
|
|
|
|
|
20252
|
$chunk .= $residue; |
|
256
|
|
|
|
|
|
|
|
|
257
|
17173
|
50
|
|
|
|
48781
|
if ($conv =~ /./os) { # length() on utf8 string is slow |
|
258
|
17173
|
|
|
|
|
23703
|
$_[1] = $chunk; |
|
259
|
17173
|
|
|
|
|
18419
|
$_[2] = undef; |
|
260
|
17173
|
|
|
|
|
53354
|
return $conv; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
} |
|
263
|
0
|
|
|
|
|
0
|
$_[2] = undef; |
|
264
|
0
|
|
|
|
|
0
|
return (undef, $errLen); |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub designate_dec { |
|
268
|
17152
|
|
|
17152
|
0
|
21368
|
my ($self, $g_seq) = @_; |
|
269
|
|
|
|
|
|
|
|
|
270
|
171520
|
50
|
|
|
|
629664
|
my $ccs = (grep { |
|
271
|
17152
|
|
|
|
|
36286
|
$_->{g_seq} and $_->{g_seq} eq $g_seq |
|
272
|
17152
|
|
|
|
|
16798
|
} @{$self->{CCS}})[0]; |
|
273
|
17152
|
50
|
|
|
|
34696
|
return undef unless $ccs; |
|
274
|
|
|
|
|
|
|
|
|
275
|
17152
|
|
|
|
|
35157
|
return $self->designate($ccs); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub invoke_dec { |
|
279
|
0
|
|
|
0
|
0
|
0
|
my ($self, $ls) = @_; |
|
280
|
|
|
|
|
|
|
|
|
281
|
0
|
0
|
0
|
|
|
0
|
my $ccs = (grep { |
|
282
|
0
|
|
|
|
|
0
|
$_->{_designated_to} and |
|
283
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls |
|
284
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}})[0]; |
|
285
|
0
|
0
|
|
|
|
0
|
return undef unless $ccs; |
|
286
|
|
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
0
|
return $self->invoke($ccs); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# encode method |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub encode { |
|
293
|
1
|
|
|
1
|
1
|
9578
|
my ($self, $utf8, $chk) = @_; |
|
294
|
|
|
|
|
|
|
|
|
295
|
1
|
|
|
|
|
3
|
my $chk_sub; |
|
296
|
1
|
|
|
|
|
2
|
my $str = ''; |
|
297
|
1
|
|
|
|
|
2
|
my $errChar; |
|
298
|
|
|
|
|
|
|
my $subChar; |
|
299
|
|
|
|
|
|
|
|
|
300
|
1
|
50
|
|
|
|
7
|
if (ref $chk eq 'CODE') { |
|
301
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
|
302
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
1
|
|
|
|
|
7
|
$self->init_state(1); |
|
306
|
|
|
|
|
|
|
|
|
307
|
1
|
|
|
|
|
8
|
while ($utf8 =~ /./os) { # length() on utf8 string is slow. |
|
308
|
17173
|
|
|
|
|
24670
|
my $conv; |
|
309
|
|
|
|
|
|
|
|
|
310
|
17173
|
|
|
|
|
53191
|
$conv = $self->_encode($utf8); |
|
311
|
17173
|
50
|
|
|
|
78120
|
if (defined $conv) { |
|
312
|
17173
|
|
|
|
|
21625
|
$str .= $conv; |
|
313
|
|
|
|
|
|
|
|
|
314
|
17173
|
100
|
66
|
|
|
74960
|
if ($conv =~ /[\r\n]/ and $self->{LineInit}) { |
|
315
|
1076
|
|
|
|
|
3400
|
$self->init_state(1); |
|
316
|
|
|
|
|
|
|
} |
|
317
|
17173
|
|
|
|
|
267438
|
next; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
0
|
$errChar = substr($utf8, 0, 1); |
|
321
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
|
322
|
0
|
|
|
|
|
0
|
croak sprintf $err_encode_nomap, '}\x{', $errChar, $self->name; |
|
323
|
|
|
|
|
|
|
} |
|
324
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
|
325
|
0
|
|
|
|
|
0
|
carp sprintf $err_encode_nomap, '}\x{', $errChar, $self->name; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
|
328
|
0
|
|
|
|
|
0
|
last; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
0
|
substr($utf8, 0, 1) = ''; |
|
332
|
|
|
|
|
|
|
|
|
333
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
334
|
0
|
|
|
|
|
0
|
$subChar = $chk_sub->(ord $errChar); |
|
335
|
0
|
0
|
|
|
|
0
|
$subChar = Encode::decode_utf8($subChar) |
|
336
|
|
|
|
|
|
|
unless Encode::is_utf8($subChar); |
|
337
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
|
338
|
0
|
|
|
|
|
0
|
$subChar = sprintf '\x{%04X}', ord $errChar; |
|
339
|
|
|
|
|
|
|
} elsif ($chk & $XMLCREF) { |
|
340
|
0
|
|
|
|
|
0
|
$subChar = sprintf '%X;', ord $errChar; |
|
341
|
|
|
|
|
|
|
} elsif ($chk & $HTMLCREF) { |
|
342
|
0
|
|
|
|
|
0
|
$subChar = sprintf '%d;', ord $errChar; |
|
343
|
|
|
|
|
|
|
} else { |
|
344
|
0
|
|
0
|
|
|
0
|
$subChar = $self->{SubChar} || '?'; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
0
|
|
|
|
|
0
|
$conv = $self->_encode($subChar); |
|
347
|
0
|
0
|
|
|
|
0
|
if (defined $conv) { |
|
348
|
0
|
|
|
|
|
0
|
$str .= $conv; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
} |
|
351
|
1
|
50
|
|
|
|
6
|
$_[1] = $utf8 unless $chk & $LEAVE_SRC; |
|
352
|
|
|
|
|
|
|
|
|
353
|
1
|
50
|
|
|
|
6
|
if (length $str) { |
|
354
|
1
|
|
|
|
|
5
|
$str .= $self->init_state(); |
|
355
|
|
|
|
|
|
|
} |
|
356
|
1
|
|
|
|
|
197
|
return $str; |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub _encode { |
|
360
|
17173
|
|
|
17173
|
|
271164
|
my ($self, $utf8) = @_; |
|
361
|
|
|
|
|
|
|
|
|
362
|
17173
|
|
|
|
|
33894
|
foreach my $ccs (@{$self->{CCS}}) { |
|
|
17173
|
|
|
|
|
42006
|
|
|
363
|
63509
|
100
|
|
|
|
225942
|
next if $ccs->{dec_only}; |
|
364
|
|
|
|
|
|
|
|
|
365
|
56574
|
|
|
|
|
70721
|
my $conv; |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# CCS with single-shift should encode runs as short as possible. |
|
368
|
|
|
|
|
|
|
# By now we support mapping from Unicode sequence up to 2 characters. |
|
369
|
56574
|
100
|
|
|
|
143823
|
if (defined $ccs->{ss}) { # empty value is allowed |
|
370
|
5871
|
|
50
|
|
|
22512
|
my $bytes = $ccs->{bytes} || 1; |
|
371
|
5871
|
|
|
|
|
407149
|
my $mc = substr($utf8, 0, 2); |
|
372
|
5871
|
|
|
|
|
46782
|
$conv = $ccs->{encoding}->encode($mc, $FB_QUIET); |
|
373
|
5871
|
100
|
|
|
|
20234
|
if ($bytes < length $conv) { |
|
|
|
100
|
|
|
|
|
|
|
374
|
13
|
|
|
|
|
27
|
$mc = substr($utf8, 0, 1); |
|
375
|
13
|
|
|
|
|
45
|
$conv = $ccs->{encoding}->encode($mc, $FB_QUIET); |
|
376
|
13
|
50
|
|
|
|
29
|
if (length $conv) { |
|
377
|
13
|
|
|
|
|
42
|
substr($utf8, 0, 1) = ''; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
} elsif (length $conv == $bytes) { |
|
380
|
3
|
|
|
|
|
14
|
substr($utf8, 0, 2) = ''; |
|
381
|
3
|
|
|
|
|
140
|
$utf8 = $mc . $utf8; |
|
382
|
|
|
|
|
|
|
} else { |
|
383
|
5855
|
|
|
|
|
12875
|
undef $conv; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
} else { |
|
386
|
50703
|
|
|
|
|
1079012
|
$conv = $ccs->{encoding}->encode($utf8, $FB_QUIET); |
|
387
|
|
|
|
|
|
|
} |
|
388
|
56574
|
100
|
100
|
|
|
400157
|
if (defined $conv and length $conv) { |
|
389
|
17173
|
|
|
|
|
347481
|
$_[1] = $utf8; |
|
390
|
17173
|
|
|
|
|
73262
|
return $self->designate($ccs) . $self->invoke($ccs, $conv); |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
} |
|
393
|
0
|
|
|
|
|
0
|
return undef; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub init_state { |
|
397
|
2155
|
|
|
2155
|
0
|
4075
|
my ($self, $reset) = @_; |
|
398
|
|
|
|
|
|
|
|
|
399
|
2155
|
100
|
|
|
|
4690
|
if ($reset) { |
|
400
|
2154
|
|
|
|
|
2344
|
foreach my $ccs (@{$self->{CCS}}) { |
|
|
2154
|
|
|
|
|
8232
|
|
|
401
|
21540
|
|
|
|
|
33061
|
delete $ccs->{_designated_to}; |
|
402
|
21540
|
|
|
|
|
40073
|
delete $ccs->{_invoked_to}; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
2154
|
|
|
|
|
15377
|
delete $self->{_state}; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
|
|
407
|
2155
|
|
|
|
|
3049
|
my $ret = ''; |
|
408
|
2155
|
|
|
|
|
3116
|
foreach my $ccs (grep { $_->{g_init} } @{$self->{CCS}}) { |
|
|
21550
|
|
|
|
|
46198
|
|
|
|
2155
|
|
|
|
|
4530
|
|
|
409
|
2155
|
|
|
|
|
4830
|
$ret .= $self->designate($ccs); |
|
410
|
|
|
|
|
|
|
} |
|
411
|
2155
|
|
|
|
|
3992
|
return $ret; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub designate { |
|
415
|
36480
|
|
|
36480
|
0
|
53953
|
my ($self, $ccs) = @_; |
|
416
|
|
|
|
|
|
|
|
|
417
|
36480
|
|
66
|
|
|
122167
|
my $g = $ccs->{g} || $ccs->{g_init}; |
|
418
|
36480
|
50
|
|
|
|
79560
|
croak sprintf 'Cannot designate %s', $ccs->{encoding}->name |
|
419
|
|
|
|
|
|
|
unless $g; |
|
420
|
36480
|
|
|
|
|
66371
|
my $g_seq = $ccs->{g_seq}; |
|
421
|
|
|
|
|
|
|
|
|
422
|
36480
|
|
|
|
|
53250
|
my @ccs; |
|
423
|
36480
|
50
|
|
|
|
60838
|
if ($g_seq) { # explicit designation |
|
424
|
364800
|
50
|
|
|
|
1856992
|
@ccs = grep { |
|
425
|
36480
|
|
|
|
|
84773
|
$_->{g_seq} and $_->{g_seq} eq $g_seq |
|
426
|
36480
|
|
|
|
|
47855
|
} @{$self->{CCS}}; |
|
427
|
|
|
|
|
|
|
} else { # static designation |
|
428
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
429
|
0
|
|
|
|
|
0
|
not $_->{g_seq} and |
|
430
|
|
|
|
|
|
|
($_->{g} and $_->{g} eq $g or $_->{g_init} and $_->{g_init} eq $g) |
|
431
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
# Already designated: do nothing |
|
434
|
36480
|
|
66
|
|
|
169428
|
return '' |
|
435
|
|
|
|
|
|
|
unless grep { |
|
436
|
36480
|
100
|
|
|
|
54567
|
not ($_->{_designated_to} and $_->{_designated_to} eq $g) |
|
437
|
|
|
|
|
|
|
} @ccs; |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# modify designation |
|
440
|
36458
|
100
|
|
|
|
37832
|
foreach my $ccs (@{$self->{_state}->{$g} || []}) { |
|
|
36458
|
|
|
|
|
139979
|
|
|
441
|
34296
|
|
|
|
|
81557
|
delete $ccs->{_designated_to}; |
|
442
|
34296
|
|
|
|
|
97321
|
delete $ccs->{_invoked_to}; |
|
443
|
|
|
|
|
|
|
} |
|
444
|
36458
|
|
|
|
|
150979
|
my %invoked = (gr => [], gl => []); |
|
445
|
36458
|
|
|
|
|
56142
|
foreach my $ccs (@ccs) { |
|
446
|
36458
|
|
|
|
|
89796
|
$ccs->{_designated_to} = $g; |
|
447
|
36458
|
100
|
66
|
|
|
203125
|
unless ($ccs->{ls} or $ccs->{ss}) { |
|
448
|
36450
|
50
|
|
|
|
77901
|
my $i = $ccs->{gr} ? 'gr' : 'gl'; |
|
449
|
|
|
|
|
|
|
|
|
450
|
36450
|
|
|
|
|
57891
|
$ccs->{_invoked_to} = $i; |
|
451
|
36450
|
|
|
|
|
54762
|
push @{$invoked{$i}}, $ccs; |
|
|
36450
|
|
|
|
|
175476
|
|
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# modify invokation |
|
456
|
36458
|
|
|
|
|
139024
|
foreach my $i (qw/gr gl/) { |
|
457
|
72916
|
50
|
|
|
|
68227
|
next unless @{$invoked{$i} || []}; |
|
|
72916
|
100
|
|
|
|
339657
|
|
|
458
|
|
|
|
|
|
|
|
|
459
|
36450
|
100
|
|
|
|
45081
|
foreach my $ccs (@{$self->{_state}->{$i} || []}) { |
|
|
36450
|
|
|
|
|
159247
|
|
|
460
|
34296
|
|
|
|
|
110832
|
delete $ccs->{_invoked_to}; |
|
461
|
|
|
|
|
|
|
} |
|
462
|
36450
|
|
|
|
|
138020
|
$self->{_state}->{$i} = $invoked{$i}; |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
|
|
465
|
36458
|
|
|
|
|
103300
|
$self->{_state}->{$g} = [@ccs]; |
|
466
|
36458
|
|
50
|
|
|
247406
|
return $g_seq || ''; |
|
467
|
|
|
|
|
|
|
} |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub invoke { |
|
470
|
17173
|
|
|
17173
|
0
|
57766
|
my ($self, $ccs, $str) = @_; |
|
471
|
17173
|
50
|
|
|
|
51101
|
$str = '' unless defined $str; |
|
472
|
|
|
|
|
|
|
|
|
473
|
17173
|
50
|
|
|
|
38018
|
my $i = $ccs->{gr} ? 'gr' : 'gl'; |
|
474
|
|
|
|
|
|
|
|
|
475
|
17173
|
50
|
|
|
|
75590
|
if ($i eq 'gr') { |
|
476
|
0
|
|
|
|
|
0
|
$str =~ tr/\x20-\x7F/\xA0-\xFF/; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
17173
|
100
|
|
|
|
79552
|
if ($ccs->{ss}) { |
|
|
|
50
|
|
|
|
|
|
|
480
|
16
|
|
|
|
|
21
|
my $out = ''; |
|
481
|
16
|
|
|
|
|
37
|
while (length $str) { |
|
482
|
16
|
|
50
|
|
|
98
|
$out .= $ccs->{ss} . substr($str, 0, ($ccs->{bytes} || 1), ''); |
|
483
|
|
|
|
|
|
|
} |
|
484
|
16
|
|
|
|
|
379
|
return $out; |
|
485
|
|
|
|
|
|
|
} elsif ($ccs->{ls}) { |
|
486
|
0
|
|
|
|
|
0
|
my $ls = $ccs->{ls}; |
|
487
|
0
|
|
|
|
|
0
|
my $g_seq = $ccs->{g_seq}; |
|
488
|
0
|
|
0
|
|
|
0
|
my $g = $ccs->{g} || $ccs->{g_init}; |
|
489
|
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
my @ccs; |
|
491
|
0
|
0
|
|
|
|
0
|
if ($g_seq) { |
|
492
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
493
|
0
|
|
|
|
|
0
|
$_->{g_seq} and $_->{g_seq} eq $g_seq and |
|
494
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls and |
|
495
|
|
|
|
|
|
|
($_->{gr} ? 'gr' : 'gl') eq $i |
|
496
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
|
497
|
|
|
|
|
|
|
} else { |
|
498
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
499
|
0
|
|
|
|
|
0
|
not $_->{g_seq} and ($_->{g} || $_->{g_init}) eq $g and |
|
500
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls and |
|
501
|
|
|
|
|
|
|
($_->{gr} ? 'gr' : 'gl') eq $i |
|
502
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
|
503
|
|
|
|
|
|
|
} |
|
504
|
|
|
|
|
|
|
# Already invoked: add nothing |
|
505
|
0
|
|
0
|
|
|
0
|
return $str |
|
506
|
|
|
|
|
|
|
unless grep { |
|
507
|
0
|
0
|
|
|
|
0
|
not ($_->{_invoked_to} and $_->{_invoked_to} eq $i) |
|
508
|
|
|
|
|
|
|
} @ccs; |
|
509
|
|
|
|
|
|
|
|
|
510
|
0
|
0
|
|
|
|
0
|
foreach my $ccs (@{$self->{_state}->{$i} || []}) { |
|
|
0
|
|
|
|
|
0
|
|
|
511
|
0
|
|
|
|
|
0
|
delete $ccs->{_invoked_to}; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
0
|
|
|
|
|
0
|
foreach my $ccs (@ccs) { |
|
514
|
0
|
|
|
|
|
0
|
$ccs->{_invoked_to} = $i; |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
0
|
|
|
|
|
0
|
$self->{_state}->{$i} = [@ccs]; |
|
518
|
0
|
|
|
|
|
0
|
return $ccs->{ls} . $str; |
|
519
|
|
|
|
|
|
|
} else { |
|
520
|
17157
|
|
|
|
|
130895
|
return $str; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# renew method |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub renew { |
|
527
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
528
|
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
|
my $clone = bless { map { _renew($_) } %$self } => ref($self); |
|
|
0
|
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
|
$clone->{renewed}++; |
|
531
|
0
|
|
|
|
|
|
return $clone; |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _renew { |
|
535
|
0
|
|
|
0
|
|
|
my $item = shift; |
|
536
|
|
|
|
|
|
|
|
|
537
|
0
|
0
|
0
|
|
|
|
if (ref $item eq 'HASH') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
538
|
0
|
|
|
|
|
|
return { map { _renew($_) } %$item }; |
|
|
0
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
} elsif (ref $item eq 'ARRAY') { |
|
540
|
0
|
|
|
|
|
|
return [ map { _renew($_) } @$item ]; |
|
|
0
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} elsif (ref $item and $item->can("renew")) { |
|
542
|
0
|
|
|
|
|
|
return $item->renew; |
|
543
|
|
|
|
|
|
|
} else { |
|
544
|
0
|
|
|
|
|
|
return $item; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# Miscelaneous |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub mime_name { |
|
551
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
552
|
0
|
0
|
|
|
|
|
return undef if $self->{Name} =~ /^x/i; |
|
553
|
0
|
|
|
|
|
|
return uc($self->{Name}); |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
1; |
|
557
|
|
|
|
|
|
|
__END__ |