| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# $Id: encoding.pm,v 2.20 2017/06/10 17:23:50 dankogai Exp $ |
|
2
|
|
|
|
|
|
|
package encoding; |
|
3
|
|
|
|
|
|
|
our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g; |
|
4
|
|
|
|
|
|
|
|
|
5
|
8
|
|
|
8
|
|
38170
|
use Encode; |
|
|
8
|
|
|
|
|
32
|
|
|
|
8
|
|
|
|
|
683
|
|
|
6
|
8
|
|
|
8
|
|
53
|
use strict; |
|
|
8
|
|
|
|
|
15
|
|
|
|
8
|
|
|
|
|
158
|
|
|
7
|
8
|
|
|
8
|
|
38
|
use warnings; |
|
|
8
|
|
|
|
|
18
|
|
|
|
8
|
|
|
|
|
180
|
|
|
8
|
8
|
|
|
8
|
|
54
|
use Config; |
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
804
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use constant { |
|
11
|
|
|
|
|
|
|
DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, |
|
12
|
8
|
|
33
|
|
|
26
|
HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, |
|
|
8
|
|
|
|
|
3333
|
|
|
|
8
|
|
|
|
|
5730
|
|
|
13
|
|
|
|
|
|
|
PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped |
|
14
|
8
|
|
|
8
|
|
57
|
}; |
|
|
8
|
|
|
|
|
16
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub _exception { |
|
17
|
8
|
|
|
8
|
|
16
|
my $name = shift; |
|
18
|
8
|
50
|
|
|
|
43
|
$] > 5.008 and return 0; # 5.8.1 or higher then no |
|
19
|
0
|
|
|
|
|
0
|
my %utfs = map { $_ => 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
20
|
|
|
|
|
|
|
qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE |
|
21
|
|
|
|
|
|
|
UTF-32 UTF-32BE UTF-32LE); |
|
22
|
0
|
0
|
|
|
|
0
|
$utfs{$name} or return 0; # UTFs or no |
|
23
|
0
|
|
|
|
|
0
|
require Config; |
|
24
|
0
|
|
|
|
|
0
|
Config->import(); |
|
25
|
0
|
|
|
|
|
0
|
our %Config; |
|
26
|
0
|
0
|
|
|
|
0
|
return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
0
|
|
0
|
0
|
0
|
0
|
sub in_locale { $^H & ( $locale::hint_bits || 0 ) } |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub _get_locale_encoding { |
|
32
|
2
|
|
|
2
|
|
12
|
my $locale_encoding; |
|
33
|
|
|
|
|
|
|
|
|
34
|
2
|
50
|
|
|
|
10
|
if ($^O eq 'MSWin32') { |
|
35
|
0
|
|
|
|
|
0
|
my @tries = ( |
|
36
|
|
|
|
|
|
|
# First try to get the OutputCP. This will work only if we |
|
37
|
|
|
|
|
|
|
# are attached to a console |
|
38
|
|
|
|
|
|
|
'Win32.pm' => 'Win32::GetConsoleOutputCP', |
|
39
|
|
|
|
|
|
|
'Win32/Console.pm' => 'Win32::Console::OutputCP', |
|
40
|
|
|
|
|
|
|
# If above failed, this means that we are a GUI app |
|
41
|
|
|
|
|
|
|
# Let's assume that the ANSI codepage is what matters |
|
42
|
|
|
|
|
|
|
'Win32.pm' => 'Win32::GetACP', |
|
43
|
|
|
|
|
|
|
); |
|
44
|
0
|
|
|
|
|
0
|
while (@tries) { |
|
45
|
0
|
|
|
|
|
0
|
my $cp = eval { |
|
46
|
0
|
|
|
|
|
0
|
require $tries[0]; |
|
47
|
8
|
|
|
8
|
|
60
|
no strict 'refs'; |
|
|
8
|
|
|
|
|
18
|
|
|
|
8
|
|
|
|
|
3780
|
|
|
48
|
0
|
|
|
|
|
0
|
&{$tries[1]}() |
|
|
0
|
|
|
|
|
0
|
|
|
49
|
|
|
|
|
|
|
}; |
|
50
|
0
|
0
|
|
|
|
0
|
if ($cp) { |
|
51
|
0
|
0
|
|
|
|
0
|
if ($cp == 65001) { # Code page for UTF-8 |
|
52
|
0
|
|
|
|
|
0
|
$locale_encoding = 'UTF-8'; |
|
53
|
|
|
|
|
|
|
} else { |
|
54
|
0
|
|
|
|
|
0
|
$locale_encoding = 'cp' . $cp; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
0
|
|
|
|
|
0
|
return $locale_encoding; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
0
|
|
|
|
|
0
|
splice(@tries, 0, 2) |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# I18N::Langinfo isn't available everywhere |
|
63
|
2
|
|
|
|
|
5
|
$locale_encoding = eval { |
|
64
|
2
|
|
|
|
|
832
|
require I18N::Langinfo; |
|
65
|
2
|
|
|
|
|
919
|
find_encoding( |
|
66
|
|
|
|
|
|
|
I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) |
|
67
|
|
|
|
|
|
|
)->name |
|
68
|
|
|
|
|
|
|
}; |
|
69
|
2
|
50
|
|
|
|
11
|
return $locale_encoding if defined $locale_encoding; |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
0
|
eval { |
|
72
|
0
|
|
|
|
|
0
|
require POSIX; |
|
73
|
|
|
|
|
|
|
# Get the current locale |
|
74
|
|
|
|
|
|
|
# Remember that MSVCRT impl is quite different from Unixes |
|
75
|
0
|
|
|
|
|
0
|
my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); |
|
76
|
0
|
0
|
|
|
|
0
|
if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { |
|
77
|
0
|
|
|
|
|
0
|
my $country_language; |
|
78
|
0
|
|
|
|
|
0
|
( $country_language, $locale_encoding ) = ( $1, $2 ); |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Could do more heuristics based on the country and language |
|
81
|
|
|
|
|
|
|
# since we have Locale::Country and Locale::Language available. |
|
82
|
|
|
|
|
|
|
# TODO: get a database of Language -> Encoding mappings |
|
83
|
|
|
|
|
|
|
# (the Estonian database at http://www.eki.ee/letter/ |
|
84
|
|
|
|
|
|
|
# would be excellent!) --jhi |
|
85
|
0
|
0
|
|
|
|
0
|
if (lc($locale_encoding) eq 'euc') { |
|
86
|
0
|
0
|
|
|
|
0
|
if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
0
|
$locale_encoding = 'euc-jp'; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
elsif ( $country_language =~ /^ko_KR|korean?$/i ) { |
|
90
|
0
|
|
|
|
|
0
|
$locale_encoding = 'euc-kr'; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { |
|
93
|
0
|
|
|
|
|
0
|
$locale_encoding = 'euc-cn'; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { |
|
96
|
0
|
|
|
|
|
0
|
$locale_encoding = 'euc-tw'; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
else { |
|
99
|
0
|
|
|
|
|
0
|
require Carp; |
|
100
|
0
|
|
|
|
|
0
|
Carp::croak( |
|
101
|
|
|
|
|
|
|
"encoding: Locale encoding '$locale_encoding' too ambiguous" |
|
102
|
|
|
|
|
|
|
); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
}; |
|
107
|
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
0
|
return $locale_encoding; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub import { |
|
112
|
|
|
|
|
|
|
|
|
113
|
9
|
|
|
9
|
|
197
|
if ( ord("A") == 193 ) { |
|
114
|
|
|
|
|
|
|
require Carp; |
|
115
|
|
|
|
|
|
|
Carp::croak("encoding: pragma does not support EBCDIC platforms"); |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
my $deprecate = |
|
119
|
|
|
|
|
|
|
($] >= 5.017 and !$Config{usecperl}) |
|
120
|
9
|
50
|
33
|
|
|
581
|
? "Use of the encoding pragma is deprecated" : 0; |
|
121
|
|
|
|
|
|
|
|
|
122
|
9
|
|
|
|
|
33
|
my $class = shift; |
|
123
|
9
|
|
|
|
|
17
|
my $name = shift; |
|
124
|
9
|
50
|
|
|
|
31
|
if (!$name){ |
|
125
|
0
|
|
|
|
|
0
|
require Carp; |
|
126
|
0
|
|
|
|
|
0
|
Carp::croak("encoding: no encoding specified."); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
9
|
50
|
|
|
|
33
|
if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm |
|
129
|
0
|
|
|
|
|
0
|
my $caller = caller(); |
|
130
|
|
|
|
|
|
|
{ |
|
131
|
8
|
|
|
8
|
|
60
|
no strict 'refs'; |
|
|
8
|
|
|
|
|
21
|
|
|
|
8
|
|
|
|
|
692
|
|
|
|
0
|
|
|
|
|
0
|
|
|
132
|
0
|
|
|
|
|
0
|
*{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding; |
|
|
0
|
|
|
|
|
0
|
|
|
133
|
|
|
|
|
|
|
} |
|
134
|
0
|
|
|
|
|
0
|
return; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
9
|
50
|
|
|
|
25
|
$name = _get_locale_encoding() if $name eq ':locale'; |
|
137
|
8
|
50
|
33
|
8
|
|
2980
|
BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } |
|
138
|
9
|
|
|
|
|
23
|
my %arg = @_; |
|
139
|
9
|
50
|
|
|
|
27
|
$name = $ENV{PERL_ENCODING} unless defined $name; |
|
140
|
9
|
|
|
|
|
31
|
my $enc = find_encoding($name); |
|
141
|
9
|
50
|
|
|
|
38
|
unless ( defined $enc ) { |
|
142
|
0
|
|
|
|
|
0
|
require Carp; |
|
143
|
0
|
|
|
|
|
0
|
Carp::croak("encoding: Unknown encoding '$name'"); |
|
144
|
|
|
|
|
|
|
} |
|
145
|
9
|
|
|
|
|
86
|
$name = $enc->name; # canonize |
|
146
|
9
|
100
|
|
|
|
33
|
unless ( $arg{Filter} ) { |
|
147
|
8
|
50
|
33
|
|
|
40
|
if ($] >= 5.025003 and !$Config{usecperl}) { |
|
148
|
0
|
|
|
|
|
0
|
require Carp; |
|
149
|
0
|
|
|
|
|
0
|
Carp::croak("The encoding pragma is no longer supported. Check cperl"); |
|
150
|
|
|
|
|
|
|
} |
|
151
|
8
|
50
|
|
|
|
683
|
warnings::warnif("deprecated",$deprecate) if $deprecate; |
|
152
|
|
|
|
|
|
|
|
|
153
|
8
|
|
|
|
|
20
|
DEBUG and warn "_exception($name) = ", _exception($name); |
|
154
|
8
|
50
|
|
|
|
29
|
if (! _exception($name)) { |
|
155
|
8
|
|
|
|
|
17
|
if (!PERL_5_21_7) { |
|
156
|
|
|
|
|
|
|
${^ENCODING} = $enc; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
else { |
|
159
|
|
|
|
|
|
|
# Starting with 5.21.7, this pragma uses a shadow variable |
|
160
|
|
|
|
|
|
|
# designed explicitly for it, ${^E_NCODING}, to enforce |
|
161
|
|
|
|
|
|
|
# lexical scope; instead of ${^ENCODING}. |
|
162
|
8
|
|
|
|
|
39
|
$^H{'encoding'} = 1; |
|
163
|
8
|
|
|
|
|
30
|
${^E_NCODING} = $enc; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
8
|
|
|
|
|
18
|
if (! HAS_PERLIO ) { |
|
167
|
|
|
|
|
|
|
return 1; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
else { |
|
171
|
1
|
50
|
|
|
|
80
|
warnings::warnif("deprecated",$deprecate) if $deprecate; |
|
172
|
|
|
|
|
|
|
|
|
173
|
1
|
50
|
|
|
|
5
|
defined( ${^ENCODING} ) and undef ${^ENCODING}; |
|
174
|
1
|
|
|
|
|
3
|
undef ${^E_NCODING} if PERL_5_21_7; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# implicitly 'use utf8' |
|
177
|
1
|
|
|
|
|
6
|
require utf8; # to fetch $utf8::hint_bits; |
|
178
|
1
|
|
|
|
|
3
|
$^H |= $utf8::hint_bits; |
|
179
|
1
|
50
|
50
|
|
|
2
|
eval { |
|
180
|
1
|
|
|
|
|
564
|
require Filter::Util::Call; |
|
181
|
1
|
|
|
|
|
729
|
Filter::Util::Call->import; |
|
182
|
|
|
|
|
|
|
filter_add( |
|
183
|
|
|
|
|
|
|
sub { |
|
184
|
11
|
|
|
11
|
|
60
|
my $status = filter_read(); |
|
185
|
11
|
50
|
|
|
|
24
|
if ( $status > 0 ) { |
|
186
|
11
|
|
|
|
|
38
|
$_ = $enc->decode( $_, 1 ); |
|
187
|
11
|
|
|
|
|
18
|
DEBUG and warn $_; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
11
|
|
|
|
|
126
|
$status; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
1
|
|
|
|
|
6
|
); |
|
192
|
1
|
|
|
|
|
20
|
1; |
|
193
|
|
|
|
|
|
|
} and DEBUG and warn "Filter installed"; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
9
|
50
|
33
|
|
|
71
|
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; |
|
196
|
9
|
|
|
|
|
23
|
for my $h (qw(STDIN STDOUT)) { |
|
197
|
18
|
50
|
|
|
|
42
|
if ( $arg{$h} ) { |
|
198
|
0
|
0
|
|
|
|
0
|
unless ( defined find_encoding( $arg{$h} ) ) { |
|
199
|
0
|
|
|
|
|
0
|
require Carp; |
|
200
|
0
|
|
|
|
|
0
|
Carp::croak( |
|
201
|
|
|
|
|
|
|
"encoding: Unknown encoding for $h, '$arg{$h}'"); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
0
|
|
|
|
|
0
|
eval { binmode( $h, ":raw :encoding($arg{$h})" ) }; |
|
|
0
|
|
|
|
|
0
|
|
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
else { |
|
206
|
18
|
50
|
|
|
|
50
|
unless ( exists $arg{$h} ) { |
|
207
|
18
|
|
|
|
|
36
|
eval { |
|
208
|
8
|
|
|
8
|
|
52
|
no warnings 'uninitialized'; |
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
1041
|
|
|
209
|
18
|
|
|
|
|
195
|
binmode( $h, ":raw :encoding($name)" ); |
|
210
|
|
|
|
|
|
|
}; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
} |
|
213
|
18
|
50
|
|
|
|
73
|
if ($@) { |
|
214
|
0
|
|
|
|
|
0
|
require Carp; |
|
215
|
0
|
|
|
|
|
0
|
Carp::croak($@); |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
} |
|
218
|
9
|
|
|
|
|
1212
|
return 1; # I doubt if we need it, though |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub unimport { |
|
222
|
8
|
|
|
8
|
|
44
|
no warnings; |
|
|
8
|
|
|
|
|
16
|
|
|
|
8
|
|
|
|
|
804
|
|
|
223
|
3
|
|
|
3
|
|
150
|
undef ${^ENCODING}; |
|
224
|
3
|
|
|
|
|
8
|
undef ${^E_NCODING} if PERL_5_21_7; |
|
225
|
3
|
|
|
|
|
4
|
if (HAS_PERLIO) { |
|
226
|
3
|
|
|
|
|
12
|
binmode( STDIN, ":raw" ); |
|
227
|
3
|
|
|
|
|
6
|
binmode( STDOUT, ":raw" ); |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
else { |
|
230
|
|
|
|
|
|
|
binmode(STDIN); |
|
231
|
|
|
|
|
|
|
binmode(STDOUT); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
3
|
100
|
|
|
|
77
|
if ( $INC{"Filter/Util/Call.pm"} ) { |
|
234
|
1
|
|
|
|
|
2
|
eval { filter_del() }; |
|
|
1
|
|
|
|
|
943
|
|
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
1; |
|
239
|
|
|
|
|
|
|
__END__ |