line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
use strict; |
3
|
52
|
|
|
52
|
|
301
|
use warnings; |
|
52
|
|
|
|
|
97
|
|
|
52
|
|
|
|
|
1381
|
|
4
|
52
|
|
|
52
|
|
218
|
our $VERSION = "7.64"; |
|
52
|
|
|
|
|
95
|
|
|
52
|
|
|
|
|
2482
|
|
5
|
|
|
|
|
|
|
$VERSION =~ tr/_//d; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use base 'Exporter'; |
8
|
52
|
|
|
52
|
|
268
|
our @EXPORT_OK = qw( |
|
52
|
|
|
|
|
110
|
|
|
52
|
|
|
|
|
7645
|
|
9
|
|
|
|
|
|
|
decode_argv env |
10
|
|
|
|
|
|
|
$ENCODING_LOCALE $ENCODING_LOCALE_FS |
11
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT |
12
|
|
|
|
|
|
|
); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use Encode (); |
15
|
52
|
|
|
52
|
|
24571
|
use Encode::Alias (); |
|
52
|
|
|
|
|
651585
|
|
|
52
|
|
|
|
|
1497
|
|
16
|
52
|
|
|
52
|
|
379
|
|
|
52
|
|
|
|
|
100
|
|
|
52
|
|
|
|
|
36959
|
|
17
|
|
|
|
|
|
|
our $ENCODING_LOCALE; |
18
|
|
|
|
|
|
|
our $ENCODING_LOCALE_FS; |
19
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_IN; |
20
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_OUT; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
if ($^O eq "MSWin32") { |
24
|
|
|
|
|
|
|
unless ($ENCODING_LOCALE) { |
25
|
|
|
|
|
|
|
# Try to obtain what the Windows ANSI code page is |
26
|
104
|
50
|
|
104
|
|
512
|
eval { |
27
|
0
|
0
|
|
|
|
0
|
unless (defined &GetConsoleCP) { |
28
|
|
|
|
|
|
|
require Win32; |
29
|
0
|
|
|
|
|
0
|
# manually "import" it since Win32->import refuses |
30
|
0
|
0
|
|
|
|
0
|
*GetConsoleCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
31
|
0
|
|
|
|
|
0
|
} |
32
|
|
|
|
|
|
|
unless (defined &GetConsoleCP) { |
33
|
0
|
0
|
|
0
|
|
0
|
require Win32::API; |
|
0
|
|
|
|
|
0
|
|
34
|
|
|
|
|
|
|
Win32::API->Import('kernel32', 'int GetConsoleCP()'); |
35
|
0
|
0
|
|
|
|
0
|
} |
36
|
0
|
|
|
|
|
0
|
if (defined &GetConsoleCP) { |
37
|
0
|
|
|
|
|
0
|
my $cp = GetConsoleCP(); |
38
|
|
|
|
|
|
|
$ENCODING_LOCALE = "cp$cp" if $cp; |
39
|
0
|
0
|
|
|
|
0
|
} |
40
|
0
|
|
|
|
|
0
|
}; |
41
|
0
|
0
|
|
|
|
0
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
unless ($ENCODING_CONSOLE_IN) { |
44
|
|
|
|
|
|
|
# only test one since set together |
45
|
|
|
|
|
|
|
unless (defined &GetInputCP) { |
46
|
0
|
0
|
|
|
|
0
|
eval { |
47
|
|
|
|
|
|
|
require Win32; |
48
|
0
|
0
|
|
|
|
0
|
eval { |
49
|
0
|
|
|
|
|
0
|
local $SIG{__WARN__} = sub {} if ( "$]" < 5.014 ); # suppress deprecation warning for inherited AUTOLOAD of Win32::GetConsoleCP() |
50
|
0
|
|
|
|
|
0
|
Win32::GetConsoleCP(); |
51
|
0
|
|
|
|
|
0
|
}; |
52
|
0
|
0
|
|
0
|
|
0
|
# manually "import" it since Win32->import refuses |
53
|
0
|
|
|
|
|
0
|
*GetInputCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
54
|
|
|
|
|
|
|
*GetOutputCP = sub { &Win32::GetConsoleOutputCP } if defined &Win32::GetConsoleOutputCP; |
55
|
|
|
|
|
|
|
}; |
56
|
0
|
0
|
|
0
|
|
0
|
unless (defined &GetInputCP) { |
|
0
|
|
|
|
|
0
|
|
57
|
0
|
0
|
|
0
|
|
0
|
eval { |
|
0
|
|
|
|
|
0
|
|
58
|
|
|
|
|
|
|
# try Win32::Console module for codepage to use |
59
|
0
|
0
|
|
|
|
0
|
require Win32::Console; |
60
|
0
|
|
|
|
|
0
|
*GetInputCP = sub { &Win32::Console::InputCP } |
61
|
|
|
|
|
|
|
if defined &Win32::Console::InputCP; |
62
|
0
|
|
|
|
|
0
|
*GetOutputCP = sub { &Win32::Console::OutputCP } |
63
|
0
|
|
|
0
|
|
0
|
if defined &Win32::Console::OutputCP; |
64
|
0
|
0
|
|
|
|
0
|
}; |
65
|
0
|
|
|
0
|
|
0
|
} |
66
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
67
|
|
|
|
|
|
|
# final fallback |
68
|
|
|
|
|
|
|
*GetInputCP = *GetOutputCP = sub { |
69
|
0
|
0
|
|
|
|
0
|
# another fallback that could work is: |
70
|
|
|
|
|
|
|
# reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP |
71
|
|
|
|
|
|
|
((qx(chcp) || '') =~ /^Active code page: (\d+)/) |
72
|
|
|
|
|
|
|
? $1 : (); |
73
|
|
|
|
|
|
|
}; |
74
|
0
|
0
|
0
|
0
|
|
0
|
} |
75
|
|
|
|
|
|
|
} |
76
|
0
|
|
|
|
|
0
|
my $cp = GetInputCP(); |
77
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN = "cp$cp" if $cp; |
78
|
|
|
|
|
|
|
$cp = GetOutputCP(); |
79
|
0
|
|
|
|
|
0
|
$ENCODING_CONSOLE_OUT = "cp$cp" if $cp; |
80
|
0
|
0
|
|
|
|
0
|
} |
81
|
0
|
|
|
|
|
0
|
} |
82
|
0
|
0
|
|
|
|
0
|
|
83
|
|
|
|
|
|
|
unless ($ENCODING_LOCALE) { |
84
|
|
|
|
|
|
|
eval { |
85
|
|
|
|
|
|
|
require I18N::Langinfo; |
86
|
104
|
100
|
|
|
|
265
|
$ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); |
87
|
52
|
|
|
|
|
90
|
|
88
|
52
|
|
|
|
|
20881
|
# Workaround of Encode < v2.25. The "646" encoding alias was |
89
|
52
|
|
|
|
|
27945
|
# introduced in Encode-2.25, but we don't want to require that version |
90
|
|
|
|
|
|
|
# quite yet. Should avoid the CPAN testers failure reported from |
91
|
|
|
|
|
|
|
# openbsd-4.7/perl-5.10.0 combo. |
92
|
|
|
|
|
|
|
$ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646"; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# https://rt.cpan.org/Ticket/Display.html?id=66373 |
95
|
52
|
50
|
|
|
|
199
|
$ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8"; |
96
|
|
|
|
|
|
|
}; |
97
|
|
|
|
|
|
|
$ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN; |
98
|
52
|
50
|
33
|
|
|
228
|
} |
99
|
|
|
|
|
|
|
|
100
|
52
|
|
33
|
|
|
145
|
# Workaround of Encode < v2.71 for "cp65000" and "cp65001" |
101
|
|
|
|
|
|
|
# The "cp65000" and "cp65001" aliases were added in [Encode v2.71](https://github.com/dankogai/p5-encode/commit/7874bd95aa10967a3b5dbae333d16bcd703ac6c6) |
102
|
|
|
|
|
|
|
# via commit <https://github.com/dankogai/p5-encode/commit/84b9c1101d5251d37e226f80d1c6781718779047>. |
103
|
|
|
|
|
|
|
# This will avoid test failures for Win32 machines using the UTF-7 or UTF-8 code pages. |
104
|
|
|
|
|
|
|
$ENCODING_LOCALE = 'UTF-7' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65000"; |
105
|
|
|
|
|
|
|
$ENCODING_LOCALE = 'utf-8-strict' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65001"; |
106
|
|
|
|
|
|
|
|
107
|
104
|
50
|
33
|
|
|
534
|
if ($^O eq "darwin") { |
108
|
104
|
50
|
33
|
|
|
430
|
$ENCODING_LOCALE_FS ||= "UTF-8"; |
109
|
|
|
|
|
|
|
} |
110
|
104
|
50
|
|
|
|
237
|
|
111
|
0
|
|
0
|
|
|
0
|
# final fallback |
112
|
|
|
|
|
|
|
$ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8"; |
113
|
|
|
|
|
|
|
$ENCODING_LOCALE_FS ||= $ENCODING_LOCALE; |
114
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE; |
115
|
104
|
0
|
33
|
|
|
220
|
$ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN; |
116
|
104
|
|
33
|
|
|
389
|
|
117
|
104
|
|
66
|
|
|
312
|
unless (Encode::find_encoding($ENCODING_LOCALE)) { |
118
|
104
|
|
66
|
|
|
304
|
my $foundit; |
119
|
|
|
|
|
|
|
if (lc($ENCODING_LOCALE) eq "gb18030") { |
120
|
104
|
50
|
|
|
|
323
|
eval { |
121
|
0
|
|
|
|
|
0
|
require Encode::HanExtra; |
122
|
0
|
0
|
|
|
|
0
|
}; |
123
|
0
|
|
|
|
|
0
|
if ($@) { |
124
|
0
|
|
|
|
|
0
|
die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped"; |
125
|
|
|
|
|
|
|
} |
126
|
0
|
0
|
|
|
|
0
|
$foundit++ if Encode::find_encoding($ENCODING_LOCALE); |
127
|
0
|
|
|
|
|
0
|
} |
128
|
|
|
|
|
|
|
die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped" |
129
|
0
|
0
|
|
|
|
0
|
unless $foundit; |
130
|
|
|
|
|
|
|
|
131
|
0
|
0
|
|
|
|
0
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
_init(); |
137
|
|
|
|
|
|
|
Encode::Alias::define_alias(sub { |
138
|
|
|
|
|
|
|
no strict 'refs'; |
139
|
|
|
|
|
|
|
no warnings 'once'; |
140
|
|
|
|
|
|
|
return ${"ENCODING_" . uc(shift)}; |
141
|
52
|
|
|
52
|
|
390
|
}, "locale"); |
|
52
|
|
|
|
|
105
|
|
|
52
|
|
|
|
|
1630
|
|
142
|
52
|
|
|
52
|
|
296
|
|
|
52
|
|
|
|
|
128
|
|
|
52
|
|
|
|
|
3359
|
|
143
|
|
|
|
|
|
|
no strict 'refs'; |
144
|
|
|
|
|
|
|
for my $a (sort keys %Encode::Alias::Alias) { |
145
|
|
|
|
|
|
|
if (defined ${"ENCODING_" . uc($a)}) { |
146
|
|
|
|
|
|
|
delete $Encode::Alias::Alias{$a}; |
147
|
52
|
|
|
52
|
|
302
|
warn "Flushed alias cache for $a" if DEBUG; |
|
52
|
|
|
|
|
94
|
|
|
52
|
|
|
|
|
14195
|
|
148
|
52
|
|
|
52
|
|
299
|
} |
149
|
208
|
100
|
|
|
|
295
|
} |
|
208
|
|
|
|
|
618
|
|
150
|
52
|
|
|
|
|
152
|
} |
151
|
52
|
|
|
|
|
144
|
|
152
|
|
|
|
|
|
|
$ENCODING_LOCALE = shift; |
153
|
|
|
|
|
|
|
$ENCODING_LOCALE_FS = shift; |
154
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN = $ENCODING_LOCALE; |
155
|
|
|
|
|
|
|
$ENCODING_CONSOLE_OUT = $ENCODING_LOCALE; |
156
|
|
|
|
|
|
|
_init(); |
157
|
52
|
|
|
52
|
1
|
109
|
_flush_aliases(); |
158
|
52
|
|
|
|
|
106
|
} |
159
|
52
|
|
|
|
|
93
|
|
160
|
52
|
|
|
|
|
103
|
die if defined wantarray; |
161
|
52
|
|
|
|
|
1125
|
for (@ARGV) { |
162
|
52
|
|
|
|
|
5256
|
$_ = Encode::decode(locale => $_, @_); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
0
|
0
|
|
0
|
1
|
|
my $k = Encode::encode(locale => shift); |
167
|
0
|
|
|
|
|
|
my $old = $ENV{$k}; |
168
|
0
|
|
|
|
|
|
if (@_) { |
169
|
|
|
|
|
|
|
my $v = shift; |
170
|
|
|
|
|
|
|
if (defined $v) { |
171
|
|
|
|
|
|
|
$ENV{$k} = Encode::encode(locale => $v); |
172
|
|
|
|
|
|
|
} |
173
|
0
|
|
|
0
|
1
|
|
else { |
174
|
0
|
|
|
|
|
|
delete $ENV{$k}; |
175
|
0
|
0
|
|
|
|
|
} |
176
|
0
|
|
|
|
|
|
} |
177
|
0
|
0
|
|
|
|
|
return Encode::decode(locale => $old) if defined wantarray; |
178
|
0
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
1; |
181
|
0
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 NAME |
184
|
0
|
0
|
|
|
|
|
|
185
|
|
|
|
|
|
|
ExtUtils::MakeMaker::Locale - bundled Encode::Locale |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 SYNOPSIS |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
use Encode::Locale; |
190
|
|
|
|
|
|
|
use Encode; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
$string = decode(locale => $bytes); |
193
|
|
|
|
|
|
|
$bytes = encode(locale => $string); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if (-t) { |
196
|
|
|
|
|
|
|
binmode(STDIN, ":encoding(console_in)"); |
197
|
|
|
|
|
|
|
binmode(STDOUT, ":encoding(console_out)"); |
198
|
|
|
|
|
|
|
binmode(STDERR, ":encoding(console_out)"); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Processing file names passed in as arguments |
202
|
|
|
|
|
|
|
my $uni_filename = decode(locale => $ARGV[0]); |
203
|
|
|
|
|
|
|
open(my $fh, "<", encode(locale_fs => $uni_filename)) |
204
|
|
|
|
|
|
|
|| die "Can't open '$uni_filename': $!"; |
205
|
|
|
|
|
|
|
binmode($fh, ":encoding(locale)"); |
206
|
|
|
|
|
|
|
... |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head1 DESCRIPTION |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
In many applications it's wise to let Perl use Unicode for the strings it |
211
|
|
|
|
|
|
|
processes. Most of the interfaces Perl has to the outside world are still byte |
212
|
|
|
|
|
|
|
based. Programs therefore need to decode byte strings that enter the program |
213
|
|
|
|
|
|
|
from the outside and encode them again on the way out. |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
The POSIX locale system is used to specify both the language conventions |
216
|
|
|
|
|
|
|
requested by the user and the preferred character set to consume and |
217
|
|
|
|
|
|
|
output. The C<Encode::Locale> module looks up the charset and encoding (called |
218
|
|
|
|
|
|
|
a CODESET in the locale jargon) and arranges for the L<Encode> module to know |
219
|
|
|
|
|
|
|
this encoding under the name "locale". It means bytes obtained from the |
220
|
|
|
|
|
|
|
environment can be converted to Unicode strings by calling C<< |
221
|
|
|
|
|
|
|
Encode::encode(locale => $bytes) >> and converted back again with C<< |
222
|
|
|
|
|
|
|
Encode::decode(locale => $string) >>. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Where file systems interfaces pass file names in and out of the program we also |
225
|
|
|
|
|
|
|
need care. The trend is for operating systems to use a fixed file encoding |
226
|
|
|
|
|
|
|
that don't actually depend on the locale; and this module determines the most |
227
|
|
|
|
|
|
|
appropriate encoding for file names. The L<Encode> module will know this |
228
|
|
|
|
|
|
|
encoding under the name "locale_fs". For traditional Unix systems this will |
229
|
|
|
|
|
|
|
be an alias to the same encoding as "locale". |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
For programs running in a terminal window (called a "Console" on some systems) |
232
|
|
|
|
|
|
|
the "locale" encoding is usually a good choice for what to expect as input and |
233
|
|
|
|
|
|
|
output. Some systems allows us to query the encoding set for the terminal and |
234
|
|
|
|
|
|
|
C<Encode::Locale> will do that if available and make these encodings known |
235
|
|
|
|
|
|
|
under the C<Encode> aliases "console_in" and "console_out". For systems where |
236
|
|
|
|
|
|
|
we can't determine the terminal encoding these will be aliased as the same |
237
|
|
|
|
|
|
|
encoding as "locale". The advice is to use "console_in" for input known to |
238
|
|
|
|
|
|
|
come from the terminal and "console_out" for output to the terminal. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
In addition to arranging for various Encode aliases the following functions and |
241
|
|
|
|
|
|
|
variables are provided: |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=over |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=item decode_argv( ) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=item decode_argv( Encode::FB_CROAK ) |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
This will decode the command line arguments to perl (the C<@ARGV> array) in-place. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
The function will by default replace characters that can't be decoded by |
252
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Any argument provided is passed as CHECK to underlying Encode::decode() call. |
255
|
|
|
|
|
|
|
Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the |
256
|
|
|
|
|
|
|
command line arguments can be decoded. See L<Encode/"Handling Malformed Data"> |
257
|
|
|
|
|
|
|
for details on other options for CHECK. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=item env( $uni_key ) |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=item env( $uni_key => $uni_value ) |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Interface to get/set environment variables. Returns the current value as a |
264
|
|
|
|
|
|
|
Unicode string. The $uni_key and $uni_value arguments are expected to be |
265
|
|
|
|
|
|
|
Unicode strings as well. Passing C<undef> as $uni_value deletes the |
266
|
|
|
|
|
|
|
environment variable named $uni_key. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
The returned value will have the characters that can't be decoded replaced by |
269
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
There is no interface to request alternative CHECK behavior as for |
272
|
|
|
|
|
|
|
decode_argv(). If you need that you need to call encode/decode yourself. |
273
|
|
|
|
|
|
|
For example: |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK); |
276
|
|
|
|
|
|
|
my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK); |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item reinit( ) |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=item reinit( $encoding ) |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Reinitialize the encodings from the locale. You want to call this function if |
283
|
|
|
|
|
|
|
you changed anything in the environment that might influence the locale. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
This function will croak if the determined encoding isn't recognized by |
286
|
|
|
|
|
|
|
the Encode module. |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
With argument force $ENCODING_... variables to set to the given value. |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=item $ENCODING_LOCALE |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
The encoding name determined to be suitable for the current locale. |
293
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale". |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=item $ENCODING_LOCALE_FS |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
The encoding name determined to be suitable for file system interfaces |
298
|
|
|
|
|
|
|
involving file names. |
299
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale_fs". |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_IN |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_OUT |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The encodings to be used for reading and writing output to the a console. |
306
|
|
|
|
|
|
|
L<Encode> know these encodings as "console_in" and "console_out". |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=back |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head1 NOTES |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
This table summarizes the mapping of the encodings set up |
313
|
|
|
|
|
|
|
by the C<Encode::Locale> module: |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Encode | | | |
316
|
|
|
|
|
|
|
Alias | Windows | Mac OS X | POSIX |
317
|
|
|
|
|
|
|
------------+---------+--------------+------------ |
318
|
|
|
|
|
|
|
locale | ANSI | nl_langinfo | nl_langinfo |
319
|
|
|
|
|
|
|
locale_fs | ANSI | UTF-8 | nl_langinfo |
320
|
|
|
|
|
|
|
console_in | OEM | nl_langinfo | nl_langinfo |
321
|
|
|
|
|
|
|
console_out | OEM | nl_langinfo | nl_langinfo |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head2 Windows |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16 |
326
|
|
|
|
|
|
|
strings) and a byte based API based a character set called ANSI. The |
327
|
|
|
|
|
|
|
regular Perl interfaces to the OS currently only uses the ANSI APIs. |
328
|
|
|
|
|
|
|
Unfortunately ANSI is not a single character set. |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
The encoding that corresponds to ANSI varies between different editions of |
331
|
|
|
|
|
|
|
Windows. For many western editions of Windows ANSI corresponds to CP-1252 |
332
|
|
|
|
|
|
|
which is a character set similar to ISO-8859-1. Conceptually the ANSI |
333
|
|
|
|
|
|
|
character set is a similar concept to the POSIX locale CODESET so this module |
334
|
|
|
|
|
|
|
figures out what the ANSI code page is and make this available as |
335
|
|
|
|
|
|
|
$ENCODING_LOCALE and the "locale" Encoding alias. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Windows systems also operate with another byte based character set. |
338
|
|
|
|
|
|
|
It's called the OEM code page. This is the encoding that the Console |
339
|
|
|
|
|
|
|
takes as input and output. It's common for the OEM code page to |
340
|
|
|
|
|
|
|
differ from the ANSI code page. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head2 Mac OS X |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
On Mac OS X the file system encoding is always UTF-8 while the locale |
345
|
|
|
|
|
|
|
can otherwise be set up as normal for POSIX systems. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
File names on Mac OS X will at the OS-level be converted to |
348
|
|
|
|
|
|
|
NFD-form. A file created by passing a NFC-filename will come |
349
|
|
|
|
|
|
|
in NFD-form from readdir(). See L<Unicode::Normalize> for details |
350
|
|
|
|
|
|
|
of NFD/NFC. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Actually, Apple does not follow the Unicode NFD standard since not all |
353
|
|
|
|
|
|
|
character ranges are decomposed. The claim is that this avoids problems with |
354
|
|
|
|
|
|
|
round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for |
355
|
|
|
|
|
|
|
details. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 POSIX (Linux and other Unixes) |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
File systems might vary in what encoding is to be used for |
360
|
|
|
|
|
|
|
filenames. Since this module has no way to actually figure out |
361
|
|
|
|
|
|
|
what the is correct it goes with the best guess which is to |
362
|
|
|
|
|
|
|
assume filenames are encoding according to the current locale. |
363
|
|
|
|
|
|
|
Users are advised to always specify UTF-8 as the locale charset. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head1 SEE ALSO |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
L<I18N::Langinfo>, L<Encode>, L<Term::Encoding> |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 AUTHOR |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Copyright 2010 Gisle Aas <gisle@aas.no>. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
374
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=cut |