File Coverage

lib/ExtUtils/MakeMaker/Locale.pm
Criterion Covered Total %
statement 51 108 47.2
branch 11 72 15.2
condition 10 28 35.7
subroutine 11 20 55.0
pod 3 3 100.0
total 86 231 37.2


line stmt bran cond sub pod time code
1              
2             use strict;
3 52     52   301 use warnings;
  52         97  
  52         1381  
4 52     52   218 our $VERSION = "7.64";
  52         95  
  52         2482  
5             $VERSION =~ tr/_//d;
6              
7             use base 'Exporter';
8 52     52   268 our @EXPORT_OK = qw(
  52         110  
  52         7645  
9             decode_argv env
10             $ENCODING_LOCALE $ENCODING_LOCALE_FS
11             $ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT
12             );
13              
14             use Encode ();
15 52     52   24571 use Encode::Alias ();
  52         651585  
  52         1497  
16 52     52   379  
  52         100  
  52         36959  
17             our $ENCODING_LOCALE;
18             our $ENCODING_LOCALE_FS;
19             our $ENCODING_CONSOLE_IN;
20             our $ENCODING_CONSOLE_OUT;
21              
22              
23             if ($^O eq "MSWin32") {
24             unless ($ENCODING_LOCALE) {
25             # Try to obtain what the Windows ANSI code page is
26 104 50   104   512 eval {
27 0 0       0 unless (defined &GetConsoleCP) {
28             require Win32;
29 0         0 # manually "import" it since Win32->import refuses
30 0 0       0 *GetConsoleCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP;
31 0         0 }
32             unless (defined &GetConsoleCP) {
33 0 0   0   0 require Win32::API;
  0         0  
34             Win32::API->Import('kernel32', 'int GetConsoleCP()');
35 0 0       0 }
36 0         0 if (defined &GetConsoleCP) {
37 0         0 my $cp = GetConsoleCP();
38             $ENCODING_LOCALE = "cp$cp" if $cp;
39 0 0       0 }
40 0         0 };
41 0 0       0 }
42              
43             unless ($ENCODING_CONSOLE_IN) {
44             # only test one since set together
45             unless (defined &GetInputCP) {
46 0 0       0 eval {
47             require Win32;
48 0 0       0 eval {
49 0         0 local $SIG{__WARN__} = sub {} if ( "$]" < 5.014 ); # suppress deprecation warning for inherited AUTOLOAD of Win32::GetConsoleCP()
50 0         0 Win32::GetConsoleCP();
51 0         0 };
52 0 0   0   0 # manually "import" it since Win32->import refuses
53 0         0 *GetInputCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP;
54             *GetOutputCP = sub { &Win32::GetConsoleOutputCP } if defined &Win32::GetConsoleOutputCP;
55             };
56 0 0   0   0 unless (defined &GetInputCP) {
  0         0  
57 0 0   0   0 eval {
  0         0  
58             # try Win32::Console module for codepage to use
59 0 0       0 require Win32::Console;
60 0         0 *GetInputCP = sub { &Win32::Console::InputCP }
61             if defined &Win32::Console::InputCP;
62 0         0 *GetOutputCP = sub { &Win32::Console::OutputCP }
63 0     0   0 if defined &Win32::Console::OutputCP;
64 0 0       0 };
65 0     0   0 }
66 0 0       0 unless (defined &GetInputCP) {
67             # final fallback
68             *GetInputCP = *GetOutputCP = sub {
69 0 0       0 # another fallback that could work is:
70             # reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP
71             ((qx(chcp) || '') =~ /^Active code page: (\d+)/)
72             ? $1 : ();
73             };
74 0 0 0 0   0 }
75             }
76 0         0 my $cp = GetInputCP();
77             $ENCODING_CONSOLE_IN = "cp$cp" if $cp;
78             $cp = GetOutputCP();
79 0         0 $ENCODING_CONSOLE_OUT = "cp$cp" if $cp;
80 0 0       0 }
81 0         0 }
82 0 0       0  
83             unless ($ENCODING_LOCALE) {
84             eval {
85             require I18N::Langinfo;
86 104 100       265 $ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
87 52         90  
88 52         20881 # Workaround of Encode < v2.25. The "646" encoding alias was
89 52         27945 # introduced in Encode-2.25, but we don't want to require that version
90             # quite yet. Should avoid the CPAN testers failure reported from
91             # openbsd-4.7/perl-5.10.0 combo.
92             $ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646";
93              
94             # https://rt.cpan.org/Ticket/Display.html?id=66373
95 52 50       199 $ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8";
96             };
97             $ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN;
98 52 50 33     228 }
99              
100 52   33     145 # Workaround of Encode < v2.71 for "cp65000" and "cp65001"
101             # The "cp65000" and "cp65001" aliases were added in [Encode v2.71](https://github.com/dankogai/p5-encode/commit/7874bd95aa10967a3b5dbae333d16bcd703ac6c6)
102             # via commit <https://github.com/dankogai/p5-encode/commit/84b9c1101d5251d37e226f80d1c6781718779047>.
103             # This will avoid test failures for Win32 machines using the UTF-7 or UTF-8 code pages.
104             $ENCODING_LOCALE = 'UTF-7' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65000";
105             $ENCODING_LOCALE = 'utf-8-strict' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65001";
106              
107 104 50 33     534 if ($^O eq "darwin") {
108 104 50 33     430 $ENCODING_LOCALE_FS ||= "UTF-8";
109             }
110 104 50       237  
111 0   0     0 # final fallback
112             $ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8";
113             $ENCODING_LOCALE_FS ||= $ENCODING_LOCALE;
114             $ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE;
115 104 0 33     220 $ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN;
116 104   33     389  
117 104   66     312 unless (Encode::find_encoding($ENCODING_LOCALE)) {
118 104   66     304 my $foundit;
119             if (lc($ENCODING_LOCALE) eq "gb18030") {
120 104 50       323 eval {
121 0         0 require Encode::HanExtra;
122 0 0       0 };
123 0         0 if ($@) {
124 0         0 die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped";
125             }
126 0 0       0 $foundit++ if Encode::find_encoding($ENCODING_LOCALE);
127 0         0 }
128             die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped"
129 0 0       0 unless $foundit;
130              
131 0 0       0 }
132              
133             # use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT;
134             }
135              
136             _init();
137             Encode::Alias::define_alias(sub {
138             no strict 'refs';
139             no warnings 'once';
140             return ${"ENCODING_" . uc(shift)};
141 52     52   390 }, "locale");
  52         105  
  52         1630  
142 52     52   296  
  52         128  
  52         3359  
143             no strict 'refs';
144             for my $a (sort keys %Encode::Alias::Alias) {
145             if (defined ${"ENCODING_" . uc($a)}) {
146             delete $Encode::Alias::Alias{$a};
147 52     52   302 warn "Flushed alias cache for $a" if DEBUG;
  52         94  
  52         14195  
148 52     52   299 }
149 208 100       295 }
  208         618  
150 52         152 }
151 52         144  
152             $ENCODING_LOCALE = shift;
153             $ENCODING_LOCALE_FS = shift;
154             $ENCODING_CONSOLE_IN = $ENCODING_LOCALE;
155             $ENCODING_CONSOLE_OUT = $ENCODING_LOCALE;
156             _init();
157 52     52 1 109 _flush_aliases();
158 52         106 }
159 52         93  
160 52         103 die if defined wantarray;
161 52         1125 for (@ARGV) {
162 52         5256 $_ = Encode::decode(locale => $_, @_);
163             }
164             }
165              
166 0 0   0 1   my $k = Encode::encode(locale => shift);
167 0           my $old = $ENV{$k};
168 0           if (@_) {
169             my $v = shift;
170             if (defined $v) {
171             $ENV{$k} = Encode::encode(locale => $v);
172             }
173 0     0 1   else {
174 0           delete $ENV{$k};
175 0 0         }
176 0           }
177 0 0         return Encode::decode(locale => $old) if defined wantarray;
178 0           }
179              
180             1;
181 0            
182              
183             =head1 NAME
184 0 0          
185             ExtUtils::MakeMaker::Locale - bundled Encode::Locale
186              
187             =head1 SYNOPSIS
188              
189             use Encode::Locale;
190             use Encode;
191              
192             $string = decode(locale => $bytes);
193             $bytes = encode(locale => $string);
194              
195             if (-t) {
196             binmode(STDIN, ":encoding(console_in)");
197             binmode(STDOUT, ":encoding(console_out)");
198             binmode(STDERR, ":encoding(console_out)");
199             }
200              
201             # Processing file names passed in as arguments
202             my $uni_filename = decode(locale => $ARGV[0]);
203             open(my $fh, "<", encode(locale_fs => $uni_filename))
204             || die "Can't open '$uni_filename': $!";
205             binmode($fh, ":encoding(locale)");
206             ...
207              
208             =head1 DESCRIPTION
209              
210             In many applications it's wise to let Perl use Unicode for the strings it
211             processes. Most of the interfaces Perl has to the outside world are still byte
212             based. Programs therefore need to decode byte strings that enter the program
213             from the outside and encode them again on the way out.
214              
215             The POSIX locale system is used to specify both the language conventions
216             requested by the user and the preferred character set to consume and
217             output. The C<Encode::Locale> module looks up the charset and encoding (called
218             a CODESET in the locale jargon) and arranges for the L<Encode> module to know
219             this encoding under the name "locale". It means bytes obtained from the
220             environment can be converted to Unicode strings by calling C<<
221             Encode::encode(locale => $bytes) >> and converted back again with C<<
222             Encode::decode(locale => $string) >>.
223              
224             Where file systems interfaces pass file names in and out of the program we also
225             need care. The trend is for operating systems to use a fixed file encoding
226             that don't actually depend on the locale; and this module determines the most
227             appropriate encoding for file names. The L<Encode> module will know this
228             encoding under the name "locale_fs". For traditional Unix systems this will
229             be an alias to the same encoding as "locale".
230              
231             For programs running in a terminal window (called a "Console" on some systems)
232             the "locale" encoding is usually a good choice for what to expect as input and
233             output. Some systems allows us to query the encoding set for the terminal and
234             C<Encode::Locale> will do that if available and make these encodings known
235             under the C<Encode> aliases "console_in" and "console_out". For systems where
236             we can't determine the terminal encoding these will be aliased as the same
237             encoding as "locale". The advice is to use "console_in" for input known to
238             come from the terminal and "console_out" for output to the terminal.
239              
240             In addition to arranging for various Encode aliases the following functions and
241             variables are provided:
242              
243             =over
244              
245             =item decode_argv( )
246              
247             =item decode_argv( Encode::FB_CROAK )
248              
249             This will decode the command line arguments to perl (the C<@ARGV> array) in-place.
250              
251             The function will by default replace characters that can't be decoded by
252             "\x{FFFD}", the Unicode replacement character.
253              
254             Any argument provided is passed as CHECK to underlying Encode::decode() call.
255             Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the
256             command line arguments can be decoded. See L<Encode/"Handling Malformed Data">
257             for details on other options for CHECK.
258              
259             =item env( $uni_key )
260              
261             =item env( $uni_key => $uni_value )
262              
263             Interface to get/set environment variables. Returns the current value as a
264             Unicode string. The $uni_key and $uni_value arguments are expected to be
265             Unicode strings as well. Passing C<undef> as $uni_value deletes the
266             environment variable named $uni_key.
267              
268             The returned value will have the characters that can't be decoded replaced by
269             "\x{FFFD}", the Unicode replacement character.
270              
271             There is no interface to request alternative CHECK behavior as for
272             decode_argv(). If you need that you need to call encode/decode yourself.
273             For example:
274              
275             my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK);
276             my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK);
277              
278             =item reinit( )
279              
280             =item reinit( $encoding )
281              
282             Reinitialize the encodings from the locale. You want to call this function if
283             you changed anything in the environment that might influence the locale.
284              
285             This function will croak if the determined encoding isn't recognized by
286             the Encode module.
287              
288             With argument force $ENCODING_... variables to set to the given value.
289              
290             =item $ENCODING_LOCALE
291              
292             The encoding name determined to be suitable for the current locale.
293             L<Encode> know this encoding as "locale".
294              
295             =item $ENCODING_LOCALE_FS
296              
297             The encoding name determined to be suitable for file system interfaces
298             involving file names.
299             L<Encode> know this encoding as "locale_fs".
300              
301             =item $ENCODING_CONSOLE_IN
302              
303             =item $ENCODING_CONSOLE_OUT
304              
305             The encodings to be used for reading and writing output to the a console.
306             L<Encode> know these encodings as "console_in" and "console_out".
307              
308             =back
309              
310             =head1 NOTES
311              
312             This table summarizes the mapping of the encodings set up
313             by the C<Encode::Locale> module:
314              
315             Encode | | |
316             Alias | Windows | Mac OS X | POSIX
317             ------------+---------+--------------+------------
318             locale | ANSI | nl_langinfo | nl_langinfo
319             locale_fs | ANSI | UTF-8 | nl_langinfo
320             console_in | OEM | nl_langinfo | nl_langinfo
321             console_out | OEM | nl_langinfo | nl_langinfo
322              
323             =head2 Windows
324              
325             Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16
326             strings) and a byte based API based a character set called ANSI. The
327             regular Perl interfaces to the OS currently only uses the ANSI APIs.
328             Unfortunately ANSI is not a single character set.
329              
330             The encoding that corresponds to ANSI varies between different editions of
331             Windows. For many western editions of Windows ANSI corresponds to CP-1252
332             which is a character set similar to ISO-8859-1. Conceptually the ANSI
333             character set is a similar concept to the POSIX locale CODESET so this module
334             figures out what the ANSI code page is and make this available as
335             $ENCODING_LOCALE and the "locale" Encoding alias.
336              
337             Windows systems also operate with another byte based character set.
338             It's called the OEM code page. This is the encoding that the Console
339             takes as input and output. It's common for the OEM code page to
340             differ from the ANSI code page.
341              
342             =head2 Mac OS X
343              
344             On Mac OS X the file system encoding is always UTF-8 while the locale
345             can otherwise be set up as normal for POSIX systems.
346              
347             File names on Mac OS X will at the OS-level be converted to
348             NFD-form. A file created by passing a NFC-filename will come
349             in NFD-form from readdir(). See L<Unicode::Normalize> for details
350             of NFD/NFC.
351              
352             Actually, Apple does not follow the Unicode NFD standard since not all
353             character ranges are decomposed. The claim is that this avoids problems with
354             round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for
355             details.
356              
357             =head2 POSIX (Linux and other Unixes)
358              
359             File systems might vary in what encoding is to be used for
360             filenames. Since this module has no way to actually figure out
361             what the is correct it goes with the best guess which is to
362             assume filenames are encoding according to the current locale.
363             Users are advised to always specify UTF-8 as the locale charset.
364              
365             =head1 SEE ALSO
366              
367             L<I18N::Langinfo>, L<Encode>, L<Term::Encoding>
368              
369             =head1 AUTHOR
370              
371             Copyright 2010 Gisle Aas <gisle@aas.no>.
372              
373             This library is free software; you can redistribute it and/or
374             modify it under the same terms as Perl itself.
375              
376             =cut