File Coverage

blib/lib/I18N/Charset.pm

Criterion	Covered	Total	%
statement	171	210	81.4
branch	78	100	78.0
condition	16	24	66.6
subroutine	31	35	88.5
pod	16	16	100.0
total	312	385	81.0

line	stmt	bran	cond	sub	pod	time	code
1
2							# $rcs = ' $Id: Charset.pm,v 1.414 2015-02-02 19:49:14 Martin Exp $ ' ;
3
4							package I18N::Charset;
5
6	11			11		537687	use strict;
	11					95
	11					364
7	11			11		71	use warnings;
	11					20
	11					432
8
9							require 5.005;
10
11	11			11		73	use base 'Exporter';
	11					19
	11					1415
12	11			11		437	use Carp;
	11					39
	11					1791
13
14							=head1 NAME
15
16							I18N::Charset - IANA Character Set Registry names and Unicode::MapUTF8
17							(et al.) conversion scheme names
18
19							=head1 SYNOPSIS
20
21							use I18N::Charset;
22
23							$sCharset = iana_charset_name('WinCyrillic');
24							# $sCharset is now 'windows-1251'
25							$sCharset = umap_charset_name('Adobe DingBats');
26							# $sCharset is now 'ADOBE-DINGBATS' which can be passed to Unicode::Map->new()
27							$sCharset = map8_charset_name('windows-1251');
28							# $sCharset is now 'cp1251' which can be passed to Unicode::Map8->new()
29							$sCharset = umu8_charset_name('x-sjis');
30							# $sCharset is now 'sjis' which can be passed to Unicode::MapUTF8->new()
31							$sCharset = libi_charset_name('x-sjis');
32							# $sCharset is now 'MS_KANJI' which can be passed to `iconv -f $sCharset ...`
33							$sCharset = enco_charset_name('Shift-JIS');
34							# $sCharset is now 'shiftjis' which can be passed to Encode::from_to()
35
36							I18N::Charset::add_iana_alias('my-japanese' => 'iso-2022-jp');
37							I18N::Charset::add_map8_alias('my-arabic' => 'arabic7');
38							I18N::Charset::add_umap_alias('my-hebrew' => 'ISO-8859-8');
39							I18N::Charset::add_libi_alias('my-sjis' => 'x-sjis');
40							I18N::Charset::add_enco_alias('my-japanese' => 'shiftjis');
41
42							=head1 DESCRIPTION
43
44							The C module provides access to the IANA Character Set
45							Registry names for identifying character encoding schemes. It also
46							provides a mapping to the character set names used by the
47							Unicode::Map and Unicode::Map8 modules.
48
49							So, for example, if you get an HTML document with a META CHARSET="..."
50							tag, you can fairly quickly determine what Unicode::MapXXX module can
51							be used to convert it to Unicode.
52
53							If you don't have the module Unicode::Map installed, the umap_
54							functions will always return undef.
55							If you don't have the module Unicode::Map8 installed, the map8_
56							functions will always return undef.
57							If you don't have the module Unicode::MapUTF8 installed, the umu8_
58							functions will always return undef.
59							If you don't have the iconv library installed, the libi_
60							functions will always return undef.
61							If you don't have the Encode module installed, the enco_
62							functions will always return undef.
63
64							=cut
65
66							#-----------------------------------------------------------------------
67							# Public Global Variables
68							#-----------------------------------------------------------------------
69
70							our
71							$VERSION = 1.418;
72
73							our @EXPORT = qw( iana_charset_name
74							map8_charset_name
75							umap_charset_name
76							umu8_charset_name
77							mib_charset_name
78							mime_charset_name
79							libi_charset_name
80							enco_charset_name
81							mib_to_charset_name charset_name_to_mib
82							);
83							our @EXPORT_OK = qw( add_iana_alias add_map8_alias add_umap_alias add_libi_alias add_enco_alias );
84
85							#-----------------------------------------------------------------------
86							# Private Global Variables
87							#-----------------------------------------------------------------------
88
89							# %hsMIBofShortname is a hash of stripped names to mib.
90							my %hsMIBofShortname;
91							# %hsLongnameOfMIB is a hash of mib to long name.
92							my %hsLongnameOfMIB;
93							# %hsMIBofLongname is a hash of long name to mib.
94							my %hsMIBofLongname;
95							# %hsMIMEofMIB is a hash of mib to preferred MIME names.
96							my %hsMIMEofMIB;
97							# %MIBtoMAP8 is a hash of mib to Unicode::Map8 names. (Only valid for
98							# those U::Map8 names that we can find in the IANA registry)
99							my %MIBtoMAP8;
100							# %MIBtoUMAP is a hash of mib to Unicode::Map names. If a U::Map
101							# encoding does not have an official IANA entry, we create a dummy mib
102							# for it.
103							my %MIBtoUMAP;
104							# %MIBtoUMU8 is a hash of mib to Unicode::MapUTF8 names. If a
105							# U::MapUTF8 encoding does not have an official IANA entry, we create
106							# a dummy mib for it.
107							my %MIBtoUMU8;
108							# %MIBtoLIBI is a hash of mib to libiconv names. (Only valid for
109							# those libiconv names that we can find in the IANA registry)
110							my %MIBtoLIBI;
111							# %MIBtoENCO is a hash of mib to Encode names. (Only valid for
112							# those Encode names that we can find in the IANA registry)
113							my %MIBtoENCO;
114
115	11			11		85	use constant DEBUG => 0;
	11					28
	11					1142
116	11			11		74	use constant DEBUG_ENCO => 0;
	11					22
	11					565
117	11			11		65	use constant DEBUG_LIBI => 0;
	11					21
	11					60735
118
119							=head1 CONVERSION ROUTINES
120
121							There are four main conversion routines: C,
122							C, C, and
123							C.
124
125							=over 4
126
127							=item iana_charset_name()
128
129							This function takes a string containing the name of a character set
130							and returns a string which contains the official IANA name of the
131							character set identified. If no valid character set name can be
132							identified, then C will be returned. The case and punctuation
133							within the string are not important.
134
135							$sCharset = iana_charset_name('WinCyrillic');
136
137							=cut
138
139							my $sDummy = 'dummymib';
140							my $sFakeMIB = $sDummy .'001';
141
142							sub _is_dummy
143							{
144	9698			9698		13300	my $s = shift;
145	9698					32003	return ($s =~ m!\A$sDummy!);
146							} # _is_dummy
147
148							sub iana_charset_name
149							{
150	9722			9722	1	15173	my $code = shift;
151	9722	100				16015	return undef unless defined $code;
152	9720	100				15995	return undef unless $code ne '';
153							# $iDebug = ($code =~ m!sjis!);
154							# print STDERR " + iana_charset_name($code)..." if $iDebug;
155	9718					13913	my $mib = _short_to_mib($code);
156	9718	100				17747	return undef unless defined $mib;
157							# print STDERR " + mib is ($mib)..." if $iDebug;
158							# Make sure this is really a IANA mib:
159	9443	100				14100	return undef if _is_dummy($mib);
160							# print STDERR " + is really iana..." if $iDebug;
161	8647					24737	return $hsLongnameOfMIB{$mib};
162							} # iana_charset_name
163
164
165							sub _try_list
166							{
167	11742			11742		15423	my $code = shift;
168	11742					17217	my @asTry = ($code, _strip($code));
169	11742	100				23798	push @asTry, _strip($code) if $code =~ s!\A(x-)+!!; # try without leading x-
170	11742					24880	return @asTry;
171							} # _try_list
172
173							sub _short_to_mib
174							{
175	11742			11742		14868	my $code = shift;
176	11742					27480	local $^W = 0;
177							# print STDERR " + _short_to_mib($code)..." if DEBUG;
178	11742					15519	my $answer = undef;
179							TRY_SHORT:
180	11742					16657	foreach my $sTry (_try_list($code))
181							{
182	19981		100			50293	my $iMIB = $hsMIBofShortname{$sTry} \|\| 'undef';
183							# print STDERR "try($sTry)...$iMIB..." if DEBUG;
184	19981	100				34570	if ($iMIB ne 'undef')
185							{
186	10991					13844	$answer = $iMIB;
187	10991					15979	last TRY_SHORT;
188							} # if
189							} # foreach
190							# print STDERR "answer is $answer\n" if DEBUG;
191	11742					27093	return $answer;
192							} # _short_to_mib
193
194
195							sub _short_to_long
196							{
197	0			0		0	local $^W = 0;
198	0					0	my $s = shift;
199							# print STDERR " + _short_to_long($s)..." if DEBUG;
200	0					0	return $hsLongnameOfMIB{_short_to_mib($s)};
201							} # _short_to_long
202
203
204							=item mime_charset_name()
205
206							This function takes a string containing the name of a character set
207							and returns a string which contains the preferred MIME name of the
208							character set identified. If no valid character set name can be
209							identified, then C will be returned. The case and punctuation
210							within the string are not important.
211
212							$sCharset = mime_charset_name('Extended_UNIX_Code_Packed_Format_for_Japanese');
213
214							=cut
215
216							sub mime_charset_name
217							{
218							# This function contributed by Masafumi "Max" Nakane. Thank you!
219	13			13	1	111	my $code = shift;
220	13	100				41	return undef unless defined $code;
221	11	100				27	return undef unless $code ne '';
222							# print STDERR " + mime_charset_name($code)..." if DEBUG;
223	10					23	my $mib = _short_to_mib($code);
224	10	100				32	return undef unless defined $mib;
225							# print STDERR " + mib is ($mib)..." if DEBUG;
226							# Make sure this is really an IANA mib:
227	7	100				12	return undef if _is_dummy($mib);
228							# print STDERR " + is really iana..." if DEBUG;
229	6					33	return $hsMIMEofMIB{$mib};
230							} # mime_charset_name
231
232
233							=item enco_charset_name()
234
235							This function takes a string containing the name of a character set
236							and returns a string which contains a name of the character set
237							suitable to be passed to the Encode module. If no valid character set
238							name can be identified, or if Encode is not installed, then C
239							will be returned. The case and punctuation within the string are not
240							important.
241
242							$sCharset = enco_charset_name('Extended_UNIX_Code_Packed_Format_for_Japanese');
243
244							=cut
245
246							my $iEncoLoaded = 0;
247
248							sub _maybe_load_enco # PRIVATE
249							{
250	64	100		64		120	return if $iEncoLoaded;
251							# Get a list of aliases from Encode:
252	2	50				166	if (eval q{require Encode})
253							{
254	2					6	my @as;
255	2					14	@as = Encode->encodings(':all');
256							# push @as, Encode->encodings('EBCDIC');
257	2					2150	my $iFake = 0;
258	2					9	my $iReal = 0;
259							ENCODING:
260	2					7	foreach my $s (@as)
261							{
262							# First, see if this already has an IANA mapping:
263	248					333	my $mib;
264	248					360	my $sIana = iana_charset_name($s);
265	248	100				428	if (!defined $sIana)
266							{
267							# Create a dummy mib:
268	104					1229	$mib = $sFakeMIB++;
269	104					137	$iFake++;
270							} # if
271							else
272							{
273	144					220	$mib = charset_name_to_mib($sIana);
274	144					207	$iReal++;
275							}
276							# At this point we have a mib for this Encode entry.
277	248					430	$MIBtoENCO{$mib} = $s;
278	248					304	DEBUG_ENCO && print STDERR " + mib for enco ==$s== is $mib\n";
279	248					394	$hsMIBofShortname{_strip($s)} = $mib;
280	248	100				391	DEBUG_ENCO && print STDERR " + assign enco =$s==>$mib\n" if _is_dummy($mib);
281							} # foreach ENCODING
282	2					4	if (DEBUG_ENCO)
283							{
284							print STDERR " + Summary of Encode encodings:\n";
285							printf STDERR (" + %d encodings found.\n", scalar(@as));
286							print STDERR " + $iFake fake mibs created.\n";
287							print STDERR " + $iReal real mibs re-used.\n";
288							} # if
289	2					7	$iEncoLoaded = 1;
290	2					10	add_enco_alias('Windows-31J', 'cp932');
291							} # if
292							else
293							{
294	0					0	print STDERR " --- Encode is not installed\n";
295							}
296							} # _maybe_load_enco
297
298							sub _mib_to_enco # PRIVATE
299							{
300	29			29		63	_maybe_load_enco();
301	29					58	return $MIBtoENCO{shift()};
302							} # _mib_to_enco
303
304							sub enco_charset_name
305							{
306	38			38	1	216	my $code = shift;
307	38	100				90	return undef unless defined $code;
308	36	100				87	return undef unless $code ne '';
309	35					83	_maybe_load_enco();
310	35					46	my $iDebug = 0; # ($code =~ m!johab!i);
311	35	50	50			124	print STDERR " + enco_charset_name($code)..." if ($iDebug \|\| DEBUG_ENCO);
312	35					61	my $mib = _short_to_mib($code);
313	35	100				84	return undef unless defined $mib;
314	29	50	50			87	print STDERR " + mib is ($mib)..." if ($iDebug \|\| DEBUG_ENCO);
315	29					46	my $ret = _mib_to_enco($mib);
316	29	50	50			94	print STDERR " + enco is ($ret)..." if ($iDebug \|\| DEBUG_ENCO);
317	29					108	return $ret;
318							} # enco_charset_name
319
320
321							=item libi_charset_name()
322
323							This function takes a string containing the name of a character set
324							and returns a string which contains a name of the character set
325							suitable to be passed to iconv. If no valid character set name can be
326							identified, then C will be returned. The case and punctuation
327							within the string are not important.
328
329							$sCharset = libi_charset_name('Extended_UNIX_Code_Packed_Format_for_Korean');
330
331							=cut
332
333							my $iLibiLoaded = 0;
334
335							sub _maybe_load_libi # PRIVATE
336							{
337	0	0		0		0	return if $iLibiLoaded;
338							# Get a list of aliases from iconv:
339	0	0				0	return unless eval 'require App::Info::Lib::Iconv';
340	0					0	my $oAILI = new App::Info::Lib::Iconv;
341	0	0				0	if (ref $oAILI)
342							{
343	0					0	my $iLibiVersion = $oAILI->version;
344	0					0	DEBUG_LIBI && warn " DDD libiconv version is $iLibiVersion\n";
345	0	0	0			0	if ($oAILI->installed && (1.08 <= $iLibiVersion))
346							{
347	0					0	my $sCmd = $oAILI->bin_dir . '/iconv -l';
348	0					0	DEBUG_LIBI && warn " DDD iconv cmdline is $sCmd\n";
349	0					0	my @asIconv = split(/\n/, `$sCmd`);
350							ICONV_LINE:
351	0					0	foreach my $sLine (@asIconv)
352							{
353	0					0	my @asWord = split(/\s+/, $sLine);
354							# First, go through and find one of these that has an IANA mapping:
355	0					0	my $mib;
356	0					0	my $sIana = undef;
357							FIND_IANA:
358	0					0	foreach my $sWord (@asWord)
359							{
360	0	0				0	last FIND_IANA if ($sIana = iana_charset_name($sWord));
361							} # foreach FIND_IANA
362	0	0				0	if (!defined $sIana)
363							{
364							# Create a dummy mib:
365	0					0	$mib = $sFakeMIB++;
366							} # if
367							else
368							{
369	0					0	$mib = charset_name_to_mib($sIana);
370							}
371							# At this point we have a mib for this iconv entry. Assign them all:
372							ADD_LIBI:
373	0					0	foreach my $sWord (reverse @asWord)
374							{
375	0					0	$MIBtoLIBI{$mib} = $sWord;
376	0					0	DEBUG_LIBI && warn " + mib for libi ==$sWord== is $mib\n";
377	0					0	$hsMIBofShortname{_strip($sWord)} = $mib;
378							} # foreach ADD_LIBI
379							} # foreach ICONV_LINE
380							} # if
381							} # if
382	0					0	$iLibiLoaded = 1;
383							} # _maybe_load_libi
384
385							sub _mib_to_libi # PRIVATE
386							{
387	0			0		0	_maybe_load_libi();
388	0					0	return $MIBtoLIBI{shift()};
389							} # _mib_to_libi
390
391							sub libi_charset_name
392							{
393	7			7	1	127	my $code = shift;
394	7	100				25	return undef unless defined $code;
395	5	100				16	return undef unless $code ne '';
396							# my $iDebug = 1; # ($code =~ m!johab!i);
397							# print STDERR " + libi_charset_name($code)..." if $iDebug;
398	4					9	my $mib = _short_to_mib($code);
399	4	50				18	return undef unless defined $mib;
400							# print STDERR " + mib is ($mib)..." if $iDebug;
401	0					0	my $ret = _mib_to_libi($mib);
402							# print STDERR " + libi is ($ret)..." if $iDebug;
403	0					0	return $ret;
404							} # libi_charset_name
405
406
407							=item mib_to_charset_name
408
409							This function takes a string containing the MIBenum of a character set
410							and returns a string which contains a name for the character set.
411							If the given MIBenum does not correspond to any character set,
412							then C will be returned.
413
414							$sCharset = mib_to_charset_name('3');
415
416							=cut
417
418							sub mib_to_charset_name
419							{
420	11			11	1	23	my $code = shift;
421	11	100				35	return undef unless defined $code;
422	9	100				25	return undef unless $code ne '';
423	8					29	local $^W = 0;
424	8					54	return $hsLongnameOfMIB{$code};
425							} # mib_to_charset_name
426
427
428							=item mib_charset_name
429
430							This is a synonum for mib_to_charset_name
431
432							=cut
433
434							sub mib_charset_name
435							{
436	9			9	1	105	mib_to_charset_name(@_);
437							} # mib_charset_name
438
439
440							=item charset_name_to_mib
441
442							This function takes a string containing the name of a character set in
443							almost any format and returns a MIBenum for the character set. For
444							IANA-registered character sets, this is the IANA-registered MIB. For
445							non-IANA character sets, this is an unambiguous unique string whose
446							only use is to pass to other functions in this module. If no valid
447							character set name can be identified, then C will be returned.
448
449							$iMIB = charset_name_to_mib('US-ASCII');
450
451							=cut
452
453							sub charset_name_to_mib
454							{
455	10123			10123	1	13214	my $s = shift;
456	10123	100				16737	return undef unless defined($s);
457							return $hsMIBofLongname{$s} \|\| $hsMIBofLongname{
458	10121		100			25602	iana_charset_name($s) \|\|
459							umap_charset_name($s) \|\|
460							map8_charset_name($s) \|\|
461							umu8_charset_name($s) \|\|
462							''
463							};
464							} # charset_name_to_mib
465
466
467							=item map8_charset_name()
468
469							This function takes a string containing the name of a character set
470							(in almost any format) and returns a string which contains a name for
471							the character set that can be passed to Unicode::Map8::new().
472							Note: the returned string will be capitalized just like
473							the name of the .bin file in the Unicode::Map8::MAPS_DIR directory.
474							If no valid character set name can be identified,
475							then C will be returned.
476							The case and punctuation within the argument string are not important.
477
478							$sCharset = map8_charset_name('windows-1251');
479
480							=cut
481
482							sub map8_charset_name
483							{
484	468			468	1	883	my $code = shift;
485	468	100				910	return undef unless defined $code;
486	466	100				867	return undef unless $code ne '';
487							# $iDebug = 0 && ($code =~ m!037!);
488							# print STDERR " + map8_charset_name($code)..." if $iDebug;
489	464					675	$code = _strip($code);
490							# print STDERR "$code..." if $iDebug;
491	464		100			734	my $iMIB = _short_to_mib($code) \|\| 'undef';
492							# print STDERR "$iMIB..." if $iDebug;
493	464	100				866	if ($iMIB ne 'undef')
494							{
495							# print STDERR "$MIBtoMAP8{$iMIB}\n" if $iDebug;
496	445					1279	return $MIBtoMAP8{$iMIB};
497							} # if
498							# print STDERR "undef\n" if $iDebug;
499	19					115	return undef;
500							} # map8_charset_name
501
502
503							=item umap_charset_name()
504
505							This function takes a string containing the name of a character set
506							(in almost any format) and returns a string which contains a name for
507							the character set that can be passed to Unicode::Map::new(). If no
508							valid character set name can be identified, then C will be
509							returned. The case and punctuation within the argument string are not
510							important.
511
512							$sCharset = umap_charset_name('hebrew');
513
514							=cut
515
516							sub umap_charset_name
517							{
518	802			802	1	1237	my $code = shift;
519	802	100				1426	return undef unless defined $code;
520	800	100				1360	return undef unless $code ne '';
521							# $iDebug = ($code =~ m!apple!i);
522							# print STDERR "\n + MIBtoUMAP{dummymib029} == $MIBtoUMAP{$sDummy .'029'}\n\n" if $iDebug;
523							# print STDERR " + umap_charset_name($code)..." if $iDebug;
524	798		100			1214	my $iMIB = _short_to_mib(_strip($code)) \|\| 'undef';
525							# print STDERR "$iMIB..." if $iDebug;
526	798	100				1535	if ($iMIB ne 'undef')
527							{
528							# print STDERR "$MIBtoUMAP{$iMIB}\n" if $iDebug;
529	778					2365	return $MIBtoUMAP{$iMIB};
530							} # if
531							# print STDERR "undef\n" if $iDebug;
532	20					79	return undef;
533							} # umap_charset_name
534
535
536							our @asMap8Debug;
537
538							=item umu8_charset_name()
539
540							This function takes a string containing the name of a character set
541							(in almost any format) and returns a string which contains a name for
542							the character set that can be passed to Unicode::MapUTF8::new(). If no
543							valid character set name can be identified, then C will be
544							returned. The case and punctuation within the argument string are not
545							important.
546
547							$sCharset = umu8_charset_name('windows-1251');
548
549							=cut
550
551							sub umu8_charset_name
552							{
553	26			26	1	2520	my $code = shift;
554	26	100				69	return undef unless defined $code;
555	24	100				65	return undef unless $code ne '';
556							# $iDebug = ($code =~ m!u!);
557							# print STDERR " + umu8_charset_name($code)..." if $iDebug;
558	22		100			43	my $iMIB = _short_to_mib($code) \|\| 'undef';
559							# print STDERR "$iMIB..." if $iDebug;
560	22	100				49	if ($iMIB ne 'undef')
561							{
562							# print STDERR "$MIBtoUMU8{$iMIB}\n" if $iDebug;
563	16					82	return $MIBtoUMU8{$iMIB};
564							} # if
565							# print STDERR "undef\n" if $iDebug;
566	6					38	return undef;
567							} # umu8_charset_name
568
569							=back
570
571							=head1 QUERY ROUTINES
572
573							There is one function which can be used to obtain a list of all
574							IANA-registered character set names.
575
576							=over 4
577
578							=item C
579
580							Returns a list of all registered IANA character set names.
581							The names are not in any particular order.
582
583							=back
584
585							=cut
586
587							sub all_iana_charset_names
588							{
589	1			1	1	156	return values %hsLongnameOfMIB;
590							} # all_iana_charset_names
591
592							#-----------------------------------------------------------------------
593
594							=head1 CHARACTER SET NAME ALIASING
595
596							This module supports several semi-private routines for specifying
597							character set name aliases.
598
599							=over 4
600
601							=item add_iana_alias()
602
603							This function takes two strings: a new alias, and a target IANA
604							Character Set Name (or another alias). It defines the new alias to
605							refer to that character set name (or to the character set name to
606							which the second alias refers).
607
608							Returns the target character set name of the successfully installed alias.
609							Returns 'undef' if the target character set name is not registered.
610							Returns 'undef' if the target character set name of the second alias
611							is not registered.
612
613							I18N::Charset::add_iana_alias('my-alias1' => 'Shift_JIS');
614
615							With this code, "my-alias1" becomes an alias for the existing IANA
616							character set name 'Shift_JIS'.
617
618							I18N::Charset::add_iana_alias('my-alias2' => 'sjis');
619
620							With this code, "my-alias2" becomes an alias for the IANA character set
621							name referred to by the existing alias 'sjis' (which happens to be 'Shift_JIS').
622
623							=cut
624
625							sub add_iana_alias
626							{
627	3			3	1	10	my ($sAlias, $sReal) = @_;
628							# print STDERR " + add_iana_alias($sAlias, $sReal)\n";
629	3					8	my $sName = iana_charset_name($sReal);
630	3	100				10	if (not defined($sName))
631							{
632	1					227	carp qq{attempt to alias "$sAlias" to unknown IANA charset "$sReal"};
633	1					17	return undef;
634							} # if
635	2					5	my $mib = _short_to_mib(_strip($sName));
636							# print STDERR " --> $sName --> $mib\n";
637	2					8	$hsMIBofShortname{_strip($sAlias)} = $mib;
638	2					9	return $sName;
639							} # add_iana_alias
640
641							#-----------------------------------------------------------------------
642
643							=item add_map8_alias()
644
645							This function takes two strings: a new alias, and a target
646							Unicode::Map8 Character Set Name (or an existing alias to a Map8 name).
647							It defines the new alias to refer to that mapping name (or to the
648							mapping name to which the second alias refers).
649
650							If the first argument is a registered IANA character set name, then
651							all aliases of that IANA character set name will end up pointing to
652							the target Map8 mapping name.
653
654							Returns the target mapping name of the successfully installed alias.
655							Returns 'undef' if the target mapping name is not registered.
656							Returns 'undef' if the target mapping name of the second alias
657							is not registered.
658
659							I18N::Charset::add_map8_alias('normal' => 'ANSI_X3.4-1968');
660
661							With the above statement, "normal" becomes an alias for the existing
662							Unicode::Map8 mapping name 'ANSI_X3.4-1968'.
663
664							I18N::Charset::add_map8_alias('normal' => 'US-ASCII');
665
666							With the above statement, "normal" becomes an alias for the existing
667							Unicode::Map mapping name 'ANSI_X3.4-1968' (which is what "US-ASCII"
668							is an alias for).
669
670							I18N::Charset::add_map8_alias('IBM297' => 'EBCDIC-CA-FR');
671
672							With the above statement, "IBM297" becomes an alias for the existing
673							Unicode::Map mapping name 'EBCDIC-CA-FR'. As a side effect, all the
674							aliases for 'IBM297' (i.e. 'cp297' and 'ebcdic-cp-fr') also become
675							aliases for 'EBCDIC-CA-FR'.
676
677							=cut
678
679							sub add_map8_alias
680							{
681	114			114	1	218	my ($sAlias, $sReal) = @_;
682	114					197	my $sName = map8_charset_name($sReal);
683	114					198	my $sShort = _strip($sAlias);
684	114					195	my $sShortName = _strip($sName);
685	114	100				414	if (not defined($sName))
686							{
687	1					221	carp qq{attempt to alias "$sAlias" to unknown Map8 charset "$sReal"};
688	1					12	return undef;
689							} # if
690	113	50				222	if (exists $hsMIBofShortname{$sShortName})
691							{
692	113					331	$hsMIBofShortname{$sShort} = $hsMIBofShortname{$sShortName};
693							} # if
694	113					196	return $sName;
695							} # add_map8_alias
696
697							#-----------------------------------------------------------------------
698
699							=item add_umap_alias()
700
701							This function works identically to add_map8_alias() above, but
702							operates on Unicode::Map encoding tables.
703
704							=cut
705
706							sub add_umap_alias
707							{
708	3			3	1	10	my ($sAlias, $sReal) = @_;
709	3					7	my $sName = umap_charset_name($sReal);
710	3					7	my $sShort = _strip($sAlias);
711	3					7	my $sShortName = _strip($sName);
712	3	100				10	if (not defined($sName))
713							{
714	1					217	carp qq{attempt to alias "$sAlias" to unknown U::Map charset "$sReal"};
715	1					12	return undef;
716							} # if
717	2	50				7	if (exists $hsMIBofShortname{$sShortName})
718							{
719	2					7	$hsMIBofShortname{$sShort} = $hsMIBofShortname{$sShortName};
720							} # if
721	2					9	return $sName;
722							} # add_umap_alias
723
724							#-----------------------------------------------------------------------
725
726							=item add_libi_alias()
727
728							This function takes two strings: a new alias, and a target iconv
729							Character Set Name (or existing iconv alias). It defines the new
730							alias to refer to that character set name (or to the character set
731							name to which the existing alias refers).
732
733							Returns the target conversion scheme name of the successfully installed alias.
734							Returns 'undef' if there is no such target conversion scheme or alias.
735
736							Examples:
737
738							I18N::Charset::add_libi_alias('my-chinese1' => 'CN-GB');
739
740							With this code, "my-chinese1" becomes an alias for the existing iconv
741							conversion scheme 'CN-GB'.
742
743							I18N::Charset::add_libi_alias('my-chinese2' => 'EUC-CN');
744
745							With this code, "my-chinese2" becomes an alias for the iconv
746							conversion scheme referred to by the existing alias 'EUC-CN' (which
747							happens to be 'CN-GB').
748
749							=cut
750
751							sub add_libi_alias
752							{
753	1			1	1	4	my ($sAlias, $sReal) = @_;
754							# print STDERR " + add_libi_alias($sAlias,$sReal)...";
755	1					3	my $sName = libi_charset_name($sReal);
756	1	50				3	if (not defined($sName))
757							{
758	1					191	carp qq{attempt to alias "$sAlias" to unknown iconv charset "$sReal"};
759	1					12	return undef;
760							} # if
761	0					0	my $mib = _short_to_mib(_strip($sName));
762							# print STDERR "sName=$sName...mib=$mib\n";
763	0					0	$hsMIBofShortname{_strip($sAlias)} = $mib;
764	0					0	return $sName;
765							} # add_libi_alias
766
767							#-----------------------------------------------------------------------
768
769							=item add_enco_alias()
770
771							This function takes two strings: a new alias, and a target Encode
772							encoding Name (or existing Encode alias). It defines the new alias
773							referring to that encoding name (or to the encoding to which the
774							existing alias refers).
775
776							Returns the target encoding name of the successfully installed alias.
777							Returns 'undef' if there is no such encoding or alias.
778
779							Examples:
780
781							I18N::Charset::add_enco_alias('my-japanese1' => 'jis0201-raw');
782
783							With this code, "my-japanese1" becomes an alias for the existing
784							encoding 'jis0201-raw'.
785
786							I18N::Charset::add_enco_alias('my-japanese2' => 'my-japanese1');
787
788							With this code, "my-japanese2" becomes an alias for the encoding
789							referred to by the existing alias 'my-japanese1' (which happens to be
790							'jis0201-raw' after the previous call).
791
792							=cut
793
794							sub add_enco_alias
795							{
796	10			10	1	757	my ($sAlias, $sReal) = @_;
797	10					30	my $iDebug = 0;
798	10	50	50			45	print STDERR " + add_enco_alias($sAlias,$sReal)..." if ($iDebug \|\| DEBUG_ENCO);
799	10					26	my $sName = enco_charset_name($sReal);
800	10	100				24	if (not defined($sName))
801							{
802	3					397	carp qq{attempt to alias "$sAlias" to unknown Encode charset "$sReal"};
803	3					35	return undef;
804							} # if
805	7					13	my $mib = _short_to_mib(_strip($sName));
806	7	50	50			39	print STDERR "sName=$sName...mib=$mib\n" if ($iDebug \|\| DEBUG_ENCO);
807	7					17	$hsMIBofShortname{_strip($sAlias)} = $mib;
808	7					40	return $sName;
809							} # add_enco_alias
810
811							#-----------------------------------------------------------------------
812
813							=back
814
815							=head1 KNOWN BUGS AND LIMITATIONS
816
817							=over 4
818
819							=item *
820
821							There could probably be many more aliases added (for convenience) to
822							all the IANA names.
823							If you have some specific recommendations, please email the author!
824
825							=item *
826
827							The only character set names which have a corresponding mapping in the
828							Unicode::Map8 module are the character sets that Unicode::Map8 can
829							convert.
830
831							Similarly, the only character set names which have a corresponding
832							mapping in the Unicode::Map module are the character sets that
833							Unicode::Map can convert.
834
835							=item *
836
837							In the current implementation, all tables are read in and initialized
838							when the module is loaded, and then held in memory until the program
839							exits. A "lazy" implementation (or a less-portable tied hash) might
840							lead to a shorter startup time. Suggestions, patches, comments are
841							always welcome!
842
843							=back
844
845							=head1 SEE ALSO
846
847							=over 4
848
849							=item Unicode::Map
850
851							Convert strings from various multi-byte character encodings to and from Unicode.
852
853							=item Unicode::Map8
854
855							Convert strings from various 8-bit character encodings to and from Unicode.
856
857							=item Jcode
858
859							Convert strings among various Japanese character encodings and Unicode.
860
861							=item Unicode::MapUTF8
862
863							A wrapper around all three of these character set conversion distributions.
864
865							=back
866
867							=head1 AUTHOR
868
869							Martin 'Kingpin' Thurn, C, L.
870
871							=head1 LICENSE
872
873							This module is free software; you can redistribute it and/or
874							modify it under the same terms as Perl itself.
875
876							=cut
877
878							#-----------------------------------------------------------------------
879
880							sub _strip
881							{
882	33574			33574		51079	my $s = lc(shift);
883	33574					46886	$s =~ tr/[0-9a-zA-Z]//dc;
884	33574					94102	return $s;
885							} # _strip
886
887							# initialization code - stuff the DATA into some data structure
888
889							# The only reason this is a while loop is so that I can bail out
890							# (e.g. for debugging) without using goto ;-)
891							INITIALIZATION:
892							{
893							my ($sName, $iMIB, $sAlias, $mimename);
894							my $iDebug = 0;
895							# I used to use the __DATA__ mechanism to initialize the data, but
896							# that is not compatible with perlapp. NOTE that storing the IANA
897							# charset data as a file separate from this module code will not
898							# work with perlapp either!
899							my $s = _init_data();
900							my $iRecord = 0;
901							RECORD:
902							while ($s =~ m//gs)
903							{
904							my $sRecord = $1;
905							$iRecord++;
906							if ($sRecord !~ m/(.+?)<\/name>/)
907							{
908							warn " WWW found record with no name.\n";
909							next RECORD;
910							} # if
911							my $sName = $1;
912							if ($sRecord !~ m/(\d+)<\/value>/)
913							{
914							warn " WWW found record '$sName' with no value.\n";
915							next RECORD;
916							} # if
917							my $iMIB = $1;
918							if ($sRecord =~ m/(.+?)<\/preferred_alias>/)
919							{
920							$hsMIMEofMIB{$iMIB} = $1;
921							$hsMIBofShortname{_strip($1)} = $iMIB;
922							}
923							else
924							{
925							# warn " WWW found record '$sName' with no preferred alias.\n";
926							} # if
927							my $sMime = $1;
928							$hsLongnameOfMIB{$iMIB} = $sName;
929							$hsMIBofLongname{$sName} = $iMIB;
930							# warn " DDD '$sName' ==> $iMIB\n";
931							$hsMIBofShortname{_strip($sName)} = $iMIB;
932							ALIAS:
933							while ($sRecord =~ m/(.+?)<\/alias>/g)
934							{
935							my $sAlias = $1;
936							$hsMIBofShortname{_strip($sAlias)} = $iMIB;
937							} # while ALIAS
938							} # while RECORD
939							# Now that we have all the standard definitions, process the special
940							# === directives:
941							my @asEqualLines = split(/\n/, _init_data_extra());
942							chomp @asEqualLines;
943							EQUAL_LINE:
944							foreach my $sLine (@asEqualLines)
945							{
946							next if ($sLine =~ m!\A#!);
947							# print STDERR " + equal-sign line $sLine...\n";
948							my @as = split(/\ ===\ /, $sLine);
949							my $sName = shift @as \|\| q{};
950							next unless $sName ne '';
951							my $iMIB = $hsMIBofShortname{_strip($sName)} \|\| 0;
952							if (! $iMIB)
953							{
954							print STDERR " EEE can not find IANA entry for equal-sign directive $sName\n";
955							next EQUAL_LINE;
956							} # unless
957							EQUAL_ITEM:
958							foreach my $s (@as)
959							{
960							my $sStrip = _strip($s);
961							# print STDERR " + $sStrip --> $iMIB\n";
962							$hsMIBofShortname{$sStrip} = $iMIB;
963							} # foreach EQUAL_ITEM
964							} # foreach EQUAL_LINE
965
966							# last; # for debugging
967
968							if (eval "require Unicode::Map8")
969							{
970							# $iDebug = 1;
971							my $sDir = $Unicode::Map8::MAPS_DIR;
972							my $sAliasesFname = "$sDir/aliases";
973							# Ah, how to get all the Unicode::Map8 supported charsets... It
974							# sure ain't easy! The aliases file in the MAPS_DIR has a nice
975							# set of aliases, but since some charsets have no aliases, they're
976							# not listed in the aliases file! Ergo, we have to read the
977							# aliases file and all the file names in the MAPS_DIR!
978							push @asMap8Debug, " DDD found Unicode::Map8 installed, will build map8 tables based on $sAliasesFname and files in that directory...\n";
979							# First, read all the files in the MAPS_DIR folder and register in our local data structures:
980							if (opendir(DIR, $sDir))
981							{
982							my @asFname = grep(!/^\.\.?$/, readdir(DIR));
983							foreach my $sLong (@asFname)
984							{
985							next unless -f "$Unicode::Map8::MAPS_DIR/$sLong";
986							$sLong =~ s/\.(?:bin\|txt)$//;
987							# Try to find the official IANA name for this encoding:
988							push @asMap8Debug, " DDD looking for $sLong in iana table...\n";
989							my $sFound = '';
990							if (defined (my $sTemp = iana_charset_name($sLong)))
991							{
992							$sFound = $sTemp;
993							} # if
994							if ($sFound eq '')
995							{
996							# $iDebug = 1;
997							$iMIB = $sFakeMIB++;
998							push @asMap8Debug, " DDD had to use a dummy mib ($iMIB) for U::Map8==$sLong==\n";
999							$hsMIBofLongname{$sLong} = $iMIB;
1000							} # unless
1001							else
1002							{
1003							$iMIB = $hsMIBofLongname{$sFound};
1004							push @asMap8Debug, " DDD found IANA name $sFound ($iMIB) for Map8 entry $sLong\n";
1005							}
1006							# Make this IANA mib map to this Map8 name:
1007							push @asMap8Debug, " DDD map $iMIB to $sLong in MIBtoMAP8...\n";
1008							$MIBtoMAP8{$iMIB} = $sLong;
1009							my $s = _strip($sLong);
1010							push @asMap8Debug, " DDD map $s to $iMIB in hsMIBofShortname...\n";
1011							$hsMIBofShortname{$s} = $iMIB;
1012							} # foreach
1013							} # if
1014							# Now, go through the Unicode::Map8 aliases hash and process the aliases:
1015							my $avoid_warning = keys %Unicode::Map8::ALIASES;
1016							while (my ($alias, $charset) = each %Unicode::Map8::ALIASES)
1017							{
1018							my $iMIB = charset_name_to_mib($charset); # qqq
1019							my $s = _strip($alias);
1020							push @asMap8Debug, " DDD map $s to $iMIB in hsMIBofShortname...\n";
1021							$hsMIBofShortname{$s} = $iMIB;
1022							} # while
1023							# If there are special cases for Unicode::Map8, add them here:
1024							add_map8_alias('ISO_8859-13:1998', 'ISO_8859-13');
1025							add_map8_alias('L 7', 'ISO_8859-13');
1026							add_map8_alias('Latin 7', 'ISO_8859-13');
1027							add_map8_alias('ISO_8859-15:1998', 'ISO_8859-15');
1028							add_map8_alias('L 0', 'ISO_8859-15');
1029							add_map8_alias('Latin 0', 'ISO_8859-15');
1030							add_map8_alias('L 9', 'ISO_8859-15');
1031							add_map8_alias('Latin 9', 'ISO_8859-15');
1032							add_map8_alias('ISO-8859-1-Windows-3.1-Latin-1', 'cp1252');
1033							add_map8_alias('csWindows31Latin1', 'cp1252');
1034							# Above aliases were described in RT#18802
1035							push @asMap8Debug, "done.\n";
1036							print STDERR @asMap8Debug if $iDebug;
1037							} # if Unicode::Map8 installed
1038
1039							# last; # for debugging
1040
1041							# $iDebug = 1;
1042							if (eval "require Unicode::Map")
1043							{
1044							print STDERR " + found Unicode::Map installed, will build tables..." if $iDebug;
1045							my $MAP_Path = $INC{'Unicode/Map.pm'};
1046							$MAP_Path =~ s/\.pm//;
1047							my $sMapFile = "$MAP_Path/REGISTRY";
1048							if (open MAPS, $sMapFile)
1049							{
1050							local $/ = undef;
1051							my @asMAPS = split(/\n\s*\n/, );
1052							UMAP_ENTRY:
1053							foreach my $sEntry (@asMAPS)
1054							{
1055							$iDebug = 0;
1056							# print STDERR " + working on Umap entry >>>>>$sEntry<<<<<...\n";
1057							my ($sName, $iMIB) = ('', '');
1058							# Get the value of the name field, and skip entries with no name:
1059							next UMAP_ENTRY unless $sEntry =~ m!^name:\s+(\S+)!mi;
1060							$sName = $1;
1061							# $iDebug = ($sName =~ m!apple!);
1062							print STDERR " + UMAP sName is $sName\n" if $iDebug;
1063							my @asAlias = split /\n/, $sEntry;
1064							@asAlias = map { /alias:\s+(.*)/; $1 } (grep /alias/, @asAlias);
1065							# See if this entry already has the MIB identified:
1066							if ($sEntry =~ m!^#mib:\s+(\d+)!mi)
1067							{
1068							$iMIB = $1;
1069							} # if
1070							else
1071							{
1072							# This entry does not have the MIB listed. See if the name
1073							# of any of the aliases are known to our iana tables:
1074							UMAP_ALIAS:
1075							foreach my $sAlias ($sName, @asAlias)
1076							{
1077							print STDERR " + try alias $sAlias\n" if $iDebug;
1078							my $iMIBtry = _short_to_mib(_strip($sAlias));
1079							if ($iMIBtry)
1080							{
1081							print STDERR " + matched\n" if $iDebug;
1082							$iMIB = $iMIBtry;
1083							last UMAP_ALIAS;
1084							} # if
1085							} # foreach
1086							# If nothing matched, create a dummy mib:
1087							if ($iMIB eq '')
1088							{
1089							$iMIB = $sFakeMIB++;
1090							print STDERR " + had to use a dummy mib ($iMIB) for U::Map==$sName==\n" if $iDebug;
1091							} # if
1092							} # else
1093							# $iDebug = ($iMIB =~ m!225[23]!);
1094							# $iDebug = ($iMIB eq '17');
1095							print STDERR " + UMAP mib is $iMIB\n" if $iDebug;
1096							$MIBtoUMAP{$iMIB} = $sName;
1097							$hsMIBofLongname{$sName} \|\|= $iMIB;
1098							$hsMIBofShortname{_strip($sName)} \|\|= $iMIB;
1099							foreach my $sAlias (@asAlias)
1100							{
1101							print STDERR " + UMAP alias $sAlias\n" if $iDebug;
1102							$hsMIBofShortname{_strip($sAlias)} = $iMIB;
1103							} # foreach $sAlias
1104							} # foreach UMAP_ENTRY
1105							close MAPS;
1106							# print STDERR "\n + MIBtoUMAP{dummymib029} == $MIBtoUMAP{$sDummy .'029'}\n\n";
1107							} # if open
1108							else
1109							{
1110							carp " --- couldn't open $sMapFile for read" if $iDebug;
1111							}
1112							# If there are special cases for Unicode::Map, add them here:
1113							# add_umap_alias("new-name", "existing-name");
1114							print STDERR "done.\n" if $iDebug;
1115							} # if Unicode::Map installed
1116
1117							# Make sure to do U::MapUTF8 last, because it (in turn) depends on
1118							# the others.
1119							# $iDebug = 1;
1120							if (1.0 <= (eval q{ require Unicode::MapUTF8; $Unicode::MapUTF8::VERSION } \|\| 0))
1121							{
1122							print STDERR " + found Unicode::MapUTF8 $Unicode::MapUTF8::VERSION installed, will build tables...\n" if $iDebug;
1123							my @as;
1124							# Wrap this in an eval to avoid compiler warning(?):
1125							eval { @as = Unicode::MapUTF8::utf8_supported_charset() };
1126							UMU8_NAME:
1127							foreach my $sName (@as)
1128							{
1129							# $iDebug = ($sName =~ m!jis!i);
1130							print STDERR " + working on UmapUTF8 entry >>>>>$sName<<<<<...\n" if $iDebug;
1131							my $s = iana_charset_name($sName) \|\| '';
1132							if ($s ne '')
1133							{
1134							# print STDERR " + iana name is >>>>>$s<<<<<...\n" if $iDebug;
1135							$MIBtoUMU8{charset_name_to_mib($s)} = $sName;
1136							next UMU8_NAME;
1137							} # if already maps to IANA
1138							# print STDERR " + UmapUTF8 entry ===$sName=== has no iana entry\n" if $iDebug;
1139							$s = umap_charset_name($sName) \|\| '';
1140							if ($s ne '')
1141							{
1142							print STDERR " + U::Map name is >>>>>$s<<<<<...\n" if $iDebug;
1143							$MIBtoUMU8{charset_name_to_mib($s)} = $sName;
1144							next UMU8_NAME;
1145							} # if maps to U::Map
1146							# print STDERR " + UmapUTF8 entry ==$sName== has no U::Map entry\n" if $iDebug;
1147							$s = map8_charset_name($sName) \|\| '';
1148							if ($s ne '')
1149							{
1150							print STDERR " + U::Map8 name is >>>>>$s<<<<<...\n" if $iDebug;
1151							$MIBtoUMU8{charset_name_to_mib($s)} = $sName;
1152							next UMU8_NAME;
1153							} # if maps to U::Map8
1154							print STDERR " + UmapUTF8 entry ==$sName== has no entries at all\n" if $iDebug;
1155							} # foreach
1156							# If there are special cases for Unicode::MapUTF8, add them here:
1157							# add_umap_alias("new-name", "existing-name");
1158							print STDERR "done.\n" if $iDebug;
1159							} # if Unicode::MapUTF8 installed
1160
1161							# Initialization is all finished:
1162							last;
1163							# Below here is debugging code:
1164
1165							print STDERR " + the following IANA names do not have entries in the Map8 table:\n";
1166							my %hiTried = ();
1167							foreach my $sIANA (sort values %hsLongnameOfMIB)
1168							{
1169							next if $hiTried{$sIANA};
1170							print "$sIANA\n" unless defined map8_charset_name($sIANA);
1171							$hiTried{$sIANA}++;
1172							} # foreach
1173
1174							# last; # for debugging
1175
1176							# debugging: selective dump:
1177							print STDERR " + after init, iana_charset_name returns:\n";
1178							foreach my $key (qw(cp1251 windows-1251 WinCyrillic sjis x-sjis Shift_JIS ASCII US-ASCII us-ascii iso-2022-jp iso-8859-1 Unicode-2-0-utf-8 EUC-KR big5 x-x-big5))
1179							{
1180							print STDERR " + $key => ", iana_charset_name($key) \|\| 'undef', "\n";
1181							} # foreach
1182
1183							# exit 88;
1184
1185							print STDERR " + after init, map8_charset_name() returns:\n";
1186							foreach my $key (qw(cp1251 windows-1251 WinCyrillic sjis x-sjis Shift_JIS ASCII US-ASCII us-ascii iso-2022-jp iso-8859-1 Unicode-2-0-utf-8 EUC-KR big5 x-x-big5))
1187							{
1188							print STDERR " + $key => ", map8_charset_name($key) \|\| 'undef', "\n";
1189							} # foreach
1190
1191							last;
1192
1193							# debugging: huge dump:
1194							# _dump_hash('hsLongnameOfMIB', \%hsLongnameOfMIB);
1195							# _dump_hash('hsMIBofLongname', \%hsMIBofLongname);
1196							# _dump_hash('hsMIBofShortname', \%hsMIBofShortname);
1197							foreach (keys %hsMIBofShortname)
1198							{
1199							print STDERR " + _short_to_long($_) == ", _short_to_long($_) \|\| 'undef', "\n";
1200							} # foreach
1201
1202							} # end of INITIALIZATION block
1203
1204							sub _dump_hash
1205							{
1206	0			0		0	my ($sName, $rh) = @_;
1207	0					0	print STDERR " + after initialization, $sName is:\n";
1208	0					0	foreach my $key (keys %$rh)
1209							{
1210	0					0	print STDERR " + $key => $$rh{$key}\n";
1211							} # foreach
1212							} # _dump_hash
1213
1214							sub _init_data_extra
1215							{
1216							# This little piece of data is a hand-made list of IANA names and
1217							# aliases, in the form AAA === BBB === CCC, where AAA is the
1218							# canonical IANA name and BBB and CCC are aliases. Note that
1219							# capitalization and punctuation of aliases are meaningless (but
1220							# whitespace is not allowed).
1221	11			11		410	return <<'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX';
1222
1223							Shift_JIS === sjis
1224							windows-1250 === winlatin2 === cp1250
1225							windows-1251 === wincyrillic === cp1251
1226							windows-1252 === winlatin1 === cp1252
1227							windows-1253 === wingreek === cp1253
1228							windows-1254 === winturkish === cp1254
1229							windows-1255 === winhebrew === cp1255
1230							windows-1256 === winarabic === cp1256
1231							windows-1257 === winbaltic === cp1257
1232							windows-1258 === winvietnamese === cp1258
1233							Adobe-Standard-Encoding === adobe-standard
1234							Adobe-Symbol-Encoding === adobe-symbol
1235							EBCDIC-ES === ebcdic-cp-es
1236							EBCDIC-FR === ebcdic-cp-fr
1237							EBCDIC-IT === ebcdic-cp-it
1238							EBCDIC-UK === ebcdic-cp-gb
1239							EBCDIC-FI-SE === ebcdic-cp-fi
1240							UTF-7 === Unicode-2-0-utf-7
1241							UTF-8 === Unicode-2-0-utf-8
1242							Extended_UNIX_Code_Packed_Format_for_Japanese === euc === euc-jp
1243							# These are for Unicode::MapUTF8:
1244							ISO-10646-UCS-2 === ucs2
1245							ISO-10646-UCS-4 === ucs4
1246							# These are for iconv:
1247							ISO-2022-JP === ISO-2022-JP-1
1248							# These are for Encode:
1249							IBM1047 === cp1047
1250							GB2312 === gb2312-raw
1251							HZ-GB-2312 === hz
1252							JIS_X0201 === jis0201-raw
1253							JIS_C6226-1983 === jis0208-raw
1254							JIS_X0212-1990 === jis0212-raw
1255							KS_C_5601-1987 === ksc5601-raw
1256							CP037 === CP37
1257							cp863 === DOSCanadaF
1258							cp860 === DOSPortuguese
1259							cp869 === DOSGreek2
1260							koi8-r === cp878
1261							# These encodings are handled by Encode, but I don't know what they are:
1262							# ??? === AdobeZdingbats
1263							# ??? === MacArabic
1264							# ??? === MacCentralEurRoman
1265							# ??? === MacChineseSimp
1266							# ??? === MacChineseTrad
1267							# ??? === MacCroatian
1268							# ??? === MacCyrillic
1269							# ??? === MacDingbats
1270							# ??? === MacFarsi
1271							# ??? === MacGreek
1272							# ??? === MacHebrew
1273							# ??? === MacIcelandic
1274							# ??? === MacJapanese
1275							# ??? === MacKorean
1276							# ??? === MacRomanian
1277							# ??? === MacRumanian
1278							# ??? === MacSami
1279							# ??? === MacThai
1280							# ??? === MacTurkish
1281							# ??? === MacUkrainian
1282							# ??? === MacVietnamese
1283							# ??? === cp1006
1284							# ??? === dingbats
1285							# ??? === nextstep
1286							# ??? === posix-bc
1287							# The following aliases are listed in RT#18802:
1288							ISO-8859-10 === 8859-10 === ISO_8859-10:1993
1289							# TCVN-5712 x-viet-tcvn viet-tcvn VN-1 TCVN-5712:1993
1290							TIS-620 === TIS_620-2553 === TIS_620-2553:1990
1291							# VPS x-viet-vps viet-vps
1292							# The above aliases are listed in RT#18802
1293							XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1294							} # _init_data_extra
1295
1296
1297							sub _init_data
1298							{
1299							# This big piece of data is the original document from
1300							# http://www.iana.org/assignments/character-sets.xml
1301	11			11		25	return <<'EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE';
1302
1303
1304
1305
1306							2018-11-21
1307							Character Sets
1308							Character Sets
1309
1310							Expert Review
1311							Primary Expert Ned Freed and Secondary Expert Martin D�rst
1312							These are the official names for character sets that may be used in
1313							the Internet and may be referred to in Internet documentation. These
1314							names are expressed in ANSI_X3.4-1968 which is commonly called
1315							US-ASCII or simply ASCII. The character set most commonly use in the
1316							Internet and used especially in protocol standards is US-ASCII, this
1317							is strongly encouraged. The use of the name US-ASCII is also
1318							encouraged.
1319
1320							The character set names may be up to 40 characters taken from the
1321							printable characters of US-ASCII. However, no distinction is made
1322							between use of upper and lower case letters.
1323
1324							The MIBenum value is a unique value for use in MIBs to identify coded
1325							character sets.
1326
1327							The value space for MIBenum values has been divided into three
1328							regions. The first region (3-999) consists of coded character sets
1329							that have been standardized by some standard setting organization.
1330							This region is intended for standards that do not have subset
1331							implementations. The second region (1000-1999) is for the Unicode and
1332							ISO/IEC 10646 coded character sets together with a specification of a
1333							(set of) sub-repertoires that may occur. The third region (>1999) is
1334							intended for vendor specific coded character sets.
1335
1336							Assigned MIB enum Numbers
1337							-------------------------
1338							0-2 Reserved
1339							3-999 Set By Standards Organizations
1340							1000-1999 Unicode / 10646
1341							2000-2999 Vendor
1342
1343							The aliases that start with "cs" have been added for use with the
1344							IANA-CHARSET-MIB as originally defined in , and as currently
1345							maintained by IANA at .
1346							Note that the ianacharset-mib needs to be kept in sync with this
1347							registry. These aliases that start with "cs" contain the standard
1348							numbers along with suggestive names in order to facilitate applications
1349							that want to display the names in user interfaces. The "cs" stands
1350							for character set and is provided for applications that need a lower
1351							case first letter but want to use mixed case thereafter that cannot
1352							contain any special characters, such as underbar ("_") and dash ("-").
1353
1354							If the character set is from an ISO standard, its cs alias is the ISO
1355							standard number or name. If the character set is not from an ISO
1356							standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
1357							Registration Authority), the ISO Registry number is specified as
1358							ISOnnn followed by letters suggestive of the name or standards number
1359							of the code set. When a national or international standard is
1360							revised, the year of revision is added to the cs alias of the new
1361							character set entry in the IANA Registry in order to distinguish the
1362							revised character set from the original character set.
1363
1364
1365							US-ASCII
1366
1367							3
1368							ANSI X3.4-1986
1369							iso-ir-6
1370							ANSI_X3.4-1968
1371							ANSI_X3.4-1986
1372							ISO_646.irv:1991
1373							ISO646-US
1374							US-ASCII
1375							us
1376							IBM367
1377							cp367
1378							csASCII
1379							US-ASCII
1380
1381
1382							ISO_8859-1:1987
1383
1384
1385							4
1386
1387							ISO-IR: International Register of Escape Sequences
1388							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1389
1390							iso-ir-100
1391							ISO_8859-1
1392							ISO-8859-1
1393							latin1
1394							l1
1395							IBM819
1396							CP819
1397							csISOLatin1
1398							ISO-8859-1
1399
1400
1401							ISO_8859-2:1987
1402
1403
1404							5
1405
1406							ISO-IR: International Register of Escape Sequences
1407							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1408
1409							iso-ir-101
1410							ISO_8859-2
1411							ISO-8859-2
1412							latin2
1413							l2
1414							csISOLatin2
1415							ISO-8859-2
1416
1417
1418							ISO_8859-3:1988
1419
1420
1421							6
1422
1423							ISO-IR: International Register of Escape Sequences
1424							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1425
1426							iso-ir-109
1427							ISO_8859-3
1428							ISO-8859-3
1429							latin3
1430							l3
1431							csISOLatin3
1432							ISO-8859-3
1433
1434
1435							ISO_8859-4:1988
1436
1437
1438							7
1439
1440							ISO-IR: International Register of Escape Sequences
1441							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1442
1443							iso-ir-110
1444							ISO_8859-4
1445							ISO-8859-4
1446							latin4
1447							l4
1448							csISOLatin4
1449							ISO-8859-4
1450
1451
1452							ISO_8859-5:1988
1453
1454
1455							8
1456
1457							ISO-IR: International Register of Escape Sequences
1458							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1459
1460							iso-ir-144
1461							ISO_8859-5
1462							ISO-8859-5
1463							cyrillic
1464							csISOLatinCyrillic
1465							ISO-8859-5
1466
1467
1468							ISO_8859-6:1987
1469
1470
1471							9
1472
1473							ISO-IR: International Register of Escape Sequences
1474							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1475
1476							iso-ir-127
1477							ISO_8859-6
1478							ISO-8859-6
1479							ECMA-114
1480							ASMO-708
1481							arabic
1482							csISOLatinArabic
1483							ISO-8859-6
1484
1485
1486							ISO_8859-7:1987
1487
1488
1489
1490							10
1491
1492							ISO-IR: International Register of Escape Sequences
1493							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1494
1495							iso-ir-126
1496							ISO_8859-7
1497							ISO-8859-7
1498							ELOT_928
1499							ECMA-118
1500							greek
1501							greek8
1502							csISOLatinGreek
1503							ISO-8859-7
1504
1505
1506							ISO_8859-8:1988
1507
1508
1509							11
1510
1511							ISO-IR: International Register of Escape Sequences
1512							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1513
1514							iso-ir-138
1515							ISO_8859-8
1516							ISO-8859-8
1517							hebrew
1518							csISOLatinHebrew
1519							ISO-8859-8
1520
1521
1522							ISO_8859-9:1989
1523
1524
1525							12
1526
1527							ISO-IR: International Register of Escape Sequences
1528							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1529
1530							iso-ir-148
1531							ISO_8859-9
1532							ISO-8859-9
1533							latin5
1534							l5
1535							csISOLatin5
1536							ISO-8859-9
1537
1538
1539							ISO-8859-10
1540
1541
1542							13
1543
1544							ISO-IR: International Register of Escape Sequences
1545							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1546
1547							iso-ir-157
1548							l6
1549							ISO_8859-10:1992
1550							csISOLatin6
1551							latin6
1552							ISO-8859-10
1553
1554
1555							ISO_6937-2-add
1556
1557
1558							14
1559
1560							ISO-IR: International Register of Escape Sequences and ISO 6937-2:1983
1561							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1562
1563							iso-ir-142
1564							csISOTextComm
1565
1566
1567							JIS_X0201
1568
1569
1570							15
1571							JIS X 0201-1976. One byte only, this is equivalent to
1572							JIS/Roman (similar to ASCII) plus eight-bit half-width
1573							Katakana
1574							X0201
1575							csHalfWidthKatakana
1576
1577
1578							JIS_Encoding
1579							16
1580							JIS X 0202-1991. Uses ISO 2022 escape sequences to
1581							shift code sets as documented in JIS X 0202-1991.
1582							csJISEncoding
1583
1584
1585							Shift_JIS
1586							17
1587							This charset is an extension of csHalfWidthKatakana by
1588							adding graphic characters in JIS X 0208. The CCS's are
1589							JIS X0201:1997 and JIS X0208:1997. The
1590							complete definition is shown in Appendix 1 of JIS
1591							X0208:1997.
1592							This charset can be used for the top-level media type "text".
1593							MS_Kanji
1594							csShiftJIS
1595							Shift_JIS
1596
1597
1598							Extended_UNIX_Code_Packed_Format_for_Japanese
1599							18
1600							Standardized by OSF, UNIX International, and UNIX Systems
1601							Laboratories Pacific. Uses ISO 2022 rules to select
1602							code set 0: US-ASCII (a single 7-bit byte set)
1603							code set 1: JIS X0208-1990 (a double 8-bit byte set)
1604							restricted to A0-FF in both bytes
1605							code set 2: Half Width Katakana (a single 7-bit byte set)
1606							requiring SS2 as the character prefix
1607							code set 3: JIS X0212-1990 (a double 7-bit byte set)
1608							restricted to A0-FF in both bytes
1609							requiring SS3 as the character prefix
1610							csEUCPkdFmtJapanese
1611							EUC-JP
1612							EUC-JP
1613
1614
1615							Extended_UNIX_Code_Fixed_Width_for_Japanese
1616							19
1617							Used in Japan. Each character is 2 octets.
1618							code set 0: US-ASCII (a single 7-bit byte set)
1619							1st byte = 00
1620							2nd byte = 20-7E
1621							code set 1: JIS X0208-1990 (a double 7-bit byte set)
1622							restricted to A0-FF in both bytes
1623							code set 2: Half Width Katakana (a single 7-bit byte set)
1624							1st byte = 00
1625							2nd byte = A0-FF
1626							code set 3: JIS X0212-1990 (a double 7-bit byte set)
1627							restricted to A0-FF in
1628							the first byte
1629							and 21-7E in the second byte
1630							csEUCFixWidJapanese
1631
1632
1633							BS_4730
1634
1635
1636							20
1637
1638							ISO-IR: International Register of Escape Sequences
1639							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1640
1641							iso-ir-4
1642							ISO646-GB
1643							gb
1644							uk
1645							csISO4UnitedKingdom
1646
1647
1648							SEN_850200_C
1649
1650
1651							21
1652
1653							ISO-IR: International Register of Escape Sequences
1654							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1655
1656							iso-ir-11
1657							ISO646-SE2
1658							se2
1659							csISO11SwedishForNames
1660
1661
1662							IT
1663
1664
1665							22
1666
1667							ISO-IR: International Register of Escape Sequences
1668							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1669
1670							iso-ir-15
1671							ISO646-IT
1672							csISO15Italian
1673
1674
1675							ES
1676
1677
1678							23
1679
1680							ISO-IR: International Register of Escape Sequences
1681							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1682
1683							iso-ir-17
1684							ISO646-ES
1685							csISO17Spanish
1686
1687
1688							DIN_66003
1689
1690
1691							24
1692
1693							ISO-IR: International Register of Escape Sequences
1694							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1695
1696							iso-ir-21
1697							de
1698							ISO646-DE
1699							csISO21German
1700
1701
1702							NS_4551-1
1703
1704
1705							25
1706
1707							ISO-IR: International Register of Escape Sequences
1708							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1709
1710							iso-ir-60
1711							ISO646-NO
1712							no
1713							csISO60DanishNorwegian
1714							csISO60Norwegian1
1715
1716
1717							NF_Z_62-010
1718
1719
1720							26
1721
1722							ISO-IR: International Register of Escape Sequences
1723							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1724
1725							iso-ir-69
1726							ISO646-FR
1727							fr
1728							csISO69French
1729
1730
1731							ISO-10646-UTF-1
1732							27
1733							Universal Transfer Format (1), this is the multibyte
1734							encoding, that subsets ASCII-7. It does not have byte
1735							ordering issues.
1736							csISO10646UTF1
1737
1738
1739							ISO_646.basic:1983
1740
1741
1742							28
1743
1744							ISO-IR: International Register of Escape Sequences
1745							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1746
1747							ref
1748							csISO646basic1983
1749
1750
1751							INVARIANT
1752
1753
1754							29
1755							csINVARIANT
1756
1757
1758							ISO_646.irv:1983
1759
1760
1761							30
1762
1763							ISO-IR: International Register of Escape Sequences
1764							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1765
1766							iso-ir-2
1767							irv
1768							csISO2IntlRefVersion
1769
1770
1771							NATS-SEFI
1772
1773
1774							31
1775
1776							ISO-IR: International Register of Escape Sequences
1777							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1778
1779							iso-ir-8-1
1780							csNATSSEFI
1781
1782
1783							NATS-SEFI-ADD
1784
1785
1786							32
1787
1788							ISO-IR: International Register of Escape Sequences
1789							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1790
1791							iso-ir-8-2
1792							csNATSSEFIADD
1793
1794
1795							NATS-DANO
1796
1797
1798							33
1799
1800							ISO-IR: International Register of Escape Sequences
1801							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1802
1803							iso-ir-9-1
1804							csNATSDANO
1805
1806
1807							NATS-DANO-ADD
1808
1809
1810							34
1811
1812							ISO-IR: International Register of Escape Sequences
1813							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1814
1815							iso-ir-9-2
1816							csNATSDANOADD
1817
1818
1819							SEN_850200_B
1820
1821
1822							35
1823
1824							ISO-IR: International Register of Escape Sequences
1825							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1826
1827							iso-ir-10
1828							FI
1829							ISO646-FI
1830							ISO646-SE
1831							se
1832							csISO10Swedish
1833
1834
1835							KS_C_5601-1987
1836
1837
1838							36
1839
1840							ISO-IR: International Register of Escape Sequences
1841							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1842
1843							iso-ir-149
1844							KS_C_5601-1989
1845							KSC_5601
1846							korean
1847							csKSC56011987
1848
1849
1850							ISO-2022-KR
1851
1852
1853							37
1854							(see also KS_C_5601-1987)
1855							csISO2022KR
1856							ISO-2022-KR
1857
1858
1859							EUC-KR
1860
1861
1862							38
1863							(see also KS_C_5861-1992)
1864							csEUCKR
1865							EUC-KR
1866
1867
1868							ISO-2022-JP
1869
1870
1871							39
1872							(see also )
1873							csISO2022JP
1874							ISO-2022-JP
1875
1876
1877							ISO-2022-JP-2
1878
1879
1880							40
1881
1882
1883
1884							csISO2022JP2
1885							ISO-2022-JP-2
1886
1887
1888							JIS_C6220-1969-jp
1889
1890
1891							41
1892
1893							ISO-IR: International Register of Escape Sequences
1894							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1895
1896							JIS_C6220-1969
1897							iso-ir-13
1898							katakana
1899							x0201-7
1900							csISO13JISC6220jp
1901
1902
1903							JIS_C6220-1969-ro
1904
1905
1906							42
1907
1908							ISO-IR: International Register of Escape Sequences
1909							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1910
1911							iso-ir-14
1912							jp
1913							ISO646-JP
1914							csISO14JISC6220ro
1915
1916
1917							PT
1918
1919
1920							43
1921
1922							ISO-IR: International Register of Escape Sequences
1923							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1924
1925							iso-ir-16
1926							ISO646-PT
1927							csISO16Portuguese
1928
1929
1930							greek7-old
1931
1932
1933							44
1934
1935							ISO-IR: International Register of Escape Sequences
1936							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1937
1938							iso-ir-18
1939							csISO18Greek7Old
1940
1941
1942							latin-greek
1943
1944
1945							45
1946
1947							ISO-IR: International Register of Escape Sequences
1948							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1949
1950							iso-ir-19
1951							csISO19LatinGreek
1952
1953
1954							NF_Z_62-010_(1973)
1955
1956
1957							46
1958
1959							ISO-IR: International Register of Escape Sequences
1960							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1961
1962							iso-ir-25
1963							ISO646-FR1
1964							csISO25French
1965
1966
1967							Latin-greek-1
1968
1969
1970							47
1971
1972							ISO-IR: International Register of Escape Sequences
1973							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1974
1975							iso-ir-27
1976							csISO27LatinGreek1
1977
1978
1979							ISO_5427
1980
1981
1982							48
1983
1984							ISO-IR: International Register of Escape Sequences
1985							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1986
1987							iso-ir-37
1988							csISO5427Cyrillic
1989
1990
1991							JIS_C6226-1978
1992
1993
1994							49
1995
1996							ISO-IR: International Register of Escape Sequences
1997							Note: The current registration authority is IPSJ/ITSCJ, Japan.
1998
1999							iso-ir-42
2000							csISO42JISC62261978
2001
2002
2003							BS_viewdata
2004
2005
2006							50
2007
2008							ISO-IR: International Register of Escape Sequences
2009							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2010
2011							iso-ir-47
2012							csISO47BSViewdata
2013
2014
2015							INIS
2016
2017
2018							51
2019
2020							ISO-IR: International Register of Escape Sequences
2021							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2022
2023							iso-ir-49
2024							csISO49INIS
2025
2026
2027							INIS-8
2028
2029
2030							52
2031
2032							ISO-IR: International Register of Escape Sequences
2033							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2034
2035							iso-ir-50
2036							csISO50INIS8
2037
2038
2039							INIS-cyrillic
2040
2041
2042							53
2043
2044							ISO-IR: International Register of Escape Sequences
2045							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2046
2047							iso-ir-51
2048							csISO51INISCyrillic
2049
2050
2051							ISO_5427:1981
2052
2053
2054							54
2055
2056							ISO-IR: International Register of Escape Sequences
2057							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2058
2059							iso-ir-54
2060							ISO5427Cyrillic1981
2061							csISO54271981
2062
2063
2064							ISO_5428:1980
2065
2066
2067							55
2068
2069							ISO-IR: International Register of Escape Sequences
2070							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2071
2072							iso-ir-55
2073							csISO5428Greek
2074
2075
2076							GB_1988-80
2077
2078
2079							56
2080
2081							ISO-IR: International Register of Escape Sequences
2082							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2083
2084							iso-ir-57
2085							cn
2086							ISO646-CN
2087							csISO57GB1988
2088
2089
2090							GB_2312-80
2091
2092
2093							57
2094
2095							ISO-IR: International Register of Escape Sequences
2096							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2097
2098							iso-ir-58
2099							chinese
2100							csISO58GB231280
2101
2102
2103							NS_4551-2
2104
2105
2106							58
2107
2108							ISO-IR: International Register of Escape Sequences
2109							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2110
2111							ISO646-NO2
2112							iso-ir-61
2113							no2
2114							csISO61Norwegian2
2115
2116
2117							videotex-suppl
2118
2119
2120							59
2121
2122							ISO-IR: International Register of Escape Sequences
2123							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2124
2125							iso-ir-70
2126							csISO70VideotexSupp1
2127
2128
2129							PT2
2130
2131
2132							60
2133
2134							ISO-IR: International Register of Escape Sequences
2135							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2136
2137							iso-ir-84
2138							ISO646-PT2
2139							csISO84Portuguese2
2140
2141
2142							ES2
2143
2144
2145							61
2146
2147							ISO-IR: International Register of Escape Sequences
2148							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2149
2150							iso-ir-85
2151							ISO646-ES2
2152							csISO85Spanish2
2153
2154
2155							MSZ_7795.3
2156
2157
2158							62
2159
2160							ISO-IR: International Register of Escape Sequences
2161							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2162
2163							iso-ir-86
2164							ISO646-HU
2165							hu
2166							csISO86Hungarian
2167
2168
2169							JIS_C6226-1983
2170
2171
2172							63
2173
2174							ISO-IR: International Register of Escape Sequences
2175							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2176
2177							iso-ir-87
2178							x0208
2179							JIS_X0208-1983
2180							csISO87JISX0208
2181
2182
2183							greek7
2184
2185
2186							64
2187
2188							ISO-IR: International Register of Escape Sequences
2189							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2190
2191							iso-ir-88
2192							csISO88Greek7
2193
2194
2195							ASMO_449
2196
2197
2198							65
2199
2200							ISO-IR: International Register of Escape Sequences
2201							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2202
2203							ISO_9036
2204							arabic7
2205							iso-ir-89
2206							csISO89ASMO449
2207
2208
2209							iso-ir-90
2210
2211
2212							66
2213
2214							ISO-IR: International Register of Escape Sequences
2215							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2216
2217							csISO90
2218
2219
2220							JIS_C6229-1984-a
2221
2222
2223							67
2224
2225							ISO-IR: International Register of Escape Sequences
2226							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2227
2228							iso-ir-91
2229							jp-ocr-a
2230							csISO91JISC62291984a
2231
2232
2233							JIS_C6229-1984-b
2234
2235
2236							68
2237
2238							ISO-IR: International Register of Escape Sequences
2239							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2240
2241							iso-ir-92
2242							ISO646-JP-OCR-B
2243							jp-ocr-b
2244							csISO92JISC62991984b
2245
2246
2247							JIS_C6229-1984-b-add
2248
2249
2250							69
2251
2252							ISO-IR: International Register of Escape Sequences
2253							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2254
2255							iso-ir-93
2256							jp-ocr-b-add
2257							csISO93JIS62291984badd
2258
2259
2260							JIS_C6229-1984-hand
2261
2262
2263							70
2264
2265							ISO-IR: International Register of Escape Sequences
2266							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2267
2268							iso-ir-94
2269							jp-ocr-hand
2270							csISO94JIS62291984hand
2271
2272
2273							JIS_C6229-1984-hand-add
2274
2275
2276							71
2277
2278							ISO-IR: International Register of Escape Sequences
2279							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2280
2281							iso-ir-95
2282							jp-ocr-hand-add
2283							csISO95JIS62291984handadd
2284
2285
2286							JIS_C6229-1984-kana
2287
2288
2289							72
2290
2291							ISO-IR: International Register of Escape Sequences
2292							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2293
2294							iso-ir-96
2295							csISO96JISC62291984kana
2296
2297
2298							ISO_2033-1983
2299
2300
2301							73
2302
2303							ISO-IR: International Register of Escape Sequences
2304							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2305
2306							iso-ir-98
2307							e13b
2308							csISO2033
2309
2310
2311							ANSI_X3.110-1983
2312
2313
2314							74
2315
2316							ISO-IR: International Register of Escape Sequences
2317							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2318
2319							iso-ir-99
2320							CSA_T500-1983
2321							NAPLPS
2322							csISO99NAPLPS
2323
2324
2325							T.61-7bit
2326
2327
2328							75
2329
2330							ISO-IR: International Register of Escape Sequences
2331							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2332
2333							iso-ir-102
2334							csISO102T617bit
2335
2336
2337							T.61-8bit
2338
2339
2340							76
2341
2342							ISO-IR: International Register of Escape Sequences
2343							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2344
2345							T.61
2346							iso-ir-103
2347							csISO103T618bit
2348
2349
2350							ECMA-cyrillic
2351							77
2352							ISO registry
2353							(formerly ECMA
2354							registry)
2355							iso-ir-111
2356							KOI8-E
2357							csISO111ECMACyrillic
2358
2359
2360							CSA_Z243.4-1985-1
2361
2362
2363							78
2364
2365							ISO-IR: International Register of Escape Sequences
2366							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2367
2368							iso-ir-121
2369							ISO646-CA
2370							csa7-1
2371							csa71
2372							ca
2373							csISO121Canadian1
2374
2375
2376							CSA_Z243.4-1985-2
2377
2378
2379							79
2380
2381							ISO-IR: International Register of Escape Sequences
2382							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2383
2384							iso-ir-122
2385							ISO646-CA2
2386							csa7-2
2387							csa72
2388							csISO122Canadian2
2389
2390
2391							CSA_Z243.4-1985-gr
2392
2393
2394							80
2395
2396							ISO-IR: International Register of Escape Sequences
2397							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2398
2399							iso-ir-123
2400							csISO123CSAZ24341985gr
2401
2402
2403							ISO_8859-6-E
2404
2405
2406							81
2407
2408
2409
2410							csISO88596E
2411							ISO-8859-6-E
2412							ISO-8859-6-E
2413
2414
2415							ISO_8859-6-I
2416
2417
2418							82
2419
2420
2421
2422							csISO88596I
2423							ISO-8859-6-I
2424							ISO-8859-6-I
2425
2426
2427							T.101-G2
2428
2429
2430							83
2431
2432							ISO-IR: International Register of Escape Sequences
2433							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2434
2435							iso-ir-128
2436							csISO128T101G2
2437
2438
2439							ISO_8859-8-E
2440
2441
2442							84
2443
2444
2445
2446							csISO88598E
2447							ISO-8859-8-E
2448							ISO-8859-8-E
2449
2450
2451							ISO_8859-8-I
2452
2453
2454							85
2455
2456
2457
2458							csISO88598I
2459							ISO-8859-8-I
2460							ISO-8859-8-I
2461
2462
2463							CSN_369103
2464
2465
2466							86
2467
2468							ISO-IR: International Register of Escape Sequences
2469							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2470
2471							iso-ir-139
2472							csISO139CSN369103
2473
2474
2475							JUS_I.B1.002
2476
2477
2478							87
2479
2480							ISO-IR: International Register of Escape Sequences
2481							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2482
2483							iso-ir-141
2484							ISO646-YU
2485							js
2486							yu
2487							csISO141JUSIB1002
2488
2489
2490							IEC_P27-1
2491
2492
2493							88
2494
2495							ISO-IR: International Register of Escape Sequences
2496							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2497
2498							iso-ir-143
2499							csISO143IECP271
2500
2501
2502							JUS_I.B1.003-serb
2503
2504
2505							89
2506
2507							ISO-IR: International Register of Escape Sequences
2508							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2509
2510							iso-ir-146
2511							serbian
2512							csISO146Serbian
2513
2514
2515							JUS_I.B1.003-mac
2516
2517
2518							90
2519
2520							ISO-IR: International Register of Escape Sequences
2521							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2522
2523							macedonian
2524							iso-ir-147
2525							csISO147Macedonian
2526
2527
2528							greek-ccitt
2529
2530
2531							91
2532
2533							ISO-IR: International Register of Escape Sequences
2534							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2535
2536							iso-ir-150
2537							csISO150
2538							csISO150GreekCCITT
2539
2540
2541							NC_NC00-10:81
2542
2543
2544							92
2545
2546							ISO-IR: International Register of Escape Sequences
2547							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2548
2549							cuba
2550							iso-ir-151
2551							ISO646-CU
2552							csISO151Cuba
2553
2554
2555							ISO_6937-2-25
2556
2557
2558							93
2559
2560							ISO-IR: International Register of Escape Sequences
2561							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2562
2563							iso-ir-152
2564							csISO6937Add
2565
2566
2567							GOST_19768-74
2568
2569
2570							94
2571
2572							ISO-IR: International Register of Escape Sequences
2573							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2574
2575							ST_SEV_358-88
2576							iso-ir-153
2577							csISO153GOST1976874
2578
2579
2580							ISO_8859-supp
2581
2582
2583							95
2584
2585							ISO-IR: International Register of Escape Sequences
2586							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2587
2588							iso-ir-154
2589							latin1-2-5
2590							csISO8859Supp
2591
2592
2593							ISO_10367-box
2594
2595
2596							96
2597
2598							ISO-IR: International Register of Escape Sequences
2599							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2600
2601							iso-ir-155
2602							csISO10367Box
2603
2604
2605							latin-lap
2606
2607
2608							97
2609
2610							ISO-IR: International Register of Escape Sequences
2611							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2612
2613							lap
2614							iso-ir-158
2615							csISO158Lap
2616
2617
2618							JIS_X0212-1990
2619
2620
2621							98
2622
2623							ISO-IR: International Register of Escape Sequences
2624							Note: The current registration authority is IPSJ/ITSCJ, Japan.
2625
2626							x0212
2627							iso-ir-159
2628							csISO159JISX02121990
2629
2630
2631							DS_2089
2632
2633
2634							99
2635							Danish Standard, DS 2089, February 1974
2636							DS2089
2637							ISO646-DK
2638							dk
2639							csISO646Danish
2640
2641
2642							us-dk
2643
2644
2645							100
2646							csUSDK
2647
2648
2649							dk-us
2650
2651
2652							101
2653							csDKUS
2654
2655
2656							KSC5636
2657
2658
2659							102
2660							ISO646-KR
2661							csKSC5636
2662
2663
2664							UNICODE-1-1-UTF-7
2665
2666							103
2667
2668
2669
2670							csUnicode11UTF7
2671
2672
2673							ISO-2022-CN
2674
2675							104
2676
2677
2678
2679							csISO2022CN
2680
2681
2682							ISO-2022-CN-EXT
2683
2684							105
2685
2686
2687
2688							csISO2022CNEXT
2689
2690
2691							UTF-8
2692
2693							106
2694
2695
2696
2697							csUTF8
2698
2699
2700							ISO-8859-13
2701							109
2702							ISO See
2703							csISO885913
2704
2705
2706							ISO-8859-14
2707							110
2708							ISO See
2709							iso-ir-199
2710							ISO_8859-14:1998
2711							ISO_8859-14
2712							latin8
2713							iso-celtic
2714							l8
2715							csISO885914
2716
2717
2718							ISO-8859-15
2719							111
2720							ISO
2721							Please see:
2722							ISO_8859-15
2723							Latin-9
2724							csISO885915
2725
2726
2727							ISO-8859-16
2728							112
2729							ISO
2730							iso-ir-226
2731							ISO_8859-16:2001
2732							ISO_8859-16
2733							latin10
2734							l10
2735							csISO885916
2736
2737
2738							GBK
2739							113
2740							Chinese IT Standardization Technical Committee
2741							Please see:
2742							CP936
2743							MS936
2744							windows-936
2745							csGBK
2746
2747
2748							GB18030
2749							114
2750							Chinese IT Standardization Technical Committee
2751							Please see:
2752							csGB18030
2753
2754
2755							OSD_EBCDIC_DF04_15
2756							115
2757							Fujitsu-Siemens standard mainframe EBCDIC encoding
2758							Please see:
2759							csOSDEBCDICDF0415
2760
2761
2762							OSD_EBCDIC_DF03_IRV
2763							116
2764							Fujitsu-Siemens standard mainframe EBCDIC encoding
2765							Please see:
2766							csOSDEBCDICDF03IRV
2767
2768
2769							OSD_EBCDIC_DF04_1
2770							117
2771							Fujitsu-Siemens standard mainframe EBCDIC encoding
2772							Please see:
2773							csOSDEBCDICDF041
2774
2775
2776							ISO-11548-1
2777							118
2778							See
2779							ISO_11548-1
2780							ISO_TR_11548-1
2781							csISO115481
2782
2783
2784							KZ-1048
2785							119
2786							See
2787							STRK1048-2002
2788							RK1048
2789							csKZ1048
2790
2791
2792							ISO-10646-UCS-2
2793							1000
2794							the 2-octet Basic Multilingual Plane, aka Unicode
2795							this needs to specify network byte order: the standard
2796							does not specify (it is a 16-bit integer space)
2797							csUnicode
2798
2799
2800							ISO-10646-UCS-4
2801							1001
2802							the full code space. (same comment about byte order,
2803							these are 31-bit numbers.
2804							csUCS4
2805
2806
2807							ISO-10646-UCS-Basic
2808							1002
2809							ASCII subset of Unicode. Basic Latin = collection 1
2810							See ISO 10646, Appendix A
2811							csUnicodeASCII
2812
2813
2814							ISO-10646-Unicode-Latin1
2815							1003
2816							ISO Latin-1 subset of Unicode. Basic Latin and Latin-1
2817							Supplement = collections 1 and 2. See ISO 10646,
2818							Appendix A. See .
2819							csUnicodeLatin1
2820							ISO-10646
2821
2822
2823							ISO-10646-J-1
2824							1004
2825							ISO 10646 Japanese, see .
2826							csUnicodeJapanese
2827
2828
2829							ISO-Unicode-IBM-1261
2830							1005
2831							IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
2832							csUnicodeIBM1261
2833
2834
2835							ISO-Unicode-IBM-1268
2836							1006
2837							IBM Latin-4 Extended Presentation Set, GCSGID: 1268
2838							csUnicodeIBM1268
2839
2840
2841							ISO-Unicode-IBM-1276
2842							1007
2843							IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
2844							csUnicodeIBM1276
2845
2846
2847							ISO-Unicode-IBM-1264
2848							1008
2849							IBM Arabic Presentation Set, GCSGID: 1264
2850							csUnicodeIBM1264
2851
2852
2853							ISO-Unicode-IBM-1265
2854							1009
2855							IBM Hebrew Presentation Set, GCSGID: 1265
2856							csUnicodeIBM1265
2857
2858
2859							UNICODE-1-1
2860
2861							1010
2862
2863
2864
2865							csUnicode11
2866
2867
2868							SCSU
2869							1011
2870							SCSU See
2871							csSCSU
2872
2873
2874							UTF-7
2875
2876							1012
2877
2878
2879
2880							csUTF7
2881
2882
2883							UTF-16BE
2884
2885							1013
2886
2887
2888
2889							csUTF16BE
2890
2891
2892							UTF-16LE
2893
2894							1014
2895
2896
2897
2898							csUTF16LE
2899
2900
2901							UTF-16
2902
2903							1015
2904
2905
2906
2907							csUTF16
2908
2909
2910							CESU-8
2911
2912							1016
2913
2914
2915
2916							csCESU8
2917							csCESU-8
2918
2919
2920							UTF-32
2921
2922							1017
2923
2924
2925
2926							csUTF32
2927
2928
2929							UTF-32BE
2930
2931							1018
2932
2933
2934
2935							csUTF32BE
2936
2937
2938							UTF-32LE
2939
2940							1019
2941
2942
2943
2944							csUTF32LE
2945
2946
2947							BOCU-1
2948
2949							1020
2950
2951
2952
2953							csBOCU1
2954							csBOCU-1
2955
2956
2957							ISO-8859-1-Windows-3.0-Latin-1
2958							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
2959							(P/N 5021-0329) pp B-13, 1996.
2960							2000
2961							Extended ISO 8859-1 Latin-1 for Windows 3.0.
2962							PCL Symbol Set id: 9U
2963							csWindows30Latin1
2964
2965
2966							ISO-8859-1-Windows-3.1-Latin-1
2967							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
2968							(P/N 5021-0329) pp B-13, 1996.
2969							2001
2970							Extended ISO 8859-1 Latin-1 for Windows 3.1.
2971							PCL Symbol Set id: 19U
2972							csWindows31Latin1
2973
2974
2975							ISO-8859-2-Windows-Latin-2
2976							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
2977							(P/N 5021-0329) pp B-13, 1996.
2978							2002
2979							Extended ISO 8859-2. Latin-2 for Windows 3.1.
2980							PCL Symbol Set id: 9E
2981							csWindows31Latin2
2982
2983
2984							ISO-8859-9-Windows-Latin-5
2985							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
2986							(P/N 5021-0329) pp B-13, 1996.
2987							2003
2988							Extended ISO 8859-9. Latin-5 for Windows 3.1
2989							PCL Symbol Set id: 5T
2990							csWindows31Latin5
2991
2992
2993							hp-roman8
2994							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
2995							(P/N 5021-0329) pp B-13, 1996.
2996
2997
2998							2004
2999							LaserJet IIP Printer User's Manual,
3000							HP part no 33471-90901, Hewlet-Packard, June 1989.
3001							roman8
3002							r8
3003							csHPRoman8
3004
3005
3006							Adobe-Standard-Encoding
3007							Adobe Systems Incorporated, PostScript Language Reference
3008							Manual, second edition, Addison-Wesley Publishing Company,
3009							Inc., 1990.
3010							2005
3011							PostScript Language Reference Manual
3012							PCL Symbol Set id: 10J
3013							csAdobeStandardEncoding
3014
3015
3016							Ventura-US
3017							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3018							(P/N 5021-0329) pp B-13, 1996.
3019							2006
3020							Ventura US. ASCII plus characters typically used in
3021							publishing, like pilcrow, copyright, registered, trade mark,
3022							section, dagger, and double dagger in the range A0 (hex)
3023							to FF (hex).
3024							PCL Symbol Set id: 14J
3025							csVenturaUS
3026
3027
3028							Ventura-International
3029							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3030							(P/N 5021-0329) pp B-13, 1996.
3031							2007
3032							Ventura International. ASCII plus coded characters similar
3033							to Roman8.
3034							PCL Symbol Set id: 13J
3035							csVenturaInternational
3036
3037
3038							DEC-MCS
3039
3040
3041							2008
3042							VAX/VMS User's Manual,
3043							Order Number: AI-Y517A-TE, April 1986.
3044							dec
3045							csDECMCS
3046
3047
3048							IBM850
3049
3050
3051							2009
3052							IBM NLS RM Vol2 SE09-8002-01, March 1990
3053							cp850
3054							850
3055							csPC850Multilingual
3056
3057
3058							PC8-Danish-Norwegian
3059							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3060							(P/N 5021-0329) pp B-13, 1996.
3061							2012
3062							PC Danish Norwegian
3063							8-bit PC set for Danish Norwegian
3064							PCL Symbol Set id: 11U
3065							csPC8DanishNorwegian
3066
3067
3068							IBM862
3069
3070
3071							2013
3072							IBM NLS RM Vol2 SE09-8002-01, March 1990
3073							cp862
3074							862
3075							csPC862LatinHebrew
3076
3077
3078							PC8-Turkish
3079							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3080							(P/N 5021-0329) pp B-13, 1996.
3081							2014
3082							PC Latin Turkish. PCL Symbol Set id: 9T
3083							csPC8Turkish
3084
3085
3086							IBM-Symbols
3087							IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
3088							for Core Interchange Digitized Type", Publication number
3089							S544-3708-01
3090							2015
3091							Presentation Set, CPGID: 259
3092							csIBMSymbols
3093
3094
3095							IBM-Thai
3096							IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
3097							for Core Interchange Digitized Type", Publication number
3098							S544-3708-01
3099							2016
3100							Presentation Set, CPGID: 838
3101							csIBMThai
3102
3103
3104							HP-Legal
3105							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3106							(P/N 5021-0329) pp B-13, 1996.
3107							2017
3108							PCL 5 Comparison Guide, Hewlett-Packard,
3109							HP part number 5961-0510, October 1992
3110							PCL Symbol Set id: 1U
3111							csHPLegal
3112
3113
3114							HP-Pi-font
3115							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3116							(P/N 5021-0329) pp B-13, 1996.
3117							2018
3118							PCL 5 Comparison Guide, Hewlett-Packard,
3119							HP part number 5961-0510, October 1992
3120							PCL Symbol Set id: 15U
3121							csHPPiFont
3122
3123
3124							HP-Math8
3125							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3126							(P/N 5021-0329) pp B-13, 1996.
3127							2019
3128							PCL 5 Comparison Guide, Hewlett-Packard,
3129							HP part number 5961-0510, October 1992
3130							PCL Symbol Set id: 8M
3131							csHPMath8
3132
3133
3134							Adobe-Symbol-Encoding
3135							Adobe Systems Incorporated, PostScript Language Reference
3136							Manual, second edition, Addison-Wesley Publishing Company,
3137							Inc., 1990.
3138							2020
3139							PostScript Language Reference Manual
3140							PCL Symbol Set id: 5M
3141							csHPPSMath
3142
3143
3144							HP-DeskTop
3145							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3146							(P/N 5021-0329) pp B-13, 1996.
3147							2021
3148							PCL 5 Comparison Guide, Hewlett-Packard,
3149							HP part number 5961-0510, October 1992
3150							PCL Symbol Set id: 7J
3151							csHPDesktop
3152
3153
3154							Ventura-Math
3155							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3156							(P/N 5021-0329) pp B-13, 1996.
3157							2022
3158							PCL 5 Comparison Guide, Hewlett-Packard,
3159							HP part number 5961-0510, October 1992
3160							PCL Symbol Set id: 6M
3161							csVenturaMath
3162
3163
3164							Microsoft-Publishing
3165							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3166							(P/N 5021-0329) pp B-13, 1996.
3167							2023
3168							PCL 5 Comparison Guide, Hewlett-Packard,
3169							HP part number 5961-0510, October 1992
3170							PCL Symbol Set id: 6J
3171							csMicrosoftPublishing
3172
3173
3174							Windows-31J
3175							2024
3176							Windows Japanese. A further extension of Shift_JIS
3177							to include NEC special characters (Row 13), NEC
3178							selection of IBM extensions (Rows 89 to 92), and IBM
3179							extensions (Rows 115 to 119). The CCS's are
3180							JIS X0201:1997, JIS X0208:1997, and these extensions.
3181							This charset can be used for the top-level media type "text",
3182							but it is of limited or specialized use (see ).
3183							PCL Symbol Set id: 19K
3184							csWindows31J
3185
3186
3187							GB2312
3188							2025
3189							Chinese for People's Republic of China (PRC) mixed one byte,
3190							two byte set:
3191							20-7E = one byte ASCII
3192							A1-FE = two byte PRC Kanji
3193							See GB 2312-80
3194							PCL Symbol Set Id: 18C
3195							csGB2312
3196							GB2312
3197
3198
3199							Big5
3200							2026
3201							Chinese for Taiwan Multi-byte set.
3202							PCL Symbol Set Id: 18T
3203							csBig5
3204							Big5
3205
3206
3207							macintosh
3208
3209
3210							2027
3211							The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
3212							mac
3213							csMacintosh
3214
3215
3216							IBM037
3217
3218
3219							2028
3220							IBM NLS RM Vol2 SE09-8002-01, March 1990
3221							cp037
3222							ebcdic-cp-us
3223							ebcdic-cp-ca
3224							ebcdic-cp-wt
3225							ebcdic-cp-nl
3226							csIBM037
3227
3228
3229							IBM038
3230
3231
3232							2029
3233							IBM 3174 Character Set Ref, GA27-3831-02, March 1990
3234							EBCDIC-INT
3235							cp038
3236							csIBM038
3237
3238
3239							IBM273
3240
3241
3242							2030
3243							IBM NLS RM Vol2 SE09-8002-01, March 1990
3244							CP273
3245							csIBM273
3246
3247
3248							IBM274
3249
3250
3251							2031
3252							IBM 3174 Character Set Ref, GA27-3831-02, March 1990
3253							EBCDIC-BE
3254							CP274
3255							csIBM274
3256
3257
3258							IBM275
3259
3260
3261							2032
3262							IBM NLS RM Vol2 SE09-8002-01, March 1990
3263							EBCDIC-BR
3264							cp275
3265							csIBM275
3266
3267
3268							IBM277
3269
3270
3271							2033
3272							IBM NLS RM Vol2 SE09-8002-01, March 1990
3273							EBCDIC-CP-DK
3274							EBCDIC-CP-NO
3275							csIBM277
3276
3277
3278							IBM278
3279
3280
3281							2034
3282							IBM NLS RM Vol2 SE09-8002-01, March 1990
3283							CP278
3284							ebcdic-cp-fi
3285							ebcdic-cp-se
3286							csIBM278
3287
3288
3289							IBM280
3290
3291
3292							2035
3293							IBM NLS RM Vol2 SE09-8002-01, March 1990
3294							CP280
3295							ebcdic-cp-it
3296							csIBM280
3297
3298
3299							IBM281
3300
3301
3302							2036
3303							IBM 3174 Character Set Ref, GA27-3831-02, March 1990
3304							EBCDIC-JP-E
3305							cp281
3306							csIBM281
3307
3308
3309							IBM284
3310
3311
3312							2037
3313							IBM NLS RM Vol2 SE09-8002-01, March 1990
3314							CP284
3315							ebcdic-cp-es
3316							csIBM284
3317
3318
3319							IBM285
3320
3321
3322							2038
3323							IBM NLS RM Vol2 SE09-8002-01, March 1990
3324							CP285
3325							ebcdic-cp-gb
3326							csIBM285
3327
3328
3329							IBM290
3330
3331
3332							2039
3333							IBM 3174 Character Set Ref, GA27-3831-02, March 1990
3334							cp290
3335							EBCDIC-JP-kana
3336							csIBM290
3337
3338
3339							IBM297
3340
3341
3342							2040
3343							IBM NLS RM Vol2 SE09-8002-01, March 1990
3344							cp297
3345							ebcdic-cp-fr
3346							csIBM297
3347
3348
3349							IBM420
3350
3351
3352							2041
3353							IBM NLS RM Vol2 SE09-8002-01, March 1990,
3354							IBM NLS RM p 11-11
3355							cp420
3356							ebcdic-cp-ar1
3357							csIBM420
3358
3359
3360							IBM423
3361
3362
3363							2042
3364							IBM NLS RM Vol2 SE09-8002-01, March 1990
3365							cp423
3366							ebcdic-cp-gr
3367							csIBM423
3368
3369
3370							IBM424
3371
3372
3373							2043
3374							IBM NLS RM Vol2 SE09-8002-01, March 1990
3375							cp424
3376							ebcdic-cp-he
3377							csIBM424
3378
3379
3380							IBM437
3381
3382
3383							2011
3384							IBM NLS RM Vol2 SE09-8002-01, March 1990
3385							cp437
3386							437
3387							csPC8CodePage437
3388
3389
3390							IBM500
3391
3392
3393							2044
3394							IBM NLS RM Vol2 SE09-8002-01, March 1990
3395							CP500
3396							ebcdic-cp-be
3397							ebcdic-cp-ch
3398							csIBM500
3399
3400
3401							IBM851
3402
3403
3404							2045
3405							IBM NLS RM Vol2 SE09-8002-01, March 1990
3406							cp851
3407							851
3408							csIBM851
3409
3410
3411							IBM852
3412
3413
3414							2010
3415							IBM NLS RM Vol2 SE09-8002-01, March 1990
3416							cp852
3417							852
3418							csPCp852
3419
3420
3421							IBM855
3422
3423
3424							2046
3425							IBM NLS RM Vol2 SE09-8002-01, March 1990
3426							cp855
3427							855
3428							csIBM855
3429
3430
3431							IBM857
3432
3433
3434							2047
3435							IBM NLS RM Vol2 SE09-8002-01, March 1990
3436							cp857
3437							857
3438							csIBM857
3439
3440
3441							IBM860
3442
3443
3444							2048
3445							IBM NLS RM Vol2 SE09-8002-01, March 1990
3446							cp860
3447							860
3448							csIBM860
3449
3450
3451							IBM861
3452
3453
3454							2049
3455							IBM NLS RM Vol2 SE09-8002-01, March 1990
3456							cp861
3457							861
3458							cp-is
3459							csIBM861
3460
3461
3462							IBM863
3463
3464
3465							2050
3466							IBM Keyboard layouts and code pages, PN 07G4586 June 1991
3467							cp863
3468							863
3469							csIBM863
3470
3471
3472							IBM864
3473
3474
3475							2051
3476							IBM Keyboard layouts and code pages, PN 07G4586 June 1991
3477							cp864
3478							csIBM864
3479
3480
3481							IBM865
3482
3483
3484							2052
3485							IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
3486							cp865
3487							865
3488							csIBM865
3489
3490
3491							IBM868
3492
3493
3494							2053
3495							IBM NLS RM Vol2 SE09-8002-01, March 1990
3496							CP868
3497							cp-ar
3498							csIBM868
3499
3500
3501							IBM869
3502
3503
3504							2054
3505							IBM Keyboard layouts and code pages, PN 07G4586 June 1991
3506							cp869
3507							869
3508							cp-gr
3509							csIBM869
3510
3511
3512							IBM870
3513
3514
3515							2055
3516							IBM NLS RM Vol2 SE09-8002-01, March 1990
3517							CP870
3518							ebcdic-cp-roece
3519							ebcdic-cp-yu
3520							csIBM870
3521
3522
3523							IBM871
3524
3525
3526							2056
3527							IBM NLS RM Vol2 SE09-8002-01, March 1990
3528							CP871
3529							ebcdic-cp-is
3530							csIBM871
3531
3532
3533							IBM880
3534
3535
3536							2057
3537							IBM NLS RM Vol2 SE09-8002-01, March 1990
3538							cp880
3539							EBCDIC-Cyrillic
3540							csIBM880
3541
3542
3543							IBM891
3544
3545
3546							2058
3547							IBM NLS RM Vol2 SE09-8002-01, March 1990
3548							cp891
3549							csIBM891
3550
3551
3552							IBM903
3553
3554
3555							2059
3556							IBM NLS RM Vol2 SE09-8002-01, March 1990
3557							cp903
3558							csIBM903
3559
3560
3561							IBM904
3562
3563
3564							2060
3565							IBM NLS RM Vol2 SE09-8002-01, March 1990
3566							cp904
3567							904
3568							csIBBM904
3569
3570
3571							IBM905
3572
3573
3574							2061
3575							IBM 3174 Character Set Ref, GA27-3831-02, March 1990
3576							CP905
3577							ebcdic-cp-tr
3578							csIBM905
3579
3580
3581							IBM918
3582
3583
3584							2062
3585							IBM NLS RM Vol2 SE09-8002-01, March 1990
3586							CP918
3587							ebcdic-cp-ar2
3588							csIBM918
3589
3590
3591							IBM1026
3592
3593
3594							2063
3595							IBM NLS RM Vol2 SE09-8002-01, March 1990
3596							CP1026
3597							csIBM1026
3598
3599
3600							EBCDIC-AT-DE
3601
3602
3603							2064
3604							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3605							csIBMEBCDICATDE
3606
3607
3608							EBCDIC-AT-DE-A
3609
3610
3611							2065
3612							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3613							csEBCDICATDEA
3614
3615
3616							EBCDIC-CA-FR
3617
3618
3619							2066
3620							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3621							csEBCDICCAFR
3622
3623
3624							EBCDIC-DK-NO
3625
3626
3627							2067
3628							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3629							csEBCDICDKNO
3630
3631
3632							EBCDIC-DK-NO-A
3633
3634
3635							2068
3636							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3637							csEBCDICDKNOA
3638
3639
3640							EBCDIC-FI-SE
3641
3642
3643							2069
3644							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3645							csEBCDICFISE
3646
3647
3648							EBCDIC-FI-SE-A
3649
3650
3651							2070
3652							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3653							csEBCDICFISEA
3654
3655
3656							EBCDIC-FR
3657
3658
3659							2071
3660							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3661							csEBCDICFR
3662
3663
3664							EBCDIC-IT
3665
3666
3667							2072
3668							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3669							csEBCDICIT
3670
3671
3672							EBCDIC-PT
3673
3674
3675							2073
3676							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3677							csEBCDICPT
3678
3679
3680							EBCDIC-ES
3681
3682
3683							2074
3684							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3685							csEBCDICES
3686
3687
3688							EBCDIC-ES-A
3689
3690
3691							2075
3692							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3693							csEBCDICESA
3694
3695
3696							EBCDIC-ES-S
3697
3698
3699							2076
3700							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3701							csEBCDICESS
3702
3703
3704							EBCDIC-UK
3705
3706
3707							2077
3708							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3709							csEBCDICUK
3710
3711
3712							EBCDIC-US
3713
3714
3715							2078
3716							IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
3717							csEBCDICUS
3718
3719
3720							UNKNOWN-8BIT
3721
3722							2079
3723							csUnknown8BiT
3724
3725
3726							MNEMONIC
3727
3728
3729							2080
3730							, also known as "mnemonic+ascii+38"
3731							csMnemonic
3732
3733
3734							MNEM
3735
3736
3737							2081
3738							, also known as "mnemonic+ascii+8200"
3739							csMnem
3740
3741
3742							VISCII
3743
3744							2082
3745
3746
3747
3748							csVISCII
3749
3750
3751							VIQR
3752
3753							2083
3754
3755
3756
3757							csVIQR
3758
3759
3760							KOI8-R
3761
3762							2084
3763							, based on GOST-19768-74, ISO-6937/8,
3764							INIS-Cyrillic, ISO-5427.
3765							csKOI8R
3766							KOI8-R
3767
3768
3769							HZ-GB-2312
3770							2085
3771							,
3772
3773
3774							IBM866
3775
3776							2086
3777							IBM NLDG Volume 2 (SE09-8002-03) August 1994
3778							cp866
3779							866
3780							csIBM866
3781
3782
3783							IBM775
3784							Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
3785							(P/N 5021-0329) pp B-13, 1996.
3786							2087
3787							HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
3788							cp775
3789							csPC775Baltic
3790
3791
3792							KOI8-U
3793
3794							2088
3795
3796
3797
3798							csKOI8U
3799
3800
3801							IBM00858
3802							2089
3803							IBM See
3804							CCSID00858
3805							CP00858
3806							PC-Multilingual-850+euro
3807							csIBM00858
3808
3809
3810							IBM00924
3811							2090
3812							IBM See
3813							CCSID00924
3814							CP00924
3815							ebcdic-Latin9--euro
3816							csIBM00924
3817
3818
3819							IBM01140
3820							2091
3821							IBM See
3822							CCSID01140
3823							CP01140
3824							ebcdic-us-37+euro
3825							csIBM01140
3826
3827
3828							IBM01141
3829							2092
3830							IBM See
3831							CCSID01141
3832							CP01141
3833							ebcdic-de-273+euro
3834							csIBM01141
3835
3836
3837							IBM01142
3838							2093
3839							IBM See
3840							CCSID01142
3841							CP01142
3842							ebcdic-dk-277+euro
3843							ebcdic-no-277+euro
3844							csIBM01142
3845
3846
3847							IBM01143
3848							2094
3849							IBM See
3850							CCSID01143
3851							CP01143
3852							ebcdic-fi-278+euro
3853							ebcdic-se-278+euro
3854							csIBM01143
3855
3856
3857							IBM01144
3858							2095
3859							IBM See
3860							CCSID01144
3861							CP01144
3862							ebcdic-it-280+euro
3863							csIBM01144
3864
3865
3866							IBM01145
3867							2096
3868							IBM See
3869							CCSID01145
3870							CP01145
3871							ebcdic-es-284+euro
3872							csIBM01145
3873
3874
3875							IBM01146
3876							2097
3877							IBM See
3878							CCSID01146
3879							CP01146
3880							ebcdic-gb-285+euro
3881							csIBM01146
3882
3883
3884							IBM01147
3885							2098
3886							IBM See
3887							CCSID01147
3888							CP01147
3889							ebcdic-fr-297+euro
3890							csIBM01147
3891
3892
3893							IBM01148
3894							2099
3895							IBM See
3896							CCSID01148
3897							CP01148
3898							ebcdic-international-500+euro
3899							csIBM01148
3900
3901
3902							IBM01149
3903							2100
3904							IBM See
3905							CCSID01149
3906							CP01149
3907							ebcdic-is-871+euro
3908							csIBM01149
3909
3910
3911							Big5-HKSCS
3912
3913							2101
3914							See
3915							csBig5HKSCS
3916
3917
3918							IBM1047
3919
3920							2102
3921							IBM1047 (EBCDIC Latin 1/Open Systems)
3922
3923							IBM-1047
3924							csIBM1047
3925
3926
3927							PTCP154
3928
3929							2103
3930							See
3931							csPTCP154
3932							PT154
3933							CP154
3934							Cyrillic-Asian
3935
3936
3937							Amiga-1251
3938							2104
3939							See
3940							Ami1251
3941							Amiga1251
3942							Ami-1251
3943							csAmiga1251
3944							(Aliases are provided for historical reasons and should not be used) [Malyshev]
3945
3946
3947							KOI7-switched
3948							2105
3949							See
3950							csKOI7switched
3951
3952
3953							BRF
3954							2106
3955							See
3956							csBRF
3957
3958
3959							TSCII
3960							2107
3961							See
3962							csTSCII
3963
3964
3965							CP51932
3966							2108
3967							See
3968							csCP51932
3969
3970
3971							windows-874
3972							2109
3973							See
3974							cswindows874
3975
3976
3977							windows-1250
3978							2250
3979							Microsoft
3980							cswindows1250
3981
3982
3983							windows-1251
3984							2251
3985							Microsoft
3986							cswindows1251
3987
3988
3989							windows-1252
3990							2252
3991							Microsoft
3992							cswindows1252
3993
3994
3995							windows-1253
3996							2253
3997							Microsoft
3998							cswindows1253
3999
4000
4001							windows-1254
4002							2254
4003							Microsoft
4004							cswindows1254
4005
4006
4007							windows-1255
4008							2255
4009							Microsoft
4010							cswindows1255
4011
4012
4013							windows-1256
4014							2256
4015							Microsoft
4016							cswindows1256
4017
4018
4019							windows-1257
4020							2257
4021							Microsoft
4022							cswindows1257
4023
4024
4025							windows-1258
4026							2258
4027							Microsoft
4028							cswindows1258
4029
4030
4031							TIS-620
4032							2259
4033							Thai Industrial Standards Institute (TISI)
4034							csTIS620
4035							ISO-8859-11
4036
4037
4038							CP50220
4039							2260
4040							See
4041							csCP50220
4042
4043
4044
4045
4046							Alexander Uskov
4047							mailto:auskov&idc.kz
4048							2002-09
4049
4050
4051							Alexei Veremeev
4052							mailto:Alexey.Veremeev&oracle.com
4053							2006-12-07
4054
4055
4056							Chris Wendt
4057							mailto:christw&microsoft.com
4058							1999-12
4059
4060
4061							Hank Nussbacher
4062							mailto:hank&vm.tau.ac.il
4063
4064
4065							Internet Assigned Numbers Authority
4066							mailto:iana&iana.org
4067
4068
4069							Jun Murai
4070							mailto:jun&wide.ad.jp
4071
4072
4073							Katya Lazhintseva
4074							mailto:katyal&microsoft.com
4075							1996-05
4076
4077
4078							Keld Simonsen
4079							mailto:Keld&keldix.com
4080							2018-10-19
4081
4082
4083							Keld Simonsen
4084							mailto:Keld.Simonsen&rap.dk
4085							2000-08
4086
4087
4088							Kuppuswamy Kalyanasundaram
4089							mailto:kalyan.geo&yahoo.com
4090							2007-05-14
4091
4092
4093							Mark Davis
4094							mailto:mark&unicode.org
4095							2002-04
4096
4097
4098							Markus Scherer
4099							mailto:markus.scherer&jtcsv.com
4100							2002-09
4101
4102
4103							Masataka Ohta
4104							mailto:mohta&cc.titech.ac.jp
4105							1995-07
4106
4107
4108							Nicky Yick
4109							mailto:cliac&itsd.gcn.gov.hk
4110							2000-10
4111
4112
4113							Reuel Robrigado
4114							mailto:reuelr&ca.ibm.com
4115							2002-09
4116
4117
4118							Rick Pond
4119							mailto:rickpond&vnet.ibm.com
4120							1997-03
4121
4122
4123							Sairan M. Kikkarin
4124							mailto:sairan&sci.kz
4125							2006-12-07
4126
4127
4128							Samuel Thibault
4129							mailto:samuel.thibault&ens-lyon.org
4130							2006-12-07
4131
4132
4133							Shawn Steele
4134							mailto:Shawn.Steele&microsoft.com
4135							2010-11-04
4136
4137
4138							Tamer Mahdi
4139							mailto:tamer&ca.ibm.com
4140							2000-08
4141
4142
4143							Toby Phipps
4144							mailto:tphipps&peoplesoft.com
4145							2002-03
4146
4147
4148							Trin Tantsetthi
4149							mailto:trin&mozart.inet.co.th
4150							1998-09
4151
4152
4153							Vladas Tumasonis
4154							mailto:vladas.tumasonis&maf.vu.lt
4155							2000-08
4156
4157
4158							Woohyong Choi
4159							mailto:whchoi&cosmos.kaist.ac.kr
4160
4161
4162							Yui Naruse
4163							mailto:naruse&airemix.jp
4164							2011-09-23
4165
4166
4167
4168							EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
4169							} # _init_data
4170
4171							1;
4172
4173							__END__