File Coverage

blib/lib/MIME/Charset.pm

Criterion	Covered	Total	%
statement	214	291	73.5
branch	92	168	54.7
condition	43	99	43.4
subroutine	29	35	82.8
pod	20	20	100.0
total	398	613	64.9

line	stmt	bran	cond	sub	pod	time	code
1							#-- perl --
2
3							package MIME::Charset;
4	9			9		201910	use 5.005;
	9					92
5
6							=head1 NAME
7
8							MIME::Charset - Charset Information for MIME
9
10							=head1 SYNOPSIS
11
12							use MIME::Charset:
13
14							$charset = MIME::Charset->new("euc-jp");
15
16							Getting charset information:
17
18							$benc = $charset->body_encoding; # e.g. "Q"
19							$cset = $charset->as_string; # e.g. "US-ASCII"
20							$henc = $charset->header_encoding; # e.g. "S"
21							$cset = $charset->output_charset; # e.g. "ISO-2022-JP"
22
23							Translating text data:
24
25							($text, $charset, $encoding) =
26							$charset->header_encode(
27							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
28							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
29							Charset => 'euc-jp');
30							# ...returns e.g. (, "ISO-2022-JP", "B").
31
32							($text, $charset, $encoding) =
33							$charset->body_encode(
34							"Collectioneur path\xe9tiquement ".
35							"\xe9clectique de d\xe9chets",
36							Charset => 'latin1');
37							# ...returns e.g. (, "ISO-8859-1", "QUOTED-PRINTABLE").
38
39							$len = $charset->encoded_header_len(
40							"Perl\xe8\xa8\x80\xe8\xaa\x9e",
41							Charset => 'utf-8',
42							Encoding => "b");
43							# ...returns e.g. 28.
44
45							Manipulating module defaults:
46
47							MIME::Charset::alias("csEUCKR", "euc-kr");
48							MIME::Charset::default("iso-8859-1");
49							MIME::Charset::fallback("us-ascii");
50
51							Non-OO functions (may be deprecated in near future):
52
53							use MIME::Charset qw(:info);
54
55							$benc = body_encoding("iso-8859-2"); # "Q"
56							$cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
57							$henc = header_encoding("utf-8"); # "S"
58							$cset = output_charset("shift_jis"); # "ISO-2022-JP"
59
60							use MIME::Charset qw(:trans);
61
62							($text, $charset, $encoding) =
63							header_encode(
64							"\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
65							"\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
66							"euc-jp");
67							# ...returns (, "ISO-2022-JP", "B");
68
69							($text, $charset, $encoding) =
70							body_encode(
71							"Collectioneur path\xe9tiquement ".
72							"\xe9clectique de d\xe9chets",
73							"latin1");
74							# ...returns (, "ISO-8859-1", "QUOTED-PRINTABLE");
75
76							$len = encoded_header_len(
77							"Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
78
79							=head1 DESCRIPTION
80
81							MIME::Charset provides information about character sets used for
82							MIME messages on Internet.
83
84							=head2 Definitions
85
86							The B is ``character set'' used in MIME to refer to a
87							method of converting a sequence of octets into a sequence of characters.
88							It includes both concepts of ``coded character set'' (CCS) and
89							``character encoding scheme'' (CES) of ISO/IEC.
90
91							The B is that used in MIME to refer to a method of representing
92							a body part or a header body as sequence(s) of printable US-ASCII
93							characters.
94
95							=cut
96
97	9			9		44	use strict;
	9					11
	9					246
98	9			9		39	use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS $Config);
	9					14
	9					653
99	9			9		55	use Exporter;
	9					14
	9					854
100							@ISA = qw(Exporter);
101							@EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
102							body_encode encoded_header_len header_encode);
103							@EXPORT_OK = qw(alias default fallback recommended);
104							%EXPORT_TAGS = (
105							"info" => [qw(body_encoding header_encoding
106							canonical_charset output_charset)],
107							"trans" =>[ qw(body_encode encoded_header_len
108							header_encode)],
109							);
110	9			9		62	use Carp qw(croak);
	9					15
	9					656
111
112	9	50		9		56	use constant USE_ENCODE => ($] >= 5.007003)? 'Encode': '';
	9					15
	9					1430
113
114							my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
115							is_utf8 resolve_alias);
116							if (USE_ENCODE) {
117	9			9		4830	eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
	9					84376
	9					5381
118							if ($@) { # Perl 5.7.3 + Encode 0.40
119							eval "use ".USE_ENCODE." qw(is_utf8);";
120							require MIME::Charset::_Compat;
121							for my $sub (@ENCODE_SUBS) {
122	9			9		55	no strict "refs";
	9					11
	9					742
123							*{$sub} = \&{"MIME::Charset::_Compat::$sub"}
124							unless $sub eq 'is_utf8';
125							}
126							}
127							} else {
128							require MIME::Charset::_Compat;
129							for my $sub (@ENCODE_SUBS) {
130	9			9		53	no strict "refs";
	9					19
	9					33495
131							*{$sub} = \&{"MIME::Charset::_Compat::$sub"};
132							}
133							}
134
135							$VERSION = '1.013_01';
136
137							######## Private Attributes ########
138
139							my $DEFAULT_CHARSET = 'US-ASCII';
140							my $FALLBACK_CHARSET = 'UTF-8';
141
142							# This table was initially borrowed from Python email package.
143
144							my %CHARSETS = (# input header enc body enc output conv
145							'DIN_66003' => ['Q', undef, undef],
146							'ISO-8859-1' => ['Q', 'Q', undef],
147							'ISO-8859-2' => ['Q', 'Q', undef],
148							'ISO-8859-3' => ['Q', 'Q', undef],
149							'ISO-8859-4' => ['Q', 'Q', undef],
150							# ISO-8859-5 is Cyrillic, and not especially used
151							# ISO-8859-6 is Arabic, also not particularly used
152							# ISO-8859-7 is Greek, 'Q' will not make it readable
153							# ISO-8859-8 is Hebrew, 'Q' will not make it readable
154							'ISO-8859-9' => ['Q', 'Q', undef],
155							'ISO-8859-10' => ['Q', 'Q', undef],
156							# ISO-8859-11 is Thai, 'Q' will not make it readable
157							'ISO-8859-13' => ['Q', 'Q', undef],
158							'ISO-8859-14' => ['Q', 'Q', undef],
159							'ISO-8859-15' => ['Q', 'Q', undef],
160							'ISO-8859-16' => ['Q', 'Q', undef],
161							'WINDOWS-1252' => ['Q', 'Q', undef],
162							'VISCII' => ['Q', 'Q', undef],
163							'US-ASCII' => [undef, undef, undef],
164							'BIG5' => ['B', 'B', undef],
165							'GB2312' => ['B', 'B', undef],
166							'HZ-GB-2312' => ['B', undef, undef],
167							'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
168							'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
169							'ISO-2022-JP' => ['B', undef, undef],
170							'ISO-2022-JP-1' => ['B', undef, undef],
171							'ISO-2022-JP-2' => ['B', undef, undef],
172							'EUC-JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
173							'SHIFT_JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
174							'ISO-2022-JP-3' => ['B', undef, undef],
175							'EUC-JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
176							'SHIFT_JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
177							'ISO-2022-JP-2004' => ['B', undef, undef],
178							'KOI8-R' => ['B', 'B', undef],
179							'TIS-620' => ['B', 'B', undef], # cf. Mew
180							'UTF-16' => ['B', 'B', undef],
181							'UTF-16BE' => ['B', 'B', undef],
182							'UTF-16LE' => ['B', 'B', undef],
183							'UTF-32' => ['B', 'B', undef],
184							'UTF-32BE' => ['B', 'B', undef],
185							'UTF-32LE' => ['B', 'B', undef],
186							'UTF-7' => ['Q', undef, undef],
187							'UTF-8' => ['S', 'S', undef],
188							'GSM03.38' => [undef, undef, undef], # not for MIME
189							# We're making this one up to represent raw unencoded 8bit
190							'8BIT' => [undef, 'B', 'ISO-8859-1'],
191							);
192
193							# Fix some unexpected or unpreferred names returned by
194							# Encode::resolve_alias() or used by somebodies else.
195							my %CHARSET_ALIASES = (# unpreferred preferred
196							"ASCII" => "US-ASCII",
197							"BIG5-ETEN" => "BIG5",
198							"CP1250" => "WINDOWS-1250",
199							"CP1251" => "WINDOWS-1251",
200							"CP1252" => "WINDOWS-1252",
201							"CP1253" => "WINDOWS-1253",
202							"CP1254" => "WINDOWS-1254",
203							"CP1255" => "WINDOWS-1255",
204							"CP1256" => "WINDOWS-1256",
205							"CP1257" => "WINDOWS-1257",
206							"CP1258" => "WINDOWS-1258",
207							"CP874" => "WINDOWS-874",
208							"CP936" => "GBK",
209							"CP949" => "KS_C_5601-1987",
210							"DIN66003" => "DIN_66003",
211							"EUC-CN" => "GB2312",
212							"HZ" => "HZ-GB-2312", # RFC 1842
213							"KS_C_5601" => "KS_C_5601-1987",
214							"SHIFTJIS" => "SHIFT_JIS",
215							"SHIFTJISX0213" => "SHIFT_JISX0213",
216							"TIS620" => "TIS-620", # IANA MIBenum 2259
217							"UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
218							"UTF8" => "UTF-8",
219							"UTF-8-STRICT" => "UTF-8", # Perl internal use
220							"GSM0338" => "GSM03.38", # not for MIME
221							);
222
223							# Some vendors encode characters beyond standardized mappings using extended
224							# encoders. Some other standard encoders need additional encode modules.
225							my %ENCODERS = (
226							'EXTENDED' => {
227							'ISO-8859-1' => [['cp1252'], ], # Encode::Byte
228							'ISO-8859-2' => [['cp1250'], ], # Encode::Byte
229							'ISO-8859-5' => [['cp1251'], ], # Encode::Byte
230							'ISO-8859-6' => [
231							['cp1256'], # Encode::Byte
232							# ['cp1006'], # ditto, for Farsi
233							],
234							'ISO-8859-6-I'=>[['cp1256'], ], # ditto
235							'ISO-8859-7' => [['cp1253'], ], # Encode::Byte
236							'ISO-8859-8' => [['cp1255'], ], # Encode::Byte
237							'ISO-8859-8-I'=>[['cp1255'], ], # ditto
238							'ISO-8859-9' => [['cp1254'], ], # Encode::Byte
239							'ISO-8859-13'=> [['cp1257'], ], # Encode::Byte
240							'GB2312' => [
241							['gb18030', 'Encode::HanExtra'],
242							['cp936'], # Encode::CN
243							],
244							'EUC-JP' => [
245							['eucJP-ascii', 'Encode::EUCJPASCII'],
246							# ['cp51932', 'Encode::EUCJPMS'],
247							],
248							'ISO-2022-JP'=> [
249							['x-iso2022jp-ascii',
250							'Encode::EUCJPASCII'],
251							# ['iso-2022-jp-ms','Encode::ISO2022JPMS'],
252							# ['cp50220', 'Encode::EUCJPMS'],
253							# ['cp50221', 'Encode::EUCJPMS'],
254							['iso-2022-jp-1'], # Encode::JP (note*)
255							],
256							'SHIFT_JIS' => [
257							['cp932'], # Encode::JP
258							],
259							'EUC-JISX0213' => [['euc-jis-2004', 'Encode::JISX0213'], ],
260							'ISO-2022-JP-3' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
261							'SHIFT_JISX0213'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
262							'EUC-KR' => [['cp949'], ], # Encode::KR
263							'BIG5' => [
264							# ['big5plus', 'Encode::HanExtra'],
265							# ['big5-2003', 'Encode::HanExtra'],
266							['cp950'], # Encode::TW
267							# ['big5-1984', 'Encode::HanExtra'],
268							],
269							'TIS-620' => [['cp874'], ], # Encode::Byte
270							'UTF-8' => [['utf8'], ], # Special name on Perl
271							},
272							'STANDARD' => {
273							'DIN_66003' => [['din66003', 'Endode::DIN66003'], ],
274							'ISO-8859-6-E' => [['iso-8859-6'],],# Encode::Byte
275							'ISO-8859-6-I' => [['iso-8859-6'],],# ditto
276							'ISO-8859-8-E' => [['iso-8859-8'],],# Encode::Byte
277							'ISO-8859-8-I' => [['iso-8859-8'],],# ditto
278							'GB18030' => [['gb18030', 'Encode::HanExtra'], ],
279							'ISO-2022-JP-2' => [['iso-2022-jp-2','Encode::ISO2022JP2'], ],
280							'EUC-JISX0213' => [['euc-jisx0213', 'Encode::JISX0213'], ],
281							'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JISX0213'], ],
282							'EUC-JIS-2004' => [['euc-jis-2004', 'Encode::JISX0213'], ],
283							'ISO-2022-JP-2004' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
284							'SHIFT_JIS-2004'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
285							'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
286							'HZ-GB-2312' => [['hz'], ], # Encode::CN
287							'TIS-620' => [['tis620'], ], # (note*)
288							'UTF-16' => [['x-utf16auto', 'MIME::Charset::UTF'],],
289							'UTF-32' => [['x-utf32auto', 'MIME::Charset::UTF'],],
290							'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
291
292							# (note*) ISO-8859-11 was not registered by IANA.
293							# L treats it as canonical name of ``tis-?620''.
294							},
295							);
296
297							# ISO-2022-* escape sequences etc. to detect charset from unencoded data.
298							my @ESCAPE_SEQS = (
299							# ISO-2022-* sequences
300							# escape seq, possible charset
301							# Following sequences are commonly used.
302							["\033\$\@", "ISO-2022-JP"], # RFC 1468
303							["\033\$B", "ISO-2022-JP"], # ditto
304							["\033(J", "ISO-2022-JP"], # ditto
305							["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
306							["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
307							# Following sequences are less commonly used.
308							["\033.A", "ISO-2022-JP-2"], # RFC 1554
309							["\033.F", "ISO-2022-JP-2"], # ditto
310							["\033\$(C", "ISO-2022-JP-2"], # ditto
311							["\033\$(O", "ISO-2022-JP-3"], # JIS X 0213:2000
312							["\033\$(P", "ISO-2022-JP-2004"], # JIS X 0213:2000/2004
313							["\033\$(Q", "ISO-2022-JP-2004"], # JIS X 0213:2004
314							["\033\$)C", "ISO-2022-KR"], # RFC 1557
315							["\033\$)A", "ISO-2022-CN"], # RFC 1922
316							["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
317							["\033\$)G", "ISO-2022-CN"], # ditto
318							["\033\$*H", "ISO-2022-CN"], # ditto
319							# Other sequences will be used with appropriate charset
320							# parameters, or hardly used.
321
322							# note*: This RFC defines ISO-2022-JP-1, superset of
323							# ISO-2022-JP. But that charset name is rarely used.
324							# OTOH many of encoders for ISO-2022-JP recognize this
325							# sequence so that comatibility with EUC-JP will be
326							# guaranteed.
327
328							# Singlebyte 7-bit sequences
329							# escape seq, possible charset
330							["\033(K", "DIN_66003"], # ISO-IR-21
331							["\033)K", "DIN_66003"], # ditto
332							["\033e", "GSM03.38"], # ESTI GSM 03.38 (note*)
333							["\033\012", "GSM03.38"], # ditto
334							["\033<", "GSM03.38"], # ditto
335							["\033/", "GSM03.38"], # ditto
336							["\033>", "GSM03.38"], # ditto
337							["\033\024", "GSM03.38"], # ditto
338							["\033(", "GSM03.38"], # ditto
339							["\033\@", "GSM03.38"], # ditto
340							["\033)", "GSM03.38"], # ditto
341							["\033=", "GSM03.38"], # ditto
342
343							# note*: This is not used for MIME message.
344							);
345
346							######## Public Configuration Attributes ########
347
348							$Config = {
349							Detect7bit => 'YES',
350							Mapping => 'EXTENDED',
351							Replacement => 'DEFAULT',
352							};
353							local @INC = @INC;
354							pop @INC if $INC[-1] eq '.';
355							eval { require MIME::Charset::Defaults; };
356
357							######## Private Constants ########
358
359							my $NON7BITRE = qr{
360							[^\x01-\x7e]
361							}x;
362
363							my $NONASCIIRE = qr{
364							[^\x09\x0a\x0d\x20\x21-\x7e]
365							}x;
366
367							my $ISO2022RE = qr{
368							ISO-2022-.+
369							}ix;
370
371							my $ASCIITRANSRE = qr{
372							HZ-GB-2312 \| UTF-7
373							}ix;
374
375
376							######## Public Functions ########
377
378							=head2 Constructor
379
380							=over
381
382							=item $charset = MIME::Charset->new([CHARSET [, OPTS]])
383
384							Create charset object.
385
386							OPTS may accept following key-value pair.
387							B:
388							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
389							conversion will not be performed. So this option do not have any effects.
390
391							=over 4
392
393							=item Mapping => MAPTYPE
394
395							Whether to extend mappings actually used for charset names or not.
396							C<"EXTENDED"> uses extended mappings.
397							C<"STANDARD"> uses standardized strict mappings.
398							Default is C<"EXTENDED">.
399
400							=back
401
402							=cut
403
404							sub new {
405	75			75	1	1408	my $class = shift;
406	75					107	my $charset = shift;
407	75	100				164	return bless {}, $class unless $charset;
408	70	50				165	return bless {}, $class if 75 < length $charset; # w/a for CPAN RT #65796.
409	70					119	my %params = @_;
410	70		33			308	my $mapping = uc($params{'Mapping'} \|\| $Config->{Mapping});
411
412	70	100				305	if ($charset =~ /\bhz.?gb.?2312$/i) {
		100
		100
413							# workaround: "HZ-GB-2312" mistakenly treated as "EUC-CN" by Encode
414							# (2.12).
415	4					10	$charset = "HZ-GB-2312";
416							} elsif ($charset =~ /\btis-?620$/i) {
417							# workaround: "TIS620" treated as ISO-8859-11 by Encode.
418							# And "TIS-620" not known by some versions of Encode (cf.
419							# CPAN RT #20781).
420	1					4	$charset = "TIS-620";
421							} elsif ($charset =~ /\biso[-_]8859[-_]8[-_]i$/i) {
422							# workaround: "ISO-8859-8-I" is treated as an alias of "ISO-8859-8"
423							# by Encode (3.18): See the note in
424							# https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
425							# However we'll treat these as separate names for compatibility.
426	1					3	$charset = "ISO-8859-8-I";
427							} else {
428	64		66			183	$charset = resolve_alias($charset) \|\| $charset
429							}
430	70		66			120754	$charset = $CHARSET_ALIASES{uc($charset)} \|\| uc($charset);
431	70					125	my ($henc, $benc, $outcset);
432	70					160	my $spec = $CHARSETS{$charset};
433	70	100				139	if ($spec) {
434	60					183	($henc, $benc, $outcset) =
435							($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
436							} else {
437	10					20	($henc, $benc, $outcset) = ('S', 'B', undef);
438							}
439	70					110	my ($decoder, $encoder);
440	70					97	if (USE_ENCODE) {
441	70					153	$decoder = _find_encoder($charset, $mapping);
442	70					131	$encoder = _find_encoder($outcset, $mapping);
443							} else {
444							$decoder = $encoder = undef;
445							}
446
447	70		66			696	bless {
			100
448							InputCharset => $charset,
449							Decoder => $decoder,
450							HeaderEncoding => $henc,
451							BodyEncoding => $benc,
452							OutputCharset => ($outcset \|\| $charset),
453							Encoder => ($encoder \|\| $decoder),
454							}, $class;
455							}
456
457							my %encoder_cache = ();
458
459							sub _find_encoder($$) {
460	140		100	140		360	my $charset = uc(shift \|\| "");
461	140	100				265	return undef unless $charset;
462	78					117	my $mapping = uc(shift);
463	78					112	my ($spec, $name, $module, $encoder);
464
465	78					109	local($@);
466	78					228	$encoder = $encoder_cache{$charset, $mapping};
467	78	100				166	return $encoder if ref $encoder;
468
469	55					129	foreach my $m (('EXTENDED', 'STANDARD')) {
470	82	50	66			256	next if $m eq 'EXTENDED' and $mapping ne 'EXTENDED';
471	82					197	$spec = $ENCODERS{$m}->{$charset};
472	82	100				160	next unless $spec;
473	41					63	foreach my $s (@{$spec}) {
	41					87
474	47					70	($name, $module) = @{$s};
	47					107
475	47	100				122	if ($module) {
476	14	100				940	next unless eval "require $module;";
477							}
478	37					154	$encoder = Encode::find_encoding($name);
479	37	50				21748	last if ref $encoder;
480							}
481	41	100				111	last if ref $encoder;
482							}
483	55		100			214	$encoder \|\|= Encode::find_encoding($charset);
484	55	100				1419	$encoder_cache{$charset, $mapping} = $encoder if $encoder;
485	55					126	return $encoder;
486							}
487
488							=back
489
490							=head2 Getting Information of Charsets
491
492							=over
493
494							=item $charset->body_encoding
495
496							=item body_encoding CHARSET
497
498							Get recommended transfer-encoding of CHARSET for message body.
499
500							Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE),
501							C<"S"> (shorter one of either) or
502							C (might not be transfer-encoded; either 7BIT or 8BIT). This may
503							not be same as encoding for message header.
504
505							=cut
506
507							sub body_encoding($) {
508	4			4	1	17	my $self = shift;
509	4	50				28	return undef unless $self;
510	4	100				22	$self = __PACKAGE__->new($self) unless ref $self;
511	4					38	$self->{BodyEncoding};
512							}
513
514							=item $charset->as_string
515
516							=item canonical_charset CHARSET
517
518							Get canonical name for charset.
519
520							=cut
521
522							sub canonical_charset($) {
523	4			4	1	12	my $self = shift;
524	4	50				13	return undef unless $self;
525	4	100				13	$self = __PACKAGE__->new($self) unless ref $self;
526	4					18	$self->{InputCharset};
527							}
528
529							sub as_string($) {
530	29			29	1	114	my $self = shift;
531	29					118	$self->{InputCharset};
532							}
533
534							=item $charset->decoder
535
536							Get L<"Encode::Encoding"> object to decode strings to Unicode by charset.
537							If charset is not specified or not known by this module,
538							undef will be returned.
539
540							=cut
541
542							sub decoder($) {
543	12			12	1	34	my $self = shift;
544	12					47	$self->{Decoder};
545							}
546
547							=item $charset->dup
548
549							Get a copy of charset object.
550
551							=cut
552
553							sub dup($) {
554	2			2	1	5	my $self = shift;
555	2					6	my $obj = __PACKAGE__->new(undef);
556	2					4	%{$obj} = %{$self};
	2					9
	2					9
557	2					5	$obj;
558							}
559
560							=item $charset->encoder([CHARSET])
561
562							Get L<"Encode::Encoding"> object to encode Unicode string using compatible
563							charset recommended to be used for messages on Internet.
564
565							If optional CHARSET is specified, replace encoder (and output charset
566							name) of $charset object with those of CHARSET, therefore,
567							$charset object will be a converter between original charset and
568							new CHARSET.
569
570							=cut
571
572							sub encoder($$;) {
573	6			6	1	9	my $self = shift;
574	6					9	my $charset = shift;
575	6	100				16	if ($charset) {
576	2	50				13	$charset = __PACKAGE__->new($charset) unless ref $charset;
577	2					6	$self->{OutputCharset} = $charset->{InputCharset};
578	2					5	$self->{Encoder} = $charset->{Decoder};
579	2					4	$self->{BodyEncoding} = $charset->{BodyEncoding};
580	2					4	$self->{HeaderEncoding} = $charset->{HeaderEncoding};
581							}
582	6					45	$self->{Encoder};
583							}
584
585							=item $charset->header_encoding
586
587							=item header_encoding CHARSET
588
589							Get recommended encoding scheme of CHARSET for message header.
590
591							Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
592							or C (might not be encoded). This may not be same as encoding
593							for message body.
594
595							=cut
596
597							sub header_encoding($) {
598	4			4	1	10	my $self = shift;
599	4	50				9	return undef unless $self;
600	4	100				12	$self = __PACKAGE__->new($self) unless ref $self;
601	4					17	$self->{HeaderEncoding};
602							}
603
604							=item $charset->output_charset
605
606							=item output_charset CHARSET
607
608							Get a charset which is compatible with given CHARSET and is recommended
609							to be used for MIME messages on Internet (if it is known by this module).
610
611							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
612							this function will simply
613							return the result of L<"canonical_charset">.
614
615							=cut
616
617							sub output_charset($) {
618	4			4	1	11	my $self = shift;
619	4	50				10	return undef unless $self;
620	4	100				15	$self = __PACKAGE__->new($self) unless ref $self;
621	4					21	$self->{OutputCharset};
622							}
623
624							=back
625
626							=head2 Translating Text Data
627
628							=over
629
630							=item $charset->body_encode(STRING [, OPTS])
631
632							=item body_encode STRING, CHARSET [, OPTS]
633
634							Get converted (if needed) data of STRING and recommended transfer-encoding
635							of that data for message body. CHARSET is the charset by which STRING
636							is encoded.
637
638							OPTS may accept following key-value pairs.
639							B:
640							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
641							conversion will not be performed. So these options do not have any effects.
642
643							=over 4
644
645							=item Detect7bit => YESNO
646
647							Try auto-detecting 7-bit charset when CHARSET is not given.
648							Default is C<"YES">.
649
650							=item Replacement => REPLACEMENT
651
652							Specifies error handling scheme. See L<"Error Handling">.
653
654							=back
655
656							3-item list of (I, I,
657							I) will be returned.
658							I will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
659							C<"7BIT"> or C<"8BIT">. If I could not be determined
660							and I contains non-ASCII byte(s), I will
661							be C and I will be C<"BASE64">.
662							I will be C<"US-ASCII"> if and only if string does not
663							contain any non-ASCII bytes.
664
665							=cut
666
667							sub body_encode {
668	4			4	1	421	my $self = shift;
669	4					8	my $text;
670	4	100				12	if (ref $self) {
671	2					5	$text = shift;
672							} else {
673	2					4	$text = $self;
674	2					10	$self = __PACKAGE__->new(shift);
675							}
676	4					17	my ($encoded, $charset) = $self->_text_encode($text, @_);
677							return ($encoded, undef, 'BASE64')
678	4	50	33			25	unless $charset and $charset->{InputCharset};
679	4					9	my $cset = $charset->{OutputCharset};
680
681							# Determine transfer-encoding.
682	4					7	my $enc = $charset->{BodyEncoding};
683
684	4	50	33			20	if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
		0
		0
		0
685	4	50				100	if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
		50
686	0					0	$enc = '8BIT';
687							} elsif ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
688	4					10	$enc = '7BIT';
689							} else { # Pure ASCII.
690	0					0	$enc = '7BIT';
691	0					0	$cset = 'US-ASCII';
692							}
693							} elsif ($enc eq 'S') {
694	0					0	$enc = _resolve_S($encoded, 1);
695							} elsif ($enc eq 'B') {
696	0					0	$enc = 'BASE64';
697							} elsif ($enc eq 'Q') {
698	0					0	$enc = 'QUOTED-PRINTABLE';
699							} else {
700	0					0	$enc = 'BASE64';
701							}
702	4					42	return ($encoded, $cset, $enc);
703							}
704
705							=item $charset->decode(STRING [,CHECK])
706
707							Decode STRING to Unicode.
708
709							B:
710							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
711							this function will die.
712
713							=cut
714
715							sub decode($$$;) {
716	14			14	1	1671	my $self = shift;
717	14					18	my $s = shift;
718	14		100			45	my $check = shift \|\| 0;
719	14					62	$self->{Decoder}->decode($s, $check);
720							}
721
722							=item detect_7bit_charset STRING
723
724							Guess 7-bit charset that may encode a string STRING.
725							If STRING contains any 8-bit bytes, C will be returned.
726							Otherwise, Default Charset will be returned for unknown charset.
727
728							=cut
729
730							sub detect_7bit_charset($) {
731	4	50		4	1	16	return $DEFAULT_CHARSET unless &USE_ENCODE;
732	4					6	my $s = shift;
733	4	50				10	return $DEFAULT_CHARSET unless $s;
734
735							# Non-7bit string
736	4	50				15	return undef if $s =~ $NON7BITRE;
737
738							# Try to detect 7-bit escape sequences.
739	4					10	foreach (@ESCAPE_SEQS) {
740	8					18	my ($seq, $cset) = @$_;
741	8	100				24	if (index($s, $seq) >= 0) {
742	4					15	my $decoder = __PACKAGE__->new($cset);
743	4	50				19	next unless $decoder->{Decoder};
744	4					8	eval {
745	4					5	my $dummy = $s;
746	4					18	$decoder->decode($dummy, FB_CROAK());
747							};
748	4	50				322	if ($@) {
749	0					0	next;
750							}
751	4					27	return $decoder->{InputCharset};
752							}
753							}
754
755							# How about HZ, VIQR, UTF-7, ...?
756
757	0					0	return $DEFAULT_CHARSET;
758							}
759
760							sub _detect_7bit_charset {
761	0			0		0	detect_7bit_charset(@_);
762							}
763
764							=item $charset->encode(STRING [, CHECK])
765
766							Encode STRING (Unicode or non-Unicode) using compatible charset recommended
767							to be used for messages on Internet (if this module knows it).
768							Note that string will be decoded to Unicode then encoded even if compatible charset
769							was equal to original charset.
770
771							B:
772							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
773							this function will die.
774
775							=cut
776
777							sub encode($$$;) {
778	10			10	1	663	my $self = shift;
779	10					17	my $s = shift;
780	10		50			32	my $check = shift \|\| 0;
781
782	10	100	66			57	unless (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
783	4	50				51	$s = $self->{Decoder}->decode($s, ($check & 0x1)? FB_CROAK(): 0);
784							}
785	10					44	my $enc = $self->{Encoder}->encode($s, $check);
786	10	50				5211	Encode::_utf8_off($enc) if is_utf8($enc); # workaround for RT #35120
787	10					33	$enc;
788							}
789
790							=item $charset->encoded_header_len(STRING [, ENCODING])
791
792							=item encoded_header_len STRING, ENCODING, CHARSET
793
794							Get length of encoded STRING for message header
795							(without folding).
796
797							ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
798							one of either C<"B"> or C<"Q">).
799
800							=cut
801
802							sub encoded_header_len($$$;) {
803	14			14	1	33	my $self = shift;
804	14					19	my ($encoding, $s);
805	14	100				26	if (ref $self) {
806	8					9	$s = shift;
807	8		66			23	$encoding = uc(shift \|\| $self->{HeaderEncoding});
808							} else {
809	6					7	$s = $self;
810	6					10	$encoding = uc(shift);
811	6					7	$self = shift;
812	6	50				18	$self = __PACKAGE__->new($self) unless ref $self;
813							}
814
815							#FIXME:$encoding === undef
816
817	14					17	my $enclen;
818	14	100	66			44	if ($encoding eq 'Q') {
		50
819	4					9	$enclen = _enclen_Q($s);
820							} elsif ($encoding eq 'S' and _resolve_S($s) eq 'Q') {
821	0					0	$enclen = _enclen_Q($s);
822							} else { # "B"
823	10					26	$enclen = _enclen_B($s);
824							}
825
826	14					64	length($self->{OutputCharset})+$enclen+7;
827							}
828
829							sub _enclen_B($) {
830	10			10		39	int((length(shift) + 2) / 3) * 4;
831							}
832
833							sub _enclen_Q($;$) {
834	4			4		15	my $s = shift;
835	4					7	my $in_body = shift;
836	4					6	my @o;
837	4	50				11	if ($in_body) {
838	0					0	@o = ($s =~ m{([^-\t\r\n !*+/0-9A-Za-z])}go);
839							} else {
840	4					33	@o = ($s =~ m{([^- !*+/0-9A-Za-z])}gos);
841							}
842	4					13	length($s) + scalar(@o) * 2;
843							}
844
845							sub _resolve_S($;$) {
846	6			6		8	my $s = shift;
847	6					8	my $in_body = shift;
848	6					7	my $e;
849	6	50				10	if ($in_body) {
850	0					0	$e = scalar(() = $s =~ m{[^-\t\r\n !*+/0-9A-Za-z]}g);
851	0	0				0	return (length($s) + 8 < $e * 6) ? 'BASE64' : 'QUOTED-PRINTABLE';
852							} else {
853	6					33	$e = scalar(() = $s =~ m{[^- !*+/0-9A-Za-z]}g);
854	6	50				27	return (length($s) + 8 < $e * 6) ? 'B' : 'Q';
855							}
856							}
857
858							=item $charset->header_encode(STRING [, OPTS])
859
860							=item header_encode STRING, CHARSET [, OPTS]
861
862							Get converted (if needed) data of STRING and recommended encoding scheme of
863							that data for message headers. CHARSET is the charset by which STRING
864							is encoded.
865
866							OPTS may accept following key-value pairs.
867							B:
868							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
869							conversion will not be performed. So these options do not have any effects.
870
871							=over 4
872
873							=item Detect7bit => YESNO
874
875							Try auto-detecting 7-bit charset when CHARSET is not given.
876							Default is C<"YES">.
877
878							=item Replacement => REPLACEMENT
879
880							Specifies error handling scheme. See L<"Error Handling">.
881
882							=back
883
884							3-item list of (I, I,
885							I) will be returned. I will be
886							either C<"B">, C<"Q"> or C (might not be encoded).
887							If I could not be determined and I
888							contains non-ASCII byte(s), I will be C<"8BIT">
889							(this is I charset name but a special value to represent unencodable
890							data) and I will be C (should not be encoded).
891							I will be C<"US-ASCII"> if and only if string does not
892							contain any non-ASCII bytes.
893
894							=cut
895
896							sub header_encode {
897	8			8	1	1143	my $self = shift;
898	8					13	my $text;
899	8	100				19	if (ref $self) {
900	4					8	$text = shift;
901							} else {
902	4					7	$text = $self;
903	4					11	$self = __PACKAGE__->new(shift);
904							}
905	8					35	my ($encoded, $charset) = $self->_text_encode($text, @_);
906							return ($encoded, '8BIT', undef)
907	8	50	33			36	unless $charset and $charset->{InputCharset};
908	8					15	my $cset = $charset->{OutputCharset};
909
910							# Determine encoding scheme.
911	8					13	my $enc = $charset->{HeaderEncoding};
912
913	8	100	66			63	if (!$enc and $encoded !~ $NON7BITRE) {
		50
		50
914	2	50				75	unless ($cset =~ /^($ISO2022RE\|$ASCIITRANSRE)$/) { # 7BIT.
915	2					6	$cset = 'US-ASCII';
916							}
917							} elsif ($enc eq 'S') {
918	0					0	$enc = _resolve_S($encoded);
919							} elsif ($enc !~ /^[BQ]$/) {
920	0					0	$enc = 'B';
921							}
922	8					44	return ($encoded, $cset, $enc);
923							}
924
925							sub _text_encode {
926	12			12		18	my $charset = shift;
927	12					18	my $s = shift;
928	12					24	my %params = @_;
929	12		33			68	my $replacement = uc($params{'Replacement'} \|\| $Config->{Replacement});
930	12		33			44	my $detect7bit = uc($params{'Detect7bit'} \|\| $Config->{Detect7bit});
931							my $encoding = $params{'Encoding'} \|\|
932	12		33			54	(exists $params{'Encoding'}? undef: 'A'); # undocumented
933
934	12	50	33			50	if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
935	0					0	$detect7bit = 'NO';
936							}
937	12	100				34	unless ($charset->{InputCharset}) {
938	4	50				29	if ($s =~ $NON7BITRE) {
		50
939	0					0	return ($s, undef);
940							} elsif ($detect7bit ne "NO") {
941	4					14	$charset = __PACKAGE__->new(&detect_7bit_charset($s));
942							} else {
943	0					0	$charset = __PACKAGE__->new($DEFAULT_CHARSET,
944							Mapping => 'STANDARD');
945							}
946							}
947	12	50	33			45	if (!$encoding or $encoding ne 'A') { # no conversion
948	0					0	$charset = $charset->dup;
949	0					0	$charset->encoder($charset);
950	0					0	$charset->{HeaderEncoding} = $encoding;
951	0					0	$charset->{BodyEncoding} = $encoding;
952							}
953							my $check = ($replacement and $replacement =~ /^\d+$/)?
954							$replacement:
955							{
956							'CROAK' => FB_CROAK(),
957							'STRICT' => FB_CROAK(),
958							'FALLBACK' => FB_CROAK(), # special
959							'PERLQQ' => FB_PERLQQ(),
960							'HTMLCREF' => FB_HTMLCREF(),
961							'XMLCREF' => FB_XMLCREF(),
962	12	50	33			163	}->{$replacement \|\| ""} \|\| 0;
			50
963
964							# Encode data by output charset if required. If failed, fallback to
965							# fallback charset.
966	12					29	my $encoded;
967	12	100	33			105	if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
			50
			50
			66
968							($charset->{InputCharset} \|\| "") ne ($charset->{OutputCharset} \|\| "")) {
969	4	50				11	if ($check & 0x1) { # CROAK or FALLBACK
970	0					0	eval {
971	0					0	$encoded = $s;
972	0					0	$encoded = $charset->encode($encoded, FB_CROAK());
973							};
974	0	0				0	if ($@) {
975	0	0	0			0	if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
976	0					0	my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
977							Mapping => 'STANDARD');
978							# croak unknown charset
979							croak "unknown charset ``$FALLBACK_CHARSET''"
980	0	0				0	unless $cset->{Decoder};
981							# charset translation
982	0					0	$charset = $charset->dup;
983	0					0	$charset->encoder($cset);
984	0					0	$encoded = $s;
985	0					0	$encoded = $charset->encode($encoded, 0);
986							# replace input & output charsets with fallback charset
987	0					0	$cset->encoder($cset);
988	0					0	$charset = $cset;
989							} else {
990	0					0	$@ =~ s/ at .+$//;
991	0					0	croak $@;
992							}
993							}
994							} else {
995	4					7	$encoded = $s;
996	4					13	$encoded = $charset->encode($encoded, $check);
997							}
998							} else {
999	8					12	$encoded = $s;
1000							}
1001
1002	12	100				72	if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
1003							# check ``ASCII transformation'' charsets
1004	4	50				162	if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/) {
		0
1005	4					10	my $u = $encoded;
1006	4					6	if (USE_ENCODE) {
1007	4					16	$u = $charset->encoder->decode($encoded); # dec. by output
1008							} elsif ($encoded =~ /[+~]/) { # workaround for pre-Encode env.
1009							$u = "x$u";
1010							}
1011	4	100				214	if ($u eq $encoded) {
1012	2					10	$charset = $charset->dup;
1013	2					5	$charset->encoder($DEFAULT_CHARSET);
1014							}
1015							} elsif ($charset->{OutputCharset} ne "US-ASCII") {
1016	0					0	$charset = $charset->dup;
1017	0					0	$charset->encoder($DEFAULT_CHARSET);
1018							}
1019							}
1020
1021	12					51	return ($encoded, $charset);
1022							}
1023
1024							=item $charset->undecode(STRING [,CHECK])
1025
1026							Encode Unicode string STRING to byte string by input charset of $charset.
1027							This is equivalent to C<$charset-Edecoder-Eencode()>.
1028
1029							B:
1030							When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
1031							this function will die.
1032
1033							=cut
1034
1035							sub undecode($$$;) {
1036	0			0	1		my $self = shift;
1037	0						my $s = shift;
1038	0		0				my $check = shift \|\| 0;
1039	0						my $enc = $self->{Decoder}->encode($s, $check);
1040	0						Encode::_utf8_off($enc); # workaround for RT #35120
1041	0						$enc;
1042							}
1043
1044							=back
1045
1046							=head2 Manipulating Module Defaults
1047
1048							=over
1049
1050							=item alias ALIAS [, CHARSET]
1051
1052							Get/set charset alias for canonical names determined by
1053							L<"canonical_charset">.
1054
1055							If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
1056							CHARSET. Otherwise, alias won't be changed. In both cases,
1057							current charset name that ALIAS is assigned will be returned.
1058
1059							=cut
1060
1061							sub alias ($;$) {
1062	0			0	1		my $alias = uc(shift);
1063	0						my $charset = uc(shift);
1064
1065	0	0					return $CHARSET_ALIASES{$alias} unless $charset;
1066
1067	0						$CHARSET_ALIASES{$alias} = $charset;
1068	0						return $charset;
1069							}
1070
1071							=item default [CHARSET]
1072
1073							Get/set default charset.
1074
1075							B is used by this module when charset context is
1076							unknown. Modules using this module are recommended to use this
1077							charset when charset context is unknown or implicit default is
1078							expected. By default, it is C<"US-ASCII">.
1079
1080							If CHARSET is given and isn't false, it will be set to default charset.
1081							Otherwise, default charset won't be changed. In both cases,
1082							current default charset will be returned.
1083
1084							B: Default charset I be changed.
1085
1086							=cut
1087
1088							sub default(;$) {
1089	0			0	1		my $charset = &canonical_charset(shift);
1090
1091	0	0					if ($charset) {
1092	0	0					croak "Unknown charset '$charset'"
1093							unless resolve_alias($charset);
1094	0						$DEFAULT_CHARSET = $charset;
1095							}
1096	0						return $DEFAULT_CHARSET;
1097							}
1098
1099							=item fallback [CHARSET]
1100
1101							Get/set fallback charset.
1102
1103							B is used by this module when conversion by given
1104							charset is failed and C<"FALLBACK"> error handling scheme is specified.
1105							Modules using this module may use this charset as last resort of charset
1106							for conversion. By default, it is C<"UTF-8">.
1107
1108							If CHARSET is given and isn't false, it will be set to fallback charset.
1109							If CHARSET is C<"NONE">, fallback charset will be undefined.
1110							Otherwise, fallback charset won't be changed. In any cases,
1111							current fallback charset will be returned.
1112
1113							B: It I useful that C<"US-ASCII"> is specified as fallback charset,
1114							since result of conversion will be readable without charset information.
1115
1116							=cut
1117
1118							sub fallback(;$) {
1119	0			0	1		my $charset = &canonical_charset(shift);
1120
1121	0	0					if ($charset eq "NONE") {
		0
1122	0						$FALLBACK_CHARSET = undef;
1123							} elsif ($charset) {
1124	0	0					croak "Unknown charset '$charset'"
1125							unless resolve_alias($charset);
1126	0						$FALLBACK_CHARSET = $charset;
1127							}
1128	0						return $FALLBACK_CHARSET;
1129							}
1130
1131							=item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
1132
1133							Get/set charset profiles.
1134
1135							If optional arguments are given and any of them are not false, profiles
1136							for CHARSET will be set by those arguments. Otherwise, profiles
1137							won't be changed. In both cases, current profiles for CHARSET will be
1138							returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
1139
1140							HEADERENC is recommended encoding scheme for message header.
1141							It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1142							C (might not be encoded).
1143
1144							BODYENC is recommended transfer-encoding for message body. It may be
1145							one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1146							C (might not be transfer-encoded).
1147
1148							ENCCHARSET is a charset which is compatible with given CHARSET and
1149							is recommended to be used for MIME messages on Internet.
1150							If conversion is not needed (or this module doesn't know appropriate
1151							charset), ENCCHARSET is C.
1152
1153							B: This function in the future releases can accept more optional
1154							arguments (for example, properties to handle character widths, line folding
1155							behavior, ...). So format of returned value may probably be changed.
1156							Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
1157							particular profile.
1158
1159							=cut
1160
1161							sub recommended ($;$;$;$) {
1162	0			0	1		my $charset = &canonical_charset(shift);
1163	0		0				my $henc = uc(shift) \|\| undef;
1164	0		0				my $benc = uc(shift) \|\| undef;
1165	0						my $cset = &canonical_charset(shift);
1166
1167	0	0					croak "CHARSET is not specified" unless $charset;
1168	0	0	0				croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
1169	0	0	0				croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
1170
1171	0	0	0				if ($henc or $benc or $cset) {
			0
1172	0	0					$cset = undef if $charset eq $cset;
1173	0						my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
1174	0						$CHARSETS{$charset} = \@spec;
1175	0						return @spec;
1176							} else {
1177	0	0					$charset = __PACKAGE__->new($charset) unless ref $charset;
1178	0						return map { $charset->{$_} } qw(HeaderEncoding BodyEncoding
	0
1179							OutputCharset);
1180							}
1181							}
1182
1183							=back
1184
1185							=head2 Constants
1186
1187							=over
1188
1189							=item USE_ENCODE
1190
1191							Unicode/multibyte support flag.
1192							Non-empty string will be set when Unicode and multibyte support is enabled.
1193							Currently, this flag will be non-empty on Perl 5.7.3 or later and
1194							empty string on earlier versions of Perl.
1195
1196							=back
1197
1198							=head2 Error Handling
1199
1200							L<"body_encode"> and L<"header_encode"> accept following C
1201							options:
1202
1203							=over
1204
1205							=item C<"DEFAULT">
1206
1207							Put a substitution character in place of a malformed character.
1208							For UCM-based encodings, will be used.
1209
1210							=item C<"FALLBACK">
1211
1212							Try C<"DEFAULT"> scheme using I (see L<"fallback">).
1213							When fallback charset is undefined and conversion causes error,
1214							code will die on error with an error message.
1215
1216							=item C<"CROAK">
1217
1218							Code will die on error immediately with an error message.
1219							Therefore, you should trap the fatal error with eval{} unless you
1220							really want to let it die on error.
1221							Synonym is C<"STRICT">.
1222
1223							=item C<"PERLQQ">
1224
1225							=item C<"HTMLCREF">
1226
1227							=item C<"XMLCREF">
1228
1229							Use C, C or C
1230							scheme defined by L module.
1231
1232							=item numeric values
1233
1234							Numeric values are also allowed.
1235							For more details see L.
1236
1237							=back
1238
1239							If error handling scheme is not specified or unknown scheme is specified,
1240							C<"DEFAULT"> will be assumed.
1241
1242							=head2 Configuration File
1243
1244							Built-in defaults for option parameters can be overridden by configuration
1245							file: F.
1246							For more details read F.
1247
1248							=head1 VERSION
1249
1250							Consult $VERSION variable.
1251
1252							Development versions of this module may be found at
1253							L.
1254
1255							=head2 Incompatible Changes
1256
1257							=over 4
1258
1259							=item Release 1.001
1260
1261							=over 4
1262
1263							=item *
1264
1265							new() method returns an object when CHARSET argument is not specified.
1266
1267							=back
1268
1269							=item Release 1.005
1270
1271							=over 4
1272
1273							=item *
1274
1275							Restrict characters in encoded-word according to RFC 2047 section 5 (3).
1276							This also affects return value of encoded_header_len() method.
1277
1278							=back
1279
1280							=item Release 1.008.2
1281
1282							=over 4
1283
1284							=item *
1285
1286							body_encoding() method may also returns C<"S">.
1287
1288							=item *
1289
1290							Return value of body_encode() method for UTF-8 may include
1291							C<"QUOTED-PRINTABLE"> encoding item that in earlier versions was fixed to
1292							C<"BASE64">.
1293
1294							=back
1295
1296							=back
1297
1298							=head1 SEE ALSO
1299
1300							Multipurpose Internet Mail Extensions (MIME).
1301
1302							=head1 AUTHOR
1303
1304							Hatuka*nezumi - IKEDA Soji
1305
1306							=head1 COPYRIGHT
1307
1308							Copyright (C) 2006-2017 Hatuka*nezumi - IKEDA Soji.
1309							This program is free software; you can redistribute it and/or modify it
1310							under the same terms as Perl itself.
1311
1312							=cut
1313
1314							1;