File Coverage

blib/lib/MIME/Charset.pm
Criterion Covered Total %
statement 213 290 73.4
branch 90 166 54.2
condition 43 99 43.4
subroutine 29 35 82.8
pod 20 20 100.0
total 395 610 64.7


line stmt bran cond sub pod time code
1             #-*- perl -*-
2              
3             package MIME::Charset;
4 9     9   69055 use 5.005;
  9         26  
5              
6             =head1 NAME
7              
8             MIME::Charset - Charset Information for MIME
9              
10             =head1 SYNOPSIS
11              
12             use MIME::Charset:
13              
14             $charset = MIME::Charset->new("euc-jp");
15              
16             Getting charset information:
17              
18             $benc = $charset->body_encoding; # e.g. "Q"
19             $cset = $charset->as_string; # e.g. "US-ASCII"
20             $henc = $charset->header_encoding; # e.g. "S"
21             $cset = $charset->output_charset; # e.g. "ISO-2022-JP"
22              
23             Translating text data:
24              
25             ($text, $charset, $encoding) =
26             $charset->header_encode(
27             "\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
28             "\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
29             Charset => 'euc-jp');
30             # ...returns e.g. (, "ISO-2022-JP", "B").
31              
32             ($text, $charset, $encoding) =
33             $charset->body_encode(
34             "Collectioneur path\xe9tiquement ".
35             "\xe9clectique de d\xe9chets",
36             Charset => 'latin1');
37             # ...returns e.g. (, "ISO-8859-1", "QUOTED-PRINTABLE").
38              
39             $len = $charset->encoded_header_len(
40             "Perl\xe8\xa8\x80\xe8\xaa\x9e",
41             Charset => 'utf-8',
42             Encoding => "b");
43             # ...returns e.g. 28.
44              
45             Manipulating module defaults:
46              
47             MIME::Charset::alias("csEUCKR", "euc-kr");
48             MIME::Charset::default("iso-8859-1");
49             MIME::Charset::fallback("us-ascii");
50              
51             Non-OO functions (may be deprecated in near future):
52              
53             use MIME::Charset qw(:info);
54              
55             $benc = body_encoding("iso-8859-2"); # "Q"
56             $cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
57             $henc = header_encoding("utf-8"); # "S"
58             $cset = output_charset("shift_jis"); # "ISO-2022-JP"
59              
60             use MIME::Charset qw(:trans);
61              
62             ($text, $charset, $encoding) =
63             header_encode(
64             "\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
65             "\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
66             "euc-jp");
67             # ...returns (, "ISO-2022-JP", "B");
68              
69             ($text, $charset, $encoding) =
70             body_encode(
71             "Collectioneur path\xe9tiquement ".
72             "\xe9clectique de d\xe9chets",
73             "latin1");
74             # ...returns (, "ISO-8859-1", "QUOTED-PRINTABLE");
75              
76             $len = encoded_header_len(
77             "Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
78              
79             =head1 DESCRIPTION
80              
81             MIME::Charset provides information about character sets used for
82             MIME messages on Internet.
83              
84             =head2 Definitions
85              
86             The B is ``character set'' used in MIME to refer to a
87             method of converting a sequence of octets into a sequence of characters.
88             It includes both concepts of ``coded character set'' (CCS) and
89             ``character encoding scheme'' (CES) of ISO/IEC.
90              
91             The B is that used in MIME to refer to a method of representing
92             a body part or a header body as sequence(s) of printable US-ASCII
93             characters.
94              
95             =cut
96              
97 9     9   36 use strict;
  9         10  
  9         255  
98 9     9   29 use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS $Config);
  9         15  
  9         597  
99 9     9   30 use Exporter;
  9         12  
  9         735  
100             @ISA = qw(Exporter);
101             @EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
102             body_encode encoded_header_len header_encode);
103             @EXPORT_OK = qw(alias default fallback recommended);
104             %EXPORT_TAGS = (
105             "info" => [qw(body_encoding header_encoding
106             canonical_charset output_charset)],
107             "trans" =>[ qw(body_encode encoded_header_len
108             header_encode)],
109             );
110 9     9   34 use Carp qw(croak);
  9         9  
  9         489  
111              
112 9 50   9   34 use constant USE_ENCODE => ($] >= 5.007003)? 'Encode': '';
  9         9  
  9         1081  
113              
114             my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
115             is_utf8 resolve_alias);
116             if (USE_ENCODE) {
117 9     9   4908 eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
  9         75112  
  9         664  
118             if ($@) { # Perl 5.7.3 + Encode 0.40
119             eval "use ".USE_ENCODE." qw(is_utf8);";
120             require MIME::Charset::_Compat;
121             for my $sub (@ENCODE_SUBS) {
122 9     9   34 no strict "refs";
  9         13  
  9         591  
123             *{$sub} = \&{"MIME::Charset::_Compat::$sub"}
124             unless $sub eq 'is_utf8';
125             }
126             }
127             } else {
128             require MIME::Charset::_Compat;
129             for my $sub (@ENCODE_SUBS) {
130 9     9   28 no strict "refs";
  9         10  
  9         27624  
131             *{$sub} = \&{"MIME::Charset::_Compat::$sub"};
132             }
133             }
134              
135             $VERSION = '1.012.2';
136              
137             ######## Private Attributes ########
138              
139             my $DEFAULT_CHARSET = 'US-ASCII';
140             my $FALLBACK_CHARSET = 'UTF-8';
141              
142             # This table was initially borrowed from Python email package.
143              
144             my %CHARSETS = (# input header enc body enc output conv
145             'ISO-8859-1' => ['Q', 'Q', undef],
146             'ISO-8859-2' => ['Q', 'Q', undef],
147             'ISO-8859-3' => ['Q', 'Q', undef],
148             'ISO-8859-4' => ['Q', 'Q', undef],
149             # ISO-8859-5 is Cyrillic, and not especially used
150             # ISO-8859-6 is Arabic, also not particularly used
151             # ISO-8859-7 is Greek, 'Q' will not make it readable
152             # ISO-8859-8 is Hebrew, 'Q' will not make it readable
153             'ISO-8859-9' => ['Q', 'Q', undef],
154             'ISO-8859-10' => ['Q', 'Q', undef],
155             # ISO-8859-11 is Thai, 'Q' will not make it readable
156             'ISO-8859-13' => ['Q', 'Q', undef],
157             'ISO-8859-14' => ['Q', 'Q', undef],
158             'ISO-8859-15' => ['Q', 'Q', undef],
159             'ISO-8859-16' => ['Q', 'Q', undef],
160             'WINDOWS-1252' => ['Q', 'Q', undef],
161             'VISCII' => ['Q', 'Q', undef],
162             'US-ASCII' => [undef, undef, undef],
163             'BIG5' => ['B', 'B', undef],
164             'GB2312' => ['B', 'B', undef],
165             'HZ-GB-2312' => ['B', undef, undef],
166             'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
167             'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
168             'ISO-2022-JP' => ['B', undef, undef],
169             'ISO-2022-JP-1' => ['B', undef, undef],
170             'ISO-2022-JP-2' => ['B', undef, undef],
171             'EUC-JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
172             'SHIFT_JISX0213' => ['B', undef, 'ISO-2022-JP-3'],
173             'ISO-2022-JP-3' => ['B', undef, undef],
174             'EUC-JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
175             'SHIFT_JIS-2004' => ['B', undef, 'ISO-2022-JP-2004'],
176             'ISO-2022-JP-2004' => ['B', undef, undef],
177             'KOI8-R' => ['B', 'B', undef],
178             'TIS-620' => ['B', 'B', undef], # cf. Mew
179             'UTF-16' => ['B', 'B', undef],
180             'UTF-16BE' => ['B', 'B', undef],
181             'UTF-16LE' => ['B', 'B', undef],
182             'UTF-32' => ['B', 'B', undef],
183             'UTF-32BE' => ['B', 'B', undef],
184             'UTF-32LE' => ['B', 'B', undef],
185             'UTF-7' => ['Q', undef, undef],
186             'UTF-8' => ['S', 'S', undef],
187             'GSM03.38' => [undef, undef, undef], # not for MIME
188             # We're making this one up to represent raw unencoded 8bit
189             '8BIT' => [undef, 'B', 'ISO-8859-1'],
190             );
191              
192             # Fix some unexpected or unpreferred names returned by
193             # Encode::resolve_alias() or used by somebodies else.
194             my %CHARSET_ALIASES = (# unpreferred preferred
195             "ASCII" => "US-ASCII",
196             "BIG5-ETEN" => "BIG5",
197             "CP1250" => "WINDOWS-1250",
198             "CP1251" => "WINDOWS-1251",
199             "CP1252" => "WINDOWS-1252",
200             "CP1253" => "WINDOWS-1253",
201             "CP1254" => "WINDOWS-1254",
202             "CP1255" => "WINDOWS-1255",
203             "CP1256" => "WINDOWS-1256",
204             "CP1257" => "WINDOWS-1257",
205             "CP1258" => "WINDOWS-1258",
206             "CP874" => "WINDOWS-874",
207             "CP936" => "GBK",
208             "CP949" => "KS_C_5601-1987",
209             "EUC-CN" => "GB2312",
210             "HZ" => "HZ-GB-2312", # RFC 1842
211             "KS_C_5601" => "KS_C_5601-1987",
212             "SHIFTJIS" => "SHIFT_JIS",
213             "SHIFTJISX0213" => "SHIFT_JISX0213",
214             "TIS620" => "TIS-620", # IANA MIBenum 2259
215             "UNICODE-1-1-UTF-7" => "UTF-7", # RFC 1642 (obs.)
216             "UTF8" => "UTF-8",
217             "UTF-8-STRICT" => "UTF-8", # Perl internal use
218             "GSM0338" => "GSM03.38", # not for MIME
219             );
220              
221             # Some vendors encode characters beyond standardized mappings using extended
222             # encoders. Some other standard encoders need additional encode modules.
223             my %ENCODERS = (
224             'EXTENDED' => {
225             'ISO-8859-1' => [['cp1252'], ], # Encode::Byte
226             'ISO-8859-2' => [['cp1250'], ], # Encode::Byte
227             'ISO-8859-5' => [['cp1251'], ], # Encode::Byte
228             'ISO-8859-6' => [
229             ['cp1256'], # Encode::Byte
230             # ['cp1006'], # ditto, for Farsi
231             ],
232             'ISO-8859-6-I'=>[['cp1256'], ], # ditto
233             'ISO-8859-7' => [['cp1253'], ], # Encode::Byte
234             'ISO-8859-8' => [['cp1255'], ], # Encode::Byte
235             'ISO-8859-8-I'=>[['cp1255'], ], # ditto
236             'ISO-8859-9' => [['cp1254'], ], # Encode::Byte
237             'ISO-8859-13'=> [['cp1257'], ], # Encode::Byte
238             'GB2312' => [
239             ['gb18030', 'Encode::HanExtra'],
240             ['cp936'], # Encode::CN
241             ],
242             'EUC-JP' => [
243             ['eucJP-ascii', 'Encode::EUCJPASCII'],
244             # ['cp51932', 'Encode::EUCJPMS'],
245             ],
246             'ISO-2022-JP'=> [
247             ['x-iso2022jp-ascii',
248             'Encode::EUCJPASCII'],
249             # ['iso-2022-jp-ms','Encode::ISO2022JPMS'],
250             # ['cp50220', 'Encode::EUCJPMS'],
251             # ['cp50221', 'Encode::EUCJPMS'],
252             ['iso-2022-jp-1'], # Encode::JP (note*)
253             ],
254             'SHIFT_JIS' => [
255             ['cp932'], # Encode::JP
256             ],
257             'EUC-JISX0213' => [['euc-jis-2004', 'Encode::JISX0213'], ],
258             'ISO-2022-JP-3' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
259             'SHIFT_JISX0213'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
260             'EUC-KR' => [['cp949'], ], # Encode::KR
261             'BIG5' => [
262             # ['big5plus', 'Encode::HanExtra'],
263             # ['big5-2003', 'Encode::HanExtra'],
264             ['cp950'], # Encode::TW
265             # ['big5-1984', 'Encode::HanExtra'],
266             ],
267             'TIS-620' => [['cp874'], ], # Encode::Byte
268             'UTF-8' => [['utf8'], ], # Special name on Perl
269             },
270             'STANDARD' => {
271             'ISO-8859-6-E' => [['iso-8859-6'],],# Encode::Byte
272             'ISO-8859-6-I' => [['iso-8859-6'],],# ditto
273             'ISO-8859-8-E' => [['iso-8859-8'],],# Encode::Byte
274             'ISO-8859-8-I' => [['iso-8859-8'],],# ditto
275             'GB18030' => [['gb18030', 'Encode::HanExtra'], ],
276             'ISO-2022-JP-2' => [['iso-2022-jp-2','Encode::ISO2022JP2'], ],
277             'EUC-JISX0213' => [['euc-jisx0213', 'Encode::JISX0213'], ],
278             'ISO-2022-JP-3' => [['iso-2022-jp-3', 'Encode::JISX0213'], ],
279             'EUC-JIS-2004' => [['euc-jis-2004', 'Encode::JISX0213'], ],
280             'ISO-2022-JP-2004' => [['iso-2022-jp-2004', 'Encode::JISX0213'], ],
281             'SHIFT_JIS-2004'=> [['shift_jis-2004', 'Encode::ShiftJIS2004'], ],
282             'EUC-TW' => [['euc-tw', 'Encode::HanExtra'], ],
283             'HZ-GB-2312' => [['hz'], ], # Encode::CN
284             'TIS-620' => [['tis620'], ], # (note*)
285             'UTF-16' => [['x-utf16auto', 'MIME::Charset::UTF'],],
286             'UTF-32' => [['x-utf32auto', 'MIME::Charset::UTF'],],
287             'GSM03.38' => [['gsm0338'], ], # Encode::GSM0338
288              
289             # (note*) ISO-8859-11 was not registered by IANA.
290             # L treats it as canonical name of ``tis-?620''.
291             },
292             );
293              
294             # ISO-2022-* escape sequences etc. to detect charset from unencoded data.
295             my @ESCAPE_SEQS = (
296             # ISO-2022-* sequences
297             # escape seq, possible charset
298             # Following sequences are commonly used.
299             ["\033\$\@", "ISO-2022-JP"], # RFC 1468
300             ["\033\$B", "ISO-2022-JP"], # ditto
301             ["\033(J", "ISO-2022-JP"], # ditto
302             ["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
303             ["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
304             # Following sequences are less commonly used.
305             ["\033.A", "ISO-2022-JP-2"], # RFC 1554
306             ["\033.F", "ISO-2022-JP-2"], # ditto
307             ["\033\$(C", "ISO-2022-JP-2"], # ditto
308             ["\033\$(O", "ISO-2022-JP-3"], # JIS X 0213:2000
309             ["\033\$(P", "ISO-2022-JP-2004"], # JIS X 0213:2000/2004
310             ["\033\$(Q", "ISO-2022-JP-2004"], # JIS X 0213:2004
311             ["\033\$)C", "ISO-2022-KR"], # RFC 1557
312             ["\033\$)A", "ISO-2022-CN"], # RFC 1922
313             ["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
314             ["\033\$)G", "ISO-2022-CN"], # ditto
315             ["\033\$*H", "ISO-2022-CN"], # ditto
316             # Other sequences will be used with appropriate charset
317             # parameters, or hardly used.
318              
319             # note*: This RFC defines ISO-2022-JP-1, superset of
320             # ISO-2022-JP. But that charset name is rarely used.
321             # OTOH many of encoders for ISO-2022-JP recognize this
322             # sequence so that comatibility with EUC-JP will be
323             # guaranteed.
324              
325             # Singlebyte 7-bit sequences
326             # escape seq, possible charset
327             ["\033e", "GSM03.38"], # ESTI GSM 03.38 (note*)
328             ["\033\012", "GSM03.38"], # ditto
329             ["\033<", "GSM03.38"], # ditto
330             ["\033/", "GSM03.38"], # ditto
331             ["\033>", "GSM03.38"], # ditto
332             ["\033\024", "GSM03.38"], # ditto
333             ["\033(", "GSM03.38"], # ditto
334             ["\033\@", "GSM03.38"], # ditto
335             ["\033)", "GSM03.38"], # ditto
336             ["\033=", "GSM03.38"], # ditto
337              
338             # note*: This is not used for MIME message.
339             );
340              
341             ######## Public Configuration Attributes ########
342              
343             $Config = {
344             Detect7bit => 'YES',
345             Mapping => 'EXTENDED',
346             Replacement => 'DEFAULT',
347             };
348             local @INC = @INC;
349             pop @INC if $INC[-1] eq '.';
350             eval { require MIME::Charset::Defaults; };
351              
352             ######## Private Constants ########
353              
354             my $NON7BITRE = qr{
355             [^\x01-\x7e]
356             }x;
357              
358             my $NONASCIIRE = qr{
359             [^\x09\x0a\x0d\x20\x21-\x7e]
360             }x;
361              
362             my $ISO2022RE = qr{
363             ISO-2022-.+
364             }ix;
365              
366             my $ASCIITRANSRE = qr{
367             HZ-GB-2312 | UTF-7
368             }ix;
369              
370              
371             ######## Public Functions ########
372              
373             =head2 Constructor
374              
375             =over
376              
377             =item $charset = MIME::Charset->new([CHARSET [, OPTS]])
378              
379             Create charset object.
380              
381             OPTS may accept following key-value pair.
382             B:
383             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
384             conversion will not be performed. So this option do not have any effects.
385              
386             =over 4
387              
388             =item Mapping => MAPTYPE
389              
390             Whether to extend mappings actually used for charset names or not.
391             C<"EXTENDED"> uses extended mappings.
392             C<"STANDARD"> uses standardized strict mappings.
393             Default is C<"EXTENDED">.
394              
395             =back
396              
397             =cut
398              
399             sub new {
400 75     75 1 700 my $class = shift;
401 75         91 my $charset = shift;
402 75 100       150 return bless {}, $class unless $charset;
403 70 50       131 return bless {}, $class if 75 < length $charset; # w/a for CPAN RT #65796.
404 70         97 my %params = @_;
405 70   33     348 my $mapping = uc($params{'Mapping'} || $Config->{Mapping});
406              
407 70 100       216 if ($charset =~ /\bhz.?gb.?2312$/i) {
    100          
408             # workaround: "HZ-GB-2312" mistakenly treated as "EUC-CN" by Encode
409             # (2.12).
410 4         6 $charset = "HZ-GB-2312";
411             } elsif ($charset =~ /\btis-?620$/i) {
412             # workaround: "TIS620" treated as ISO-8859-11 by Encode.
413             # And "TIS-620" not known by some versions of Encode (cf.
414             # CPAN RT #20781).
415 1         2 $charset = "TIS-620";
416             } else {
417 65   66     142 $charset = resolve_alias($charset) || $charset
418             }
419 70   66     66531 $charset = $CHARSET_ALIASES{uc($charset)} || uc($charset);
420 70         68 my ($henc, $benc, $outcset);
421 70         106 my $spec = $CHARSETS{$charset};
422 70 100       98 if ($spec) {
423 60         123 ($henc, $benc, $outcset) =
424             ($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
425             } else {
426 10         14 ($henc, $benc, $outcset) = ('S', 'B', undef);
427             }
428 70         61 my ($decoder, $encoder);
429 70         54 if (USE_ENCODE) {
430 70         106 $decoder = _find_encoder($charset, $mapping);
431 70         93 $encoder = _find_encoder($outcset, $mapping);
432             } else {
433             $decoder = $encoder = undef;
434             }
435              
436 70   66     605 bless {
      100        
437             InputCharset => $charset,
438             Decoder => $decoder,
439             HeaderEncoding => $henc,
440             BodyEncoding => $benc,
441             OutputCharset => ($outcset || $charset),
442             Encoder => ($encoder || $decoder),
443             }, $class;
444             }
445              
446             my %encoder_cache = ();
447              
448             sub _find_encoder($$) {
449 140   100 140   312 my $charset = uc(shift || "");
450 140 100       214 return undef unless $charset;
451 78         77 my $mapping = uc(shift);
452 78         64 my ($spec, $name, $module, $encoder);
453              
454 78         61 local($@);
455 78         143 $encoder = $encoder_cache{$charset, $mapping};
456 78 100       133 return $encoder if ref $encoder;
457              
458 55         77 foreach my $m (('EXTENDED', 'STANDARD')) {
459 82 50 66     273 next if $m eq 'EXTENDED' and $mapping ne 'EXTENDED';
460 82         119 $spec = $ENCODERS{$m}->{$charset};
461 82 100       125 next unless $spec;
462 41         33 foreach my $s (@{$spec}) {
  41         70  
463 47         33 ($name, $module) = @{$s};
  47         74  
464 47 100       105 if ($module) {
465 14 100       770 next unless eval "require $module;";
466             }
467 37         121 $encoder = Encode::find_encoding($name);
468 37 50       8902 last if ref $encoder;
469             }
470 41 100       73 last if ref $encoder;
471             }
472 55   100     167 $encoder ||= Encode::find_encoding($charset);
473 55 100       1060 $encoder_cache{$charset, $mapping} = $encoder if $encoder;
474 55         88 return $encoder;
475             }
476              
477             =back
478              
479             =head2 Getting Information of Charsets
480              
481             =over
482              
483             =item $charset->body_encoding
484              
485             =item body_encoding CHARSET
486              
487             Get recommended transfer-encoding of CHARSET for message body.
488              
489             Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE),
490             C<"S"> (shorter one of either) or
491             C (might not be transfer-encoded; either 7BIT or 8BIT). This may
492             not be same as encoding for message header.
493              
494             =cut
495              
496             sub body_encoding($) {
497 4     4 1 13 my $self = shift;
498 4 50       13 return undef unless $self;
499 4 100       15 $self = __PACKAGE__->new($self) unless ref $self;
500 4         25 $self->{BodyEncoding};
501             }
502              
503             =item $charset->as_string
504              
505             =item canonical_charset CHARSET
506              
507             Get canonical name for charset.
508              
509             =cut
510              
511             sub canonical_charset($) {
512 4     4 1 11 my $self = shift;
513 4 50       9 return undef unless $self;
514 4 100       9 $self = __PACKAGE__->new($self) unless ref $self;
515 4         16 $self->{InputCharset};
516             }
517              
518             sub as_string($) {
519 29     29 1 82 my $self = shift;
520 29         77 $self->{InputCharset};
521             }
522              
523             =item $charset->decoder
524              
525             Get L<"Encode::Encoding"> object to decode strings to Unicode by charset.
526             If charset is not specified or not known by this module,
527             undef will be returned.
528              
529             =cut
530              
531             sub decoder($) {
532 12     12 1 26 my $self = shift;
533 12         29 $self->{Decoder};
534             }
535              
536             =item $charset->dup
537              
538             Get a copy of charset object.
539              
540             =cut
541              
542             sub dup($) {
543 2     2 1 3 my $self = shift;
544 2         7 my $obj = __PACKAGE__->new(undef);
545 2         3 %{$obj} = %{$self};
  2         8  
  2         7  
546 2         6 $obj;
547             }
548              
549             =item $charset->encoder([CHARSET])
550              
551             Get L<"Encode::Encoding"> object to encode Unicode string using compatible
552             charset recommended to be used for messages on Internet.
553              
554             If optional CHARSET is specified, replace encoder (and output charset
555             name) of $charset object with those of CHARSET, therefore,
556             $charset object will be a converter between original charset and
557             new CHARSET.
558              
559             =cut
560              
561             sub encoder($$;) {
562 6     6 1 7 my $self = shift;
563 6         7 my $charset = shift;
564 6 100       15 if ($charset) {
565 2 50       7 $charset = __PACKAGE__->new($charset) unless ref $charset;
566 2         4 $self->{OutputCharset} = $charset->{InputCharset};
567 2         3 $self->{Encoder} = $charset->{Decoder};
568 2         4 $self->{BodyEncoding} = $charset->{BodyEncoding};
569 2         3 $self->{HeaderEncoding} = $charset->{HeaderEncoding};
570             }
571 6         17 $self->{Encoder};
572             }
573              
574             =item $charset->header_encoding
575              
576             =item header_encoding CHARSET
577              
578             Get recommended encoding scheme of CHARSET for message header.
579              
580             Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
581             or C (might not be encoded). This may not be same as encoding
582             for message body.
583              
584             =cut
585              
586             sub header_encoding($) {
587 4     4 1 9 my $self = shift;
588 4 50       9 return undef unless $self;
589 4 100       9 $self = __PACKAGE__->new($self) unless ref $self;
590 4         16 $self->{HeaderEncoding};
591             }
592              
593             =item $charset->output_charset
594              
595             =item output_charset CHARSET
596              
597             Get a charset which is compatible with given CHARSET and is recommended
598             to be used for MIME messages on Internet (if it is known by this module).
599              
600             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
601             this function will simply
602             return the result of L<"canonical_charset">.
603              
604             =cut
605              
606             sub output_charset($) {
607 4     4 1 15 my $self = shift;
608 4 50       9 return undef unless $self;
609 4 100       12 $self = __PACKAGE__->new($self) unless ref $self;
610 4         21 $self->{OutputCharset};
611             }
612              
613             =back
614              
615             =head2 Translating Text Data
616              
617             =over
618              
619             =item $charset->body_encode(STRING [, OPTS])
620              
621             =item body_encode STRING, CHARSET [, OPTS]
622              
623             Get converted (if needed) data of STRING and recommended transfer-encoding
624             of that data for message body. CHARSET is the charset by which STRING
625             is encoded.
626              
627             OPTS may accept following key-value pairs.
628             B:
629             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
630             conversion will not be performed. So these options do not have any effects.
631              
632             =over 4
633              
634             =item Detect7bit => YESNO
635              
636             Try auto-detecting 7-bit charset when CHARSET is not given.
637             Default is C<"YES">.
638              
639             =item Replacement => REPLACEMENT
640              
641             Specifies error handling scheme. See L<"Error Handling">.
642              
643             =back
644              
645             3-item list of (I, I,
646             I) will be returned.
647             I will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
648             C<"7BIT"> or C<"8BIT">. If I could not be determined
649             and I contains non-ASCII byte(s), I will
650             be C and I will be C<"BASE64">.
651             I will be C<"US-ASCII"> if and only if string does not
652             contain any non-ASCII bytes.
653              
654             =cut
655              
656             sub body_encode {
657 4     4 1 263 my $self = shift;
658 4         4 my $text;
659 4 100       10 if (ref $self) {
660 2         3 $text = shift;
661             } else {
662 2         1 $text = $self;
663 2         10 $self = __PACKAGE__->new(shift);
664             }
665 4         17 my ($encoded, $charset) = $self->_text_encode($text, @_);
666             return ($encoded, undef, 'BASE64')
667 4 50 33     18 unless $charset and $charset->{InputCharset};
668 4         5 my $cset = $charset->{OutputCharset};
669              
670             # Determine transfer-encoding.
671 4         4 my $enc = $charset->{BodyEncoding};
672              
673 4 50 33     20 if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
    0          
    0          
    0          
674 4 50       93 if ($encoded =~ $NON7BITRE) { # String contains 8bit char(s).
    50          
675 0         0 $enc = '8BIT';
676             } elsif ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
677 4         6 $enc = '7BIT';
678             } else { # Pure ASCII.
679 0         0 $enc = '7BIT';
680 0         0 $cset = 'US-ASCII';
681             }
682             } elsif ($enc eq 'S') {
683 0         0 $enc = _resolve_S($encoded, 1);
684             } elsif ($enc eq 'B') {
685 0         0 $enc = 'BASE64';
686             } elsif ($enc eq 'Q') {
687 0         0 $enc = 'QUOTED-PRINTABLE';
688             } else {
689 0         0 $enc = 'BASE64';
690             }
691 4         24 return ($encoded, $cset, $enc);
692             }
693              
694             =item $charset->decode(STRING [,CHECK])
695              
696             Decode STRING to Unicode.
697              
698             B:
699             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
700             this function will die.
701              
702             =cut
703              
704             sub decode($$$;) {
705 14     14 1 831 my $self = shift;
706 14         16 my $s = shift;
707 14   100     46 my $check = shift || 0;
708 14         53 $self->{Decoder}->decode($s, $check);
709             }
710              
711             =item detect_7bit_charset STRING
712              
713             Guess 7-bit charset that may encode a string STRING.
714             If STRING contains any 8-bit bytes, C will be returned.
715             Otherwise, Default Charset will be returned for unknown charset.
716              
717             =cut
718              
719             sub detect_7bit_charset($) {
720 4 50   4 1 11 return $DEFAULT_CHARSET unless &USE_ENCODE;
721 4         5 my $s = shift;
722 4 50       8 return $DEFAULT_CHARSET unless $s;
723              
724             # Non-7bit string
725 4 50       11 return undef if $s =~ $NON7BITRE;
726              
727             # Try to detect 7-bit escape sequences.
728 4         9 foreach (@ESCAPE_SEQS) {
729 8         12 my ($seq, $cset) = @$_;
730 8 100       17 if (index($s, $seq) >= 0) {
731 4         9 my $decoder = __PACKAGE__->new($cset);
732 4 50       9 next unless $decoder->{Decoder};
733 4         6 eval {
734 4         3 my $dummy = $s;
735 4         14 $decoder->decode($dummy, FB_CROAK());
736             };
737 4 50       205 if ($@) {
738 0         0 next;
739             }
740 4         23 return $decoder->{InputCharset};
741             }
742             }
743              
744             # How about HZ, VIQR, UTF-7, ...?
745              
746 0         0 return $DEFAULT_CHARSET;
747             }
748              
749             sub _detect_7bit_charset {
750 0     0   0 detect_7bit_charset(@_);
751             }
752              
753             =item $charset->encode(STRING [, CHECK])
754              
755             Encode STRING (Unicode or non-Unicode) using compatible charset recommended
756             to be used for messages on Internet (if this module knows it).
757             Note that string will be decoded to Unicode then encoded even if compatible charset
758             was equal to original charset.
759              
760             B:
761             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
762             this function will die.
763              
764             =cut
765              
766             sub encode($$$;) {
767 10     10 1 313 my $self = shift;
768 10         19 my $s = shift;
769 10   50     30 my $check = shift || 0;
770              
771 10 100 66     44 unless (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
772 4 50       42 $s = $self->{Decoder}->decode($s, ($check & 0x1)? FB_CROAK(): 0);
773             }
774 10         34 my $enc = $self->{Encoder}->encode($s, $check);
775 10 50       3399 Encode::_utf8_off($enc) if is_utf8($enc); # workaround for RT #35120
776 10         28 $enc;
777             }
778              
779             =item $charset->encoded_header_len(STRING [, ENCODING])
780              
781             =item encoded_header_len STRING, ENCODING, CHARSET
782              
783             Get length of encoded STRING for message header
784             (without folding).
785              
786             ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
787             one of either C<"B"> or C<"Q">).
788              
789             =cut
790              
791             sub encoded_header_len($$$;) {
792 14     14 1 25 my $self = shift;
793 14         13 my ($encoding, $s);
794 14 100       18 if (ref $self) {
795 8         6 $s = shift;
796 8   66     19 $encoding = uc(shift || $self->{HeaderEncoding});
797             } else {
798 6         6 $s = $self;
799 6         8 $encoding = uc(shift);
800 6         24 $self = shift;
801 6 50       24 $self = __PACKAGE__->new($self) unless ref $self;
802             }
803              
804             #FIXME:$encoding === undef
805              
806 14         12 my $enclen;
807 14 100 66     46 if ($encoding eq 'Q') {
    50          
808 4         8 $enclen = _enclen_Q($s);
809             } elsif ($encoding eq 'S' and _resolve_S($s) eq 'Q') {
810 0         0 $enclen = _enclen_Q($s);
811             } else { # "B"
812 10         14 $enclen = _enclen_B($s);
813             }
814              
815 14         59 length($self->{OutputCharset})+$enclen+7;
816             }
817              
818             sub _enclen_B($) {
819 10     10   23 int((length(shift) + 2) / 3) * 4;
820             }
821              
822             sub _enclen_Q($;$) {
823 4     4   8 my $s = shift;
824 4         4 my $in_body = shift;
825 4         5 my @o;
826 4 50       6 if ($in_body) {
827 0         0 @o = ($s =~ m{([^-\t\r\n !*+/0-9A-Za-z])}go);
828             } else {
829 4         26 @o = ($s =~ m{([^- !*+/0-9A-Za-z])}gos);
830             }
831 4         10 length($s) + scalar(@o) * 2;
832             }
833              
834             sub _resolve_S($;$) {
835 6     6   7 my $s = shift;
836 6         5 my $in_body = shift;
837 6         6 my $e;
838 6 50       11 if ($in_body) {
839 0         0 $e = scalar(() = $s =~ m{[^-\t\r\n !*+/0-9A-Za-z]}g);
840 0 0       0 return (length($s) + 8 < $e * 6) ? 'BASE64' : 'QUOTED-PRINTABLE';
841             } else {
842 6         27 $e = scalar(() = $s =~ m{[^- !*+/0-9A-Za-z]}g);
843 6 50       27 return (length($s) + 8 < $e * 6) ? 'B' : 'Q';
844             }
845             }
846              
847             =item $charset->header_encode(STRING [, OPTS])
848              
849             =item header_encode STRING, CHARSET [, OPTS]
850              
851             Get converted (if needed) data of STRING and recommended encoding scheme of
852             that data for message headers. CHARSET is the charset by which STRING
853             is encoded.
854              
855             OPTS may accept following key-value pairs.
856             B:
857             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
858             conversion will not be performed. So these options do not have any effects.
859              
860             =over 4
861              
862             =item Detect7bit => YESNO
863              
864             Try auto-detecting 7-bit charset when CHARSET is not given.
865             Default is C<"YES">.
866              
867             =item Replacement => REPLACEMENT
868              
869             Specifies error handling scheme. See L<"Error Handling">.
870              
871             =back
872              
873             3-item list of (I, I,
874             I) will be returned. I will be
875             either C<"B">, C<"Q"> or C (might not be encoded).
876             If I could not be determined and I
877             contains non-ASCII byte(s), I will be C<"8BIT">
878             (this is I charset name but a special value to represent unencodable
879             data) and I will be C (should not be encoded).
880             I will be C<"US-ASCII"> if and only if string does not
881             contain any non-ASCII bytes.
882              
883             =cut
884              
885             sub header_encode {
886 8     8 1 573 my $self = shift;
887 8         6 my $text;
888 8 100       17 if (ref $self) {
889 4         5 $text = shift;
890             } else {
891 4         2 $text = $self;
892 4         7 $self = __PACKAGE__->new(shift);
893             }
894 8         38 my ($encoded, $charset) = $self->_text_encode($text, @_);
895             return ($encoded, '8BIT', undef)
896 8 50 33     27 unless $charset and $charset->{InputCharset};
897 8         10 my $cset = $charset->{OutputCharset};
898              
899             # Determine encoding scheme.
900 8         9 my $enc = $charset->{HeaderEncoding};
901              
902 8 100 66     54 if (!$enc and $encoded !~ $NON7BITRE) {
    50          
    50          
903 2 50       89 unless ($cset =~ /^($ISO2022RE|$ASCIITRANSRE)$/) { # 7BIT.
904 2         4 $cset = 'US-ASCII';
905             }
906             } elsif ($enc eq 'S') {
907 0         0 $enc = _resolve_S($encoded);
908             } elsif ($enc !~ /^[BQ]$/) {
909 0         0 $enc = 'B';
910             }
911 8         30 return ($encoded, $cset, $enc);
912             }
913              
914             sub _text_encode {
915 12     12   12 my $charset = shift;
916 12         9 my $s = shift;
917 12         15 my %params = @_;
918 12   33     40 my $replacement = uc($params{'Replacement'} || $Config->{Replacement});
919 12   33     29 my $detect7bit = uc($params{'Detect7bit'} || $Config->{Detect7bit});
920             my $encoding = $params{'Encoding'} ||
921 12   33     33 (exists $params{'Encoding'}? undef: 'A'); # undocumented
922              
923 12 50 33     46 if (!$encoding or $encoding ne 'A') { # no 7-bit auto-detection
924 0         0 $detect7bit = 'NO';
925             }
926 12 100       28 unless ($charset->{InputCharset}) {
927 4 50       25 if ($s =~ $NON7BITRE) {
    50          
928 0         0 return ($s, undef);
929             } elsif ($detect7bit ne "NO") {
930 4         10 $charset = __PACKAGE__->new(&detect_7bit_charset($s));
931             } else {
932 0         0 $charset = __PACKAGE__->new($DEFAULT_CHARSET,
933             Mapping => 'STANDARD');
934             }
935             }
936 12 50 33     41 if (!$encoding or $encoding ne 'A') { # no conversion
937 0         0 $charset = $charset->dup;
938 0         0 $charset->encoder($charset);
939 0         0 $charset->{HeaderEncoding} = $encoding;
940 0         0 $charset->{BodyEncoding} = $encoding;
941             }
942             my $check = ($replacement and $replacement =~ /^\d+$/)?
943             $replacement:
944             {
945             'CROAK' => FB_CROAK(),
946             'STRICT' => FB_CROAK(),
947             'FALLBACK' => FB_CROAK(), # special
948             'PERLQQ' => FB_PERLQQ(),
949             'HTMLCREF' => FB_HTMLCREF(),
950             'XMLCREF' => FB_XMLCREF(),
951 12 50 33     153 }->{$replacement || ""} || 0;
      50        
952              
953             # Encode data by output charset if required. If failed, fallback to
954             # fallback charset.
955 12         22 my $encoded;
956 12 100 33     98 if (is_utf8($s) or $s =~ /[^\x00-\xFF]/ or
      50        
      50        
      66        
957             ($charset->{InputCharset} || "") ne ($charset->{OutputCharset} || "")) {
958 4 50       7 if ($check & 0x1) { # CROAK or FALLBACK
959 0         0 eval {
960 0         0 $encoded = $s;
961 0         0 $encoded = $charset->encode($encoded, FB_CROAK());
962             };
963 0 0       0 if ($@) {
964 0 0 0     0 if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
965 0         0 my $cset = __PACKAGE__->new($FALLBACK_CHARSET,
966             Mapping => 'STANDARD');
967             # croak unknown charset
968             croak "unknown charset ``$FALLBACK_CHARSET''"
969 0 0       0 unless $cset->{Decoder};
970             # charset translation
971 0         0 $charset = $charset->dup;
972 0         0 $charset->encoder($cset);
973 0         0 $encoded = $s;
974 0         0 $encoded = $charset->encode($encoded, 0);
975             # replace input & output charsets with fallback charset
976 0         0 $cset->encoder($cset);
977 0         0 $charset = $cset;
978             } else {
979 0         0 $@ =~ s/ at .+$//;
980 0         0 croak $@;
981             }
982             }
983             } else {
984 4         4 $encoded = $s;
985 4         10 $encoded = $charset->encode($encoded, $check);
986             }
987             } else {
988 8         10 $encoded = $s;
989             }
990              
991 12 100       62 if ($encoded !~ /$NONASCIIRE/) { # maybe ASCII
992             # check ``ASCII transformation'' charsets
993 4 50       81 if ($charset->{OutputCharset} =~ /^($ASCIITRANSRE)$/) {
    0          
994 4         8 my $u = $encoded;
995 4         4 if (USE_ENCODE) {
996 4         11 $u = $charset->encoder->decode($encoded); # dec. by output
997             } elsif ($encoded =~ /[+~]/) { # workaround for pre-Encode env.
998             $u = "x$u";
999             }
1000 4 100       143 if ($u eq $encoded) {
1001 2         6 $charset = $charset->dup;
1002 2         4 $charset->encoder($DEFAULT_CHARSET);
1003             }
1004             } elsif ($charset->{OutputCharset} ne "US-ASCII") {
1005 0         0 $charset = $charset->dup;
1006 0         0 $charset->encoder($DEFAULT_CHARSET);
1007             }
1008             }
1009              
1010 12         28 return ($encoded, $charset);
1011             }
1012              
1013             =item $charset->undecode(STRING [,CHECK])
1014              
1015             Encode Unicode string STRING to byte string by input charset of $charset.
1016             This is equivalent to C<$charset-Edecoder-Eencode()>.
1017              
1018             B:
1019             When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
1020             this function will die.
1021              
1022             =cut
1023              
1024             sub undecode($$$;) {
1025 0     0 1   my $self = shift;
1026 0           my $s = shift;
1027 0   0       my $check = shift || 0;
1028 0           my $enc = $self->{Decoder}->encode($s, $check);
1029 0           Encode::_utf8_off($enc); # workaround for RT #35120
1030 0           $enc;
1031             }
1032              
1033             =back
1034              
1035             =head2 Manipulating Module Defaults
1036              
1037             =over
1038              
1039             =item alias ALIAS [, CHARSET]
1040              
1041             Get/set charset alias for canonical names determined by
1042             L<"canonical_charset">.
1043              
1044             If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
1045             CHARSET. Otherwise, alias won't be changed. In both cases,
1046             current charset name that ALIAS is assigned will be returned.
1047              
1048             =cut
1049              
1050             sub alias ($;$) {
1051 0     0 1   my $alias = uc(shift);
1052 0           my $charset = uc(shift);
1053              
1054 0 0         return $CHARSET_ALIASES{$alias} unless $charset;
1055              
1056 0           $CHARSET_ALIASES{$alias} = $charset;
1057 0           return $charset;
1058             }
1059              
1060             =item default [CHARSET]
1061              
1062             Get/set default charset.
1063              
1064             B is used by this module when charset context is
1065             unknown. Modules using this module are recommended to use this
1066             charset when charset context is unknown or implicit default is
1067             expected. By default, it is C<"US-ASCII">.
1068              
1069             If CHARSET is given and isn't false, it will be set to default charset.
1070             Otherwise, default charset won't be changed. In both cases,
1071             current default charset will be returned.
1072              
1073             B: Default charset I be changed.
1074              
1075             =cut
1076              
1077             sub default(;$) {
1078 0     0 1   my $charset = &canonical_charset(shift);
1079              
1080 0 0         if ($charset) {
1081 0 0         croak "Unknown charset '$charset'"
1082             unless resolve_alias($charset);
1083 0           $DEFAULT_CHARSET = $charset;
1084             }
1085 0           return $DEFAULT_CHARSET;
1086             }
1087              
1088             =item fallback [CHARSET]
1089              
1090             Get/set fallback charset.
1091              
1092             B is used by this module when conversion by given
1093             charset is failed and C<"FALLBACK"> error handling scheme is specified.
1094             Modules using this module may use this charset as last resort of charset
1095             for conversion. By default, it is C<"UTF-8">.
1096              
1097             If CHARSET is given and isn't false, it will be set to fallback charset.
1098             If CHARSET is C<"NONE">, fallback charset will be undefined.
1099             Otherwise, fallback charset won't be changed. In any cases,
1100             current fallback charset will be returned.
1101              
1102             B: It I useful that C<"US-ASCII"> is specified as fallback charset,
1103             since result of conversion will be readable without charset information.
1104              
1105             =cut
1106              
1107             sub fallback(;$) {
1108 0     0 1   my $charset = &canonical_charset(shift);
1109              
1110 0 0         if ($charset eq "NONE") {
    0          
1111 0           $FALLBACK_CHARSET = undef;
1112             } elsif ($charset) {
1113 0 0         croak "Unknown charset '$charset'"
1114             unless resolve_alias($charset);
1115 0           $FALLBACK_CHARSET = $charset;
1116             }
1117 0           return $FALLBACK_CHARSET;
1118             }
1119              
1120             =item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
1121              
1122             Get/set charset profiles.
1123              
1124             If optional arguments are given and any of them are not false, profiles
1125             for CHARSET will be set by those arguments. Otherwise, profiles
1126             won't be changed. In both cases, current profiles for CHARSET will be
1127             returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
1128              
1129             HEADERENC is recommended encoding scheme for message header.
1130             It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1131             C (might not be encoded).
1132              
1133             BODYENC is recommended transfer-encoding for message body. It may be
1134             one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
1135             C (might not be transfer-encoded).
1136              
1137             ENCCHARSET is a charset which is compatible with given CHARSET and
1138             is recommended to be used for MIME messages on Internet.
1139             If conversion is not needed (or this module doesn't know appropriate
1140             charset), ENCCHARSET is C.
1141              
1142             B: This function in the future releases can accept more optional
1143             arguments (for example, properties to handle character widths, line folding
1144             behavior, ...). So format of returned value may probably be changed.
1145             Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
1146             particular profile.
1147              
1148             =cut
1149              
1150             sub recommended ($;$;$;$) {
1151 0     0 1   my $charset = &canonical_charset(shift);
1152 0   0       my $henc = uc(shift) || undef;
1153 0   0       my $benc = uc(shift) || undef;
1154 0           my $cset = &canonical_charset(shift);
1155              
1156 0 0         croak "CHARSET is not specified" unless $charset;
1157 0 0 0       croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
1158 0 0 0       croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
1159              
1160 0 0 0       if ($henc or $benc or $cset) {
      0        
1161 0 0         $cset = undef if $charset eq $cset;
1162 0           my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
1163 0           $CHARSETS{$charset} = \@spec;
1164 0           return @spec;
1165             } else {
1166 0 0         $charset = __PACKAGE__->new($charset) unless ref $charset;
1167 0           return map { $charset->{$_} } qw(HeaderEncoding BodyEncoding
  0            
1168             OutputCharset);
1169             }
1170             }
1171              
1172             =back
1173              
1174             =head2 Constants
1175              
1176             =over
1177              
1178             =item USE_ENCODE
1179              
1180             Unicode/multibyte support flag.
1181             Non-empty string will be set when Unicode and multibyte support is enabled.
1182             Currently, this flag will be non-empty on Perl 5.7.3 or later and
1183             empty string on earlier versions of Perl.
1184              
1185             =back
1186              
1187             =head2 Error Handling
1188              
1189             L<"body_encode"> and L<"header_encode"> accept following C
1190             options:
1191              
1192             =over
1193              
1194             =item C<"DEFAULT">
1195              
1196             Put a substitution character in place of a malformed character.
1197             For UCM-based encodings, will be used.
1198              
1199             =item C<"FALLBACK">
1200              
1201             Try C<"DEFAULT"> scheme using I (see L<"fallback">).
1202             When fallback charset is undefined and conversion causes error,
1203             code will die on error with an error message.
1204              
1205             =item C<"CROAK">
1206              
1207             Code will die on error immediately with an error message.
1208             Therefore, you should trap the fatal error with eval{} unless you
1209             really want to let it die on error.
1210             Synonym is C<"STRICT">.
1211              
1212             =item C<"PERLQQ">
1213              
1214             =item C<"HTMLCREF">
1215              
1216             =item C<"XMLCREF">
1217              
1218             Use C, C or C
1219             scheme defined by L module.
1220              
1221             =item numeric values
1222              
1223             Numeric values are also allowed.
1224             For more details see L.
1225              
1226             =back
1227              
1228             If error handling scheme is not specified or unknown scheme is specified,
1229             C<"DEFAULT"> will be assumed.
1230              
1231             =head2 Configuration File
1232              
1233             Built-in defaults for option parameters can be overridden by configuration
1234             file: F.
1235             For more details read F.
1236              
1237             =head1 VERSION
1238              
1239             Consult $VERSION variable.
1240              
1241             Development versions of this module may be found at
1242             L.
1243              
1244             =head2 Incompatible Changes
1245              
1246             =over 4
1247              
1248             =item Release 1.001
1249              
1250             =over 4
1251              
1252             =item *
1253              
1254             new() method returns an object when CHARSET argument is not specified.
1255              
1256             =back
1257              
1258             =item Release 1.005
1259              
1260             =over 4
1261              
1262             =item *
1263              
1264             Restrict characters in encoded-word according to RFC 2047 section 5 (3).
1265             This also affects return value of encoded_header_len() method.
1266              
1267             =back
1268              
1269             =item Release 1.008.2
1270              
1271             =over 4
1272              
1273             =item *
1274              
1275             body_encoding() method may also returns C<"S">.
1276              
1277             =item *
1278              
1279             Return value of body_encode() method for UTF-8 may include
1280             C<"QUOTED-PRINTABLE"> encoding item that in earlier versions was fixed to
1281             C<"BASE64">.
1282              
1283             =back
1284              
1285             =back
1286              
1287             =head1 SEE ALSO
1288              
1289             Multipurpose Internet Mail Extensions (MIME).
1290              
1291             =head1 AUTHOR
1292              
1293             Hatuka*nezumi - IKEDA Soji
1294              
1295             =head1 COPYRIGHT
1296              
1297             Copyright (C) 2006-2017 Hatuka*nezumi - IKEDA Soji.
1298             This program is free software; you can redistribute it and/or modify it
1299             under the same terms as Perl itself.
1300              
1301             =cut
1302              
1303             1;