File Coverage

blib/lib/MIME/EncWords.pm
Criterion Covered Total %
statement 387 451 85.8
branch 168 238 70.5
condition 110 177 62.1
subroutine 22 22 100.0
pod 3 3 100.0
total 690 891 77.4


line stmt bran cond sub pod time code
1             #-*- perl -*-
2              
3             package MIME::EncWords;
4             require 5.005;
5              
6             =head1 NAME
7              
8             MIME::EncWords - deal with RFC 2047 encoded words (improved)
9              
10             =head1 SYNOPSIS
11              
12             I is aimed to be another implimentation
13             of L so that it will achieve more exact conformance with
14             RFC 2047 (formerly RFC 1522) specifications. Additionally, it contains
15             some improvements.
16             Following synopsis and descriptions are inherited from its inspirer,
17             then added descriptions on improvements (B<**>) or changes and
18             clarifications (B<*>).>
19              
20             Before reading further, you should see L to make sure that
21             you understand where this module fits into the grand scheme of things.
22             Go on, do it now. I'll wait.
23              
24             Ready? Ok...
25              
26             use MIME::EncWords qw(:all);
27              
28             ### Decode the string into another string, forgetting the charsets:
29             $decoded = decode_mimewords(
30             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
31             );
32              
33             ### Split string into array of decoded [DATA,CHARSET] pairs:
34             @decoded = decode_mimewords(
35             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
36             );
37              
38             ### Encode a single unsafe word:
39             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
40              
41             ### Encode a string, trying to find the unsafe words inside it:
42             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
43              
44             =head1 DESCRIPTION
45              
46             Fellow Americans, you probably won't know what the hell this module
47             is for. Europeans, Russians, et al, you probably do. C<:-)>.
48              
49             For example, here's a valid MIME header you might get:
50              
51             From: =?US-ASCII?Q?Keith_Moore?=
52             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
53             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
54             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
55             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
56             =?US-ASCII?Q?.._cool!?=
57              
58             The fields basically decode to (sorry, I can only approximate the
59             Latin characters with 7 bit sequences /o and 'e):
60              
61             From: Keith Moore
62             To: Keld J/orn Simonsen
63             CC: Andr'e Pirard
64             Subject: If you can read this you understand the example... cool!
65              
66             B: Fellow Americans, Europeans, you probably won't know
67             what the hell this module is for. East Asians, et al, you probably do.
68             C<(^_^)>.
69              
70             For example, here's a valid MIME header you might get:
71              
72             Subject: =?EUC-KR?B?sNTAuLinKGxhemluZXNzKSwgwvzB9ri7seIoaW1w?=
73             =?EUC-KR?B?YXRpZW5jZSksILGzuLgoaHVicmlzKQ==?=
74              
75             The fields basically decode to (sorry, I cannot approximate the
76             non-Latin multibyte characters with any 7 bit sequences):
77              
78             Subject: ???(laziness), ????(impatience), ??(hubris)
79              
80             =head1 PUBLIC INTERFACE
81              
82             =over 4
83              
84             =cut
85              
86             ### Pragmas:
87 5     5   29810 use strict;
  5         8  
  5         198  
88 5     5   20 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA $Config);
  5         7  
  5         377  
89              
90             ### Exporting:
91 5     5   24 use Exporter;
  5         19  
  5         378  
92              
93             %EXPORT_TAGS = (all => [qw(decode_mimewords
94             encode_mimeword
95             encode_mimewords)]);
96             Exporter::export_ok_tags(qw(all));
97              
98             ### Inheritance:
99             @ISA = qw(Exporter);
100              
101             ### Other modules:
102 5     5   38 use Carp qw(croak carp);
  5         7  
  5         275  
103 5     5   2933 use MIME::Base64;
  5         3173  
  5         278  
104 5     5   2920 use MIME::Charset qw(:trans);
  5         71916  
  5         1229  
105              
106             my @ENCODE_SUBS = qw(FB_CROAK is_utf8 resolve_alias);
107             if (MIME::Charset::USE_ENCODE) {
108 5     5   21 eval "use ".MIME::Charset::USE_ENCODE." \@ENCODE_SUBS;";
  5         6  
  5         232  
109             if ($@) { # Perl 5.7.3 + Encode 0.40
110             eval "use ".MIME::Charset::USE_ENCODE." qw(is_utf8);";
111             require MIME::Charset::_Compat;
112             for my $sub (@ENCODE_SUBS) {
113 5     5   33 no strict "refs";
  5         5  
  5         394  
114             *{$sub} = \&{"MIME::Charset::_Compat::$sub"}
115             unless $sub eq 'is_utf8';
116             }
117             }
118             } else {
119             require Unicode::String;
120             require MIME::Charset::_Compat;
121             for my $sub (@ENCODE_SUBS) {
122 5     5   22 no strict "refs";
  5         12  
  5         23792  
123             *{$sub} = \&{"MIME::Charset::_Compat::$sub"};
124             }
125             }
126              
127             #------------------------------
128             #
129             # Globals...
130             #
131             #------------------------------
132              
133             ### The package version, both in 1.23 style *and* usable by MakeMaker:
134             $VERSION = '1.014.3';
135              
136             ### Public Configuration Attributes
137             $Config = {
138             %{$MIME::Charset::Config}, # Detect7bit, Replacement, Mapping
139             Charset => 'ISO-8859-1',
140             Encoding => 'A',
141             Field => undef,
142             Folding => "\n",
143             MaxLineLen => 76,
144             Minimal => 'YES',
145             };
146             eval { require MIME::EncWords::Defaults; };
147              
148             ### Private Constants
149              
150             my $PRINTABLE = "\\x21-\\x7E";
151             #my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF";
152             my $NONPRINT = qr{[^$PRINTABLE]}; # Improvement: Unicode support.
153             my $UNSAFE = qr{[^\x01-\x20$PRINTABLE]};
154             my $WIDECHAR = qr{[^\x00-\xFF]};
155             my $ASCIITRANS = qr{^(?:HZ-GB-2312|UTF-7)$}i;
156             my $ASCIIINCOMPAT = qr{^UTF-(?:16|32)(?:BE|LE)?$}i;
157             my $DISPNAMESPECIAL = "\\x22(),:;<>\\x40\\x5C"; # RFC5322 name-addr specials.
158              
159             #------------------------------
160              
161             # _utf_to_unicode CSETOBJ, STR
162             # Private: Convert UTF-16*/32* to Unicode or UTF-8.
163             sub _utf_to_unicode {
164 90     90   70 my $csetobj = shift;
165 90         80 my $str = shift;
166              
167 90 100       226 return $str if is_utf8($str);
168              
169 48         81 return $csetobj->decode($str)
170             if MIME::Charset::USE_ENCODE();
171              
172 0         0 my $cset = $csetobj->as_string;
173 0         0 my $unistr = Unicode::String->new();
174 0 0 0     0 if ($cset eq 'UTF-16' or $cset eq 'UTF-16BE') {
    0 0        
    0          
    0          
175 0         0 $unistr->utf16($str);
176             } elsif ($cset eq 'UTF-16LE') {
177 0         0 $unistr->utf16le($str);
178             } elsif ($cset eq 'UTF-32' or $cset eq 'UTF-32BE') {
179 0         0 $unistr->utf32($str);
180             } elsif ($cset eq 'UTF-32LE') {
181 0         0 $unistr->utf32le($str);
182             } else {
183 0         0 croak "unknown transformation '$cset'";
184             }
185 0         0 return $unistr->utf8;
186             }
187              
188             #------------------------------
189              
190             # _decode_B STRING
191             # Private: used by _decode_header() to decode "B" encoding.
192             # Improvement by this module: sanity check on encoded sequence.
193             sub _decode_B {
194 57     57   100 my $str = shift;
195 57 50 33     349 unless ((length($str) % 4 == 0) and
196             $str =~ m|^[A-Za-z0-9+/]+={0,2}$|) {
197 0         0 return undef;
198             }
199 57         186 return decode_base64($str);
200             }
201              
202             # _decode_Q STRING
203             # Private: used by _decode_header() to decode "Q" encoding, which is
204             # almost, but not exactly, quoted-printable. :-P
205             # Improvement by this module: sanity check on encoded sequence (>=1.012.3).
206             sub _decode_Q {
207 49     49   50 my $str = shift;
208 49 50       134 if ($str =~ /=(?![0-9a-fA-F][0-9a-fA-F])/) { #XXX:" " and "\t" are allowed
209 0         0 return undef;
210             }
211 49         103 $str =~ s/_/\x20/g; # RFC 2047, Q rule 2
212 49         114 $str =~ s/=([0-9a-fA-F]{2})/pack("C", hex($1))/ge; # RFC 2047, Q rule 1
  182         373  
213 49         85 $str;
214             }
215              
216             # _encode_B STRING
217             # Private: used by encode_mimeword() to encode "B" encoding.
218             sub _encode_B {
219 73     73   63 my $str = shift;
220 73         223 encode_base64($str, '');
221             }
222              
223             # _encode_Q STRING
224             # Private: used by encode_mimeword() to encode "Q" encoding, which is
225             # almost, but not exactly, quoted-printable. :-P
226             # Improvement by this module: Spaces are escaped by ``_''.
227             sub _encode_Q {
228 71     71   62 my $str = shift;
229             # Restrict characters to those listed in RFC 2047 section 5 (3)
230 71         202 $str =~ s{[^-!*+/0-9A-Za-z]}{
231 489 100       1281 $& eq "\x20"? "_": sprintf("=%02X", ord($&))
232             }eog;
233 71         132 $str;
234             }
235              
236             #------------------------------
237              
238             =item decode_mimewords ENCODED, [OPTS...]
239              
240             I
241             Go through the string looking for RFC 2047-style "Q"
242             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
243              
244             B splits the ENCODED string into a list of decoded
245             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
246             data are returned in a 1-element array C<[DATA]>, giving an effective
247             CHARSET of C.
248              
249             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
250             foreach (decode_mimewords($enc)) {
251             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
252             }
253              
254             B<**>
255             However, adjacent encoded-words with same charset will be concatenated
256             to handle multibyte sequences safely.
257              
258             B<**>
259             Language information defined by RFC2231, section 5 will be additonal
260             third element, if any.
261              
262             B<*>
263             Whitespaces surrounding unencoded data will not be stripped so that
264             compatibility with L will be ensured.
265              
266             B joins the "data" elements of the above
267             list together, and returns that. I
268             and probably I what you want, but if you know that all charsets
269             in the ENCODED string are identical, it might be useful to you.
270             (Before you use this, please see L,
271             which is probably what you want.)
272             B<**>
273             See also "Charset" option below.
274              
275             In the event of a syntax error, $@ will be set to a description
276             of the error, but parsing will continue as best as possible (so as to
277             get I back when decoding headers).
278             $@ will be false if no error was detected.
279              
280             B<*>
281             Malformed encoded-words will be kept encoded.
282             In this case $@ will be set.
283              
284             Any arguments past the ENCODED string are taken to define a hash of options.
285             B<**>
286             When Unicode/multibyte support is disabled
287             (see L),
288             these options will not have any effects.
289              
290             =over 4
291              
292             =item Charset
293             B<**>
294              
295             Name of character set by which data elements in scalar context
296             will be converted.
297             The default is no conversion.
298             If this option is specified as special value C<"_UNICODE_">,
299             returned value will be Unicode string.
300              
301             B:
302             This feature is still information-lossy, I when C<"_UNICODE_"> is
303             specified.
304              
305             =item Detect7bit
306             B<**>
307              
308             Try to detect 7-bit charset on unencoded portions.
309             Default is C<"YES">.
310              
311             =cut
312              
313             #=item Field
314             #
315             #Name of the mail field this string came from. I
316              
317             =item Mapping
318             B<**>
319              
320             In scalar context, specify mappings actually used for charset names.
321             C<"EXTENDED"> uses extended mappings.
322             C<"STANDARD"> uses standardized strict mappings.
323             Default is C<"EXTENDED">.
324              
325             =back
326              
327             =cut
328              
329             sub decode_mimewords {
330 72     72 1 20120 my $encstr = shift;
331 72         176 my %params = @_;
332 72         358 my %Params = &_getparams(\%params,
333             NoDefault => [qw(Charset)], # default is no conv.
334             YesNo => [qw(Detect7bit)],
335             Others => [qw(Mapping)],
336             Obsoleted => [qw(Field)],
337             ToUpper => [qw(Charset Mapping)],
338             );
339 72         365 my $cset = MIME::Charset->new($Params{Charset},
340             Mapping => $Params{Mapping});
341             # unfolding: normalize linear-white-spaces and orphan newlines.
342 72 50       6557 $encstr =~ s/(?:[\r\n]+[\t ])*[\r\n]+([\t ]|\Z)/$1? " ": ""/eg;
  39         579  
343 72         152 $encstr =~ s/[\r\n]+/ /g;
344              
345 72         71 my @tokens;
346 72         75 $@ = ''; ### error-return
347              
348             ### Decode:
349 72         71 my ($word, $charset, $language, $encoding, $enc, $dec);
350 72         63 my $spc = '';
351 72         148 pos($encstr) = 0;
352 72         95 while (1) {
353 265 100       532 last if (pos($encstr) >= length($encstr));
354 193         159 my $pos = pos($encstr); ### save it
355              
356             ### Case 1: are we looking at "=?..?..?="?
357 193 100       654 if ($encstr =~ m{\G # from where we left off..
358             =\?([^?]*) # "=?" + charset +
359             \?([bq]) # "?" + encoding +
360             \?([^?]+) # "?" + data maybe with spcs +
361             \?= # "?="
362             ([\r\n\t ]*)
363             }xgi) {
364 106         323 ($word, $charset, $encoding, $enc) = ($&, $1, lc($2), $3);
365 106         147 my $tspc = $4;
366              
367             # RFC 2231 section 5 extension
368 106 100       199 if ($charset =~ s/^([^\*]*)\*(.*)/$1/) {
369 4   50     10 $language = $2 || undef;
370 4   50     5 $charset ||= undef;
371             } else {
372 102         92 $language = undef;
373             }
374              
375 106 100       142 if ($encoding eq 'q') {
376 49         111 $dec = _decode_Q($enc);
377             } else {
378 57         102 $dec = _decode_B($enc);
379             }
380 106 50       181 unless (defined $dec) {
381 0         0 $@ .= qq|Illegal sequence in "$word" (pos $pos)\n|;
382 0         0 push @tokens, [$spc.$word];
383 0         0 $spc = '';
384 0         0 next;
385             }
386              
387 106         92 { local $@;
  106         81  
388 106 100 50     736 if (scalar(@tokens) and
    100 100        
    50 100        
      66        
      33        
      66        
389             lc($charset || "") eq lc($tokens[-1]->[1] || "") and
390             resolve_alias($charset) and
391             (!${tokens[-1]}[2] and !$language or
392             lc(${tokens[-1]}[2]) eq lc($language))) { # Concat words if possible.
393 24         6864 $tokens[-1]->[0] .= $dec;
394             } elsif ($language) {
395 4         8 push @tokens, [$dec, $charset, $language];
396             } elsif ($charset) {
397 78         129 push @tokens, [$dec, $charset];
398             } else {
399 0         0 push @tokens, [$dec];
400             }
401 106         122 $spc = $tspc;
402             }
403 106         108 next;
404             }
405              
406             ### Case 2: are we looking at a bad "=?..." prefix?
407             ### We need this to detect problems for case 3, which stops at "=?":
408 87         115 pos($encstr) = $pos; # reset the pointer.
409 87 100       183 if ($encstr =~ m{\G=\?}xg) {
410 6         20 $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
411 6         13 push @tokens, [$spc.'=?'];
412 6         7 $spc = '';
413 6         7 next;
414             }
415              
416             ### Case 3: are we looking at ordinary text?
417 81         91 pos($encstr) = $pos; # reset the pointer.
418 81 50       572 if ($encstr =~ m{\G # from where we left off...
419             (.*? # shortest possible string,
420             \n*) # followed by 0 or more NLs,
421             (?=(\Z|=\?)) # terminated by "=?" or EOS
422             }xgs) {
423 81 50       173 length($1) or croak "MIME::EncWords: internal logic err: empty token\n";
424 81         196 push @tokens, [$spc.$1];
425 81         81 $spc = '';
426 81         87 next;
427             }
428              
429             ### Case 4: bug!
430 0         0 croak "MIME::EncWords: unexpected case:\n($encstr) pos $pos\n\t".
431             "Please alert developer.\n";
432             }
433 72 50       117 push @tokens, [$spc] if $spc;
434              
435             # Detect 7-bit charset
436 72 50       149 if ($Params{Detect7bit} ne "NO") {
437 72         51 local $@;
438 72         100 foreach my $t (@tokens) {
439 169 100 100     1367 unless ($t->[0] =~ $UNSAFE or $t->[1]) {
440 87         211 my $charset = MIME::Charset::_detect_7bit_charset($t->[0]);
441 87 50 33     4315 if ($charset and $charset ne &MIME::Charset::default()) {
442 0         0 $t->[1] = $charset;
443             }
444             }
445             }
446             }
447              
448 72 100       346 if (wantarray) {
449 24         123 @tokens;
450             } else {
451 111         361 join('', map {
452 48         62 &_convert($_->[0], $_->[1], $cset, $Params{Mapping})
453             } @tokens);
454             }
455             }
456              
457             #------------------------------
458              
459             # _convert RAW, FROMCHARSET, TOCHARSET, MAPPING
460             # Private: used by decode_mimewords() to convert string by other charset
461             # or to decode to Unicode.
462             # When source charset is unknown and Unicode string is requested, at first
463             # try well-formed UTF-8 then fallback to ISO-8859-1 so that almost all
464             # non-ASCII bytes will be preserved.
465             sub _convert($$$$) {
466 111     111   96 my $s = shift;
467 111         152 my $charset = shift;
468 111         78 my $cset = shift;
469 111         87 my $mapping = shift;
470 111 50       216 return $s unless &MIME::Charset::USE_ENCODE;
471 111 100       194 return $s unless $cset->as_string;
472 66 50 66     321 croak "unsupported charset ``".$cset->as_string."''"
473             unless $cset->decoder or $cset->as_string eq "_UNICODE_";
474              
475 66         351 local($@);
476 66         117 $charset = MIME::Charset->new($charset, Mapping => $mapping);
477 66 50 66     4441 if ($charset->as_string and $charset->as_string eq $cset->as_string) {
478 0         0 return $s;
479             }
480             # build charset object to transform string from $charset to $cset.
481 66         477 $charset->encoder($cset);
482              
483 66         543 my $converted = $s;
484 66 50 33     488 if (is_utf8($s) or $s =~ $WIDECHAR) {
    100          
    100          
485 0 0       0 if ($charset->output_charset ne "_UNICODE_") {
486 0         0 $converted = $charset->encode($s);
487             }
488             } elsif ($charset->output_charset eq "_UNICODE_") {
489 37 100       194 if (!$charset->decoder) {
490 18 50       90 if ($s =~ $UNSAFE) {
491 0         0 $@ = '';
492 0         0 eval {
493 0         0 $charset = MIME::Charset->new("UTF-8",
494             Mapping => 'STANDARD');
495 0         0 $converted = $charset->decode($converted, FB_CROAK());
496             };
497 0 0       0 if ($@) {
498 0         0 $converted = $s;
499 0         0 $charset = MIME::Charset->new("ISO-8859-1",
500             Mapping => 'STANDARD');
501 0         0 $converted = $charset->decode($converted, 0);
502             }
503             }
504             } else {
505 19         82 $converted = $charset->decode($s);
506             }
507             } elsif ($charset->decoder) {
508 18         196 $converted = $charset->encode($s);
509             }
510 66         1012 return $converted;
511             }
512              
513             #------------------------------
514              
515             =item encode_mimeword RAW, [ENCODING], [CHARSET]
516              
517             I
518             Encode a single RAW "word" that has unsafe characters.
519             The "word" will be encoded in its entirety.
520              
521             ### Encode "<>":
522             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
523              
524             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
525             B<**>
526             You may also specify it as ``special'' value: C<"S"> to choose shorter
527             one of either C<"Q"> or C<"B">.
528              
529             You may specify the CHARSET, which defaults to C.
530              
531             B<*>
532             Spaces will be escaped with ``_'' by C<"Q"> encoding.
533              
534             =cut
535              
536             sub encode_mimeword {
537 144     144 1 175 my $word = shift;
538 144   50     271 my $encoding = uc(shift || 'Q'); # not overridden.
539 144   50     223 my $charset = shift || 'ISO-8859-1'; # ditto.
540 144   50     394 my $language = uc(shift || ""); # ditto.
541              
542 144 50       186 if (ref $charset) {
543 144 50 33     970 if (is_utf8($word) or $word =~ /$WIDECHAR/) {
544 0         0 $word = $charset->undecode($word, 0);
545             }
546 144         313 $charset = $charset->as_string;
547             } else {
548 0         0 $charset = uc($charset);
549             }
550 144         543 my $encstr;
551 144 100       238 if ($encoding eq 'Q') {
    50          
552 71         108 $encstr = &_encode_Q($word);
553             } elsif ($encoding eq "S") {
554 0         0 my ($B, $Q) = (&_encode_B($word), &_encode_Q($word));
555 0 0       0 if (length($B) < length($Q)) {
556 0         0 $encoding = "B";
557 0         0 $encstr = $B;
558             } else {
559 0         0 $encoding = "Q";
560 0         0 $encstr = $Q;
561             }
562             } else { # "B"
563 73         84 $encoding = "B";
564 73         123 $encstr = &_encode_B($word);
565             }
566              
567 144 50       182 if ($language) {
568 0         0 return "=?$charset*$language?$encoding?$encstr?=";
569             } else {
570 144         414 return "=?$charset?$encoding?$encstr?=";
571             }
572             }
573              
574             #------------------------------
575              
576             =item encode_mimewords RAW, [OPTS]
577              
578             I
579             Given a RAW string, try to find and encode all "unsafe" sequences
580             of characters:
581              
582             ### Encode a string with some unsafe "words":
583             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
584              
585             Returns the encoded string.
586              
587             B<**>
588             RAW may be a Unicode string when Unicode/multibyte support is enabled
589             (see L).
590             Furthermore, RAW may be a reference to that returned
591             by L on array context. In latter case "Charset"
592             option (see below) will be overridden (see also a note below).
593              
594             B:
595             B<*>
596             When RAW is an arrayref,
597             adjacent encoded-words (i.e. elements having non-ASCII charset element)
598             are concatenated. Then they are split taking
599             care of character boundaries of multibyte sequences when Unicode/multibyte
600             support is enabled.
601             Portions for unencoded data should include surrounding whitespace(s), or
602             they will be merged into adjoining encoded-word(s).
603              
604             Any arguments past the RAW string are taken to define a hash of options:
605              
606             =over 4
607              
608             =item Charset
609              
610             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
611             a.k.a. "Latin-1".
612              
613             =item Detect7bit
614             B<**>
615              
616             When "Encoding" option (see below) is specified as C<"a"> and "Charset"
617             option is unknown, try to detect 7-bit charset on given RAW string.
618             Default is C<"YES">.
619             When Unicode/multibyte support is disabled,
620             this option will not have any effects
621             (see L).
622              
623             =item Encoding
624              
625             The encoding to use, C<"q"> or C<"b">.
626             B<**>
627             You may also specify ``special'' values: C<"a"> will automatically choose
628             recommended encoding to use (with charset conversion if alternative
629             charset is recommended: see L);
630             C<"s"> will choose shorter one of either C<"q"> or C<"b">.
631             B:
632             B<*>
633             As of release 1.005, The default was changed from C<"q">
634             (the default on MIME::Words) to C<"a">.
635              
636             =item Field
637              
638             Name of the mail field this string will be used in.
639             B<**>
640             Length of mail field name will be considered in the first line of
641             encoded header.
642              
643             =item Folding
644             B<**>
645              
646             A Sequence to fold encoded lines. The default is C<"\n">.
647             If empty string C<""> is specified, encoded-words exceeding line length
648             (see L below) will be split by SPACE.
649              
650             B:
651             B<*>
652             Though RFC 5322 (formerly RFC 2822) states that the lines in
653             Internet messages are delimited by CRLF (C<"\r\n">),
654             this module chose LF (C<"\n">) as a default to keep backward compatibility.
655             When you use the default, you might need converting newlines
656             before encoded headers are thrown into session.
657              
658             =item Mapping
659             B<**>
660              
661             Specify mappings actually used for charset names.
662             C<"EXTENDED"> uses extended mappings.
663             C<"STANDARD"> uses standardized strict mappings.
664             The default is C<"EXTENDED">.
665             When Unicode/multibyte support is disabled,
666             this option will not have any effects
667             (see L).
668              
669             =item MaxLineLen
670             B<**>
671              
672             Maximum line length excluding newline.
673             The default is 76.
674             Negative value means unlimited line length (as of release 1.012.3).
675              
676             =item Minimal
677             B<**>
678              
679             Takes care of natural word separators (i.e. whitespaces)
680             in the text to be encoded.
681             If C<"NO"> is specified, this module will encode whole text
682             (if encoding needed) not regarding whitespaces;
683             encoded-words exceeding line length will be split based only on their
684             lengths.
685             Default is C<"YES"> by which minimal portions of text are encoded.
686             If C<"DISPNAME"> is specified, portions including special characters
687             described in RFC5322 (formerly RFC2822, RFC822) address specification
688             (section 3.4) are also encoded.
689             This is useful for encoding display-name of address fields.
690              
691             B:
692             As of release 0.040, default has been changed to C<"YES"> to ensure
693             compatibility with MIME::Words.
694             On earlier releases, this option was fixed to be C<"NO">.
695              
696             B:
697             C<"DISPNAME"> option was introduced at release 1.012.
698              
699             =item Replacement
700             B<**>
701              
702             See L.
703              
704             =back
705              
706             =cut
707              
708             sub encode_mimewords {
709 90     90 1 34074 my $words = shift;
710 90         214 my %params = @_;
711 90         403 my %Params = &_getparams(\%params,
712             YesNo => [qw(Detect7bit)],
713             Others => [qw(Charset Encoding Field Folding
714             Mapping MaxLineLen Minimal
715             Replacement)],
716             ToUpper => [qw(Charset Encoding Mapping Minimal
717             Replacement)],
718             );
719 90 50       446 croak "unsupported encoding ``$Params{Encoding}''"
720             unless $Params{Encoding} =~ /^[ABQS]$/;
721             # newline and following WSP
722 90         80 my ($fwsbrk, $fwsspc);
723 90 50       314 if ($Params{Folding} =~ m/^([\r\n]*)([\t ]?)$/) {
724 90         222 $fwsbrk = $1;
725 90   50     297 $fwsspc = $2 || " ";
726             } else {
727 0         0 croak sprintf "illegal folding sequence ``\\x%*v02X''", '\\x',
728             $Params{Folding};
729             }
730             # charset objects
731 90         350 my $charsetobj = MIME::Charset->new($Params{Charset},
732             Mapping => $Params{Mapping});
733 90         25980 my $ascii = MIME::Charset->new("US-ASCII", Mapping => 'STANDARD');
734 90         5032 $ascii->encoder($ascii);
735             # lengths
736 90 100       879 my $firstlinelen = $Params{MaxLineLen} -
737             ($Params{Field}? length("$Params{Field}: "): 0);
738 90         104 my $maxrestlen = $Params{MaxLineLen} - length($fwsspc);
739             # minimal encoding flag
740 90 50       386 if (!$Params{Minimal}) {
    50          
741 0         0 $Params{Minimal} = 'NO';
742             } elsif ($Params{Minimal} !~ /^(NO|DISPNAME)$/) {
743 90         110 $Params{Minimal} = 'YES';
744             }
745             # unsafe ASCII sequences
746 90 50       855 my $UNSAFEASCII = ($maxrestlen <= 1)?
747             qr{(?: =\? )}ox:
748             qr{(?: =\? | [$PRINTABLE]{$Params{MaxLineLen}} )}x;
749 90 50       247 $UNSAFEASCII = qr{(?: [$DISPNAMESPECIAL] | $UNSAFEASCII )}x
750             if $Params{Minimal} eq 'DISPNAME';
751              
752 90 100       168 unless (ref($words) eq "ARRAY") {
753             # workaround for UTF-16* & UTF-32*: force UTF-8.
754 66 100       142 if ($charsetobj->as_string =~ /$ASCIIINCOMPAT/) {
755 24         215 $words = _utf_to_unicode($charsetobj, $words);
756 24         382 $charsetobj = MIME::Charset->new('UTF-8');
757             }
758              
759 66         1393 my @words = ();
760             # unfolding: normalize linear-white-spaces and orphan newlines.
761 66 50       946 $words =~ s/(?:[\r\n]+[\t ])*[\r\n]+([\t ]|\Z)/$1? " ": ""/eg;
  1         19  
762 66         153 $words =~ s/[\r\n]+/ /g;
763             # split if required
764 66 50       344 if ($Params{Minimal} =~ /YES|DISPNAME/) {
765 66         84 my ($spc, $unsafe_last) = ('', 0);
766 66         396 foreach my $w (split(/([\t ]+)/, $words)) {
767 640 50 66     1131 next unless scalar(@words) or length($w); # skip garbage
768 640 100       1280 if ($w =~ /[\t ]/) {
769 287         294 $spc = $w;
770 287         255 next;
771             }
772              
773             # workaround for ``ASCII transformation'' charsets
774 353         281 my $u = $w;
775 353 100       622 if ($charsetobj->as_string =~ /$ASCIITRANS/) {
776 6         47 if (MIME::Charset::USE_ENCODE) {
777 6 50 33     35 if (is_utf8($w) or $w =~ /$WIDECHAR/) {
778 0         0 $w = $charsetobj->undecode($u);
779             } else {
780 6         13 $u = $charsetobj->decode($w);
781             }
782             } elsif ($w =~ /[+~]/) { #FIXME: for pre-Encode environment
783             $u = "x$w";
784             }
785             }
786 353 100       2098 if (scalar(@words)) {
787 287 100 100     2395 if (($w =~ /$NONPRINT|$UNSAFEASCII/ or $u ne $w) xor
      100        
788             $unsafe_last) {
789 61 100       81 if ($unsafe_last) {
790 40         63 push @words, $spc.$w;
791             } else {
792 21         24 $words[-1] .= $spc;
793 21         29 push @words, $w;
794             }
795 61         76 $unsafe_last = not $unsafe_last;
796             } else {
797 226         363 $words[-1] .= $spc.$w;
798             }
799             } else {
800 66         120 push @words, $spc.$w;
801 66   66     614 $unsafe_last =
802             ($w =~ /$NONPRINT|$UNSAFEASCII/ or $u ne $w);
803             }
804 353         420 $spc = '';
805             }
806 66 50       169 if ($spc) {
807 0 0       0 if (scalar(@words)) {
808 0         0 $words[-1] .= $spc;
809             } else { # only WSPs
810 0         0 push @words, $spc;
811             }
812             }
813             } else {
814 0         0 @words = ($words);
815             }
816 66         90 $words = [map { [$_, $Params{Charset}] } @words];
  127         288  
817             }
818              
819             # Translate / concatenate words.
820 90         91 my @triplets;
821 90         129 foreach (@$words) {
822 211         300 my ($s, $cset) = @$_;
823 211 50       404 next unless length($s);
824 211   100     702 my $csetobj = MIME::Charset->new($cset || "",
825             Mapping => $Params{Mapping});
826              
827             # workaround for UTF-16*/UTF-32*: force UTF-8
828 211 100 100     13889 if ($csetobj->as_string and $csetobj->as_string =~ /$ASCIIINCOMPAT/) {
829 66         813 $s = _utf_to_unicode($csetobj, $s);
830 66         373 $csetobj = MIME::Charset->new('UTF-8');
831             }
832              
833             # determine charset and encoding
834             # try defaults only if 7-bit charset detection is not required
835 211         3968 my $enc;
836 211         242 my $obj = $csetobj;
837 211 100       329 unless ($obj->as_string) {
838 45 100 33     515 if ($Params{Encoding} ne "A" or $Params{Detect7bit} eq "NO" or
      66        
839             $s =~ /$UNSAFE/) {
840 6         13 $obj = $charsetobj;
841             }
842             }
843 211         955 ($s, $cset, $enc) =
844             $obj->header_encode($s,
845             Detect7bit => $Params{Detect7bit},
846             Replacement => $Params{Replacement},
847             Encoding => $Params{Encoding});
848             # Resolve 'S' encoding based on global length. See (*).
849 211 100 33     25452 $enc = 'S'
      66        
850             if defined $enc and
851             ($Params{Encoding} eq 'S' or
852             $Params{Encoding} eq 'A' and $obj->header_encoding eq 'S');
853              
854             # pure ASCII
855 211 100 66     2019 if ($cset eq "US-ASCII" and !$enc and $s =~ /$UNSAFEASCII/) {
      100        
856             # pure ASCII with unsafe sequences should be encoded
857 4   33     14 $cset = $csetobj->output_charset ||
858             $charsetobj->output_charset ||
859             $ascii->output_charset;
860 4         54 $csetobj = MIME::Charset->new($cset,
861             Mapping => $Params{Mapping});
862             # Preserve original Encoding option unless it was 'A'.
863 4 100 50     199 $enc = ($Params{Encoding} eq 'A') ?
864             ($csetobj->header_encoding || 'Q') :
865             $Params{Encoding};
866             } else {
867 207         542 $csetobj = MIME::Charset->new($cset,
868             Mapping => $Params{Mapping});
869             }
870              
871             # Now no charset translations are needed.
872 211         9283 $csetobj->encoder($csetobj);
873              
874             # Concatenate adjacent ``words'' so that multibyte sequences will
875             # be handled safely.
876             # Note: Encoded-word and unencoded text must not adjoin without
877             # separating whitespace(s).
878 211 100       1721 if (scalar(@triplets)) {
879 121         112 my ($last, $lastenc, $lastcsetobj) = @{$triplets[-1]};
  121         205  
880 121 100 50     221 if ($csetobj->decoder and
    100 66        
    100 100        
      100        
      66        
      66        
      100        
      100        
      100        
881             ($lastcsetobj->as_string || "") eq $csetobj->as_string and
882             ($lastenc || "") eq ($enc || "")) {
883 26         431 $triplets[-1]->[0] .= $s;
884 26         174 next;
885             } elsif (!$lastenc and $enc and $last !~ /[\r\n\t ]$/) {
886 7 50       226 if ($last =~ /^(.*)([\r\n\t ])([$PRINTABLE]+)$/s) {
    0          
887 7         27 $triplets[-1]->[0] = $1.$2;
888 7         22 $s = $3.$s;
889             } elsif ($lastcsetobj->as_string eq "US-ASCII") {
890 0         0 $triplets[-1]->[0] .= $s;
891 0         0 $triplets[-1]->[1] = $enc;
892 0         0 $triplets[-1]->[2] = $csetobj;
893 0         0 next;
894             }
895             } elsif ($lastenc and !$enc and $s !~ /^[\r\n\t ]/) {
896 16 50       403 if ($s =~ /^([$PRINTABLE]+)([\r\n\t ])(.*)$/s) {
    0          
897 16         47 $triplets[-1]->[0] .= $1;
898 16         43 $s = $2.$3;
899             } elsif ($csetobj->as_string eq "US-ASCII") {
900 0         0 $triplets[-1]->[0] .= $s;
901 0         0 next;
902             }
903             }
904             }
905 185         2175 push @triplets, [$s, $enc, $csetobj];
906             }
907              
908             # (*) Resolve 'S' encoding based on global length.
909 90 100       138 my @s_enc = grep { $_->[1] and $_->[1] eq 'S' } @triplets;
  185         621  
910 90 100       167 if (scalar @s_enc) {
911 42         35 my $enc;
912 42 100       38 my $b = scalar grep { $_->[1] and $_->[1] eq 'B' } @triplets;
  84         212  
913 42 100       34 my $q = scalar grep { $_->[1] and $_->[1] eq 'Q' } @triplets;
  84         211  
914             # 'A' chooses 'B' or 'Q' when all other encoded-words have same enc.
915 42 100 66     302 if ($Params{Encoding} eq 'A' and $b and ! $q) {
    50 66        
      33        
      33        
916 7         11 $enc = 'B';
917             } elsif ($Params{Encoding} eq 'A' and ! $b and $q) {
918 0         0 $enc = 'Q';
919             # Otherwise, assuming 'Q', when characters to be encoded are more than
920             # 6th of total (plus a little fraction), 'B' will win.
921             # Note: This might give 'Q' so great advantage...
922             } else {
923 35         32 my @no_enc = grep { ! $_->[1] } @triplets;
  63         78  
924 35         33 my $total = length join('', map { $_->[0] } (@no_enc, @s_enc));
  63         102  
925 35         42 my $q = scalar(() = join('', map { $_->[0] } @s_enc) =~
  35         395  
926             m{[^- !*+/0-9A-Za-z]}g);
927 35 100       118 if ($total + 8 < $q * 6) {
928 21         30 $enc = 'B';
929             } else {
930 14         21 $enc = 'Q';
931             }
932             }
933 42         59 foreach (@triplets) {
934 84 100 100     259 $_->[1] = $enc if $_->[1] and $_->[1] eq 'S';
935             }
936             }
937              
938             # chop leading FWS
939 90   33     474 while (scalar(@triplets) and $triplets[0]->[0] =~ s/^[\r\n\t ]+//) {
940 0 0       0 shift @triplets unless length($triplets[0]->[0]);
941             }
942              
943             # Split long ``words''.
944 90         97 my @splitwords;
945             my $restlen;
946 90 50       174 if ($Params{MaxLineLen} < 0) {
947 0         0 @splitwords = @triplets;
948             } else {
949 90         98 $restlen = $firstlinelen;
950 90         117 foreach (@triplets) {
951 185         226 my ($s, $enc, $csetobj) = @$_;
952              
953 185         281 my @s = &_split($s, $enc, $csetobj, $restlen, $maxrestlen);
954 185         194 push @splitwords, @s;
955 185         175 my ($last, $lastenc, $lastcsetobj) = @{$s[-1]};
  185         280  
956 185         167 my $lastlen;
957 185 100       216 if ($lastenc) {
958 99         197 $lastlen = $lastcsetobj->encoded_header_len($last, $lastenc);
959             } else {
960 86         79 $lastlen = length($last);
961             }
962 185 100       1312 $restlen = $maxrestlen if scalar @s > 1; # has split; new line(s) fed
963 185         173 $restlen -= $lastlen;
964 185 100       597 $restlen = $maxrestlen if $restlen <= 1;
965             }
966             }
967              
968             # Do encoding.
969 90         81 my @lines;
970 90         73 $restlen = $firstlinelen;
971 90         127 foreach (@splitwords) {
972 279         314 my ($str, $encoding, $charsetobj) = @$_;
973 279 50       400 next unless length($str);
974              
975 279         280 my $s;
976 279 100       336 if (!$encoding) {
977 135         119 $s = $str;
978             } else {
979 144         205 $s = encode_mimeword($str, $encoding, $charsetobj);
980             }
981              
982 279 100 100     1550 my $spc = (scalar(@lines) and $lines[-1] =~ /[\r\n\t ]$/ or
983             $s =~ /^[\r\n\t ]/)? '': ' ';
984 279 100       664 if (!scalar(@lines)) {
    50          
    100          
985 90         148 push @lines, $s;
986             } elsif ($Params{MaxLineLen} < 0) {
987 0         0 $lines[-1] .= $spc.$s;
988             } elsif (length($lines[-1].$spc.$s) <= $restlen) {
989 91         201 $lines[-1] .= $spc.$s;
990             } else {
991 98 100       316 if ($lines[-1] =~ s/([\r\n\t ]+)$//) {
992 4         9 $s = $1.$s;
993             }
994 98         180 $s =~ s/^[\r\n]*[\t ]//; # strip only one WSP replaced by FWS
995 98         107 push @lines, $s;
996 98         150 $restlen = $maxrestlen;
997             }
998             }
999              
1000 90         1147 join($fwsbrk.$fwsspc, @lines);
1001             }
1002              
1003             #------------------------------
1004              
1005             # _split RAW, ENCODING, CHARSET_OBJECT, ROOM_OF_FIRST_LINE, MAXRESTLEN
1006             # Private: used by encode_mimewords() to split a string into
1007             # (encoded or non-encoded) words.
1008             # Returns an array of arrayrefs [SUBSTRING, ENCODING, CHARSET].
1009             sub _split {
1010 185     185   191 my $str = shift;
1011 185         151 my $encoding = shift;
1012 185         148 my $charset = shift;
1013 185         165 my $restlen = shift;
1014 185         151 my $maxrestlen = shift;
1015              
1016 185 50 33     387 if (!$charset->as_string or $charset->as_string eq '8BIT') {# Undecodable.
1017 0         0 $str =~ s/[\r\n]+[\t ]*|\x00/ /g; # Eliminate hostile characters.
1018 0         0 return ([$str, undef, $charset]);
1019             }
1020 185 100 66     1635 if (!$encoding and $charset->as_string eq 'US-ASCII') { # Pure ASCII.
1021 86         426 return &_split_ascii($str, $restlen, $maxrestlen);
1022             }
1023 99 50 50     183 if (!$charset->decoder and MIME::Charset::USE_ENCODE) { # Unsupported.
1024 0         0 return ([$str, $encoding, $charset]);
1025             }
1026              
1027 99         428 my (@splitwords, $ustr, $first);
1028 99         174 while (length($str)) {
1029 144 100       286 if ($charset->encoded_header_len($str, $encoding) <= $restlen) {
1030 98         1447 push @splitwords, [$str, $encoding, $charset];
1031 98         125 last;
1032             }
1033 46         1053 $ustr = $str;
1034 46 50 33     460 if (!(is_utf8($ustr) or $ustr =~ /$WIDECHAR/) and
      50        
1035             MIME::Charset::USE_ENCODE) {
1036 46         133 $ustr = $charset->decode($ustr);
1037             }
1038 46         1349 ($first, $str) = &_clip_unsafe($ustr, $encoding, $charset, $restlen);
1039             # retry splitting if failed
1040 46 100 66     264 if ($first and !$str and
      100        
1041             $maxrestlen < $charset->encoded_header_len($first, $encoding)) {
1042 4         57 ($first, $str) = &_clip_unsafe($ustr, $encoding, $charset,
1043             $maxrestlen);
1044             }
1045 46         114 push @splitwords, [$first, $encoding, $charset];
1046 46         172 $restlen = $maxrestlen;
1047             }
1048 99         205 return @splitwords;
1049             }
1050              
1051             # _split_ascii RAW, ROOM_OF_FIRST_LINE, MAXRESTLEN
1052             # Private: used by encode_mimewords() to split an US-ASCII string into
1053             # (encoded or non-encoded) words.
1054             # Returns an array of arrayrefs [SUBSTRING, undef, "US-ASCII"].
1055             sub _split_ascii {
1056 86     86   82 my $s = shift;
1057 86         69 my $restlen = shift;
1058 86         69 my $maxrestlen = shift;
1059 86   33     264 $restlen ||= $maxrestlen;
1060              
1061 86         62 my @splitwords;
1062 86         311 my $ascii = MIME::Charset->new("US-ASCII", Mapping => 'STANDARD');
1063 86         5030 foreach my $line (split(/(?:[\t ]*[\r\n]+)+/, $s)) {
1064 86         104 my $spc = '';
1065 86         405 foreach my $word (split(/([\t ]+)/, $line)) {
1066             # skip first garbage
1067 990 50 66     1912 next unless scalar(@splitwords) or defined $word;
1068 990 100       1906 if ($word =~ /[\t ]/) {
1069 467         383 $spc = $word;
1070 467         406 next;
1071             }
1072              
1073 523         533 my $cont = $spc.$word;
1074 523         384 my $elen = length($cont);
1075 523 100       672 next unless $elen;
1076 459 100       490 if (scalar(@splitwords)) {
1077             # Concatenate adjacent words so that encoded-word and
1078             # unencoded text will adjoin with separating whitespace.
1079 373 100       421 if ($elen <= $restlen) {
1080 324         388 $splitwords[-1]->[0] .= $cont;
1081 324         253 $restlen -= $elen;
1082             } else {
1083 49         78 push @splitwords, [$cont, undef, $ascii];
1084 49         48 $restlen = $maxrestlen - $elen;
1085             }
1086             } else {
1087 86         159 push @splitwords, [$cont, undef, $ascii];
1088 86         158 $restlen -= $elen;
1089             }
1090 459         528 $spc = '';
1091             }
1092 86 100       228 if ($spc) {
1093 30 50       54 if (scalar(@splitwords)) {
1094 30         76 $splitwords[-1]->[0] .= $spc;
1095 30         53 $restlen -= length($spc);
1096             } else { # only WSPs
1097 0         0 push @splitwords, [$spc, undef, $ascii];
1098 0         0 $restlen = $maxrestlen - length($spc);
1099             }
1100             }
1101             }
1102 86         249 return @splitwords;
1103             }
1104              
1105             # _clip_unsafe UNICODE, ENCODING, CHARSET_OBJECT, ROOM_OF_FIRST_LINE
1106             # Private: used by encode_mimewords() to bite off one encodable
1107             # ``word'' from a Unicode string.
1108             # Note: When Unicode/multibyte support is not enabled, character
1109             # boundaries of multibyte string shall be broken!
1110             sub _clip_unsafe {
1111 50     50   59 my $ustr = shift;
1112 50         86 my $encoding = shift;
1113 50         79 my $charset = shift;
1114 50         46 my $restlen = shift;
1115 50 50       127 return ("", "") unless length($ustr);
1116              
1117             # Seek maximal division point.
1118 50         65 my ($shorter, $longer) = (0, length($ustr));
1119 50         89 while ($shorter < $longer) {
1120 239         229 my $cur = ($shorter + $longer + 1) >> 1;
1121 239         397 my $enc = substr($ustr, 0, $cur);
1122 239         149 if (MIME::Charset::USE_ENCODE ne '') {
1123 239         418 $enc = $charset->undecode($enc);
1124             }
1125 239         7135 my $elen = $charset->encoded_header_len($enc, $encoding);
1126 239 100       3693 if ($elen <= $restlen) {
1127 129         229 $shorter = $cur;
1128             } else {
1129 110         211 $longer = $cur - 1;
1130             }
1131             }
1132              
1133             # Make sure that combined characters won't be divided.
1134 50         40 my ($fenc, $renc);
1135 50         53 my $max = length($ustr);
1136 50         37 while (1) {
1137 50         48 $@ = '';
1138 50         52 eval {
1139 50         120 ($fenc, $renc) =
1140             (substr($ustr, 0, $shorter), substr($ustr, $shorter));
1141 50         49 if (MIME::Charset::USE_ENCODE ne '') {
1142             # FIXME: croak if $renc =~ /^\p{M}/
1143 50         131 $fenc = $charset->undecode($fenc, FB_CROAK());
1144 50         1019 $renc = $charset->undecode($renc, FB_CROAK());
1145             }
1146             };
1147 50 50       1041 last unless ($@);
1148              
1149 0         0 $shorter++;
1150 0 0       0 unless ($shorter < $max) { # Unencodable character(s) may be included.
1151 0         0 return ($charset->undecode($ustr), "");
1152             }
1153             }
1154              
1155 50 100       113 if (length($fenc)) {
1156 46         235 return ($fenc, $renc);
1157             } else {
1158 4         16 return ($renc, "");
1159             }
1160             }
1161              
1162             #------------------------------
1163              
1164             # _getparams HASHREF, OPTS
1165             # Private: used to get option parameters.
1166             sub _getparams {
1167 162     162   195 my $params = shift;
1168 162         400 my %params = @_;
1169 162         151 my %Params;
1170             my %GotParams;
1171 162         245 foreach my $k (qw(NoDefault YesNo Others Obsoleted ToUpper)) {
1172 810   100     2031 $Params{$k} = $params{$k} || [];
1173             }
1174 162         366 foreach my $k (keys %$params) {
1175 338         252 my $supported = 0;
1176 338         296 foreach my $i (@{$Params{NoDefault}}, @{$Params{YesNo}},
  338         368  
  338         304  
  338         299  
1177 338         360 @{$Params{Others}}, @{$Params{Obsoleted}}) {
1178 1286 100       2074 if (lc $i eq lc $k) {
1179 338         415 $GotParams{$i} = $params->{$k};
1180 338         237 $supported = 1;
1181 338         306 last;
1182             }
1183             }
1184 338 50       630 carp "unknown or deprecated option ``$k''" unless $supported;
1185             }
1186             # get defaults
1187 162         198 foreach my $i (@{$Params{YesNo}}, @{$Params{Others}}) {
  162         194  
  162         213  
1188 954 100       1983 $GotParams{$i} = $Config->{$i} unless defined $GotParams{$i};
1189             }
1190             # yesno params
1191 162         168 foreach my $i (@{$Params{YesNo}}) {
  162         234  
1192 162 50 33     742 if (!$GotParams{$i} or uc $GotParams{$i} eq "NO") {
1193 0         0 $GotParams{$i} = "NO";
1194             } else {
1195 162         309 $GotParams{$i} = "YES";
1196             }
1197             }
1198             # normalize case
1199 162         158 foreach my $i (@{$Params{ToUpper}}) {
  162         219  
1200 594   66     1660 $GotParams{$i} &&= uc $GotParams{$i};
1201             }
1202 162         1055 return %GotParams;
1203             }
1204              
1205             #------------------------------
1206              
1207             =back
1208              
1209             =head2 Configuration Files
1210             B<**>
1211              
1212             Built-in defaults of option parameters for L
1213             (except 'Charset' option) and
1214             L can be overridden by configuration files:
1215             F and F.
1216             For more details read F.
1217              
1218             =head1 VERSION
1219              
1220             Consult C<$VERSION> variable.
1221              
1222             Development versions of this module may be found at
1223             L.
1224              
1225             =head1 SEE ALSO
1226              
1227             L,
1228             L
1229              
1230             =head1 AUTHORS
1231              
1232             The original version of function decode_mimewords() is derived from
1233             L module that was written by:
1234             Eryq (F), ZeeGee Software Inc (F).
1235             David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
1236              
1237             Other stuff are rewritten or added by:
1238             Hatuka*nezumi - IKEDA Soji .
1239              
1240             This program is free software; you can redistribute
1241             it and/or modify it under the same terms as Perl itself.
1242              
1243             =cut
1244              
1245             1;