File Coverage

blib/lib/MIME/EncWords.pm
Criterion Covered Total %
statement 387 451 85.8
branch 170 238 71.4
condition 118 180 65.5
subroutine 22 22 100.0
pod 3 3 100.0
total 700 894 78.3


line stmt bran cond sub pod time code
1             #-*- perl -*-
2              
3             package MIME::EncWords;
4             require 5.005;
5              
6             =head1 NAME
7              
8             MIME::EncWords - deal with RFC 2047 encoded words (improved)
9              
10             =head1 SYNOPSIS
11              
12             I is aimed to be another implimentation
13             of L so that it will achieve more exact conformance with
14             RFC 2047 (formerly RFC 1522) specifications. Additionally, it contains
15             some improvements.
16             Following synopsis and descriptions are inherited from its inspirer,
17             then added descriptions on improvements (B<**>) or changes and
18             clarifications (B<*>).>
19              
20             Before reading further, you should see L to make sure that
21             you understand where this module fits into the grand scheme of things.
22             Go on, do it now. I'll wait.
23              
24             Ready? Ok...
25              
26             use MIME::EncWords qw(:all);
27              
28             ### Decode the string into another string, forgetting the charsets:
29             $decoded = decode_mimewords(
30             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
31             );
32              
33             ### Split string into array of decoded [DATA,CHARSET] pairs:
34             @decoded = decode_mimewords(
35             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
36             );
37              
38             ### Encode a single unsafe word:
39             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
40              
41             ### Encode a string, trying to find the unsafe words inside it:
42             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
43              
44             =head1 DESCRIPTION
45              
46             Fellow Americans, you probably won't know what the hell this module
47             is for. Europeans, Russians, et al, you probably do. C<:-)>.
48              
49             For example, here's a valid MIME header you might get:
50              
51             From: =?US-ASCII?Q?Keith_Moore?=
52             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
53             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
54             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
55             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
56             =?US-ASCII?Q?.._cool!?=
57              
58             The fields basically decode to (sorry, I can only approximate the
59             Latin characters with 7 bit sequences /o and 'e):
60              
61             From: Keith Moore
62             To: Keld J/orn Simonsen
63             CC: Andr'e Pirard
64             Subject: If you can read this you understand the example... cool!
65              
66             B: Fellow Americans, Europeans, you probably won't know
67             what the hell this module is for. East Asians, et al, you probably do.
68             C<(^_^)>.
69              
70             For example, here's a valid MIME header you might get:
71              
72             Subject: =?EUC-KR?B?sNTAuLinKGxhemluZXNzKSwgwvzB9ri7seIoaW1w?=
73             =?EUC-KR?B?YXRpZW5jZSksILGzuLgoaHVicmlzKQ==?=
74              
75             The fields basically decode to (sorry, I cannot approximate the
76             non-Latin multibyte characters with any 7 bit sequences):
77              
78             Subject: ???(laziness), ????(impatience), ??(hubris)
79              
80             =head1 PUBLIC INTERFACE
81              
82             =over 4
83              
84             =cut
85              
86             ### Pragmas:
87 5     5   156129 use strict;
  5         10  
  5         230  
88 5     5   30 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA $Config);
  5         12  
  5         381  
89              
90             ### Exporting:
91 5     5   24 use Exporter;
  5         10  
  5         400  
92              
93             %EXPORT_TAGS = (all => [qw(decode_mimewords
94             encode_mimeword
95             encode_mimewords)]);
96             Exporter::export_ok_tags(qw(all));
97              
98             ### Inheritance:
99             @ISA = qw(Exporter);
100              
101             ### Other modules:
102 5     5   24 use Carp qw(croak carp);
  5         11  
  5         358  
103 5     5   2413 use MIME::Base64;
  5         3608  
  5         360  
104 5     5   2580 use MIME::Charset qw(:trans);
  5         102588  
  5         1261  
105              
106             my @ENCODE_SUBS = qw(FB_CROAK is_utf8 resolve_alias);
107             if (MIME::Charset::USE_ENCODE) {
108 5     5   31 eval "use ".MIME::Charset::USE_ENCODE." \@ENCODE_SUBS;";
  5         18  
  5         215  
109             if ($@) { # Perl 5.7.3 + Encode 0.40
110             eval "use ".MIME::Charset::USE_ENCODE." qw(is_utf8);";
111             require MIME::Charset::_Compat;
112             for my $sub (@ENCODE_SUBS) {
113 5     5   40 no strict "refs";
  5         8  
  5         543  
114             *{$sub} = \&{"MIME::Charset::_Compat::$sub"}
115             unless $sub eq 'is_utf8';
116             }
117             }
118             } else {
119             require Unicode::String;
120             require MIME::Charset::_Compat;
121             for my $sub (@ENCODE_SUBS) {
122 5     5   32 no strict "refs";
  5         13  
  5         33417  
123             *{$sub} = \&{"MIME::Charset::_Compat::$sub"};
124             }
125             }
126              
127             #------------------------------
128             #
129             # Globals...
130             #
131             #------------------------------
132              
133             ### The package version, both in 1.23 style *and* usable by MakeMaker:
134             $VERSION = '1.015.0';
135              
136             ### Public Configuration Attributes
137             $Config = {
138             %{$MIME::Charset::Config}, # Detect7bit, Replacement, Mapping
139             Charset => 'ISO-8859-1',
140             Encoding => 'A',
141             Field => undef,
142             Folding => "\n",
143             MaxLineLen => 76,
144             Minimal => 'YES',
145             };
146             eval {
147             local @INC = @INC;
148             pop @INC if $INC[-1] eq '.';
149             require MIME::EncWords::Defaults;
150             };
151              
152             ### Private Constants
153              
154             my $PRINTABLE = "\\x21-\\x7E";
155             #my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF";
156             my $NONPRINT = qr{[^$PRINTABLE]}; # Improvement: Unicode support.
157             my $UNSAFE = qr{[^\x01-\x20$PRINTABLE]};
158             my $WIDECHAR = qr{[^\x00-\xFF]};
159             my $ASCIITRANS = qr{^(?:HZ-GB-2312|UTF-7)$}i;
160             my $ASCIIINCOMPAT = qr{^UTF-(?:16|32)(?:BE|LE)?$}i;
161             my $DISPNAMESPECIAL = qr{[()<>\[\]:;@\\,."]}; # RFC5322 name-addr specials.
162              
163             #------------------------------
164              
165             # _utf_to_unicode CSETOBJ, STR
166             # Private: Convert UTF-16*/32* to Unicode or UTF-8.
167             sub _utf_to_unicode {
168 90     90   172 my $csetobj = shift;
169 90         157 my $str = shift;
170              
171 90 100       281 return $str if is_utf8($str);
172              
173 48         189 return $csetobj->decode($str)
174             if MIME::Charset::USE_ENCODE();
175              
176 0         0 my $cset = $csetobj->as_string;
177 0         0 my $unistr = Unicode::String->new();
178 0 0 0     0 if ($cset eq 'UTF-16' or $cset eq 'UTF-16BE') {
    0 0        
    0          
    0          
179 0         0 $unistr->utf16($str);
180             } elsif ($cset eq 'UTF-16LE') {
181 0         0 $unistr->utf16le($str);
182             } elsif ($cset eq 'UTF-32' or $cset eq 'UTF-32BE') {
183 0         0 $unistr->utf32($str);
184             } elsif ($cset eq 'UTF-32LE') {
185 0         0 $unistr->utf32le($str);
186             } else {
187 0         0 croak "unknown transformation '$cset'";
188             }
189 0         0 return $unistr->utf8;
190             }
191              
192             #------------------------------
193              
194             # _decode_B STRING
195             # Private: used by _decode_header() to decode "B" encoding.
196             # Improvement by this module: sanity check on encoded sequence.
197             sub _decode_B {
198 57     57   107 my $str = shift;
199 57 50 33     396 unless ((length($str) % 4 == 0) and
200             $str =~ m|^[A-Za-z0-9+/]+={0,2}$|) {
201 0         0 return undef;
202             }
203 57         249 return decode_base64($str);
204             }
205              
206             # _decode_Q STRING
207             # Private: used by _decode_header() to decode "Q" encoding, which is
208             # almost, but not exactly, quoted-printable. :-P
209             # Improvement by this module: sanity check on encoded sequence (>=1.012.3).
210             sub _decode_Q {
211 49     49   109 my $str = shift;
212 49 50       211 if ($str =~ /=(?![0-9a-fA-F][0-9a-fA-F])/) { #XXX:" " and "\t" are allowed
213 0         0 return undef;
214             }
215 49         180 $str =~ s/_/\x20/g; # RFC 2047, Q rule 2
216 49         184 $str =~ s/=([0-9a-fA-F]{2})/pack("C", hex($1))/ge; # RFC 2047, Q rule 1
  182         718  
217 49         139 $str;
218             }
219              
220             # _encode_B STRING
221             # Private: used by encode_mimeword() to encode "B" encoding.
222             sub _encode_B {
223 73     73   111 my $str = shift;
224 73         312 encode_base64($str, '');
225             }
226              
227             # _encode_Q STRING
228             # Private: used by encode_mimeword() to encode "Q" encoding, which is
229             # almost, but not exactly, quoted-printable. :-P
230             # Improvement by this module: Spaces are escaped by ``_''.
231             sub _encode_Q {
232 75     75   110 my $str = shift;
233             # Restrict characters to those listed in RFC 2047 section 5 (3)
234 75         325 $str =~ s{[^-!*+/0-9A-Za-z]}{
235 495 100       1766 $& eq "\x20"? "_": sprintf("=%02X", ord($&))
236             }eog;
237 75         206 $str;
238             }
239              
240             #------------------------------
241              
242             =item decode_mimewords ENCODED, [OPTS...]
243              
244             I
245             Go through the string looking for RFC 2047-style "Q"
246             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
247              
248             B splits the ENCODED string into a list of decoded
249             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
250             data are returned in a 1-element array C<[DATA]>, giving an effective
251             CHARSET of C.
252              
253             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
254             foreach (decode_mimewords($enc)) {
255             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
256             }
257              
258             B<**>
259             However, adjacent encoded-words with same charset will be concatenated
260             to handle multibyte sequences safely.
261              
262             B<**>
263             Language information defined by RFC2231, section 5 will be additional
264             third element, if any.
265              
266             B<*>
267             Whitespaces surrounding unencoded data will not be stripped so that
268             compatibility with L will be ensured.
269              
270             B joins the "data" elements of the above
271             list together, and returns that. I
272             and probably I what you want, but if you know that all charsets
273             in the ENCODED string are identical, it might be useful to you.
274             (Before you use this, please see L,
275             which is probably what you want.)
276             B<**>
277             See also "Charset" option below.
278              
279             In the event of a syntax error, $@ will be set to a description
280             of the error, but parsing will continue as best as possible (so as to
281             get I back when decoding headers).
282             $@ will be false if no error was detected.
283              
284             B<*>
285             Malformed encoded-words will be kept encoded.
286             In this case $@ will be set.
287              
288             Any arguments past the ENCODED string are taken to define a hash of options.
289             B<**>
290             When Unicode/multibyte support is disabled
291             (see L),
292             these options will not have any effects.
293              
294             =over 4
295              
296             =item Charset
297             B<**>
298              
299             Name of character set by which data elements in scalar context
300             will be converted.
301             The default is no conversion.
302             If this option is specified as special value C<"_UNICODE_">,
303             returned value will be Unicode string.
304              
305             B:
306             This feature is still information-lossy, I when C<"_UNICODE_"> is
307             specified.
308              
309             =item Detect7bit
310             B<**>
311              
312             Try to detect 7-bit charset on unencoded portions.
313             Default is C<"YES">.
314              
315             =cut
316              
317             #=item Field
318             #
319             #Name of the mail field this string came from. I
320              
321             =item Mapping
322             B<**>
323              
324             In scalar context, specify mappings actually used for charset names.
325             C<"EXTENDED"> uses extended mappings.
326             C<"STANDARD"> uses standardized strict mappings.
327             Default is C<"EXTENDED">.
328              
329             =back
330              
331             =cut
332              
333             sub decode_mimewords {
334 72     72 1 142561 my $encstr = shift;
335 72         226 my %params = @_;
336 72         399 my %Params = &_getparams(\%params,
337             NoDefault => [qw(Charset)], # default is no conv.
338             YesNo => [qw(Detect7bit)],
339             Others => [qw(Mapping)],
340             Obsoleted => [qw(Field)],
341             ToUpper => [qw(Charset Mapping)],
342             );
343             my $cset = MIME::Charset->new($Params{Charset},
344 72         604 Mapping => $Params{Mapping});
345             # unfolding: normalize linear-white-spaces and orphan newlines.
346 72 50       8519 $encstr =~ s/(?:[\r\n]+[\t ])*[\r\n]+([\t ]|\Z)/$1? " ": ""/eg;
  39         1017  
347 72         205 $encstr =~ s/[\r\n]+/ /g;
348              
349 72         127 my @tokens;
350 72         165 $@ = ''; ### error-return
351              
352             ### Decode:
353 72         143 my ($word, $charset, $language, $encoding, $enc, $dec);
354 72         154 my $spc = '';
355 72         252 pos($encstr) = 0;
356 72         147 while (1) {
357 265 100       683 last if (pos($encstr) >= length($encstr));
358 193         332 my $pos = pos($encstr); ### save it
359              
360             ### Case 1: are we looking at "=?..?..?="?
361 193 100       993 if ($encstr =~ m{\G # from where we left off..
362             =\?([^?]*) # "=?" + charset +
363             \?([bq]) # "?" + encoding +
364             \?([^?]+) # "?" + data maybe with spcs +
365             \?= # "?="
366             ([\r\n\t ]*)
367             }xgi) {
368 106         612 ($word, $charset, $encoding, $enc) = ($&, $1, lc($2), $3);
369 106         262 my $tspc = $4;
370              
371             # RFC 2231 section 5 extension
372 106 100       302 if ($charset =~ s/^([^\*]*)\*(.*)/$1/) {
373 4   50     16 $language = $2 || undef;
374 4   50     12 $charset ||= undef;
375             } else {
376 102         174 $language = undef;
377             }
378              
379 106 100       238 if ($encoding eq 'q') {
380 49         149 $dec = _decode_Q($enc);
381             } else {
382 57         142 $dec = _decode_B($enc);
383             }
384 106 50       253 unless (defined $dec) {
385 0         0 $@ .= qq|Illegal sequence in "$word" (pos $pos)\n|;
386 0         0 push @tokens, [$spc.$word];
387 0         0 $spc = '';
388 0         0 next;
389             }
390              
391 106         173 { local $@;
  106         168  
392 106 100 50     888 if (scalar(@tokens) and
    100 100        
    50 100        
      66        
      33        
      66        
393             lc($charset || "") eq lc($tokens[-1]->[1] || "") and
394             resolve_alias($charset) and
395             (!${tokens[-1]}[2] and !$language or
396             lc(${tokens[-1]}[2]) eq lc($language))) { # Concat words if possible.
397 24         10327 $tokens[-1]->[0] .= $dec;
398             } elsif ($language) {
399 4         13 push @tokens, [$dec, $charset, $language];
400             } elsif ($charset) {
401 78         274 push @tokens, [$dec, $charset];
402             } else {
403 0         0 push @tokens, [$dec];
404             }
405 106         258 $spc = $tspc;
406             }
407 106         226 next;
408             }
409              
410             ### Case 2: are we looking at a bad "=?..." prefix?
411             ### We need this to detect problems for case 3, which stops at "=?":
412 87         203 pos($encstr) = $pos; # reset the pointer.
413 87 100       255 if ($encstr =~ m{\G=\?}xg) {
414 6         17 $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
415 6         16 push @tokens, [$spc.'=?'];
416 6         10 $spc = '';
417 6         11 next;
418             }
419              
420             ### Case 3: are we looking at ordinary text?
421 81         173 pos($encstr) = $pos; # reset the pointer.
422 81 50       1008 if ($encstr =~ m{\G # from where we left off...
423             (.*? # shortest possible string,
424             \n*) # followed by 0 or more NLs,
425             (?=(\Z|=\?)) # terminated by "=?" or EOS
426             }xgs) {
427 81 50       243 length($1) or croak "MIME::EncWords: internal logic err: empty token\n";
428 81         297 push @tokens, [$spc.$1];
429 81         161 $spc = '';
430 81         155 next;
431             }
432              
433             ### Case 4: bug!
434 0         0 croak "MIME::EncWords: unexpected case:\n($encstr) pos $pos\n\t".
435             "Please alert developer.\n";
436             }
437 72 50       168 push @tokens, [$spc] if $spc;
438              
439             # Detect 7-bit charset
440 72 50       253 if ($Params{Detect7bit} ne "NO") {
441 72         97 local $@;
442 72         158 foreach my $t (@tokens) {
443 169 100 100     2135 unless ($t->[0] =~ $UNSAFE or $t->[1]) {
444 87         294 my $charset = MIME::Charset::_detect_7bit_charset($t->[0]);
445 87 50 33     8992 if ($charset and $charset ne &MIME::Charset::default()) {
446 0         0 $t->[1] = $charset;
447             }
448             }
449             }
450             }
451              
452 72 100       586 if (wantarray) {
453 24         179 @tokens;
454             } else {
455             join('', map {
456 48         138 &_convert($_->[0], $_->[1], $cset, $Params{Mapping})
457 111         561 } @tokens);
458             }
459             }
460              
461             #------------------------------
462              
463             # _convert RAW, FROMCHARSET, TOCHARSET, MAPPING
464             # Private: used by decode_mimewords() to convert string by other charset
465             # or to decode to Unicode.
466             # When source charset is unknown and Unicode string is requested, at first
467             # try well-formed UTF-8 then fallback to ISO-8859-1 so that almost all
468             # non-ASCII bytes will be preserved.
469             sub _convert($$$$) {
470 111     111   210 my $s = shift;
471 111         220 my $charset = shift;
472 111         162 my $cset = shift;
473 111         194 my $mapping = shift;
474 111 50       372 return $s unless &MIME::Charset::USE_ENCODE;
475 111 100       338 return $s unless $cset->as_string;
476 66 50 66     435 croak "unsupported charset ``".$cset->as_string."''"
477             unless $cset->decoder or $cset->as_string eq "_UNICODE_";
478              
479 66         657 local($@);
480 66         198 $charset = MIME::Charset->new($charset, Mapping => $mapping);
481 66 50 66     10400 if ($charset->as_string and $charset->as_string eq $cset->as_string) {
482 0         0 return $s;
483             }
484             # build charset object to transform string from $charset to $cset.
485 66         1883 $charset->encoder($cset);
486              
487 66         1090 my $converted = $s;
488 66 50 33     710 if (is_utf8($s) or $s =~ $WIDECHAR) {
    100          
    100          
489 0 0       0 if ($charset->output_charset ne "_UNICODE_") {
490 0         0 $converted = $charset->encode($s);
491             }
492             } elsif ($charset->output_charset eq "_UNICODE_") {
493 37 100       336 if (!$charset->decoder) {
494 18 50       152 if ($s =~ $UNSAFE) {
495 0         0 $@ = '';
496 0         0 eval {
497 0         0 $charset = MIME::Charset->new("UTF-8",
498             Mapping => 'STANDARD');
499 0         0 $converted = $charset->decode($converted, FB_CROAK());
500             };
501 0 0       0 if ($@) {
502 0         0 $converted = $s;
503 0         0 $charset = MIME::Charset->new("ISO-8859-1",
504             Mapping => 'STANDARD');
505 0         0 $converted = $charset->decode($converted, 0);
506             }
507             }
508             } else {
509 19         131 $converted = $charset->decode($s);
510             }
511             } elsif ($charset->decoder) {
512 18         275 $converted = $charset->encode($s);
513             }
514 66         1895 return $converted;
515             }
516              
517             #------------------------------
518              
519             =item encode_mimeword RAW, [ENCODING], [CHARSET]
520              
521             I
522             Encode a single RAW "word" that has unsafe characters.
523             The "word" will be encoded in its entirety.
524              
525             ### Encode "<>":
526             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
527              
528             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
529             B<**>
530             You may also specify it as ``special'' value: C<"S"> to choose shorter
531             one of either C<"Q"> or C<"B">.
532              
533             You may specify the CHARSET, which defaults to C.
534              
535             B<*>
536             Spaces will be escaped with ``_'' by C<"Q"> encoding.
537              
538             =cut
539              
540             sub encode_mimeword {
541 148     148 1 246 my $word = shift;
542 148   50     367 my $encoding = uc(shift || 'Q'); # not overridden.
543 148   50     420 my $charset = shift || 'ISO-8859-1'; # ditto.
544 148   50     460 my $language = uc(shift || ""); # ditto.
545              
546 148 50       316 if (ref $charset) {
547 148 50 33     1223 if (is_utf8($word) or $word =~ /$WIDECHAR/) {
548 0         0 $word = $charset->undecode($word, 0);
549             }
550 148         421 $charset = $charset->as_string;
551             } else {
552 0         0 $charset = uc($charset);
553             }
554 148         786 my $encstr;
555 148 100       336 if ($encoding eq 'Q') {
    50          
556 75         168 $encstr = &_encode_Q($word);
557             } elsif ($encoding eq "S") {
558 0         0 my ($B, $Q) = (&_encode_B($word), &_encode_Q($word));
559 0 0       0 if (length($B) < length($Q)) {
560 0         0 $encoding = "B";
561 0         0 $encstr = $B;
562             } else {
563 0         0 $encoding = "Q";
564 0         0 $encstr = $Q;
565             }
566             } else { # "B"
567 73         108 $encoding = "B";
568 73         142 $encstr = &_encode_B($word);
569             }
570              
571 148 50       288 if ($language) {
572 0         0 return "=?$charset*$language?$encoding?$encstr?=";
573             } else {
574 148         453 return "=?$charset?$encoding?$encstr?=";
575             }
576             }
577              
578             #------------------------------
579              
580             =item encode_mimewords RAW, [OPTS]
581              
582             I
583             Given a RAW string, try to find and encode all "unsafe" sequences
584             of characters:
585              
586             ### Encode a string with some unsafe "words":
587             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
588              
589             Returns the encoded string.
590              
591             B<**>
592             RAW may be a Unicode string when Unicode/multibyte support is enabled
593             (see L).
594             Furthermore, RAW may be a reference to that returned
595             by L on array context. In latter case "Charset"
596             option (see below) will be overridden (see also a note below).
597              
598             B:
599             B<*>
600             When RAW is an arrayref,
601             adjacent encoded-words (i.e. elements having non-ASCII charset element)
602             are concatenated. Then they are split taking
603             care of character boundaries of multibyte sequences when Unicode/multibyte
604             support is enabled.
605             Portions for unencoded data should include surrounding whitespace(s), or
606             they will be merged into adjoining encoded-word(s).
607              
608             Any arguments past the RAW string are taken to define a hash of options:
609              
610             =over 4
611              
612             =item Charset
613              
614             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
615             a.k.a. "Latin-1".
616              
617             =item Detect7bit
618             B<**>
619              
620             When "Encoding" option (see below) is specified as C<"a"> and "Charset"
621             option is unknown, try to detect 7-bit charset on given RAW string.
622             Default is C<"YES">.
623             When Unicode/multibyte support is disabled,
624             this option will not have any effects
625             (see L).
626              
627             =item Encoding
628              
629             The encoding to use, C<"q"> or C<"b">.
630             B<**>
631             You may also specify ``special'' values: C<"a"> will automatically choose
632             recommended encoding to use (with charset conversion if alternative
633             charset is recommended: see L);
634             C<"s"> will choose shorter one of either C<"q"> or C<"b">.
635             B:
636             B<*>
637             As of release 1.005, The default was changed from C<"q">
638             (the default on MIME::Words) to C<"a">.
639              
640             =item Field
641              
642             Name of the mail field this string will be used in.
643             B<**>
644             Length of mail field name will be considered in the first line of
645             encoded header.
646              
647             =item Folding
648             B<**>
649              
650             A Sequence to fold encoded lines. The default is C<"\n">.
651             If empty string C<""> is specified, encoded-words exceeding line length
652             (see L below) will be split by SPACE.
653              
654             B:
655             B<*>
656             Though RFC 5322 (formerly RFC 2822) states that the lines in
657             Internet messages are delimited by CRLF (C<"\r\n">),
658             this module chose LF (C<"\n">) as a default to keep backward compatibility.
659             When you use the default, you might need converting newlines
660             before encoded headers are thrown into session.
661              
662             =item Mapping
663             B<**>
664              
665             Specify mappings actually used for charset names.
666             C<"EXTENDED"> uses extended mappings.
667             C<"STANDARD"> uses standardized strict mappings.
668             The default is C<"EXTENDED">.
669             When Unicode/multibyte support is disabled,
670             this option will not have any effects
671             (see L).
672              
673             =item MaxLineLen
674             B<**>
675              
676             Maximum line length excluding newline.
677             The default is 76.
678             Negative value means unlimited line length (as of release 1.012.3).
679              
680             =item Minimal
681             B<**>
682              
683             Takes care of natural word separators (i.e. whitespaces)
684             in the text to be encoded.
685             If C<"NO"> is specified, this module will encode whole text
686             (if encoding needed) not regarding whitespaces;
687             encoded-words exceeding line length will be split based only on their
688             lengths.
689             Default is C<"YES"> by which minimal portions of text are encoded.
690             If C<"DISPNAME"> is specified, portions including special characters
691             described in RFC5322 (formerly RFC2822, RFC822) address specification
692             (section 3.4) are also encoded.
693             This is useful for encoding display-name of address fields.
694              
695             B:
696             As of release 0.040, default has been changed to C<"YES"> to ensure
697             compatibility with MIME::Words.
698             On earlier releases, this option was fixed to be C<"NO">.
699              
700             B:
701             C<"DISPNAME"> option was introduced at release 1.012.
702              
703             =item Replacement
704             B<**>
705              
706             See L.
707              
708             =back
709              
710             =cut
711              
712             sub encode_mimewords {
713 92     92 1 412218 my $words = shift;
714 92         351 my %params = @_;
715 92         538 my %Params = &_getparams(\%params,
716             YesNo => [qw(Detect7bit)],
717             Others => [qw(Charset Encoding Field Folding
718             Mapping MaxLineLen Minimal
719             Replacement)],
720             ToUpper => [qw(Charset Encoding Mapping Minimal
721             Replacement)],
722             );
723             croak "unsupported encoding ``$Params{Encoding}''"
724 92 50       598 unless $Params{Encoding} =~ /^[ABQS]$/;
725             # newline and following WSP
726 92         159 my ($fwsbrk, $fwsspc);
727 92 50       470 if ($Params{Folding} =~ m/^([\r\n]*)([\t ]?)$/) {
728 92         280 $fwsbrk = $1;
729 92   50     371 $fwsspc = $2 || " ";
730             } else {
731             croak sprintf "illegal folding sequence ``\\x%*v02X''", '\\x',
732 0         0 $Params{Folding};
733             }
734             # charset objects
735             my $charsetobj = MIME::Charset->new($Params{Charset},
736 92         498 Mapping => $Params{Mapping});
737 92         23775 my $ascii = MIME::Charset->new("US-ASCII", Mapping => 'STANDARD');
738 92         9228 $ascii->encoder($ascii);
739             # lengths
740             my $firstlinelen = $Params{MaxLineLen} -
741 92 100       1363 ($Params{Field}? length("$Params{Field}: "): 0);
742 92         175 my $maxrestlen = $Params{MaxLineLen} - length($fwsspc);
743             # minimal encoding flag
744 92 50       519 if (!$Params{Minimal}) {
    100          
745 0         0 $Params{Minimal} = 'NO';
746             } elsif ($Params{Minimal} !~ /^(NO|DISPNAME)$/) {
747 90         187 $Params{Minimal} = 'YES';
748             }
749             # unsafe ASCII sequences
750 92 50       1080 my $UNSAFEASCII = ($maxrestlen <= 1)?
751             qr{(?: =\? )}ox:
752             qr{(?: =\? | [$PRINTABLE]{$Params{MaxLineLen}} )}x;
753             $UNSAFEASCII = qr{(?: $DISPNAMESPECIAL | $UNSAFEASCII )}x
754 92 100       352 if $Params{Minimal} eq 'DISPNAME';
755              
756 92 100       253 unless (ref($words) eq "ARRAY") {
757             # workaround for UTF-16* & UTF-32*: force UTF-8.
758 68 100 100     227 if ($charsetobj->as_string
759             and $charsetobj->as_string =~ /$ASCIIINCOMPAT/) {
760 24         524 $words = _utf_to_unicode($charsetobj, $words);
761 24         741 $charsetobj = MIME::Charset->new('UTF-8');
762             }
763              
764 68         3075 my @words = ();
765             # unfolding: normalize linear-white-spaces and orphan newlines.
766 68 50       1393 $words =~ s/(?:[\r\n]+[\t ])*[\r\n]+([\t ]|\Z)/$1? " ": ""/eg;
  1         12  
767 68         207 $words =~ s/[\r\n]+/ /g;
768             # split if required
769 68 50       390 if ($Params{Minimal} =~ /YES|DISPNAME/) {
770 68         178 my ($spc, $unsafe_last) = ('', 0);
771 68         533 foreach my $w (split(/([\t ]+)/, $words)) {
772 658 50 66     1450 next unless scalar(@words) or length($w); # skip garbage
773 658 100       1558 if ($w =~ /[\t ]/) {
774 295         467 $spc = $w;
775 295         543 next;
776             }
777              
778             # workaround for ``ASCII transformation'' charsets
779 363         534 my $u = $w;
780 363 100 100     848 if ($charsetobj->as_string
781             and $charsetobj->as_string =~ /$ASCIITRANS/) {
782 6         65 if (MIME::Charset::USE_ENCODE) {
783 6 50 33     23 if (is_utf8($w) or $w =~ /$WIDECHAR/) {
784 0         0 $w = $charsetobj->undecode($u);
785             } else {
786 6         11 $u = $charsetobj->decode($w);
787             }
788             } elsif ($w =~ /[+~]/) { #FIXME: for pre-Encode environment
789             $u = "x$w";
790             }
791             }
792 363 100       4948 if (scalar(@words)) {
793 295 100 100     2860 if (($w =~ /$NONPRINT|$UNSAFEASCII/ or $u ne $w) xor
794             $unsafe_last) {
795 65 100       125 if ($unsafe_last) {
796 42         99 push @words, $spc.$w;
797             } else {
798 23         39 $words[-1] .= $spc;
799 23         37 push @words, $w;
800             }
801 65         116 $unsafe_last = not $unsafe_last;
802             } else {
803 230         570 $words[-1] .= $spc.$w;
804             }
805             } else {
806 68         213 push @words, $spc.$w;
807 68   66     797 $unsafe_last =
808             ($w =~ /$NONPRINT|$UNSAFEASCII/ or $u ne $w);
809             }
810 363         754 $spc = '';
811             }
812 68 50       231 if ($spc) {
813 0 0       0 if (scalar(@words)) {
814 0         0 $words[-1] .= $spc;
815             } else { # only WSPs
816 0         0 push @words, $spc;
817             }
818             }
819             } else {
820 0         0 @words = ($words);
821             }
822 68         157 $words = [map { [$_, $Params{Charset}] } @words];
  133         472  
823             }
824              
825             # Translate / concatenate words.
826 92         147 my @triplets;
827 92         216 foreach (@$words) {
828 217         534 my ($s, $cset) = @$_;
829 217 50       465 next unless length($s);
830             my $csetobj = MIME::Charset->new($cset || "",
831 217   100     760 Mapping => $Params{Mapping});
832              
833             # workaround for UTF-16*/UTF-32*: force UTF-8
834 217 100 100     23328 if ($csetobj->as_string and $csetobj->as_string =~ /$ASCIIINCOMPAT/) {
835 66         1468 $s = _utf_to_unicode($csetobj, $s);
836 66         780 $csetobj = MIME::Charset->new('UTF-8');
837             }
838              
839             # determine charset and encoding
840             # try defaults only if 7-bit charset detection is not required
841 217         7754 my $enc;
842 217         1250 my $obj = $csetobj;
843 217 100       490 unless ($obj->as_string) {
844 48 100 33     517 if ($Params{Encoding} ne "A" or $Params{Detect7bit} eq "NO" or
      66        
845             $s =~ /$UNSAFE/) {
846 6         8 $obj = $charsetobj;
847             }
848             }
849             ($s, $cset, $enc) =
850             $obj->header_encode($s,
851             Detect7bit => $Params{Detect7bit},
852             Replacement => $Params{Replacement},
853 217         1284 Encoding => $Params{Encoding});
854             # Resolve 'S' encoding based on global length. See (*).
855             $enc = 'S'
856             if defined $enc and
857             ($Params{Encoding} eq 'S' or
858 217 100 66     40693 $Params{Encoding} eq 'A' and $obj->header_encoding eq 'S');
      100        
859              
860             # pure ASCII
861 217 100 66     2691 if ($cset eq "US-ASCII" and !$enc and $s =~ /$UNSAFEASCII/) {
      100        
862             # pure ASCII with unsafe sequences should be encoded
863 7   66     21 $cset = $csetobj->output_charset ||
864             $charsetobj->output_charset ||
865             $ascii->output_charset;
866             $csetobj = MIME::Charset->new($cset,
867 7         90 Mapping => $Params{Mapping});
868             # Preserve original Encoding option unless it was 'A'.
869             $enc = ($Params{Encoding} eq 'A') ?
870             ($csetobj->header_encoding || 'Q') :
871 7 100 100     452 $Params{Encoding};
872             } else {
873             $csetobj = MIME::Charset->new($cset,
874 210         585 Mapping => $Params{Mapping});
875             }
876              
877             # Now no charset translations are needed.
878 217         16064 $csetobj->encoder($csetobj);
879              
880             # Concatenate adjacent ``words'' so that multibyte sequences will
881             # be handled safely.
882             # Note: Encoded-word and unencoded text must not adjoin without
883             # separating whitespace(s).
884 217 100       2940 if (scalar(@triplets)) {
885 125         180 my ($last, $lastenc, $lastcsetobj) = @{$triplets[-1]};
  125         309  
886 125 100 50     273 if ($csetobj->decoder and
    100 66        
    100 100        
      100        
      100        
      66        
      100        
      100        
      100        
887             ($lastcsetobj->as_string || "") eq $csetobj->as_string and
888             ($lastenc || "") eq ($enc || "")) {
889 26         467 $triplets[-1]->[0] .= $s;
890 26         131 next;
891             } elsif (!$lastenc and $enc and $last !~ /[\r\n\t ]$/) {
892 7 50       288 if ($last =~ /^(.*)([\r\n\t ])([$PRINTABLE]+)$/s) {
    0          
893 7         41 $triplets[-1]->[0] = $1.$2;
894 7         24 $s = $3.$s;
895             } elsif ($lastcsetobj->as_string eq "US-ASCII") {
896 0         0 $triplets[-1]->[0] .= $s;
897 0         0 $triplets[-1]->[1] = $enc;
898 0         0 $triplets[-1]->[2] = $csetobj;
899 0         0 next;
900             }
901             } elsif ($lastenc and !$enc and $s !~ /^[\r\n\t ]/) {
902 16 50       578 if ($s =~ /^([$PRINTABLE]+)([\r\n\t ])(.*)$/s) {
    0          
903 16         68 $triplets[-1]->[0] .= $1;
904 16         65 $s = $2.$3;
905             } elsif ($csetobj->as_string eq "US-ASCII") {
906 0         0 $triplets[-1]->[0] .= $s;
907 0         0 next;
908             }
909             }
910             }
911 191         2502 push @triplets, [$s, $enc, $csetobj];
912             }
913              
914             # (*) Resolve 'S' encoding based on global length.
915 92 100       194 my @s_enc = grep { $_->[1] and $_->[1] eq 'S' } @triplets;
  191         677  
916 92 100       210 if (scalar @s_enc) {
917 43         66 my $enc;
918 43 100       67 my $b = scalar grep { $_->[1] and $_->[1] eq 'B' } @triplets;
  87         265  
919 43 100       63 my $q = scalar grep { $_->[1] and $_->[1] eq 'Q' } @triplets;
  87         249  
920             # 'A' chooses 'B' or 'Q' when all other encoded-words have same enc.
921 43 100 66     360 if ($Params{Encoding} eq 'A' and $b and ! $q) {
    50 66        
      33        
      33        
922 7         16 $enc = 'B';
923             } elsif ($Params{Encoding} eq 'A' and ! $b and $q) {
924 0         0 $enc = 'Q';
925             # Otherwise, assuming 'Q', when characters to be encoded are more than
926             # 6th of total (plus a little fraction), 'B' will win.
927             # Note: This might give 'Q' so great advantage...
928             } else {
929 36         61 my @no_enc = grep { ! $_->[1] } @triplets;
  66         152  
930 36         82 my $total = length join('', map { $_->[0] } (@no_enc, @s_enc));
  66         169  
931 36         71 my $q = scalar(() = join('', map { $_->[0] } @s_enc) =~
  37         720  
932             m{[^- !*+/0-9A-Za-z]}g);
933 36 100       199 if ($total + 8 < $q * 6) {
934 21         48 $enc = 'B';
935             } else {
936 15         59 $enc = 'Q';
937             }
938             }
939 43         94 foreach (@triplets) {
940 87 100 100     326 $_->[1] = $enc if $_->[1] and $_->[1] eq 'S';
941             }
942             }
943              
944             # chop leading FWS
945 92   33     475 while (scalar(@triplets) and $triplets[0]->[0] =~ s/^[\r\n\t ]+//) {
946 0 0       0 shift @triplets unless length($triplets[0]->[0]);
947             }
948              
949             # Split long ``words''.
950 92         167 my @splitwords;
951             my $restlen;
952 92 50       201 if ($Params{MaxLineLen} < 0) {
953 0         0 @splitwords = @triplets;
954             } else {
955 92         139 $restlen = $firstlinelen;
956 92         159 foreach (@triplets) {
957 191         449 my ($s, $enc, $csetobj) = @$_;
958              
959 191         445 my @s = &_split($s, $enc, $csetobj, $restlen, $maxrestlen);
960 191         378 push @splitwords, @s;
961 191         282 my ($last, $lastenc, $lastcsetobj) = @{$s[-1]};
  191         445  
962 191         276 my $lastlen;
963 191 100       371 if ($lastenc) {
964 103         200 $lastlen = $lastcsetobj->encoded_header_len($last, $lastenc);
965             } else {
966 88         120 $lastlen = length($last);
967             }
968 191 100       2140 $restlen = $maxrestlen if scalar @s > 1; # has split; new line(s) fed
969 191         283 $restlen -= $lastlen;
970 191 100       628 $restlen = $maxrestlen if $restlen <= 1;
971             }
972             }
973              
974             # Do encoding.
975 92         151 my @lines;
976 92         131 $restlen = $firstlinelen;
977 92         224 foreach (@splitwords) {
978 285         661 my ($str, $encoding, $charsetobj) = @$_;
979 285 50       532 next unless length($str);
980              
981 285         372 my $s;
982 285 100       500 if (!$encoding) {
983 137         199 $s = $str;
984             } else {
985 148         321 $s = encode_mimeword($str, $encoding, $charsetobj);
986             }
987              
988 285 100 100     1729 my $spc = (scalar(@lines) and $lines[-1] =~ /[\r\n\t ]$/ or
989             $s =~ /^[\r\n\t ]/)? '': ' ';
990 285 100       897 if (!scalar(@lines)) {
    50          
    100          
991 92         250 push @lines, $s;
992             } elsif ($Params{MaxLineLen} < 0) {
993 0         0 $lines[-1] .= $spc.$s;
994             } elsif (length($lines[-1].$spc.$s) <= $restlen) {
995 95         279 $lines[-1] .= $spc.$s;
996             } else {
997 98 100       469 if ($lines[-1] =~ s/([\r\n\t ]+)$//) {
998 4         14 $s = $1.$s;
999             }
1000 98         307 $s =~ s/^[\r\n]*[\t ]//; # strip only one WSP replaced by FWS
1001 98         186 push @lines, $s;
1002 98         209 $restlen = $maxrestlen;
1003             }
1004             }
1005              
1006 92         1727 join($fwsbrk.$fwsspc, @lines);
1007             }
1008              
1009             #------------------------------
1010              
1011             # _split RAW, ENCODING, CHARSET_OBJECT, ROOM_OF_FIRST_LINE, MAXRESTLEN
1012             # Private: used by encode_mimewords() to split a string into
1013             # (encoded or non-encoded) words.
1014             # Returns an array of arrayrefs [SUBSTRING, ENCODING, CHARSET].
1015             sub _split {
1016 191     191   303 my $str = shift;
1017 191         316 my $encoding = shift;
1018 191         292 my $charset = shift;
1019 191         285 my $restlen = shift;
1020 191         251 my $maxrestlen = shift;
1021              
1022 191 50 33     489 if (!$charset->as_string or $charset->as_string eq '8BIT') {# Undecodable.
1023 0         0 $str =~ s/[\r\n]+[\t ]*|\x00/ /g; # Eliminate hostile characters.
1024 0         0 return ([$str, undef, $charset]);
1025             }
1026 191 100 66     2233 if (!$encoding and $charset->as_string eq 'US-ASCII') { # Pure ASCII.
1027 88         569 return &_split_ascii($str, $restlen, $maxrestlen);
1028             }
1029 103 50 50     231 if (!$charset->decoder and MIME::Charset::USE_ENCODE) { # Unsupported.
1030 0         0 return ([$str, $encoding, $charset]);
1031             }
1032              
1033 103         585 my (@splitwords, $ustr, $first);
1034 103         212 while (length($str)) {
1035 148 100       462 if ($charset->encoded_header_len($str, $encoding) <= $restlen) {
1036 102         2311 push @splitwords, [$str, $encoding, $charset];
1037 102         190 last;
1038             }
1039 46         1842 $ustr = $str;
1040 46 50 33     474 if (!(is_utf8($ustr) or $ustr =~ /$WIDECHAR/) and
      50        
1041             MIME::Charset::USE_ENCODE) {
1042 46         149 $ustr = $charset->decode($ustr);
1043             }
1044 46         1403 ($first, $str) = &_clip_unsafe($ustr, $encoding, $charset, $restlen);
1045             # retry splitting if failed
1046 46 100 66     234 if ($first and !$str and
      100        
1047             $maxrestlen < $charset->encoded_header_len($first, $encoding)) {
1048 4         77 ($first, $str) = &_clip_unsafe($ustr, $encoding, $charset,
1049             $maxrestlen);
1050             }
1051 46         136 push @splitwords, [$first, $encoding, $charset];
1052 46         111 $restlen = $maxrestlen;
1053             }
1054 103         268 return @splitwords;
1055             }
1056              
1057             # _split_ascii RAW, ROOM_OF_FIRST_LINE, MAXRESTLEN
1058             # Private: used by encode_mimewords() to split an US-ASCII string into
1059             # (encoded or non-encoded) words.
1060             # Returns an array of arrayrefs [SUBSTRING, undef, "US-ASCII"].
1061             sub _split_ascii {
1062 88     88   128 my $s = shift;
1063 88         128 my $restlen = shift;
1064 88         113 my $maxrestlen = shift;
1065 88   33     185 $restlen ||= $maxrestlen;
1066              
1067 88         151 my @splitwords;
1068 88         227 my $ascii = MIME::Charset->new("US-ASCII", Mapping => 'STANDARD');
1069 88         8465 foreach my $line (split(/(?:[\t ]*[\r\n]+)+/, $s)) {
1070 88         167 my $spc = '';
1071 88         632 foreach my $word (split(/([\t ]+)/, $line)) {
1072             # skip first garbage
1073 1006 50 66     2275 next unless scalar(@splitwords) or defined $word;
1074 1006 100       2214 if ($word =~ /[\t ]/) {
1075 475         713 $spc = $word;
1076 475         776 next;
1077             }
1078              
1079 531         864 my $cont = $spc.$word;
1080 531         774 my $elen = length($cont);
1081 531 100       975 next unless $elen;
1082 465 100       795 if (scalar(@splitwords)) {
1083             # Concatenate adjacent words so that encoded-word and
1084             # unencoded text will adjoin with separating whitespace.
1085 377 100       689 if ($elen <= $restlen) {
1086 328         582 $splitwords[-1]->[0] .= $cont;
1087 328         503 $restlen -= $elen;
1088             } else {
1089 49         137 push @splitwords, [$cont, undef, $ascii];
1090 49         82 $restlen = $maxrestlen - $elen;
1091             }
1092             } else {
1093 88         252 push @splitwords, [$cont, undef, $ascii];
1094 88         199 $restlen -= $elen;
1095             }
1096 465         813 $spc = '';
1097             }
1098 88 100       327 if ($spc) {
1099 32 50       68 if (scalar(@splitwords)) {
1100 32         65 $splitwords[-1]->[0] .= $spc;
1101 32         66 $restlen -= length($spc);
1102             } else { # only WSPs
1103 0         0 push @splitwords, [$spc, undef, $ascii];
1104 0         0 $restlen = $maxrestlen - length($spc);
1105             }
1106             }
1107             }
1108 88         366 return @splitwords;
1109             }
1110              
1111             # _clip_unsafe UNICODE, ENCODING, CHARSET_OBJECT, ROOM_OF_FIRST_LINE
1112             # Private: used by encode_mimewords() to bite off one encodable
1113             # ``word'' from a Unicode string.
1114             # Note: When Unicode/multibyte support is not enabled, character
1115             # boundaries of multibyte string shall be broken!
1116             sub _clip_unsafe {
1117 50     50   115 my $ustr = shift;
1118 50         122 my $encoding = shift;
1119 50         72 my $charset = shift;
1120 50         78 my $restlen = shift;
1121 50 50       131 return ("", "") unless length($ustr);
1122              
1123             # Seek maximal division point.
1124 50         162 my ($shorter, $longer) = (0, length($ustr));
1125 50         118 while ($shorter < $longer) {
1126 239         388 my $cur = ($shorter + $longer + 1) >> 1;
1127 239         536 my $enc = substr($ustr, 0, $cur);
1128 239         315 if (MIME::Charset::USE_ENCODE ne '') {
1129 239         543 $enc = $charset->undecode($enc);
1130             }
1131 239         8522 my $elen = $charset->encoded_header_len($enc, $encoding);
1132 239 100       6471 if ($elen <= $restlen) {
1133 129         329 $shorter = $cur;
1134             } else {
1135 110         261 $longer = $cur - 1;
1136             }
1137             }
1138              
1139             # Make sure that combined characters won't be divided.
1140 50         82 my ($fenc, $renc);
1141 50         102 my $max = length($ustr);
1142 50         70 while (1) {
1143 50         82 $@ = '';
1144 50         89 eval {
1145 50         227 ($fenc, $renc) =
1146             (substr($ustr, 0, $shorter), substr($ustr, $shorter));
1147 50         80 if (MIME::Charset::USE_ENCODE ne '') {
1148             # FIXME: croak if $renc =~ /^\p{M}/
1149 50         187 $fenc = $charset->undecode($fenc, FB_CROAK());
1150 50         1309 $renc = $charset->undecode($renc, FB_CROAK());
1151             }
1152             };
1153 50 50       1443 last unless ($@);
1154              
1155 0         0 $shorter++;
1156 0 0       0 unless ($shorter < $max) { # Unencodable character(s) may be included.
1157 0         0 return ($charset->undecode($ustr), "");
1158             }
1159             }
1160              
1161 50 100       109 if (length($fenc)) {
1162 46         187 return ($fenc, $renc);
1163             } else {
1164 4         15 return ($renc, "");
1165             }
1166             }
1167              
1168             #------------------------------
1169              
1170             # _getparams HASHREF, OPTS
1171             # Private: used to get option parameters.
1172             sub _getparams {
1173 164     164   288 my $params = shift;
1174 164         610 my %params = @_;
1175 164         293 my %Params;
1176             my %GotParams;
1177 164         360 foreach my $k (qw(NoDefault YesNo Others Obsoleted ToUpper)) {
1178 820   100     2556 $Params{$k} = $params{$k} || [];
1179             }
1180 164         468 foreach my $k (keys %$params) {
1181 342         559 my $supported = 0;
1182 342         479 foreach my $i (@{$Params{NoDefault}}, @{$Params{YesNo}},
  342         700  
  342         551  
1183 342         530 @{$Params{Others}}, @{$Params{Obsoleted}}) {
  342         602  
1184 1306 100       2917 if (lc $i eq lc $k) {
1185 342         787 $GotParams{$i} = $params->{$k};
1186 342         615 $supported = 1;
1187 342         562 last;
1188             }
1189             }
1190 342 50       830 carp "unknown or deprecated option ``$k''" unless $supported;
1191             }
1192             # get defaults
1193 164         303 foreach my $i (@{$Params{YesNo}}, @{$Params{Others}}) {
  164         285  
  164         347  
1194 972 100       2560 $GotParams{$i} = $Config->{$i} unless defined $GotParams{$i};
1195             }
1196             # yesno params
1197 164         303 foreach my $i (@{$Params{YesNo}}) {
  164         318  
1198 164 50 33     842 if (!$GotParams{$i} or uc $GotParams{$i} eq "NO") {
1199 0         0 $GotParams{$i} = "NO";
1200             } else {
1201 164         389 $GotParams{$i} = "YES";
1202             }
1203             }
1204             # normalize case
1205 164         238 foreach my $i (@{$Params{ToUpper}}) {
  164         336  
1206 604   66     2079 $GotParams{$i} &&= uc $GotParams{$i};
1207             }
1208 164         1555 return %GotParams;
1209             }
1210              
1211             #------------------------------
1212              
1213             =back
1214              
1215             =head2 Configuration Files
1216             B<**>
1217              
1218             Built-in defaults of option parameters for L
1219             (except 'Charset' option) and
1220             L can be overridden by configuration files:
1221             F and F.
1222             For more details read F.
1223              
1224             =head1 VERSION
1225              
1226             Consult C<$VERSION> variable.
1227              
1228             Development versions of this module may be found at
1229             L.
1230              
1231             =head1 SEE ALSO
1232              
1233             L,
1234             L
1235              
1236             =head1 AUTHORS
1237              
1238             The original version of function decode_mimewords() is derived from
1239             L module that was written by:
1240             Eryq (F), ZeeGee Software Inc (F).
1241             David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
1242              
1243             Other stuff are rewritten or added by:
1244             Hatuka*nezumi - IKEDA Soji .
1245              
1246             This program is free software; you can redistribute
1247             it and/or modify it under the same terms as Perl itself.
1248              
1249             =cut
1250              
1251             1;