File Coverage

lib/MIME/AltWords.pm
Criterion Covered Total %
statement 219 277 79.0
branch 101 126 80.1
condition 47 65 72.3
subroutine 37 38 97.3
pod 1 15 6.6
total 405 521 77.7


line stmt bran cond sub pod time code
1             #
2             # MIME/AltWords.pm -- a lot of fixes on MIME::Words
3             # by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4             # -- Fri Mar 31 18:41:14 CEST 2006
5             #
6             # Dat: this assumes Perl v5.8 or later
7             # Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8             # Dat: see `perldoc MIME::Words' for the original documentation
9             # Dat: a raw string has bytes in 0..255, and it is already encoded in some
10             # encoding
11             # SUXX: perldoc doesn't respect `=encoding utf-8'
12             # lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13             # !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14             # !! document all
15             # !! document test cases
16             # !! MANIFEST etc.
17             #
18              
19             package MIME::AltWords;
20 1     1   11 use v5.8; # Dat: Unicode string support etc.
  1         3  
21 1     1   4 use integer;
  1         1  
  1         5  
22 1     1   17 use strict;
  1         1  
  1         19  
23 1     1   247 use MIME::AltWords0; #use MIME::Words;
  1         2  
  1         23  
24 1     1   4 use MIME::Base64;
  1         1  
  1         31  
25 1     1   325 use Encode;
  1         7086  
  1         56  
26 1     1   6 use warnings;
  1         1  
  1         17  
27 1     1   4 use Exporter;
  1         1  
  1         26  
28 1     1   3 no warnings qw(prototype redefine);
  1         2  
  1         34  
29              
30             # vvv Dat: exports symbols from MIME::AltWords0 by default, works only
31             # without these
32             #use vars qw(@EXPORT_OK %EXPORT_TAGS);
33             #BEGIN {
34             # @EXPORT_OK=@MIME::AltWords0::EXPORT_OK;
35             # %EXPORT_TAGS=%MIME::AltWords0::EXPORT_TAGS;
36             ## die "@EXPORT_OK";
37             #}
38             #Exporter::export_ok_tags('all');
39              
40             =pod
41              
42             =encoding utf8
43              
44             =head1 NAME
45              
46             MIME::AltWords - properly deal with RFC-1522 encoded words
47              
48             =head1 SYNOPSIS
49              
50             The Perl module L is recommended for encoding and
51             decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
52             e-mail message headers (mostly Subject, From and To).
53              
54             L is similar to L in
55             L, but it provides an alternate implementation that follows the
56             MIME specification more carefully, and it is actually compatible with
57             existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
58             L extends the functionality of L (version
59             5.420) by adding more functions and more options to existing functions. The
60             original interface is changed in an upward-compatible way.
61              
62             Before reading further, you should see L to make sure that
63             you understand where this module fits into the grand scheme of things.
64             Go on, do it now. I'll wait.
65              
66             Ready? Ok...
67              
68             use MIME::AltWords qw(:all);
69            
70             ### Decode the string into another string, forgetting the charsets:
71             $decoded = decode_mimewords(
72             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
73             );
74            
75             ### Split string into array of decoded [DATA,CHARSET] pairs:
76             @decoded = decode_mimewords(
77             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
78             );
79            
80             ### Encode a single unsafe word:
81             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
82            
83             ### Encode a string, trying to find the unsafe words inside it:
84             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
85              
86              
87              
88             =head1 DESCRIPTION
89              
90             Fellow Americans, you probably won't know what the hell this module
91             is for. Europeans, Russians, et al, you probably do. C<:-)>.
92              
93             For example, here's a valid MIME header you might get:
94              
95             From: =?US-ASCII?Q?Keith_Moore?=
96             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
97             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
98             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
99             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
100             =?US-ASCII?Q?.._cool!?=
101              
102             The fields basically decode to (sorry, I can only approximate the
103             Latin characters with 7 bit sequences /o and 'e):
104              
105             From: Keith Moore
106             To: Keld J/orn Simonsen
107             CC: Andr'e Pirard
108             Subject: If you can read this you understand the example... cool!
109              
110              
111             =head1 PUBLIC INTERFACE
112              
113             =over 4
114              
115             =cut
116              
117              
118 1     1   4 use vars qw($NONPRINT $VERSION);
  1         1  
  1         37  
119              
120             ### The package version, both in 1.23 style *and* usable by MakeMaker:
121             BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
122 1     1   1311 $VERSION = "0.13"
123             }
124              
125             # Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
126             $NONPRINT=qr{(?:[^\x20-\x7E]|=)};
127             #$NONPRINT=qr{(?:[^\x20-\x7E]|[=](?=[?]))}; # Imp: is a bare `=' legal?
128              
129             #** @param $_[0] charset name
130             #** @return MIME canonical charset name
131             sub canonical_charset($) {
132 150     150 0 148 my $S=$_[0];
133 150 100       652 if ($S=~/\A(?:iso-?(?:8859-?)?|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
  79 100       217  
    100          
134 45         79 elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
135 3         4 elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
136 23         43 else { uc $S }
137             }
138              
139             #** @param $_[0] string.
140             #** @param $_[1] hashref. options
141             #** @param $_[2] string to append to
142             sub append_encoded_word($$$) {
143 97     97 0 193 my($word,$opts,$dst)=@_;
144 97 100       170 if ($opts->{Encoding} eq "B") {
    50          
145 29         64 $word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
146 29         46 $word=~s@\s+@@g;
147 29         39 $$dst.=$word
148             } elsif ($opts->{Encoding} eq "Q") {
149             # Dat: improved MIME::Words::_encode_Q
150 68 100       317 $word =~ s{( )|([_\?\=]|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
  306         1109  
151             : sprintf("=%02X", ord($2))}eog;
152 68         129 $$dst.=$word
153 0         0 } else { die }
154             undef
155 97         123 }
156              
157             #use vars qw($old_encode_mimewords);
158             #BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
159              
160             #** @param $_[0] string. Unicode
161             #** @param $_[1] hashref. options preprocessed by out encode_mimewords()
162             #** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
163             sub encode_mimeword1($$) {
164 90     90 0 144 my($src,$opts)=@_;
165             # Imp: warning if Encode::encode cannot represent character
166 90         93 my $dst="";
167 90         165 my $open="=?$opts->{Charset}?$opts->{Encoding}?";
168 90         85 my $maxlen=64; # Dat: good for a subject line !! 63 or 62
169             # ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
170             # ^^^ Dat: one quoted word shouldn't be too long
171             # ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
172 90         93 $maxlen-=length($open);
173 90 100       140 $maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
174             #print STDERR "($src) $maxlen\n";
175              
176 90         167 $src=Encode::encode($opts->{Charset},$src);
177 90 100       2860 if ($opts->{Shorts}) {
178 84         85 my $I=0;
179 84         133 while ($I
180             # Dat: split result for too long consecutive headers (i.e. long Subject: line)
181 91         92 my $J=$I+$maxlen;
182 91 100       124 $J=length($src) if $J>length($src);
183 91 100       135 if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
184 25 50       52 if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
185 0   0     0 $J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
186 0 0 0     0 $J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
187             # ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
188             }
189             }
190             # Imp: else: fix for other multibyte encodings
191 91         131 $dst.=$open;
192 91         91 my $addlen=-length($dst);
193 91         213 append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
194 91         126 $addlen+=length($dst);
195 91 100 100     247 if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
      66        
196             # Dat: too many hex `=..' triplets, become too long
197 7         9 my $K=length($dst);
198 7         15 while ($addlen>$maxlen) {
199 48 100       69 if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
  3         5  
  3         4  
  3         5  
200 45         40 else { $addlen--; $K--; $J-- }
  45         40  
  45         57  
201             }
202 7         12 substr($dst,$K)="";
203             # Imp: more efficient, don't process the same data many times
204             }
205 91         104 $dst.="?="; $dst.=$opts->{Space};
  91         89  
206 91         170 $I=$J;
207             }
208 6         10 } else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
  6         11  
  6         8  
209 90         132 $dst
210             }
211              
212             #** @returns the specified string quoted in double quotes. All characters
213             #** are printable ASCII.
214             sub dumpstr($) {
215 0     0 0 0 my $S=$_[0];
216 0         0 $S=~s@(["\\])|([^ -~])@
217 0 0       0 defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
218             : "\\$1" # Imp: Unicode chars
219             @ge;
220 0         0 "\"$S\"" #"
221             }
222              
223             #** Splits a string on spaces into lines so no line is longer than the maximum
224             #** (except if there is no space nearby).
225             #** Only the 1st space is converted to $_[2] at each break.
226             #** @param $_[0] string. to split
227             #** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
228             #** @param $_[2] chars to replace a space with -- not being added to the
229             #** maximum line length
230             sub split_words($$$) {
231 74     74 0 119 my($S,$maxlen,$nl)=@_;
232 74         65 my $lastpos=0; my $I=0; my $J;
  74         71  
  74         70  
233             #** Position after last space to split at, or $I
234             my $K;
235 74         75 my $ret="";
236 74         95 while (1) { # Imp: faster implementation
237 88         74 $K=$J=$I;
238 88   66     334 $J++ while $J
239 88 100 100     168 while ($K==$I ? ($J
240 2878         2708 my $C=substr($S,$J,1);
241 2878 50       3528 if ($C eq"\n") { $K=$I=++$J }
  0 100       0  
242 65         140 elsif ($C eq " ") { $K=++$J }
243 2813         3941 else { $J++ }
244             }
245 88 100 100     171 if ($K>$I and $J>$I+$maxlen) {
246 18         40 $ret.=substr($S,$I,$K-1-$I);
247 18         18 $ret.=$nl;
248 18         21 $I=$K; # Imp: skip more
249             }
250 88 100       121 if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
  74         124  
  74         84  
251             }
252             $ret
253 74         131 }
254              
255             =item encode_mimewords RAW, [OPTS]
256              
257             I
258             Given a RAW string, try to find and encode all "unsafe" sequences
259             of characters:
260              
261             ### Encode a string with some unsafe "words":
262             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
263              
264             Returns the encoded string.
265             Any arguments past the RAW string are taken to define a hash of options:
266              
267             =over 4
268              
269             =item Charset
270              
271             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
272             a.k.a. "Latin-1".
273              
274             =item Encoding
275              
276             The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
277              
278             =item Field
279              
280             Name of the mail field this string will be used in. I
281              
282             =back
283              
284             B this is a stable, tested, widely compatible solution. Strict
285             compliance with RFC-1522 (regarding the use of encoded words in message
286             headers), however, was not proven, but strings returned by this function
287             work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
288             recommended way is to use this function instead of C or
289             L.
290              
291             =cut
292              
293              
294             #** Dat: no prototype, because original encode_mimewords() doesn't have it
295             #** a prototype either
296             #** @param $_[0] string|raw string. raw
297             #** @param $_[1].. list of key-value pairs. options
298             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
299             #** Charset is now autodetected (not always ISO-8859-1)
300             #** New key: Raw:
301             #** -- 1 (default): true: $_[0] is already a raw, encoded string
302             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
303             # with Encode::encode(Charset)
304             #** New key: Shorts:
305             #** -- 1 (default for encode_mimewords)
306             #** -- 0 (default for encode_mimeword)
307             #** New key: Whole: (is respected iff Shorts==0)
308             #** -- 1 (default): quote the string as a whole word (not default in the original!)
309             #** -- 0: encode subwords
310             #** New key: Keeptrailnl
311             #** -- 0: treat trailing newline as unprintable
312             #** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
313             # keep trailing newline at the end
314             #** !! doc more, including new keys
315             sub encode_mimewords_low {
316 80     80 0 726 my($S,%opts)=@_;
317 80 50       157 return undef if !defined($S);
318             # die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
319             #die unless open LOG, ">> /tmp/b.log";
320             #die unless print LOG "=> ".dumpstr($S)."\n";
321             #die unless close LOG;
322             #$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
323 80 100       144 $opts{Raw}=1 if !defined $opts{Raw};
324 80 100       137 $opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
325 80 50       114 die if !defined $opts{Charset};
326 80         124 $opts{Charset}=canonical_charset($opts{Charset});
327 80 100       134 if ($opts{Raw}) { # Dat: improvement
328 46         46 $opts{Raw}=0;
329             #die if !defined $S;
330 46         90 $S=Encode::decode($opts{Charset}, $S);
331             # ^^^ Dat: better do a Unicode regexp match
332             }
333 79 100       1996 $opts{Encoding}=defined($opts{Encoding}) ? uc($opts{Encoding}) : "Q";
334 79 100       149 $opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
335 79 100 100     239 $opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
336             # ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
337             # bad, Pine doesn't display quoted-printable properly
338             # (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
339             # the other way;
340             # We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
341 79 100       114 $opts{Shorts}=1 if !defined $opts{Shorts};
342 79 100       109 $opts{Whole}=1 if !defined $opts{Whole};
343 79 50       118 $opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
344 79 50       120 $opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
345 79 100       102 $opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
346 79         79 my $toend="";
347 79 100 100     303 $toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
348 79 100       167 if (!$opts{Shorts}) {
    100          
349 6         10 $S=encode_mimeword1($S,\%opts)
350             } elsif ($opts{Whole}) {
351 46 100       157 if ($S=~/$NONPRINT/o) {
352 43         85 $S=encode_mimeword1($S,\%opts);
353 43         62 substr($S,-1)=""; # Dat: remove last space
354             }
355 46 50       96 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
356             } else {
357 27         28 my $lastpos=0;
358 27         145 while ($S=~/($NONPRINT[^ ]* *)/go) {
359             # ^^^ Dat: having ` *' is a must here, other clients just forget about it
360 41         99 my $I=pos($S)-length($1);
361 41   100     259 $I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
362 41         51 my $pos=pos($S); my $D;
  41         34  
363 41   33     318 1 while ($pos=pos($S)) and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and defined($D=$1)
      66        
      66        
364             and $S=~/\G[^ ]* */goc;
365 41 50       97 pos($S)=$pos if !defined $D;
366 41         64 my $srclen=pos($S)-$I;
367 41         67 my $src=substr($S,$I,$srclen);
368             ##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
369 41 50 66     287 if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and !defined($1)) {
      66        
      33        
370             ##print STDERR "Strip ending space\n";
371 29         58 substr($src,-1)=""; # Dat: see test case 'ignore_space'
372             }
373             # Dat: now pos($S) is invalid
374 41 50       68 die if 1>length($src);
375             ##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
376 41         73 my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
377             ##print STDERR substr($S,$I,$srclen),";;\n";
378 41         117 substr($S,$I,$srclen)=$dst; # Imp: do with less copying
379 41         227 $lastpos=pos($S)=$I+length($dst);
380             }
381             substr($S,-length($opts{Space}))="" if
382 27 100 66     95 0
383 27 50       62 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
384             }
385 79         101 $S.=$toend;
386             #$S=~s@ @\n @g; # !! debug
387             #die unless open LOG, ">> /tmp/b.log";
388             #die unless print LOG "T> ".dumpstr($S)."\n";
389             #die unless close LOG;
390 79         335 $S
391             }
392              
393             #use vars qw($old_encode_mimeword);
394             #BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
395              
396             =item encode_mimeword RAW, [ENCODING], [CHARSET]
397              
398             I
399             Encode a single RAW "word" that has unsafe characters.
400             The "word" will be encoded in its entirety.
401              
402             ### Encode "<>":
403             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
404              
405             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
406             You may specify the CHARSET, which defaults to C.
407              
408             =cut
409              
410              
411             #** Dat: no prototype, because original encode_mimeword() doesn't have it
412             #** a prototype either
413             #** @param $_[0] raw string. raw
414             #** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
415             #** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
416             #** not as in its documentation
417             sub encode_mimeword {
418             #sub encode_mimeword($;$$) {
419 6     6 1 17 encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
420             }
421              
422 1     1   6 use vars qw($old_decode_mimewords);
  1         2  
  1         38  
423 1     1   226 BEGIN { $old_decode_mimewords=\&MIME::AltWords::decode_mimewords_wantarray } #### pts #### AltWords.pm adds `_wantarray'
424              
425             #** Dat: function added by #### pts ####
426             #** @param $_[0] a mimewords-encoded string
427             #** @return a canonical encoding name with which the string can be re-encoded
428             sub get_best_decode_charset($) {
429 23     23 0 27 my $encodedstr=$_[0];
430 23         21 my @L;
431 23         32 for my $token (decode_mimewords($encodedstr)) {
432 45   100     100 my $charset=canonical_charset($token->[1] or "");
433 45 100 100     134 push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
      100        
434             }
435 23 100       55 @L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
436 23         46 $L[0]
437             }
438              
439             =item decode_mimewords ENCODED, [OPTS...]
440              
441             I
442             Go through the string looking for RFC-1522-style "Q"
443             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
444              
445             B splits the ENCODED string into a list of decoded
446             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
447             data are returned in a 1-element array C<[DATA]>, giving an effective
448             CHARSET of C.
449              
450             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
451             foreach (decode_mimewords($enc)) {
452             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
453             }
454              
455             B joins the "data" elements of the above list
456             together, and returns that. I it
457             sanitizes the returned string to use a specific, single charset, either
458             specified using the C option, or autodetecting one (ISO-8859-1,
459             ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
460             autodetection, C can be used to query the charset
461             autodetected.
462              
463             You might want to see L as an alternate of
464             L.
465              
466             In the event of a syntax error, $@ will be set to a description
467             of the error, but parsing will continue as best as possible (so as to
468             get I back when decoding headers).
469             $@ will be false if no error was detected.
470              
471             Any arguments past the ENCODED string are taken to define a hash of options:
472              
473             =over 4
474              
475             =item Field
476              
477             Name of the mail field this string came from. I
478              
479             =back
480              
481             =cut
482              
483             #** Dat: no prototype, because original decode_mimewords() doesn't have it
484             #** a prototype either
485             #** Dat: it is unsafe to use this without the Raw=>1 option. Search for
486             #** MIME::WordDecoder in `perldoc MIME::Words'.
487             #** @param $_[0] a mimewords-encoded string
488             #** @param $_[1]... list of options (key=>value pairs).
489             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
490             #** New key: Raw:
491             #** -- 1 (default): true: return a raw, encoded string. The encoding will
492             #** be Charset, or the one returned by get_best_decode_charset().
493             #** This is an improvement
494             #** by #### pts #### -- the original decode_mimewords() didn't return
495             #** the string in a consistent encoding.
496             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
497             #** with Encode::encode(Charset)
498             #** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
499             sub decode_mimewords_low {
500 50 100   50 0 108 return $old_decode_mimewords->(@_) if wantarray;
501 27         48 my($encodedstr,%opts)=@_;
502 27 100       48 $opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
503 27         24 my $ret='';
504             # vvv Dat: not mutually recursive, because get_best_decode_charset() calls
505             # decode_mimewords() in list context, so this line won't be
506             # reached.
507             $opts{Charset}=get_best_decode_charset($encodedstr) if
508 27 100 100     81 $opts{Raw} and !defined $opts{Charset};
509 27         29 my $S;
510 27         60 for my $token ($old_decode_mimewords->($encodedstr,%opts)) { # Dat: $charset in $token->[1]
511 55 100       121 $S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
512 55 100       1110 $S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
513 55         3753 $ret.=$S
514             }
515             $ret
516 27         108 }
517              
518 1     1   5 use vars qw(@encode_subject_opts);
  1         1  
  1         34  
519 1     1   67 BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
520              
521             #** Dat: function added by #### pts ####
522             #** @param $_[0] String. A mimewords-encoded e-mail subject.
523             #** @return String. better mimewords-encoded
524             sub fix_subject($) {
525 1     1 0 2 my $encodedstr=$_[0];
526 1         2 my $best_charset=get_best_decode_charset($encodedstr);
527 1         2 my $decoded=decode_mimewords($encodedstr, Raw=>0);
528 1         3 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
529             }
530              
531 1     1   5 use vars qw(@encode_addresses_opts);
  1         1  
  1         39  
532 1     1   125 BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
533              
534             #** Dat: function added by #### pts ####
535             #** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
536             #** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
537             #** @return String. better mimewords-encoded
538             sub fix_addresses($) {
539 3     3 0 25 my $encodedstr=$_[0];
540 3         7 my $best_charset=get_best_decode_charset($encodedstr);
541 3         6 my $decoded=decode_mimewords($encodedstr, Raw=>0);
542             #print STDERR "DE($decoded)\n";
543             #chomp $decoded;
544             #$decoded.=" alma ";
545 3         8 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
546             }
547              
548             #** Dat: function added by #### pts ####
549             #** @param $_[0] a Unicode string
550             #** @param $_[1] a charset
551             #** @return Boolean: is it encodable?
552             sub is_encodable($$) {
553 48     48 0 63 my $charset=uc$_[1];
554 48         49 my $S=$_[0]; # Dat: must be copied for Encode::encode
555 48 100 66     143 return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
556 45         47 eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
  45         80  
557 45 100       1415 $@ ? 0 : 1
558             }
559              
560             #** Please don't include US-ASCII.
561             #** Specify UTF-8 last (because get_best_encode_charset() ignores everything
562             #** after the first UTF-8).
563 1     1   6 use vars qw(@encode_charsets);
  1         1  
  1         35  
564 1     1   216 BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
565              
566             #** Dat: function added by #### pts ####
567             #** @param $_[0] string to try
568             sub get_best_encode_charset($) {
569 39     39 0 60 for my $charset (@encode_charsets) {
570 48 100       70 return $charset if is_encodable($_[0], $charset);
571             }
572 0         0 return 'UTF-8'
573             }
574              
575             #** Dat: function added by #### pts ####
576             #** @param $_[0] String. Perl Unicode-string
577             #** @param $_[1] encode mode: 'subject' or 'addresses'
578             #** @return String. better mimewords-encoded with the best charset
579             sub encode_unicode($$) {
580 6     6 0 9 my $str=$_[0];
581 6 100       14 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
582 6         9 my $best_charset=get_best_encode_charset($str);
583 6         13 encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
584             }
585              
586             #** Dat: function added by #### pts ####
587             #** @param $_[0] String. Perl 8-bit string, encoded in $charset
588             #** @param $_[1] encode mode: 'subject' or 'addresses'
589             #** @param $_[2] $charset;
590             #** @return String. better mimewords-encoded with the best charset
591             sub encode_8bit($$$) {
592 6     6 0 9 my $str=$_[0];
593 6         7 my $charset=$_[2];
594 6 100       13 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
595 6         10 $charset=canonical_charset($charset);
596 6         7 my $best_charset;
597 6         9 for my $charset2 (@encode_charsets) {
598 13 100       15 if (canonical_charset($charset2) eq $charset) {
599 5         6 $best_charset=$charset; last
600 5         7 }
601             }
602 6 100       11 $best_charset=canonical_charset(get_best_encode_charset($str)) if
603             !defined $best_charset;
604 6 100       15 ($charset eq $best_charset) ? # Imp: find badly encoded string...
605             encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
606             encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
607             }
608              
609             # --- Logging...
610              
611 1     1   16 BEGIN { *encode_mimewords=\&encode_mimewords_low }
612 1     1   27 BEGIN { *decode_mimewords=\&decode_mimewords_low }
613              
614 1 50   1   62 BEGIN { if ($main::DEBUG) {
615              
616             #use vars qw($orig_encode_mimewords $orig_decode_mimewords);
617 1     1   4 no warnings qw(prototype redefine);
  1         1  
  1         357  
618              
619             #BEGIN { $orig_encode_mimewords=\&encode_mimewords }
620             *encode_mimewords=sub {
621 0         0 require Carp;
622 0         0 my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
623 0         0 my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
624 0         0 $dump.=$ret."\n";
625 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
626 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
627 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
628 0         0 binmode($log, ':utf8');
629 0         0 print $log $dump;
630 0         0 Carp::cluck("^^^ encode_mimewords() ");
631 0         0 close $log;
632             }
633             $ret
634 0         0 };
  0         0  
635              
636             #BEGIN { $orig_decode_mimewords=\&decode_mimewords }
637             # Imp: copy prototype of original...
638             *decode_mimewords=sub {
639 0         0 require Carp;
640 0         0 my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
641 0 0       0 if (wantarray) {
642 0         0 my @L=(&decode_mimewords_low(@_));
643 0         0 $dump.="@L (ary)\n";
644 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
645 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
646 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
647 0         0 binmode($log, ':utf8');
648 0         0 print $log $dump;
649 0         0 Carp::cluck("^^^ decode_mimewords() ");
650 0         0 close $log;
651             }
652             @L
653 0         0 } else {
654 0         0 my $ret=decode_mimewords_low(@_);
655 0         0 $dump.=$ret."\n";
656 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
657 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
658 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
659 0         0 binmode($log, ':utf8');
660 0         0 print $log $dump;
661 0         0 Carp::cluck("^^^ decode_mimewords() ");
662 0         0 close $log;
663             }
664             $ret
665 0         0 }
666 0         0 };
667              
668             } }
669              
670             # ---
671              
672             =back
673              
674             =head1 NOTES
675              
676             Exports its principle functions by default, in keeping with
677             L and L.
678              
679             Doesn't depend on L or L. All the shared code is
680             copied to L, which is bundled.
681              
682             See also L for the previous version
683             of L integrated into the Sympa 4 mailing list software.
684              
685             =head1 AUTHOR
686              
687             L was written by
688             PĂŠter SzabĂł (F) in 2006, and it has been uploaded to CPAN on
689             2006-09-27.
690              
691             L uses code from L (in the file
692             C) and it uses documentation from L
693             (in the files C and C).
694              
695             Here is the original author and copyright information for L.
696              
697             Eryq (F), ZeeGee Software Inc (F).
698             David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
699              
700             All rights reserved. This program is free software; you can redistribute
701             it and/or modify it under the same terms as Perl itself.
702              
703             Thanks also to...
704              
705             Kent Boortz For providing the idea, and the baseline
706             RFC-1522-decoding code!
707             KJJ at PrimeNet For requesting that this be split into
708             its own module.
709             Stephane Barizien For reporting a nasty bug.
710              
711              
712             =head1 VERSION
713              
714             See $VERSION in C .
715              
716             =cut
717              
718             =begin testing
719              
720             is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
721             is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
722             is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
723             is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
724             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
725             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
726             "az ?rkiköt? földi adatai");
727             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
728             "az ?rkiköt? földi adatai");
729             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
730             "az űrkikötő földi adatai");
731             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
732             "az űrkikötő földi adatai");
733             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
734             "az űrkikötő földi adatai"); # Dat: guess Charset to ISO-8859-2
735             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
736             is(MIME::AltWords::encode_mimewords("az űrkikötő földi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
737             is(MIME::AltWords::encode_mimewords("az űrkikötő földi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
738             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
739             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
740             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
741             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
742             { my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
743             ok($@=~/^Wide character /); # Dat: Encode::decode fails
744             }
745             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
746             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
747             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
748             "ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
749             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
750             "=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
751             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
752             "ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
753             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
754             "ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
755             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
756             "=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
757             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
758             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
759             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
760             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
761              
762             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
763             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
764             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
765             "szóköz) kapcsolatban áll egy nem kizárással.",
766             Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
767             "A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
768              
769             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
770             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
771             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
772             "szóköz) kapcsolatban áll egy nem kizárással.",
773             Charset=>"ISO-8859-2",Raw=>0),
774             "=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
775             # vvv Dat: composing with Pine emits:
776             #is(MIME::AltWords::encode_mimewords("A keresési eredményekből bizonyos szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel előzi meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy szóköz) kapcsolatban áll egy nem kizárással.",
777             #"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
778             # =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
779             # =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
780             # =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
781             # =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
782             # =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
783              
784             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
785             Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
786             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
787             Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
788              
789             is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
790              
791             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
792             "=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
793             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
794             "=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
795             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
796             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
797             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
798             "=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
799              
800             SKIP: {
801             eval { require Mail::Address };
802             skip "Mail::Address not installed", 1 if $@;
803             my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
804             my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
805             $address="undef" if !defined $address;
806             is($address, "pts\@our.um");
807             }
808              
809             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
810             "\x{C5}\x{B1}", 'decode_mimewords()');
811             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
812             "\x{171}", 'decode_mimewords()');
813              
814             is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
815             'ISO-8859-2', 'get_best_decode_charset()');
816             is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
817             'UTF-8', 'get_best_decode_charset()');
818             is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
819              
820             is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
821             "=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
822             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
823             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
824             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
825             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
826             is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
827             "=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
828              
829             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
830             "f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
831             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
832             "f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
833             #die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
834             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
835             "Szabó Péter", 'decode_mimewords()');
836              
837             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
838             "f\x{C3}\x{A9}l", 'encode+decode mimewords');
839             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
840             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
841             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
842             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
843             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
844             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
845             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
846             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
847             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
848             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
849             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
850             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
851              
852             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
853             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
854             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
855             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
856             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
857             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
858              
859             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
860             "m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
861              
862             is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
863             is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
864             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
865             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
866              
867             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
868             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
869             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
870             is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
871              
872             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
873             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
874              
875             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
876             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
877             is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
878             is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
879             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
880             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
881              
882             is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
883             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
884              
885             is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
886             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
887              
888             is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
889             "=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
890              
891             is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
892             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld Jřrn Simonsen ', "MIME::Words test case 2");
893             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'André Pirard ', "MIME::Words test case 3");
894             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
895             is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
896             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
897             # vvv !! is this correct (space after \n)?
898             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
899             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
900              
901             my $in0 = Encode::encode("windows-1251", "\x{422}\x{435}\x{441}\x{442}\x{438}\x{440}\x{43e}\x{432}\x{430}\x{43d}\x{438}\x{435}");
902             my $out0b = "=?WINDOWS-1251?B?0uXx8ujw7uLg7ejl?=";
903             my $out0q = "=?WINDOWS-1251?Q?=D2=E5=F1=F2=E8=F0=EE=E2=E0=ED=E8=E5?=";
904             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"B"), $out0b);
905             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"Q"), $out0q);
906             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"q"), $out0q);
907             is(MIME::AltWords::encode_mimeword($in0, "B", "windows-1251"), $out0b);
908             is(MIME::AltWords::encode_mimeword($in0, "Q", "windows-1251"), $out0q);
909             is(MIME::AltWords::encode_mimeword($in0, "q", "windows-1251"), $out0q);
910             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"b"), $out0b);
911             is(MIME::AltWords::encode_mimeword($in0, "b", "windows-1251"), $out0b);
912              
913             =cut
914              
915             1