File Coverage

lib/MIME/AltWords.pm
Criterion Covered Total %
statement 245 318 77.0
branch 111 144 77.0
condition 47 65 72.3
subroutine 38 41 92.6
pod 1 16 6.2
total 442 584 75.6


line stmt bran cond sub pod time code
1             #
2             # MIME/AltWords.pm -- a lot of fixes on MIME::Words
3             # by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4             # -- Fri Mar 31 18:41:14 CEST 2006
5             #
6             # Dat: this assumes Perl v5.8 or later
7             # Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8             # Dat: see `perldoc MIME::Words' for the original documentation
9             # Dat: a raw string has bytes in 0..255, and it is already encoded in some
10             # encoding
11             # SUXX: perldoc doesn't respect `=encoding utf-8'
12             # lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13             # !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14             # !! document all
15             # !! document test cases
16             # !! MANIFEST etc.
17             #
18              
19             package MIME::AltWords;
20 1     1   17 use v5.8; # Dat: Unicode string support etc.
  1         3  
21 1     1   4 use integer;
  1         2  
  1         9  
22 1     1   20 use strict;
  1         1  
  1         18  
23 1     1   259 use MIME::Base64;
  1         541  
  1         59  
24 1     1   275 use MIME::QuotedPrint;
  1         177  
  1         40  
25 1     1   373 use Encode;
  1         7229  
  1         61  
26 1     1   7 use warnings;
  1         1  
  1         19  
27 1     1   4 use Exporter;
  1         2  
  1         29  
28 1     1   4 no warnings qw(prototype redefine);
  1         1  
  1         36  
29              
30             =pod
31              
32             =encoding utf8
33              
34             =head1 NAME
35              
36             MIME::AltWords - properly deal with RFC-1522 encoded words
37              
38             =head1 SYNOPSIS
39              
40             The Perl module L is recommended for encoding and
41             decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
42             e-mail message headers (mostly Subject, From and To).
43              
44             L is similar to L in
45             L, but it provides an alternate implementation that follows the
46             MIME specification more carefully, and it is actually compatible with
47             existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
48             L extends the functionality of L (version
49             5.420) by adding more functions and more options to existing functions. The
50             original interface is changed in an upward-compatible way.
51              
52             Before reading further, you should see L to make sure that
53             you understand where this module fits into the grand scheme of things.
54             Go on, do it now. I'll wait.
55              
56             Ready? Ok...
57              
58             use MIME::AltWords qw(:all);
59            
60             ### Decode the string into another string, forgetting the charsets:
61             $decoded = decode_mimewords(
62             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
63             );
64            
65             ### Split string into array of decoded [DATA,CHARSET] pairs:
66             @decoded = decode_mimewords(
67             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
68             );
69            
70             ### Encode a single unsafe word:
71             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
72            
73             ### Encode a string, trying to find the unsafe words inside it:
74             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
75              
76              
77              
78             =head1 DESCRIPTION
79              
80             Fellow Americans, you probably won't know what the hell this module
81             is for. Europeans, Russians, et al, you probably do. C<:-)>.
82              
83             For example, here's a valid MIME header you might get:
84              
85             From: =?US-ASCII?Q?Keith_Moore?=
86             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
87             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
88             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
89             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
90             =?US-ASCII?Q?.._cool!?=
91              
92             The fields basically decode to (sorry, I can only approximate the
93             Latin characters with 7 bit sequences /o and 'e):
94              
95             From: Keith Moore
96             To: Keld J/orn Simonsen
97             CC: Andr'e Pirard
98             Subject: If you can read this you understand the example... cool!
99              
100              
101             =head1 PUBLIC INTERFACE
102              
103             =over 4
104              
105             =cut
106              
107              
108 1     1   5 use vars qw($NONPRINT $VERSION);
  1         1  
  1         48  
109              
110             ### The package version, both in 1.23 style *and* usable by MakeMaker:
111             BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
112 1     1   2023 $VERSION = "0.14"
113             }
114              
115             # Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
116             $NONPRINT=qr{(?:[^\x20-\x7E]|=)};
117             #$NONPRINT=qr{(?:[^\x20-\x7E]|[=](?=[?]))}; # Imp: is a bare `=' legal?
118              
119             #** @param $_[0] charset name
120             #** @return MIME canonical charset name
121             sub canonical_charset($) {
122 150     150 0 162 my $S=$_[0];
123 150 100       674 if ($S=~/\A(?:iso-?(?:8859-?)?|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
  79 100       235  
    100          
124 45         84 elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
125 3         4 elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
126 23         47 else { uc $S }
127             }
128              
129             #** @param $_[0] string.
130             #** @param $_[1] hashref. options
131             #** @param $_[2] string to append to
132             sub append_encoded_word($$$) {
133 97     97 0 202 my($word,$opts,$dst)=@_;
134 97 100       168 if ($opts->{Encoding} eq "B") {
    50          
135 29         87 $word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
136 29         43 $word=~s@\s+@@g;
137 29         43 $$dst.=$word
138             } elsif ($opts->{Encoding} eq "Q") {
139             # Dat: improved MIME::Words::_encode_Q
140 68 100       311 $word =~ s{( )|([_\?\=]|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
  306         1111  
141             : sprintf("=%02X", ord($2))}eog;
142 68         119 $$dst.=$word
143 0         0 } else { die }
144             undef
145 97         150 }
146              
147             #use vars qw($old_encode_mimewords);
148             #BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
149              
150             #** @param $_[0] string. Unicode
151             #** @param $_[1] hashref. options preprocessed by out encode_mimewords()
152             #** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
153             sub encode_mimeword1($$) {
154 90     90 0 146 my($src,$opts)=@_;
155             # Imp: warning if Encode::encode cannot represent character
156 90         98 my $dst="";
157 90         161 my $open="=?$opts->{Charset}?$opts->{Encoding}?";
158 90         86 my $maxlen=64; # Dat: good for a subject line !! 63 or 62
159             # ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
160             # ^^^ Dat: one quoted word shouldn't be too long
161             # ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
162 90         101 $maxlen-=length($open);
163 90 100       149 $maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
164             #print STDERR "($src) $maxlen\n";
165              
166 90         196 $src=Encode::encode($opts->{Charset},$src);
167 90 100       3102 if ($opts->{Shorts}) {
168 84         78 my $I=0;
169 84         137 while ($I
170             # Dat: split result for too long consecutive headers (i.e. long Subject: line)
171 91         98 my $J=$I+$maxlen;
172 91 100       125 $J=length($src) if $J>length($src);
173 91 100       137 if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
174 25 50       57 if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
175 0   0     0 $J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
176 0 0 0     0 $J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
177             # ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
178             }
179             }
180             # Imp: else: fix for other multibyte encodings
181 91         126 $dst.=$open;
182 91         100 my $addlen=-length($dst);
183 91         233 append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
184 91         119 $addlen+=length($dst);
185 91 100 100     255 if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
      66        
186             # Dat: too many hex `=..' triplets, become too long
187 7         11 my $K=length($dst);
188 7         11 while ($addlen>$maxlen) {
189 48 100       63 if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
  3         4  
  3         4  
  3         4  
190 45         42 else { $addlen--; $K--; $J-- }
  45         42  
  45         54  
191             }
192 7         8 substr($dst,$K)="";
193             # Imp: more efficient, don't process the same data many times
194             }
195 91         97 $dst.="?="; $dst.=$opts->{Space};
  91         111  
196 91         157 $I=$J;
197             }
198 6         11 } else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
  6         11  
  6         7  
199 90         137 $dst
200             }
201              
202             #** @returns the specified string quoted in double quotes. All characters
203             #** are printable ASCII.
204             sub dumpstr($) {
205 0     0 0 0 my $S=$_[0];
206 0         0 $S=~s@(["\\])|([^ -~])@
207 0 0       0 defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
208             : "\\$1" # Imp: Unicode chars
209             @ge;
210 0         0 "\"$S\"" #"
211             }
212              
213             #** Splits a string on spaces into lines so no line is longer than the maximum
214             #** (except if there is no space nearby).
215             #** Only the 1st space is converted to $_[2] at each break.
216             #** @param $_[0] string. to split
217             #** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
218             #** @param $_[2] chars to replace a space with -- not being added to the
219             #** maximum line length
220             sub split_words($$$) {
221 74     74 0 111 my($S,$maxlen,$nl)=@_;
222 74         75 my $lastpos=0; my $I=0; my $J;
  74         69  
  74         78  
223             #** Position after last space to split at, or $I
224             my $K;
225 74         67 my $ret="";
226 74         63 while (1) { # Imp: faster implementation
227 88         83 $K=$J=$I;
228 88   66     341 $J++ while $J
229 88 100 100     178 while ($K==$I ? ($J
230 2878         2714 my $C=substr($S,$J,1);
231 2878 50       3466 if ($C eq"\n") { $K=$I=++$J }
  0 100       0  
232 65         178 elsif ($C eq " ") { $K=++$J }
233 2813         4105 else { $J++ }
234             }
235 88 100 100     178 if ($K>$I and $J>$I+$maxlen) {
236 18         30 $ret.=substr($S,$I,$K-1-$I);
237 18         17 $ret.=$nl;
238 18         16 $I=$K; # Imp: skip more
239             }
240 88 100       126 if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
  74         129  
  74         93  
241             }
242             $ret
243 74         148 }
244              
245             =item encode_mimewords RAW, [OPTS]
246              
247             I
248             Given a RAW string, try to find and encode all "unsafe" sequences
249             of characters:
250              
251             ### Encode a string with some unsafe "words":
252             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
253              
254             Returns the encoded string.
255             Any arguments past the RAW string are taken to define a hash of options:
256              
257             =over 4
258              
259             =item Charset
260              
261             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
262             a.k.a. "Latin-1".
263              
264             =item Encoding
265              
266             The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
267              
268             =item Field
269              
270             Name of the mail field this string will be used in. I
271              
272             =back
273              
274             B this is a stable, tested, widely compatible solution. Strict
275             compliance with RFC-1522 (regarding the use of encoded words in message
276             headers), however, was not proven, but strings returned by this function
277             work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
278             recommended way is to use this function instead of C or
279             L.
280              
281             =cut
282              
283              
284             #** Dat: no prototype, because original encode_mimewords() doesn't have it
285             #** a prototype either
286             #** @param $_[0] string|raw string. raw
287             #** @param $_[1].. list of key-value pairs. options
288             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
289             #** Charset is now autodetected (not always ISO-8859-1)
290             #** New key: Raw:
291             #** -- 1 (default): true: $_[0] is already a raw, encoded string
292             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
293             # with Encode::encode(Charset)
294             #** New key: Shorts:
295             #** -- 1 (default for encode_mimewords)
296             #** -- 0 (default for encode_mimeword)
297             #** New key: Whole: (is respected iff Shorts==0)
298             #** -- 1 (default): quote the string as a whole word (not default in the original!)
299             #** -- 0: encode subwords
300             #** New key: Keeptrailnl
301             #** -- 0: treat trailing newline as unprintable
302             #** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
303             # keep trailing newline at the end
304             #** !! doc more, including new keys
305             sub encode_mimewords_low {
306 80     80 0 808 my($S,%opts)=@_;
307 80 50       156 return undef if !defined($S);
308             # die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
309             #die unless open LOG, ">> /tmp/b.log";
310             #die unless print LOG "=> ".dumpstr($S)."\n";
311             #die unless close LOG;
312             #$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
313 80 100       142 $opts{Raw}=1 if !defined $opts{Raw};
314 80 100       155 $opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
315 80 50       119 die if !defined $opts{Charset};
316 80         118 $opts{Charset}=canonical_charset($opts{Charset});
317 80 100       132 if ($opts{Raw}) { # Dat: improvement
318 46         52 $opts{Raw}=0;
319             #die if !defined $S;
320 46         99 $S=Encode::decode($opts{Charset}, $S);
321             # ^^^ Dat: better do a Unicode regexp match
322             }
323 79 100       1918 $opts{Encoding}=defined($opts{Encoding}) ? uc($opts{Encoding}) : "Q";
324 79 100       150 $opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
325 79 100 100     241 $opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
326             # ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
327             # bad, Pine doesn't display quoted-printable properly
328             # (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
329             # the other way;
330             # We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
331 79 100       145 $opts{Shorts}=1 if !defined $opts{Shorts};
332 79 100       111 $opts{Whole}=1 if !defined $opts{Whole};
333 79 50       115 $opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
334 79 50       123 $opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
335 79 100       107 $opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
336 79         83 my $toend="";
337 79 100 100     334 $toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
338 79 100       150 if (!$opts{Shorts}) {
    100          
339 6         11 $S=encode_mimeword1($S,\%opts)
340             } elsif ($opts{Whole}) {
341 46 100       179 if ($S=~/$NONPRINT/o) {
342 43         88 $S=encode_mimeword1($S,\%opts);
343 43         69 substr($S,-1)=""; # Dat: remove last space
344             }
345 46 50       104 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
346             } else {
347 27         32 my $lastpos=0;
348 27         154 while ($S=~/($NONPRINT[^ ]* *)/go) {
349             # ^^^ Dat: having ` *' is a must here, other clients just forget about it
350 41         96 my $I=pos($S)-length($1);
351 41   100     284 $I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
352 41         51 my $pos=pos($S); my $D;
  41         36  
353 41   33     311 1 while ($pos=pos($S)) and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and defined($D=$1)
      66        
      66        
354             and $S=~/\G[^ ]* */goc;
355 41 50       90 pos($S)=$pos if !defined $D;
356 41         63 my $srclen=pos($S)-$I;
357 41         63 my $src=substr($S,$I,$srclen);
358             ##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
359 41 50 66     246 if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and !defined($1)) {
      66        
      33        
360             ##print STDERR "Strip ending space\n";
361 29         55 substr($src,-1)=""; # Dat: see test case 'ignore_space'
362             }
363             # Dat: now pos($S) is invalid
364 41 50       72 die if 1>length($src);
365             ##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
366 41         67 my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
367             ##print STDERR substr($S,$I,$srclen),";;\n";
368 41         116 substr($S,$I,$srclen)=$dst; # Imp: do with less copying
369 41         255 $lastpos=pos($S)=$I+length($dst);
370             }
371             substr($S,-length($opts{Space}))="" if
372 27 100 66     101 0
373 27 50       70 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
374             }
375 79         105 $S.=$toend;
376             #$S=~s@ @\n @g; # !! debug
377             #die unless open LOG, ">> /tmp/b.log";
378             #die unless print LOG "T> ".dumpstr($S)."\n";
379             #die unless close LOG;
380 79         357 $S
381             }
382              
383             #use vars qw($old_encode_mimeword);
384             #BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
385              
386             =item encode_mimeword RAW, [ENCODING], [CHARSET]
387              
388             I
389             Encode a single RAW "word" that has unsafe characters.
390             The "word" will be encoded in its entirety.
391              
392             ### Encode "<>":
393             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
394              
395             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
396             You may specify the CHARSET, which defaults to C.
397              
398             =cut
399              
400              
401             #** Dat: no prototype, because original encode_mimeword() doesn't have it
402             #** a prototype either
403             #** @param $_[0] raw string. raw
404             #** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
405             #** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
406             #** not as in its documentation
407             sub encode_mimeword {
408             #sub encode_mimeword($;$$) {
409 6     6 1 19 encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
410             }
411              
412             # ---
413              
414             # $MIME_WORDS VERSION = "5.420";
415              
416             # _decode_Q STRING
417             # Private: used by _decode_header() to decode "Q" encoding, which is
418             # almost, but not exactly, quoted-printable. :-P
419             sub _decode_Q {
420 34     34   38 my $str = shift;
421 34         78 $str =~ s/_/\x20/g; # RFC-1522, Q rule 2
422 34         95 $str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
  56         192  
423 34         64 $str;
424             }
425              
426             # _encode_Q STRING
427             # Private: used by _encode_header() to decode "Q" encoding, which is
428             # almost, but not exactly, quoted-printable. :-P
429             sub _encode_Q {
430 0     0   0 my $str = shift;
431 0         0 $str =~ s{([_\?\=\x00-\x1F\x7F-\xFF])}{sprintf("=%02X", ord($1))}eg;
  0         0  
432 0         0 $str;
433             }
434              
435             # _decode_B STRING
436             # Private: used by _decode_header() to decode "B" encoding.
437             sub _decode_B {
438 30     30   33 my $str = shift;
439 30         63 decode_base64($str);
440             }
441              
442             # _encode_B STRING
443             # Private: used by _decode_header() to decode "B" encoding.
444             sub _encode_B {
445 0     0   0 my $str = shift;
446 0         0 encode_base64($str, '');
447             }
448              
449             # Copied from MIME::Words.
450             sub decode_mimewords_wantarray_low {
451 50     50 0 51 my $encstr = shift;
452 50         72 my %params = @_;
453 50         49 my @tokens;
454 50         56 $@ = ''; ### error-return
455              
456             ### Collapse boundaries between adjacent encoded words:
457 50         132 $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
458 50         92 pos($encstr) = 0;
459             ### print STDOUT "ENC = [", $encstr, "]\n";
460              
461             ### Decode:
462 50         74 my ($charset, $encoding, $enc, $dec);
463 50         45 while (1) {
464 150 100       206 last if (pos($encstr) >= length($encstr));
465 100         93 my $pos = pos($encstr); ### save it
466              
467             ### Case 1: are we looking at "=?..?..?="?
468 100 100       320 if ($encstr =~ m{\G # from where we left off..
469             =\?([^?]*) # "=?" + charset +
470             \?([bq]) # "?" + encoding +
471             \?([^?]+) # "?" + data maybe with spcs +
472             \?= # "?="
473             }xgi) {
474 64         185 ($charset, $encoding, $enc) = ($1, lc($2), $3);
475 64 100       139 $dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc));
476 64         110 push @tokens, [$dec, $charset];
477 64         89 next;
478             }
479              
480             ### Case 2: are we looking at a bad "=?..." prefix?
481             ### We need this to detect problems for case 3, which stops at "=?":
482 36         49 pos($encstr) = $pos; # reset the pointer.
483 36 50       69 if ($encstr =~ m{\G=\?}xg) {
484 0         0 $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
485 0         0 push @tokens, ['=?'];
486 0         0 next;
487             }
488              
489             ### Case 3: are we looking at ordinary text?
490 36         42 pos($encstr) = $pos; # reset the pointer.
491 36 50       155 if ($encstr =~ m{\G # from where we left off...
492             ([\x00-\xFF]*? # shortest possible string,
493             \n*) # followed by 0 or more NLs,
494             (?=(\Z|=\?)) # terminated by "=?" or EOS
495             }xg) {
496 36 50       70 length($1) or die "MIME::AltWords: internal logic err: empty token\n";
497 36         67 push @tokens, [$1];
498 36         44 next;
499             }
500            
501 0 0       0 if ($encstr=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) { #### pts ####
502 0         0 $@.=qq|wide character in encoded string\n|;
503 0 0       0 push @tokens, [$1] if 0!=length($1);
504 0         0 next;
505             }
506              
507             ### Case 4: bug!
508 0         0 die "MIME::AltWords: unexpected case:\n($encstr) pos $pos\n\t".
509             "Please alert developer.\n";
510             }
511 50 50       146 return (wantarray ? @tokens : join('',map {$_->[0]} @tokens));
  0         0  
512             }
513              
514             #** Dat: function added by #### pts ####
515             #** @param $_[0] a mimewords-encoded string
516             #** @return a canonical encoding name with which the string can be re-encoded
517             sub get_best_decode_charset($) {
518 23     23 0 28 my $encodedstr=$_[0];
519 23         27 my @L;
520 23         38 for my $token (decode_mimewords($encodedstr)) {
521 45   100     111 my $charset=canonical_charset($token->[1] or "");
522 45 100 100     175 push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
      100        
523             }
524 23 100       58 @L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
525 23         59 $L[0]
526             }
527              
528             =item decode_mimewords ENCODED, [OPTS...]
529              
530             I
531             Go through the string looking for RFC-1522-style "Q"
532             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
533              
534             B splits the ENCODED string into a list of decoded
535             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
536             data are returned in a 1-element array C<[DATA]>, giving an effective
537             CHARSET of C.
538              
539             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
540             foreach (decode_mimewords($enc)) {
541             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
542             }
543              
544             B joins the "data" elements of the above list
545             together, and returns that. I it
546             sanitizes the returned string to use a specific, single charset, either
547             specified using the C option, or autodetecting one (ISO-8859-1,
548             ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
549             autodetection, C can be used to query the charset
550             autodetected.
551              
552             You might want to see L as an alternate of
553             L.
554              
555             In the event of a syntax error, $@ will be set to a description
556             of the error, but parsing will continue as best as possible (so as to
557             get I back when decoding headers).
558             $@ will be false if no error was detected.
559              
560             Any arguments past the ENCODED string are taken to define a hash of options:
561              
562             =over 4
563              
564             =item Field
565              
566             Name of the mail field this string came from. I
567              
568             =back
569              
570             =cut
571              
572             #** Dat: no prototype, because original decode_mimewords() doesn't have it
573             #** a prototype either
574             #** Dat: it is unsafe to use this without the Raw=>1 option. Search for
575             #** MIME::WordDecoder in `perldoc MIME::Words'.
576             #** @param $_[0] a mimewords-encoded string
577             #** @param $_[1]... list of options (key=>value pairs).
578             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
579             #** New key: Raw:
580             #** -- 1 (default): true: return a raw, encoded string. The encoding will
581             #** be Charset, or the one returned by get_best_decode_charset().
582             #** This is an improvement
583             #** by #### pts #### -- the original decode_mimewords() didn't return
584             #** the string in a consistent encoding.
585             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
586             #** with Encode::encode(Charset)
587             #** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
588             sub decode_mimewords_low {
589 50 100   50 0 98 return decode_mimewords_wantarray_low(@_) if wantarray;
590 27         47 my($encodedstr,%opts)=@_;
591 27 100       63 $opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
592 27         32 my $ret='';
593             # vvv Dat: not mutually recursive, because get_best_decode_charset() calls
594             # decode_mimewords() in list context, so this line won't be
595             # reached.
596             $opts{Charset}=get_best_decode_charset($encodedstr) if
597 27 100 100     91 $opts{Raw} and !defined $opts{Charset};
598 27         29 my $S;
599 27         62 for my $token (decode_mimewords_wantarray_low($encodedstr,%opts)) { # Dat: $charset in $token->[1]
600 55 100       136 $S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
601 55 100       1245 $S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
602 55         4072 $ret.=$S
603             }
604             $ret
605 27         122 }
606              
607 1     1   8 use vars qw(@encode_subject_opts);
  1         1  
  1         45  
608 1     1   68 BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
609              
610             #** Dat: function added by #### pts ####
611             #** @param $_[0] String. A mimewords-encoded e-mail subject.
612             #** @return String. better mimewords-encoded
613             sub fix_subject($) {
614 1     1 0 2 my $encodedstr=$_[0];
615 1         3 my $best_charset=get_best_decode_charset($encodedstr);
616 1         3 my $decoded=decode_mimewords($encodedstr, Raw=>0);
617 1         5 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
618             }
619              
620 1     1   5 use vars qw(@encode_addresses_opts);
  1         2  
  1         36  
621 1     1   121 BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
622              
623             #** Dat: function added by #### pts ####
624             #** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
625             #** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
626             #** @return String. better mimewords-encoded
627             sub fix_addresses($) {
628 3     3 0 5 my $encodedstr=$_[0];
629 3         5 my $best_charset=get_best_decode_charset($encodedstr);
630 3         6 my $decoded=decode_mimewords($encodedstr, Raw=>0);
631             #print STDERR "DE($decoded)\n";
632             #chomp $decoded;
633             #$decoded.=" alma ";
634 3         6 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
635             }
636              
637             #** Dat: function added by #### pts ####
638             #** @param $_[0] a Unicode string
639             #** @param $_[1] a charset
640             #** @return Boolean: is it encodable?
641             sub is_encodable($$) {
642 48     48 0 80 my $charset=uc$_[1];
643 48         49 my $S=$_[0]; # Dat: must be copied for Encode::encode
644 48 100 66     182 return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
645 45         55 eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
  45         96  
646 45 100       1652 $@ ? 0 : 1
647             }
648              
649             #** Please don't include US-ASCII.
650             #** Specify UTF-8 last (because get_best_encode_charset() ignores everything
651             #** after the first UTF-8).
652 1     1   6 use vars qw(@encode_charsets);
  1         2  
  1         33  
653 1     1   224 BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
654              
655             #** Dat: function added by #### pts ####
656             #** @param $_[0] string to try
657             sub get_best_encode_charset($) {
658 39     39 0 58 for my $charset (@encode_charsets) {
659 48 100       84 return $charset if is_encodable($_[0], $charset);
660             }
661 0         0 return 'UTF-8'
662             }
663              
664             #** Dat: function added by #### pts ####
665             #** @param $_[0] String. Perl Unicode-string
666             #** @param $_[1] encode mode: 'subject' or 'addresses'
667             #** @return String. better mimewords-encoded with the best charset
668             sub encode_unicode($$) {
669 6     6 0 9 my $str=$_[0];
670 6 100       14 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
671 6         12 my $best_charset=get_best_encode_charset($str);
672 6         14 encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
673             }
674              
675             #** Dat: function added by #### pts ####
676             #** @param $_[0] String. Perl 8-bit string, encoded in $charset
677             #** @param $_[1] encode mode: 'subject' or 'addresses'
678             #** @param $_[2] $charset;
679             #** @return String. better mimewords-encoded with the best charset
680             sub encode_8bit($$$) {
681 6     6 0 11 my $str=$_[0];
682 6         7 my $charset=$_[2];
683 6 100       20 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
684 6         15 $charset=canonical_charset($charset);
685 6         9 my $best_charset;
686 6         10 for my $charset2 (@encode_charsets) {
687 13 100       17 if (canonical_charset($charset2) eq $charset) {
688 5         7 $best_charset=$charset; last
689 5         7 }
690             }
691 6 100       13 $best_charset=canonical_charset(get_best_encode_charset($str)) if
692             !defined $best_charset;
693 6 100       18 ($charset eq $best_charset) ? # Imp: find badly encoded string...
694             encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
695             encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
696             }
697              
698             # --- Logging...
699              
700 1     1   19 BEGIN { *encode_mimewords=\&encode_mimewords_low }
701 1     1   33 BEGIN { *decode_mimewords=\&decode_mimewords_low }
702              
703 1 50   1   51 BEGIN { if ($main::DEBUG) {
704              
705             #use vars qw($orig_encode_mimewords $orig_decode_mimewords);
706 1     1   5 no warnings qw(prototype redefine);
  1         2  
  1         337  
707              
708             #BEGIN { $orig_encode_mimewords=\&encode_mimewords }
709             *encode_mimewords=sub {
710 0         0 require Carp;
711 0         0 my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
712 0         0 my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
713 0         0 $dump.=$ret."\n";
714 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
715 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
716 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
717 0         0 binmode($log, ':utf8');
718 0         0 print $log $dump;
719 0         0 Carp::cluck("^^^ encode_mimewords() ");
720 0         0 close $log;
721             }
722             $ret
723 0         0 };
  0         0  
724              
725             #BEGIN { $orig_decode_mimewords=\&decode_mimewords }
726             # Imp: copy prototype of original...
727             *decode_mimewords=sub {
728 0         0 require Carp;
729 0         0 my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
730 0 0       0 if (wantarray) {
731 0         0 my @L=(&decode_mimewords_low(@_));
732 0         0 $dump.="@L (ary)\n";
733 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
734 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
735 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
736 0         0 binmode($log, ':utf8');
737 0         0 print $log $dump;
738 0         0 Carp::cluck("^^^ decode_mimewords() ");
739 0         0 close $log;
740             }
741             @L
742 0         0 } else {
743 0         0 my $ret=decode_mimewords_low(@_);
744 0         0 $dump.=$ret."\n";
745 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
746 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
747 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
748 0         0 binmode($log, ':utf8');
749 0         0 print $log $dump;
750 0         0 Carp::cluck("^^^ decode_mimewords() ");
751 0         0 close $log;
752             }
753             $ret
754 0         0 }
755 0         0 };
756              
757             } }
758              
759             # ---
760              
761             =back
762              
763             =head1 NOTES
764              
765             Exports its principle functions by default, in keeping with
766             L and L.
767              
768             Doesn't depend on L or L.
769              
770             See also L for the previous version
771             of L integrated into the Sympa 4 mailing list software.
772              
773             =head1 AUTHOR
774              
775             L was written by
776             PĂŠter SzabĂł (F) in 2006, and it has been uploaded to CPAN on
777             2006-09-27.
778              
779             L uses code from L (in the function
780             C) and it uses documentation from L
781             (in the file C).
782              
783             Here is the original author and copyright information for L.
784              
785             Eryq (F), ZeeGee Software Inc (F).
786             David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
787              
788             All rights reserved. This program is free software; you can redistribute
789             it and/or modify it under the same terms as Perl itself.
790              
791             Thanks also to...
792              
793             Kent Boortz For providing the idea, and the baseline
794             RFC-1522-decoding code!
795             KJJ at PrimeNet For requesting that this be split into
796             its own module.
797             Stephane Barizien For reporting a nasty bug.
798              
799              
800             =head1 VERSION
801              
802             See $VERSION in C .
803              
804             =cut
805              
806             =begin testing
807              
808             is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
809             is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
810             is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
811             is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
812             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
813             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
814             "az ?rkiköt? földi adatai");
815             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
816             "az ?rkiköt? földi adatai");
817             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
818             "az űrkikötő földi adatai");
819             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
820             "az űrkikötő földi adatai");
821             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
822             "az űrkikötő földi adatai"); # Dat: guess Charset to ISO-8859-2
823             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
824             is(MIME::AltWords::encode_mimewords("az űrkikötő földi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
825             is(MIME::AltWords::encode_mimewords("az űrkikötő földi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
826             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
827             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
828             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
829             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
830             { my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
831             ok($@=~/^Wide character /); # Dat: Encode::decode fails
832             }
833             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
834             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
835             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
836             "ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
837             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
838             "=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
839             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
840             "ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
841             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
842             "ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
843             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
844             "=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
845             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
846             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
847             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
848             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
849              
850             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
851             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
852             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
853             "szóköz) kapcsolatban áll egy nem kizárással.",
854             Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
855             "A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
856              
857             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
858             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
859             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
860             "szóköz) kapcsolatban áll egy nem kizárással.",
861             Charset=>"ISO-8859-2",Raw=>0),
862             "=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
863             # vvv Dat: composing with Pine emits:
864             #is(MIME::AltWords::encode_mimewords("A keresési eredményekből bizonyos szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel előzi meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy szóköz) kapcsolatban áll egy nem kizárással.",
865             #"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
866             # =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
867             # =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
868             # =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
869             # =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
870             # =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
871              
872             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
873             Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
874             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
875             Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
876              
877             is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
878              
879             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
880             "=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
881             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
882             "=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
883             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
884             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
885             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
886             "=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
887              
888             SKIP: {
889             eval { require Mail::Address };
890             skip "Mail::Address not installed", 1 if $@;
891             my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
892             my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
893             $address="undef" if !defined $address;
894             is($address, "pts\@our.um");
895             }
896              
897             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
898             "\x{C5}\x{B1}", 'decode_mimewords()');
899             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
900             "\x{171}", 'decode_mimewords()');
901              
902             is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
903             'ISO-8859-2', 'get_best_decode_charset()');
904             is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
905             'UTF-8', 'get_best_decode_charset()');
906             is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
907              
908             is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
909             "=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
910             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
911             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
912             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
913             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
914             is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
915             "=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
916              
917             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
918             "f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
919             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
920             "f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
921             #die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
922             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
923             "Szabó Péter", 'decode_mimewords()');
924              
925             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
926             "f\x{C3}\x{A9}l", 'encode+decode mimewords');
927             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
928             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
929             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
930             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
931             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
932             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
933             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
934             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
935             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
936             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
937             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
938             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
939              
940             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
941             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
942             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
943             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
944             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
945             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
946              
947             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
948             "m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
949              
950             is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
951             is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
952             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
953             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
954              
955             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
956             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
957             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
958             is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
959              
960             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
961             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
962              
963             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
964             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
965             is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
966             is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
967             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
968             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
969              
970             is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
971             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
972              
973             is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
974             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
975              
976             is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
977             "=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
978              
979             is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
980             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld Jřrn Simonsen ', "MIME::Words test case 2");
981             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'André Pirard ', "MIME::Words test case 3");
982             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
983             is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
984             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
985             # vvv !! is this correct (space after \n)?
986             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
987             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
988              
989             my $in0 = Encode::encode("windows-1251", "\x{422}\x{435}\x{441}\x{442}\x{438}\x{440}\x{43e}\x{432}\x{430}\x{43d}\x{438}\x{435}");
990             my $out0b = "=?WINDOWS-1251?B?0uXx8ujw7uLg7ejl?=";
991             my $out0q = "=?WINDOWS-1251?Q?=D2=E5=F1=F2=E8=F0=EE=E2=E0=ED=E8=E5?=";
992             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"B"), $out0b);
993             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"Q"), $out0q);
994             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"q"), $out0q);
995             is(MIME::AltWords::encode_mimeword($in0, "B", "windows-1251"), $out0b);
996             is(MIME::AltWords::encode_mimeword($in0, "Q", "windows-1251"), $out0q);
997             is(MIME::AltWords::encode_mimeword($in0, "q", "windows-1251"), $out0q);
998             is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"b"), $out0b);
999             is(MIME::AltWords::encode_mimeword($in0, "b", "windows-1251"), $out0b);
1000              
1001             =cut
1002              
1003             1