File Coverage

lib/MIME/AltWords.pm

Criterion	Covered	Total	%
statement	245	318	77.0
branch	111	144	77.0
condition	47	65	72.3
subroutine	38	41	92.6
pod	1	16	6.2
total	442	584	75.6

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# MIME/AltWords.pm -- a lot of fixes on MIME::Words
3							# by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4							# -- Fri Mar 31 18:41:14 CEST 2006
5							#
6							# Dat: this assumes Perl v5.8 or later
7							# Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8							# Dat: see `perldoc MIME::Words' for the original documentation
9							# Dat: a raw string has bytes in 0..255, and it is already encoded in some
10							# encoding
11							# SUXX: perldoc doesn't respect `=encoding utf-8'
12							# lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13							# !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14							# !! document all
15							# !! document test cases
16							# !! MANIFEST etc.
17							#
18
19							package MIME::AltWords;
20	1			1		17	use v5.8; # Dat: Unicode string support etc.
	1					3
21	1			1		4	use integer;
	1					2
	1					9
22	1			1		20	use strict;
	1					1
	1					18
23	1			1		259	use MIME::Base64;
	1					541
	1					59
24	1			1		275	use MIME::QuotedPrint;
	1					177
	1					40
25	1			1		373	use Encode;
	1					7229
	1					61
26	1			1		7	use warnings;
	1					1
	1					19
27	1			1		4	use Exporter;
	1					2
	1					29
28	1			1		4	no warnings qw(prototype redefine);
	1					1
	1					36
29
30							=pod
31
32							=encoding utf8
33
34							=head1 NAME
35
36							MIME::AltWords - properly deal with RFC-1522 encoded words
37
38							=head1 SYNOPSIS
39
40							The Perl module L is recommended for encoding and
41							decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
42							e-mail message headers (mostly Subject, From and To).
43
44							L is similar to L in
45							L, but it provides an alternate implementation that follows the
46							MIME specification more carefully, and it is actually compatible with
47							existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
48							L extends the functionality of L (version
49							5.420) by adding more functions and more options to existing functions. The
50							original interface is changed in an upward-compatible way.
51
52							Before reading further, you should see L to make sure that
53							you understand where this module fits into the grand scheme of things.
54							Go on, do it now. I'll wait.
55
56							Ready? Ok...
57
58							use MIME::AltWords qw(:all);
59
60							### Decode the string into another string, forgetting the charsets:
61							$decoded = decode_mimewords(
62							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
63							);
64
65							### Split string into array of decoded [DATA,CHARSET] pairs:
66							@decoded = decode_mimewords(
67							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
68							);
69
70							### Encode a single unsafe word:
71							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
72
73							### Encode a string, trying to find the unsafe words inside it:
74							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
75
76
77
78							=head1 DESCRIPTION
79
80							Fellow Americans, you probably won't know what the hell this module
81							is for. Europeans, Russians, et al, you probably do. C<:-)>.
82
83							For example, here's a valid MIME header you might get:
84
85							From: =?US-ASCII?Q?Keith_Moore?=
86							To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
87							CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
88							Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
89							=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
90							=?US-ASCII?Q?.._cool!?=
91
92							The fields basically decode to (sorry, I can only approximate the
93							Latin characters with 7 bit sequences /o and 'e):
94
95							From: Keith Moore
96							To: Keld J/orn Simonsen
97							CC: Andr'e Pirard
98							Subject: If you can read this you understand the example... cool!
99
100
101							=head1 PUBLIC INTERFACE
102
103							=over 4
104
105							=cut
106
107
108	1			1		5	use vars qw($NONPRINT $VERSION);
	1					1
	1					48
109
110							### The package version, both in 1.23 style and usable by MakeMaker:
111							BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
112	1			1		2023	$VERSION = "0.14"
113							}
114
115							# Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
116							$NONPRINT=qr{(?:[^\x20-\x7E]\|=)};
117							#$NONPRINT=qr{(?:[^\x20-\x7E]\|[=](?=[?]))}; # Imp: is a bare `=' legal?
118
119							#** @param $_[0] charset name
120							#** @return MIME canonical charset name
121							sub canonical_charset($) {
122	150			150	0	162	my $S=$_[0];
123	150	100				674	if ($S=~/\A(?:iso-?(?:8859-?)?\|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
	79	100				235
		100
124	45					84	elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
125	3					4	elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
126	23					47	else { uc $S }
127							}
128
129							#** @param $_[0] string.
130							#** @param $_[1] hashref. options
131							#** @param $_[2] string to append to
132							sub append_encoded_word($$$) {
133	97			97	0	202	my($word,$opts,$dst)=@_;
134	97	100				168	if ($opts->{Encoding} eq "B") {
		50
135	29					87	$word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
136	29					43	$word=~s@\s+@@g;
137	29					43	$$dst.=$word
138							} elsif ($opts->{Encoding} eq "Q") {
139							# Dat: improved MIME::Words::_encode_Q
140	68	100				311	$word =~ s{( )\|([_\?\=]\|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
	306					1111
141							: sprintf("=%02X", ord($2))}eog;
142	68					119	$$dst.=$word
143	0					0	} else { die }
144							undef
145	97					150	}
146
147							#use vars qw($old_encode_mimewords);
148							#BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
149
150							#** @param $_[0] string. Unicode
151							#** @param $_[1] hashref. options preprocessed by out encode_mimewords()
152							#** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
153							sub encode_mimeword1($$) {
154	90			90	0	146	my($src,$opts)=@_;
155							# Imp: warning if Encode::encode cannot represent character
156	90					98	my $dst="";
157	90					161	my $open="=?$opts->{Charset}?$opts->{Encoding}?";
158	90					86	my $maxlen=64; # Dat: good for a subject line !! 63 or 62
159							# ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
160							# ^^^ Dat: one quoted word shouldn't be too long
161							# ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
162	90					101	$maxlen-=length($open);
163	90	100				149	$maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
164							#print STDERR "($src) $maxlen\n";
165
166	90					196	$src=Encode::encode($opts->{Charset},$src);
167	90	100				3102	if ($opts->{Shorts}) {
168	84					78	my $I=0;
169	84					137	while ($I
170							# Dat: split result for too long consecutive headers (i.e. long Subject: line)
171	91					98	my $J=$I+$maxlen;
172	91	100				125	$J=length($src) if $J>length($src);
173	91	100				137	if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
174	25	50				57	if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
175	0		0			0	$J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
176	0	0	0			0	$J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
177							# ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
178							}
179							}
180							# Imp: else: fix for other multibyte encodings
181	91					126	$dst.=$open;
182	91					100	my $addlen=-length($dst);
183	91					233	append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
184	91					119	$addlen+=length($dst);
185	91	100	100			255	if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
			66
186							# Dat: too many hex `=..' triplets, become too long
187	7					11	my $K=length($dst);
188	7					11	while ($addlen>$maxlen) {
189	48	100				63	if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
	3					4
	3					4
	3					4
190	45					42	else { $addlen--; $K--; $J-- }
	45					42
	45					54
191							}
192	7					8	substr($dst,$K)="";
193							# Imp: more efficient, don't process the same data many times
194							}
195	91					97	$dst.="?="; $dst.=$opts->{Space};
	91					111
196	91					157	$I=$J;
197							}
198	6					11	} else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
	6					11
	6					7
199	90					137	$dst
200							}
201
202							#** @returns the specified string quoted in double quotes. All characters
203							#** are printable ASCII.
204							sub dumpstr($) {
205	0			0	0	0	my $S=$_[0];
206	0					0	$S=~s@(["\\])\|([^ -~])@
207	0	0				0	defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
208							: "\\$1" # Imp: Unicode chars
209							@ge;
210	0					0	"\"$S\"" #"
211							}
212
213							#** Splits a string on spaces into lines so no line is longer than the maximum
214							#** (except if there is no space nearby).
215							#** Only the 1st space is converted to $_[2] at each break.
216							#** @param $_[0] string. to split
217							#** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
218							#** @param $_[2] chars to replace a space with -- not being added to the
219							#** maximum line length
220							sub split_words($$$) {
221	74			74	0	111	my($S,$maxlen,$nl)=@_;
222	74					75	my $lastpos=0; my $I=0; my $J;
	74					69
	74					78
223							#** Position after last space to split at, or $I
224							my $K;
225	74					67	my $ret="";
226	74					63	while (1) { # Imp: faster implementation
227	88					83	$K=$J=$I;
228	88		66			341	$J++ while $J
229	88	100	100			178	while ($K==$I ? ($J
230	2878					2714	my $C=substr($S,$J,1);
231	2878	50				3466	if ($C eq"\n") { $K=$I=++$J }
	0	100				0
232	65					178	elsif ($C eq " ") { $K=++$J }
233	2813					4105	else { $J++ }
234							}
235	88	100	100			178	if ($K>$I and $J>$I+$maxlen) {
236	18					30	$ret.=substr($S,$I,$K-1-$I);
237	18					17	$ret.=$nl;
238	18					16	$I=$K; # Imp: skip more
239							}
240	88	100				126	if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
	74					129
	74					93
241							}
242							$ret
243	74					148	}
244
245							=item encode_mimewords RAW, [OPTS]
246
247							I
248							Given a RAW string, try to find and encode all "unsafe" sequences
249							of characters:
250
251							### Encode a string with some unsafe "words":
252							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
253
254							Returns the encoded string.
255							Any arguments past the RAW string are taken to define a hash of options:
256
257							=over 4
258
259							=item Charset
260
261							Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
262							a.k.a. "Latin-1".
263
264							=item Encoding
265
266							The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
267
268							=item Field
269
270							Name of the mail field this string will be used in. I
271
272							=back
273
274							B this is a stable, tested, widely compatible solution. Strict
275							compliance with RFC-1522 (regarding the use of encoded words in message
276							headers), however, was not proven, but strings returned by this function
277							work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
278							recommended way is to use this function instead of C or
279							L.
280
281							=cut
282
283
284							#** Dat: no prototype, because original encode_mimewords() doesn't have it
285							#** a prototype either
286							#** @param $_[0] string\|raw string. raw
287							#** @param $_[1].. list of key-value pairs. options
288							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
289							#** Charset is now autodetected (not always ISO-8859-1)
290							#** New key: Raw:
291							#** -- 1 (default): true: $_[0] is already a raw, encoded string
292							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
293							# with Encode::encode(Charset)
294							#** New key: Shorts:
295							#** -- 1 (default for encode_mimewords)
296							#** -- 0 (default for encode_mimeword)
297							#** New key: Whole: (is respected iff Shorts==0)
298							#** -- 1 (default): quote the string as a whole word (not default in the original!)
299							#** -- 0: encode subwords
300							#** New key: Keeptrailnl
301							#** -- 0: treat trailing newline as unprintable
302							#** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
303							# keep trailing newline at the end
304							#** !! doc more, including new keys
305							sub encode_mimewords_low {
306	80			80	0	808	my($S,%opts)=@_;
307	80	50				156	return undef if !defined($S);
308							# die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
309							#die unless open LOG, ">> /tmp/b.log";
310							#die unless print LOG "=> ".dumpstr($S)."\n";
311							#die unless close LOG;
312							#$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
313	80	100				142	$opts{Raw}=1 if !defined $opts{Raw};
314	80	100				155	$opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
315	80	50				119	die if !defined $opts{Charset};
316	80					118	$opts{Charset}=canonical_charset($opts{Charset});
317	80	100				132	if ($opts{Raw}) { # Dat: improvement
318	46					52	$opts{Raw}=0;
319							#die if !defined $S;
320	46					99	$S=Encode::decode($opts{Charset}, $S);
321							# ^^^ Dat: better do a Unicode regexp match
322							}
323	79	100				1918	$opts{Encoding}=defined($opts{Encoding}) ? uc($opts{Encoding}) : "Q";
324	79	100				150	$opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
325	79	100	100			241	$opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
326							# ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
327							# bad, Pine doesn't display quoted-printable properly
328							# (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
329							# the other way;
330							# We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
331	79	100				145	$opts{Shorts}=1 if !defined $opts{Shorts};
332	79	100				111	$opts{Whole}=1 if !defined $opts{Whole};
333	79	50				115	$opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
334	79	50				123	$opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
335	79	100				107	$opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
336	79					83	my $toend="";
337	79	100	100			334	$toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
338	79	100				150	if (!$opts{Shorts}) {
		100
339	6					11	$S=encode_mimeword1($S,\%opts)
340							} elsif ($opts{Whole}) {
341	46	100				179	if ($S=~/$NONPRINT/o) {
342	43					88	$S=encode_mimeword1($S,\%opts);
343	43					69	substr($S,-1)=""; # Dat: remove last space
344							}
345	46	50				104	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
346							} else {
347	27					32	my $lastpos=0;
348	27					154	while ($S=~/($NONPRINT[^ ]* *)/go) {
349							# ^^^ Dat: having ` *' is a must here, other clients just forget about it
350	41					96	my $I=pos($S)-length($1);
351	41		100			284	$I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
352	41					51	my $pos=pos($S); my $D;
	41					36
353	41		33			311	1 while ($pos=pos($S)) and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and defined($D=$1)
			66
			66
354							and $S=~/\G[^ ]* */goc;
355	41	50				90	pos($S)=$pos if !defined $D;
356	41					63	my $srclen=pos($S)-$I;
357	41					63	my $src=substr($S,$I,$srclen);
358							##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
359	41	50	66			246	if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and !defined($1)) {
			66
			33
360							##print STDERR "Strip ending space\n";
361	29					55	substr($src,-1)=""; # Dat: see test case 'ignore_space'
362							}
363							# Dat: now pos($S) is invalid
364	41	50				72	die if 1>length($src);
365							##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
366	41					67	my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
367							##print STDERR substr($S,$I,$srclen),";;\n";
368	41					116	substr($S,$I,$srclen)=$dst; # Imp: do with less copying
369	41					255	$lastpos=pos($S)=$I+length($dst);
370							}
371							substr($S,-length($opts{Space}))="" if
372	27	100	66			101	0
373	27	50				70	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
374							}
375	79					105	$S.=$toend;
376							#$S=~s@ @\n @g; # !! debug
377							#die unless open LOG, ">> /tmp/b.log";
378							#die unless print LOG "T> ".dumpstr($S)."\n";
379							#die unless close LOG;
380	79					357	$S
381							}
382
383							#use vars qw($old_encode_mimeword);
384							#BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
385
386							=item encode_mimeword RAW, [ENCODING], [CHARSET]
387
388							I
389							Encode a single RAW "word" that has unsafe characters.
390							The "word" will be encoded in its entirety.
391
392							### Encode "<>":
393							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
394
395							You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
396							You may specify the CHARSET, which defaults to C.
397
398							=cut
399
400
401							#** Dat: no prototype, because original encode_mimeword() doesn't have it
402							#** a prototype either
403							#** @param $_[0] raw string. raw
404							#** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
405							#** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
406							#** not as in its documentation
407							sub encode_mimeword {
408							#sub encode_mimeword($;$$) {
409	6			6	1	19	encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
410							}
411
412							# ---
413
414							# $MIME_WORDS VERSION = "5.420";
415
416							# _decode_Q STRING
417							# Private: used by _decode_header() to decode "Q" encoding, which is
418							# almost, but not exactly, quoted-printable. :-P
419							sub _decode_Q {
420	34			34		38	my $str = shift;
421	34					78	$str =~ s/_/\x20/g; # RFC-1522, Q rule 2
422	34					95	$str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
	56					192
423	34					64	$str;
424							}
425
426							# _encode_Q STRING
427							# Private: used by _encode_header() to decode "Q" encoding, which is
428							# almost, but not exactly, quoted-printable. :-P
429							sub _encode_Q {
430	0			0		0	my $str = shift;
431	0					0	$str =~ s{([_\?\=\x00-\x1F\x7F-\xFF])}{sprintf("=%02X", ord($1))}eg;
	0					0
432	0					0	$str;
433							}
434
435							# _decode_B STRING
436							# Private: used by _decode_header() to decode "B" encoding.
437							sub _decode_B {
438	30			30		33	my $str = shift;
439	30					63	decode_base64($str);
440							}
441
442							# _encode_B STRING
443							# Private: used by _decode_header() to decode "B" encoding.
444							sub _encode_B {
445	0			0		0	my $str = shift;
446	0					0	encode_base64($str, '');
447							}
448
449							# Copied from MIME::Words.
450							sub decode_mimewords_wantarray_low {
451	50			50	0	51	my $encstr = shift;
452	50					72	my %params = @_;
453	50					49	my @tokens;
454	50					56	$@ = ''; ### error-return
455
456							### Collapse boundaries between adjacent encoded words:
457	50					132	$encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
458	50					92	pos($encstr) = 0;
459							### print STDOUT "ENC = [", $encstr, "]\n";
460
461							### Decode:
462	50					74	my ($charset, $encoding, $enc, $dec);
463	50					45	while (1) {
464	150	100				206	last if (pos($encstr) >= length($encstr));
465	100					93	my $pos = pos($encstr); ### save it
466
467							### Case 1: are we looking at "=?..?..?="?
468	100	100				320	if ($encstr =~ m{\G # from where we left off..
469							=\?([^?]*) # "=?" + charset +
470							\?([bq]) # "?" + encoding +
471							\?([^?]+) # "?" + data maybe with spcs +
472							\?= # "?="
473							}xgi) {
474	64					185	($charset, $encoding, $enc) = ($1, lc($2), $3);
475	64	100				139	$dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc));
476	64					110	push @tokens, [$dec, $charset];
477	64					89	next;
478							}
479
480							### Case 2: are we looking at a bad "=?..." prefix?
481							### We need this to detect problems for case 3, which stops at "=?":
482	36					49	pos($encstr) = $pos; # reset the pointer.
483	36	50				69	if ($encstr =~ m{\G=\?}xg) {
484	0					0	$@ .= qq\|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n\|;
485	0					0	push @tokens, ['=?'];
486	0					0	next;
487							}
488
489							### Case 3: are we looking at ordinary text?
490	36					42	pos($encstr) = $pos; # reset the pointer.
491	36	50				155	if ($encstr =~ m{\G # from where we left off...
492							([\x00-\xFF]*? # shortest possible string,
493							\n*) # followed by 0 or more NLs,
494							(?=(\Z\|=\?)) # terminated by "=?" or EOS
495							}xg) {
496	36	50				70	length($1) or die "MIME::AltWords: internal logic err: empty token\n";
497	36					67	push @tokens, [$1];
498	36					44	next;
499							}
500
501	0	0				0	if ($encstr=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) { #### pts ####
502	0					0	$@.=qq\|wide character in encoded string\n\|;
503	0	0				0	push @tokens, [$1] if 0!=length($1);
504	0					0	next;
505							}
506
507							### Case 4: bug!
508	0					0	die "MIME::AltWords: unexpected case:\n($encstr) pos $pos\n\t".
509							"Please alert developer.\n";
510							}
511	50	50				146	return (wantarray ? @tokens : join('',map {$_->[0]} @tokens));
	0					0
512							}
513
514							#** Dat: function added by #### pts ####
515							#** @param $_[0] a mimewords-encoded string
516							#** @return a canonical encoding name with which the string can be re-encoded
517							sub get_best_decode_charset($) {
518	23			23	0	28	my $encodedstr=$_[0];
519	23					27	my @L;
520	23					38	for my $token (decode_mimewords($encodedstr)) {
521	45		100			111	my $charset=canonical_charset($token->[1] or "");
522	45	100	100			175	push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
			100
523							}
524	23	100				58	@L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
525	23					59	$L[0]
526							}
527
528							=item decode_mimewords ENCODED, [OPTS...]
529
530							I
531							Go through the string looking for RFC-1522-style "Q"
532							(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
533
534							B splits the ENCODED string into a list of decoded
535							C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
536							data are returned in a 1-element array C<[DATA]>, giving an effective
537							CHARSET of C.
538
539							$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
540							foreach (decode_mimewords($enc)) {
541							print "", ($_[1] \|\| 'US-ASCII'), ": ", $_[0], "\n";
542							}
543
544							B joins the "data" elements of the above list
545							together, and returns that. I it
546							sanitizes the returned string to use a specific, single charset, either
547							specified using the C option, or autodetecting one (ISO-8859-1,
548							ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
549							autodetection, C can be used to query the charset
550							autodetected.
551
552							You might want to see L as an alternate of
553							L.
554
555							In the event of a syntax error, $@ will be set to a description
556							of the error, but parsing will continue as best as possible (so as to
557							get I back when decoding headers).
558							$@ will be false if no error was detected.
559
560							Any arguments past the ENCODED string are taken to define a hash of options:
561
562							=over 4
563
564							=item Field
565
566							Name of the mail field this string came from. I
567
568							=back
569
570							=cut
571
572							#** Dat: no prototype, because original decode_mimewords() doesn't have it
573							#** a prototype either
574							#** Dat: it is unsafe to use this without the Raw=>1 option. Search for
575							#** MIME::WordDecoder in `perldoc MIME::Words'.
576							#** @param $_[0] a mimewords-encoded string
577							#** @param $_[1]... list of options (key=>value pairs).
578							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
579							#** New key: Raw:
580							#** -- 1 (default): true: return a raw, encoded string. The encoding will
581							#** be Charset, or the one returned by get_best_decode_charset().
582							#** This is an improvement
583							#** by #### pts #### -- the original decode_mimewords() didn't return
584							#** the string in a consistent encoding.
585							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
586							#** with Encode::encode(Charset)
587							#** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
588							sub decode_mimewords_low {
589	50	100		50	0	98	return decode_mimewords_wantarray_low(@_) if wantarray;
590	27					47	my($encodedstr,%opts)=@_;
591	27	100				63	$opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
592	27					32	my $ret='';
593							# vvv Dat: not mutually recursive, because get_best_decode_charset() calls
594							# decode_mimewords() in list context, so this line won't be
595							# reached.
596							$opts{Charset}=get_best_decode_charset($encodedstr) if
597	27	100	100			91	$opts{Raw} and !defined $opts{Charset};
598	27					29	my $S;
599	27					62	for my $token (decode_mimewords_wantarray_low($encodedstr,%opts)) { # Dat: $charset in $token->[1]
600	55	100				136	$S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
601	55	100				1245	$S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
602	55					4072	$ret.=$S
603							}
604							$ret
605	27					122	}
606
607	1			1		8	use vars qw(@encode_subject_opts);
	1					1
	1					45
608	1			1		68	BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
609
610							#** Dat: function added by #### pts ####
611							#** @param $_[0] String. A mimewords-encoded e-mail subject.
612							#** @return String. better mimewords-encoded
613							sub fix_subject($) {
614	1			1	0	2	my $encodedstr=$_[0];
615	1					3	my $best_charset=get_best_decode_charset($encodedstr);
616	1					3	my $decoded=decode_mimewords($encodedstr, Raw=>0);
617	1					5	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
618							}
619
620	1			1		5	use vars qw(@encode_addresses_opts);
	1					2
	1					36
621	1			1		121	BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
622
623							#** Dat: function added by #### pts ####
624							#** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
625							#** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
626							#** @return String. better mimewords-encoded
627							sub fix_addresses($) {
628	3			3	0	5	my $encodedstr=$_[0];
629	3					5	my $best_charset=get_best_decode_charset($encodedstr);
630	3					6	my $decoded=decode_mimewords($encodedstr, Raw=>0);
631							#print STDERR "DE($decoded)\n";
632							#chomp $decoded;
633							#$decoded.=" alma ";
634	3					6	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
635							}
636
637							#** Dat: function added by #### pts ####
638							#** @param $_[0] a Unicode string
639							#** @param $_[1] a charset
640							#** @return Boolean: is it encodable?
641							sub is_encodable($$) {
642	48			48	0	80	my $charset=uc$_[1];
643	48					49	my $S=$_[0]; # Dat: must be copied for Encode::encode
644	48	100	66			182	return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
645	45					55	eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
	45					96
646	45	100				1652	$@ ? 0 : 1
647							}
648
649							#** Please don't include US-ASCII.
650							#** Specify UTF-8 last (because get_best_encode_charset() ignores everything
651							#** after the first UTF-8).
652	1			1		6	use vars qw(@encode_charsets);
	1					2
	1					33
653	1			1		224	BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
654
655							#** Dat: function added by #### pts ####
656							#** @param $_[0] string to try
657							sub get_best_encode_charset($) {
658	39			39	0	58	for my $charset (@encode_charsets) {
659	48	100				84	return $charset if is_encodable($_[0], $charset);
660							}
661	0					0	return 'UTF-8'
662							}
663
664							#** Dat: function added by #### pts ####
665							#** @param $_[0] String. Perl Unicode-string
666							#** @param $_[1] encode mode: 'subject' or 'addresses'
667							#** @return String. better mimewords-encoded with the best charset
668							sub encode_unicode($$) {
669	6			6	0	9	my $str=$_[0];
670	6	100				14	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
671	6					12	my $best_charset=get_best_encode_charset($str);
672	6					14	encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
673							}
674
675							#** Dat: function added by #### pts ####
676							#** @param $_[0] String. Perl 8-bit string, encoded in $charset
677							#** @param $_[1] encode mode: 'subject' or 'addresses'
678							#** @param $_[2] $charset;
679							#** @return String. better mimewords-encoded with the best charset
680							sub encode_8bit($$$) {
681	6			6	0	11	my $str=$_[0];
682	6					7	my $charset=$_[2];
683	6	100				20	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
684	6					15	$charset=canonical_charset($charset);
685	6					9	my $best_charset;
686	6					10	for my $charset2 (@encode_charsets) {
687	13	100				17	if (canonical_charset($charset2) eq $charset) {
688	5					7	$best_charset=$charset; last
689	5					7	}
690							}
691	6	100				13	$best_charset=canonical_charset(get_best_encode_charset($str)) if
692							!defined $best_charset;
693	6	100				18	($charset eq $best_charset) ? # Imp: find badly encoded string...
694							encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
695							encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
696							}
697
698							# --- Logging...
699
700	1			1		19	BEGIN { *encode_mimewords=\&encode_mimewords_low }
701	1			1		33	BEGIN { *decode_mimewords=\&decode_mimewords_low }
702
703	1	50		1		51	BEGIN { if ($main::DEBUG) {
704
705							#use vars qw($orig_encode_mimewords $orig_decode_mimewords);
706	1			1		5	no warnings qw(prototype redefine);
	1					2
	1					337
707
708							#BEGIN { $orig_encode_mimewords=\&encode_mimewords }
709							*encode_mimewords=sub {
710	0					0	require Carp;
711	0					0	my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
712	0					0	my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
713	0					0	$dump.=$ret."\n";
714	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
715	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
716	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
717	0					0	binmode($log, ':utf8');
718	0					0	print $log $dump;
719	0					0	Carp::cluck("^^^ encode_mimewords() ");
720	0					0	close $log;
721							}
722							$ret
723	0					0	};
	0					0
724
725							#BEGIN { $orig_decode_mimewords=\&decode_mimewords }
726							# Imp: copy prototype of original...
727							*decode_mimewords=sub {
728	0					0	require Carp;
729	0					0	my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
730	0	0				0	if (wantarray) {
731	0					0	my @L=(&decode_mimewords_low(@_));
732	0					0	$dump.="@L (ary)\n";
733	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
734	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
735	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
736	0					0	binmode($log, ':utf8');
737	0					0	print $log $dump;
738	0					0	Carp::cluck("^^^ decode_mimewords() ");
739	0					0	close $log;
740							}
741							@L
742	0					0	} else {
743	0					0	my $ret=decode_mimewords_low(@_);
744	0					0	$dump.=$ret."\n";
745	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
746	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
747	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
748	0					0	binmode($log, ':utf8');
749	0					0	print $log $dump;
750	0					0	Carp::cluck("^^^ decode_mimewords() ");
751	0					0	close $log;
752							}
753							$ret
754	0					0	}
755	0					0	};
756
757							} }
758
759							# ---
760
761							=back
762
763							=head1 NOTES
764
765							Exports its principle functions by default, in keeping with
766							L and L.
767
768							Doesn't depend on L or L.
769
770							See also L for the previous version
771							of L integrated into the Sympa 4 mailing list software.
772
773							=head1 AUTHOR
774
775							L was written by
776							Péter Szabó (F) in 2006, and it has been uploaded to CPAN on
777							2006-09-27.
778
779							L uses code from L (in the function
780							C) and it uses documentation from L
781							(in the file C).
782
783							Here is the original author and copyright information for L.
784
785							Eryq (F), ZeeGee Software Inc (F).
786							David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
787
788							All rights reserved. This program is free software; you can redistribute
789							it and/or modify it under the same terms as Perl itself.
790
791							Thanks also to...
792
793							Kent Boortz For providing the idea, and the baseline
794							RFC-1522-decoding code!
795							KJJ at PrimeNet For requesting that this be split into
796							its own module.
797							Stephane Barizien For reporting a nasty bug.
798
799
800							=head1 VERSION
801
802							See $VERSION in C .
803
804							=cut
805
806							=begin testing
807
808							is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
809							is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
810							is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
811							is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
812							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
813							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
814							"az ?rkik�t? f�ldi adatai");
815							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
816							"az ?rkik�t? f�ldi adatai");
817							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
818							"az �rkik�t� f�ldi adatai");
819							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
820							"az �rkik�t� f�ldi adatai");
821							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
822							"az �rkik�t� f�ldi adatai"); # Dat: guess Charset to ISO-8859-2
823							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
824							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
825							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
826							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
827							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
828							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
829							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
830							{ my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
831							ok($@=~/^Wide character /); # Dat: Encode::decode fails
832							}
833							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
834							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
835							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
836							"ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
837							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
838							"=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
839							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
840							"ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
841							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
842							"ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
843							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
844							"=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
845							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
846							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
847							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
848							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
849
850							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
851							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
852							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
853							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
854							Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
855							"A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
856
857							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
858							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
859							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
860							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
861							Charset=>"ISO-8859-2",Raw=>0),
862							"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
863							# vvv Dat: composing with Pine emits:
864							#is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb�l bizonyos szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el�zi meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
865							#"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
866							# =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
867							# =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
868							# =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
869							# =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
870							# =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
871
872							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
873							Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
874							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
875							Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
876
877							is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
878
879							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
880							"=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
881							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
882							"=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
883							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
884							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
885							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
886							"=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
887
888							SKIP: {
889							eval { require Mail::Address };
890							skip "Mail::Address not installed", 1 if $@;
891							my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
892							my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
893							$address="undef" if !defined $address;
894							is($address, "pts\@our.um");
895							}
896
897							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
898							"\x{C5}\x{B1}", 'decode_mimewords()');
899							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
900							"\x{171}", 'decode_mimewords()');
901
902							is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
903							'ISO-8859-2', 'get_best_decode_charset()');
904							is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
905							'UTF-8', 'get_best_decode_charset()');
906							is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
907
908							is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
909							"=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
910							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
911							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
912							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
913							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
914							is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
915							"=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
916
917							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
918							"f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
919							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
920							"f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
921							#die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
922							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
923							"Szab� P�ter", 'decode_mimewords()');
924
925							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
926							"f\x{C3}\x{A9}l", 'encode+decode mimewords');
927							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
928							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
929							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
930							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
931							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
932							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
933							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
934							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
935							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
936							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
937							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
938							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
939
940							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
941							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
942							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
943							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
944							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
945							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
946
947							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
948							"m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
949
950							is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
951							is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
952							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
953							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
954
955							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
956							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
957							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
958							is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
959
960							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
961							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
962
963							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
964							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
965							is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
966							is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
967							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
968							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
969
970							is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
971							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
972
973							is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
974							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
975
976							is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
977							"=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
978
979							is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
980							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld J�rn Simonsen ', "MIME::Words test case 2");
981							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'Andr� Pirard ', "MIME::Words test case 3");
982							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
983							is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
984							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
985							# vvv !! is this correct (space after \n)?
986							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
987							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
988
989							my $in0 = Encode::encode("windows-1251", "\x{422}\x{435}\x{441}\x{442}\x{438}\x{440}\x{43e}\x{432}\x{430}\x{43d}\x{438}\x{435}");
990							my $out0b = "=?WINDOWS-1251?B?0uXx8ujw7uLg7ejl?=";
991							my $out0q = "=?WINDOWS-1251?Q?=D2=E5=F1=F2=E8=F0=EE=E2=E0=ED=E8=E5?=";
992							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"B"), $out0b);
993							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"Q"), $out0q);
994							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"q"), $out0q);
995							is(MIME::AltWords::encode_mimeword($in0, "B", "windows-1251"), $out0b);
996							is(MIME::AltWords::encode_mimeword($in0, "Q", "windows-1251"), $out0q);
997							is(MIME::AltWords::encode_mimeword($in0, "q", "windows-1251"), $out0q);
998							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"b"), $out0b);
999							is(MIME::AltWords::encode_mimeword($in0, "b", "windows-1251"), $out0b);
1000
1001							=cut
1002
1003							1