File Coverage

lib/MIME/AltWords0.pm

Criterion	Covered	Total	%
statement	51	66	77.2
branch	10	18	55.5
condition			n/a
subroutine	10	12	83.3
pod	0	1	0.0
total	71	97	73.2

line	stmt	bran	sub	pod	time	code
1						package MIME::AltWords;
2						# Dat: `package MIME::AltWords0' would be incorrect. We don't need the zero.
3						# copy of package MIME::Words (version 5.420);
4						# at Wed Sep 27 07:52:29 CEST 2006
5
6						=head1 NAME
7
8						MIME::AltWords0 - copy of MIME::Words (not for direct use)
9
10
11						=head1 SYNOPSIS
12
13						L is an auxilary package used by L.
14
15						If you want to encode or decode MIME words (such as
16						C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in e-mail message headers (mostly
17						Subject, From and To), the recommended Perl module to use is
18						L. Please don't use L (which is
19						useless by itself anyway) or L (version 5.420 has several
20						serious bugs both with encoding and decoding).
21
22						Most users shouldn't read on, but they should read L
23						instead.
24
25						=head1 DESCRIPTION
26
27						The following functions have been moved to L:
28						decode_mimewords() (some code stays here as decode_mimewords_wantarray()),
29
30						use MIME::AltWords0 qw(:all);
31
32						### Split string into array of decoded [DATA,CHARSET] pairs:
33						@decoded = decode_mimewords_wantarray(
34						'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
35						);
36
37						Fellow Americans, you probably won't know what the hell this module
38						is for. Europeans, Russians, et al, you probably do. C<:-)>.
39
40						For example, here's a valid MIME header you might get:
41
42						From: =?US-ASCII?Q?Keith_Moore?=
43						To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
44						CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
45						Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
46						=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
47						=?US-ASCII?Q?.._cool!?=
48
49						The fields basically decode to (sorry, I can only approximate the
50						Latin characters with 7 bit sequences /o and 'e):
51
52						From: Keith Moore
53						To: Keld J/orn Simonsen
54						CC: Andr'e Pirard
55						Subject: If you can read this you understand the example... cool!
56
57
58						=head1 PUBLIC INTERFACE
59
60						=over 4
61
62						=cut
63
64						require 5.001;
65
66						### Pragmas:
67	1		1		4	use strict;
	1				1
	1				29
68	1		1		4	use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
	1				2
	1				69
69
70						### Exporting:
71	1		1		4	use Exporter;
	1				1
	1				77
72						%EXPORT_TAGS = (all => [qw(decode_mimewords encode_mimewords encode_mimeword
73						)]);
74						Exporter::export_ok_tags('all');
75
76						### Inheritance:
77						@ISA = qw(Exporter);
78
79						### Other modules:
80	1		1		762	use MIME::Base64;
	1				657
	1				53
81	1		1		713	use MIME::QuotedPrint;
	1				180
	1				585
82
83
84
85						#------------------------------
86						#
87						# Globals...
88						#
89						#------------------------------
90
91						### The package version, both in 1.23 style and usable by MakeMaker:
92						$VERSION = "5.420";
93
94	1		1		5	use vars qw($NONPRINT); #### pts ####
	1				1
	1				644
95
96						### Nonprintables (controls + x7F + 8bit):
97						$NONPRINT = "\\x00-\\x1F\\x7F-\\xFF";
98
99
100						#------------------------------
101
102						# _decode_Q STRING
103						# Private: used by _decode_header() to decode "Q" encoding, which is
104						# almost, but not exactly, quoted-printable. :-P
105						sub _decode_Q {
106	34		34		45	my $str = shift;
107	34				92	$str =~ s/_/\x20/g; # RFC-1522, Q rule 2
108	34				120	$str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
	56				215
109	34				76	$str;
110						}
111
112						# _encode_Q STRING
113						# Private: used by _encode_header() to decode "Q" encoding, which is
114						# almost, but not exactly, quoted-printable. :-P
115						sub _encode_Q {
116	0		0		0	my $str = shift;
117	0				0	$str =~ s{([_\?\=$NONPRINT])}{sprintf("=%02X", ord($1))}eog;
	0				0
118	0				0	$str;
119						}
120
121						# _decode_B STRING
122						# Private: used by _decode_header() to decode "B" encoding.
123						sub _decode_B {
124	30		30		36	my $str = shift;
125	30				91	decode_base64($str);
126						}
127
128						# _encode_B STRING
129						# Private: used by _decode_header() to decode "B" encoding.
130						sub _encode_B {
131	0		0		0	my $str = shift;
132	0				0	encode_base64($str, '');
133						}
134
135
136
137						#------------------------------
138
139						=item decode_mimewords_wantarray ENCODED, [OPTS...]
140
141						I
142						Go through the string looking for RFC-1522-style "Q"
143						(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
144
145						B splits the ENCODED string into a list of decoded
146						C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
147						data are returned in a 1-element array C<[DATA]>, giving an effective
148						CHARSET of C.
149
150						$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
151						foreach (decode_mimewords($enc)) {
152						print "", ($_[1] \|\| 'US-ASCII'), ": ", $_[0], "\n";
153						}
154
155						B the result is undocumented, please see
156						L instead.
157
158						In the event of a syntax error, $@ will be set to a description
159						of the error, but parsing will continue as best as possible (so as to
160						get I back when decoding headers).
161						$@ will be false if no error was detected.
162
163						Any arguments past the ENCODED string are taken to define a hash of options:
164
165						=over 4
166
167						=item Field
168
169						Name of the mail field this string came from. I
170
171						=back
172
173						=cut
174
175						sub decode_mimewords_wantarray {
176	50		50	0	64	my $encstr = shift;
177	50				98	my %params = @_;
178	50				50	my @tokens;
179	50				55	$@ = ''; ### error-return
180
181						### Collapse boundaries between adjacent encoded words:
182	50				143	$encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
183	50				111	pos($encstr) = 0;
184						### print STDOUT "ENC = [", $encstr, "]\n";
185
186						### Decode:
187	50				68	my ($charset, $encoding, $enc, $dec);
188	50				48	while (1) {
189	150	100			318	last if (pos($encstr) >= length($encstr));
190	100				104	my $pos = pos($encstr); ### save it
191
192						### Case 1: are we looking at "=?..?..?="?
193	100	100			364	if ($encstr =~ m{\G # from where we left off..
194						=\?([^?]*) # "=?" + charset +
195						\?([bq]) # "?" + encoding +
196						\?([^?]+) # "?" + data maybe with spcs +
197						\?= # "?="
198						}xgi) {
199	64				192	($charset, $encoding, $enc) = ($1, lc($2), $3);
200	64	100			168	$dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc));
201	64				144	push @tokens, [$dec, $charset];
202	64				92	next;
203						}
204
205						### Case 2: are we looking at a bad "=?..." prefix?
206						### We need this to detect problems for case 3, which stops at "=?":
207	36				67	pos($encstr) = $pos; # reset the pointer.
208	36	50			85	if ($encstr =~ m{\G=\?}xg) {
209	0				0	$@ .= qq\|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n\|;
210	0				0	push @tokens, ['=?'];
211	0				0	next;
212						}
213
214						### Case 3: are we looking at ordinary text?
215	36				51	pos($encstr) = $pos; # reset the pointer.
216	36	50			232	if ($encstr =~ m{\G # from where we left off...
217						([\x00-\xFF]*? # shortest possible string,
218						\n*) # followed by 0 or more NLs,
219						(?=(\Z\|=\?)) # terminated by "=?" or EOS
220						}xg) {
221	36	50			79	length($1) or die "MIME::AltWords0: internal logic err: empty token\n";
222	36				85	push @tokens, [$1];
223	36				47	next;
224						}
225
226	0	0			0	if ($encstr=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) { #### pts ####
227	0				0	$@.=qq\|wide character in encoded string\n\|;
228	0	0			0	push @tokens, [$1] if 0!=length($1);
229	0				0	next;
230						}
231
232						### Case 4: bug!
233	0				0	die "MIME::AltWords0: unexpected case:\n($encstr) pos $pos\n\t".
234						"Please alert developer.\n";
235						}
236	50	50			256	return (wantarray ? @tokens : join('',map {$_->[0]} @tokens));
	0
237						}
238
239						#------------------------------
240
241						# vvv buggy implementation of encode_mimeword() commented out,
242						# see MIME::AltWords
243
244						=begin comment
245
246						=item encode_mimeword RAW, [ENCODING], [CHARSET]
247
248						I
249						Encode a single RAW "word" that has unsafe characters.
250						The "word" will be encoded in its entirety.
251
252						### Encode "<>":
253						$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
254
255						You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
256						You may specify the CHARSET, which defaults to C.
257
258						=cut
259
260						=begin commentcode
261
262						sub encode_mimeword {
263						my $word = shift;
264						my $encoding = uc(shift \|\| 'Q');
265						my $charset = uc(shift \|\| 'ISO-8859-1');
266						my $encfunc = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B);
267						"=?$charset?$encoding?" . &$encfunc($word) . "?=";
268						}
269
270						=cut
271
272						#------------------------------
273
274						# vvv buggy implementation of encode_mimeword() commented out,
275						# see MIME::AltWords
276
277						=begin comment
278
279						=item encode_mimewords RAW, [OPTS]
280
281						I
282						Given a RAW string, try to find and encode all "unsafe" sequences
283						of characters:
284
285						### Encode a string with some unsafe "words":
286						$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
287
288						Returns the encoded string.
289						Any arguments past the RAW string are taken to define a hash of options:
290
291						=over 4
292
293						=item Charset
294
295						Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
296						a.k.a. "Latin-1".
297
298						=item Encoding
299
300						The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
301
302						=item Field
303
304						Name of the mail field this string will be used in. I
305
306						=back
307
308						B this is a quick-and-dirty solution, intended for character
309						sets which overlap ASCII. B
310						rules regarding the use of encoded words in message headers>.
311						You may want to roll your own variant,
312						using C, for your application.
313						I
314
315						=cut
316
317						=begin commentcode
318
319						sub encode_mimewords {
320						my ($rawstr, %params) = @_;
321						my $charset = $params{Charset} \|\| 'ISO-8859-1';
322						my $encoding = lc($params{Encoding} \|\| 'q');
323
324						### Encode any "words" with unsafe characters.
325						### We limit such words to 18 characters, to guarantee that the
326						### worst-case encoding give us no more than 54 + ~10 < 75 characters
327						my $word;
328						$rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
329						$word = $1;
330						(($word !~ /[$NONPRINT]/o)
331						? $word ### no unsafe chars
332						: encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
333						}xeg;
334						$rawstr;
335						}
336
337						1;
338						__END__