File Coverage

lib/MIME/AltWords0.pm
Criterion Covered Total %
statement 51 66 77.2
branch 10 18 55.5
condition n/a
subroutine 10 12 83.3
pod 0 1 0.0
total 71 97 73.2


line stmt bran cond sub pod time code
1             package MIME::AltWords;
2             # Dat: `package MIME::AltWords0' would be incorrect. We don't need the zero.
3             # copy of package MIME::Words (version 5.420);
4             # at Wed Sep 27 07:52:29 CEST 2006
5              
6             =head1 NAME
7              
8             MIME::AltWords0 - copy of MIME::Words (not for direct use)
9              
10              
11             =head1 SYNOPSIS
12              
13             L is an auxilary package used by L.
14              
15             If you want to encode or decode MIME words (such as
16             C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in e-mail message headers (mostly
17             Subject, From and To), the recommended Perl module to use is
18             L. Please don't use L (which is
19             useless by itself anyway) or L (version 5.420 has several
20             serious bugs both with encoding and decoding).
21              
22             Most users shouldn't read on, but they should read L
23             instead.
24              
25             =head1 DESCRIPTION
26              
27             The following functions have been moved to L:
28             decode_mimewords() (some code stays here as decode_mimewords_wantarray()),
29              
30             use MIME::AltWords0 qw(:all);
31            
32             ### Split string into array of decoded [DATA,CHARSET] pairs:
33             @decoded = decode_mimewords_wantarray(
34             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
35             );
36            
37             Fellow Americans, you probably won't know what the hell this module
38             is for. Europeans, Russians, et al, you probably do. C<:-)>.
39              
40             For example, here's a valid MIME header you might get:
41              
42             From: =?US-ASCII?Q?Keith_Moore?=
43             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
44             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
45             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
46             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
47             =?US-ASCII?Q?.._cool!?=
48              
49             The fields basically decode to (sorry, I can only approximate the
50             Latin characters with 7 bit sequences /o and 'e):
51              
52             From: Keith Moore
53             To: Keld J/orn Simonsen
54             CC: Andr'e Pirard
55             Subject: If you can read this you understand the example... cool!
56              
57              
58             =head1 PUBLIC INTERFACE
59              
60             =over 4
61              
62             =cut
63              
64             require 5.001;
65              
66             ### Pragmas:
67 1     1   4 use strict;
  1         1  
  1         29  
68 1     1   4 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
  1         2  
  1         69  
69              
70             ### Exporting:
71 1     1   4 use Exporter;
  1         1  
  1         77  
72             %EXPORT_TAGS = (all => [qw(decode_mimewords encode_mimewords encode_mimeword
73             )]);
74             Exporter::export_ok_tags('all');
75              
76             ### Inheritance:
77             @ISA = qw(Exporter);
78              
79             ### Other modules:
80 1     1   762 use MIME::Base64;
  1         657  
  1         53  
81 1     1   713 use MIME::QuotedPrint;
  1         180  
  1         585  
82              
83              
84              
85             #------------------------------
86             #
87             # Globals...
88             #
89             #------------------------------
90              
91             ### The package version, both in 1.23 style *and* usable by MakeMaker:
92             $VERSION = "5.420";
93              
94 1     1   5 use vars qw($NONPRINT); #### pts ####
  1         1  
  1         644  
95              
96             ### Nonprintables (controls + x7F + 8bit):
97             $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF";
98              
99              
100             #------------------------------
101              
102             # _decode_Q STRING
103             # Private: used by _decode_header() to decode "Q" encoding, which is
104             # almost, but not exactly, quoted-printable. :-P
105             sub _decode_Q {
106 34     34   45 my $str = shift;
107 34         92 $str =~ s/_/\x20/g; # RFC-1522, Q rule 2
108 34         120 $str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
  56         215  
109 34         76 $str;
110             }
111              
112             # _encode_Q STRING
113             # Private: used by _encode_header() to decode "Q" encoding, which is
114             # almost, but not exactly, quoted-printable. :-P
115             sub _encode_Q {
116 0     0   0 my $str = shift;
117 0         0 $str =~ s{([_\?\=$NONPRINT])}{sprintf("=%02X", ord($1))}eog;
  0         0  
118 0         0 $str;
119             }
120              
121             # _decode_B STRING
122             # Private: used by _decode_header() to decode "B" encoding.
123             sub _decode_B {
124 30     30   36 my $str = shift;
125 30         91 decode_base64($str);
126             }
127              
128             # _encode_B STRING
129             # Private: used by _decode_header() to decode "B" encoding.
130             sub _encode_B {
131 0     0   0 my $str = shift;
132 0         0 encode_base64($str, '');
133             }
134              
135              
136              
137             #------------------------------
138              
139             =item decode_mimewords_wantarray ENCODED, [OPTS...]
140              
141             I
142             Go through the string looking for RFC-1522-style "Q"
143             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
144              
145             B splits the ENCODED string into a list of decoded
146             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
147             data are returned in a 1-element array C<[DATA]>, giving an effective
148             CHARSET of C.
149              
150             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
151             foreach (decode_mimewords($enc)) {
152             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
153             }
154              
155             B the result is undocumented, please see
156             L instead.
157              
158             In the event of a syntax error, $@ will be set to a description
159             of the error, but parsing will continue as best as possible (so as to
160             get I back when decoding headers).
161             $@ will be false if no error was detected.
162              
163             Any arguments past the ENCODED string are taken to define a hash of options:
164              
165             =over 4
166              
167             =item Field
168              
169             Name of the mail field this string came from. I
170              
171             =back
172              
173             =cut
174              
175             sub decode_mimewords_wantarray {
176 50     50 0 64 my $encstr = shift;
177 50         98 my %params = @_;
178 50         50 my @tokens;
179 50         55 $@ = ''; ### error-return
180              
181             ### Collapse boundaries between adjacent encoded words:
182 50         143 $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
183 50         111 pos($encstr) = 0;
184             ### print STDOUT "ENC = [", $encstr, "]\n";
185              
186             ### Decode:
187 50         68 my ($charset, $encoding, $enc, $dec);
188 50         48 while (1) {
189 150 100       318 last if (pos($encstr) >= length($encstr));
190 100         104 my $pos = pos($encstr); ### save it
191              
192             ### Case 1: are we looking at "=?..?..?="?
193 100 100       364 if ($encstr =~ m{\G # from where we left off..
194             =\?([^?]*) # "=?" + charset +
195             \?([bq]) # "?" + encoding +
196             \?([^?]+) # "?" + data maybe with spcs +
197             \?= # "?="
198             }xgi) {
199 64         192 ($charset, $encoding, $enc) = ($1, lc($2), $3);
200 64 100       168 $dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc));
201 64         144 push @tokens, [$dec, $charset];
202 64         92 next;
203             }
204              
205             ### Case 2: are we looking at a bad "=?..." prefix?
206             ### We need this to detect problems for case 3, which stops at "=?":
207 36         67 pos($encstr) = $pos; # reset the pointer.
208 36 50       85 if ($encstr =~ m{\G=\?}xg) {
209 0         0 $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
210 0         0 push @tokens, ['=?'];
211 0         0 next;
212             }
213              
214             ### Case 3: are we looking at ordinary text?
215 36         51 pos($encstr) = $pos; # reset the pointer.
216 36 50       232 if ($encstr =~ m{\G # from where we left off...
217             ([\x00-\xFF]*? # shortest possible string,
218             \n*) # followed by 0 or more NLs,
219             (?=(\Z|=\?)) # terminated by "=?" or EOS
220             }xg) {
221 36 50       79 length($1) or die "MIME::AltWords0: internal logic err: empty token\n";
222 36         85 push @tokens, [$1];
223 36         47 next;
224             }
225            
226 0 0       0 if ($encstr=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) { #### pts ####
227 0         0 $@.=qq|wide character in encoded string\n|;
228 0 0       0 push @tokens, [$1] if 0!=length($1);
229 0         0 next;
230             }
231              
232             ### Case 4: bug!
233 0         0 die "MIME::AltWords0: unexpected case:\n($encstr) pos $pos\n\t".
234             "Please alert developer.\n";
235             }
236 50 50       256 return (wantarray ? @tokens : join('',map {$_->[0]} @tokens));
  0            
237             }
238              
239             #------------------------------
240              
241             # vvv buggy implementation of encode_mimeword() commented out,
242             # see MIME::AltWords
243              
244             =begin comment
245              
246             =item encode_mimeword RAW, [ENCODING], [CHARSET]
247              
248             I
249             Encode a single RAW "word" that has unsafe characters.
250             The "word" will be encoded in its entirety.
251              
252             ### Encode "<>":
253             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
254              
255             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
256             You may specify the CHARSET, which defaults to C.
257              
258             =cut
259              
260             =begin commentcode
261              
262             sub encode_mimeword {
263             my $word = shift;
264             my $encoding = uc(shift || 'Q');
265             my $charset = uc(shift || 'ISO-8859-1');
266             my $encfunc = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B);
267             "=?$charset?$encoding?" . &$encfunc($word) . "?=";
268             }
269              
270             =cut
271              
272             #------------------------------
273              
274             # vvv buggy implementation of encode_mimeword() commented out,
275             # see MIME::AltWords
276              
277             =begin comment
278              
279             =item encode_mimewords RAW, [OPTS]
280              
281             I
282             Given a RAW string, try to find and encode all "unsafe" sequences
283             of characters:
284              
285             ### Encode a string with some unsafe "words":
286             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
287              
288             Returns the encoded string.
289             Any arguments past the RAW string are taken to define a hash of options:
290              
291             =over 4
292              
293             =item Charset
294              
295             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
296             a.k.a. "Latin-1".
297              
298             =item Encoding
299              
300             The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
301              
302             =item Field
303              
304             Name of the mail field this string will be used in. I
305              
306             =back
307              
308             B this is a quick-and-dirty solution, intended for character
309             sets which overlap ASCII. B
310             rules regarding the use of encoded words in message headers>.
311             You may want to roll your own variant,
312             using C, for your application.
313             I
314              
315             =cut
316              
317             =begin commentcode
318              
319             sub encode_mimewords {
320             my ($rawstr, %params) = @_;
321             my $charset = $params{Charset} || 'ISO-8859-1';
322             my $encoding = lc($params{Encoding} || 'q');
323              
324             ### Encode any "words" with unsafe characters.
325             ### We limit such words to 18 characters, to guarantee that the
326             ### worst-case encoding give us no more than 54 + ~10 < 75 characters
327             my $word;
328             $rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
329             $word = $1;
330             (($word !~ /[$NONPRINT]/o)
331             ? $word ### no unsafe chars
332             : encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
333             }xeg;
334             $rawstr;
335             }
336              
337             1;
338             __END__