File Coverage

blib/lib/Encode/MIME/Header.pm

Criterion	Covered	Total	%
statement	139	151	92.0
branch	73	100	73.0
condition	63	84	75.0
subroutine	19	21	90.4
pod	4	4	100.0
total	298	360	82.7

line	stmt	bran	cond	sub	pod	time	code
1							package Encode::MIME::Header;
2	5			5		80032	use strict;
	5					18
	5					179
3	5			5		38	use warnings;
	5					20
	5					465
4
5							our $VERSION = do { my @r = ( q$Revision: 2.26 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6
7	5			5		41	use Carp ();
	5					16
	5					89
8	5			5		647	use Encode ();
	5					24
	5					118
9	5			5		2814	use MIME::Base64 ();
	5					3858
	5					635
10
11							my %seed = (
12							decode_b => 1, # decodes 'B' encoding ?
13							decode_q => 1, # decodes 'Q' encoding ?
14							encode => 'B', # encode with 'B' or 'Q' ?
15							charset => 'UTF-8', # encode charset
16							bpl => 75, # bytes per line
17							);
18
19							my @objs;
20
21							push @objs, bless {
22							%seed,
23							Name => 'MIME-Header',
24							} => __PACKAGE__;
25
26							push @objs, bless {
27							%seed,
28							decode_q => 0,
29							Name => 'MIME-B',
30							} => __PACKAGE__;
31
32							push @objs, bless {
33							%seed,
34							decode_b => 0,
35							encode => 'Q',
36							Name => 'MIME-Q',
37							} => __PACKAGE__;
38
39							Encode::define_encoding($_, $_->{Name}) foreach @objs;
40
41	5			5		46	use parent qw(Encode::Encoding);
	5					14
	5					56
42
43	0			0	1	0	sub needs_lines { 1 }
44	0			0	1	0	sub perlio_ok { 0 }
45
46							# RFC 2047 and RFC 2231 grammar
47							my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\\|\}~]+/;
48							my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49							my $re_encoding = qr/[QqBb]/;
50							my $re_encoded_text = qr/[^\?]*/;
51							my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52							my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53							my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54
55							# in strict mode check also for valid base64 characters and also for valid quoted printable codes
56							my $re_encoding_strict_b = qr/[Bb]/;
57							my $re_encoding_strict_q = qr/[Qq]/;
58							my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59							my $re_encoded_text_strict_q = qr/(?:[^\?\s=]\|=[0-9A-Fa-f]{2})*/;
60							my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61							my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62
63							my $re_newline = qr/(?:\r\n\|[\r\n])/;
64
65							# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66							# except in comments when separator between words and comment round brackets can be omitted
67							my $re_word_begin_strict = qr/(?:(?:[ \t]\|\A)$?\|(?:[^\\]\|\A)$\()/;
68							my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69							my $re_word_end_strict = qr/(?:\)$\|$?(?:$re_newline?[ \t]\|\z))/;
70
71							my $re_match = qr/()((?:$re_encoded_word\s)$re_encoded_word)()/;
72							my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73
74							my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75							my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76
77							our $STRICT_DECODE = 0;
78
79							sub decode($$;$) {
80	204			204	1	679	my ($obj, $str, $chk) = @_;
81	204	100				675	return undef unless defined $str;
82
83	200	100				579	my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84	200	100				497	my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85
86	200					381	my $stop = 0;
87	200					571	my $output = substr($str, 0, 0); # to propagate taintedness
88
89							# decode each line separately, match whole continuous folded line at one call
90	200		66			29975	1 while not $stop and $str =~ s{^((?:[^\r\n](?:$re_newline[ \t])?))($re_newline)?}{
91
92	416					3423	my $line = $1;
93	416	100				1297	my $sep = defined $2 ? $2 : '';
94
95	416	100	100			1691	$stop = 1 unless length($line) or length($sep);
96
97							# NOTE: this code partially could break $chk support
98							# in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99							# fixes breaking inside multi-byte characters
100	416		100			840461	1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101
102							# process sequence of encoded MIME words at once
103	416		100			115583	1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104
105	197					772	my $begin = $1 . $2;
106	197					980	my $words = $3;
107
108	197					464	$begin =~ tr/\r\n//d;
109	197					448	$output .= $begin;
110
111							# decode one MIME word
112	197		100			2369	1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113
114	50224					206611	$output .= $1;
115	50224					140903	my $orig = $2;
116	50224					114426	my $charset = $3;
117	50224					189881	my ($mime_enc, $text) = split /\?/, $5;
118
119	50224					120767	$text =~ tr/\r\n//d;
120
121	50224					168898	my $enc = Encode::find_mime_encoding($charset);
122
123							# in non strict mode allow also perl encoding aliases
124	50224	100	100			144644	if ( not defined $enc and not $STRICT_DECODE ) {
125							# make sure that decoded string will be always strict UTF-8
126	6	100				25	$charset = 'UTF-8' if lc($charset) eq 'utf8';
127	6					19	$enc = Encode::find_encoding($charset);
128							}
129
130	50224	100				121820	if ( not defined $enc ) {
131	13	50	66			77	Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::DIE_ON_ERR;
132	13	50	66			73	Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::WARN_ON_ERR;
133	13	100	100			74	$stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
134	13	100				114	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		100
135	13	100				187	$stop ? $orig : '';
136							} else {
137	50211	100	66			279030	if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
		50	33
138	46					163	my $decoded = _decode_b($enc, $text, $chk);
139	46	50	33			233	$stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
			33
140	46	50				215	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		50
141	46	50				674	$stop ? $orig : '';
142							} elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143	50165					131192	my $decoded = _decode_q($enc, $text, $chk);
144	50165	100	66			205179	$stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
			66
145	50165	50				175669	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		100
146	50165	100				2790054	$stop ? $orig : '';
147							} else {
148	0	0	0			0	Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::DIE_ON_ERR;
149	0	0	0			0	Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::WARN_ON_ERR;
150	0	0	0			0	$stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
151	0	0				0	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		0
152	0	0				0	$stop ? $orig : '';
153							}
154							}
155
156							}se;
157
158	197	100				621	if ( not $stop ) {
159	193					440	$output .= $words;
160	193					467	$words = '';
161							}
162
163	197					2075	$words;
164
165							}se;
166
167	416	100				1196	if ( not $stop ) {
168	216					5374	$line =~ tr/\r\n//d;
169	216					3294	$output .= $line . $sep;
170	216					497	$line = '';
171	216					420	$sep = '';
172							}
173
174	416					4040	$line . $sep;
175
176							}se;
177
178	200	100	100			1090	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
179	200					1152	return $output;
180							}
181
182							sub _decode_b {
183	46			46		149	my ($enc, $text, $chk) = @_;
184							# MIME::Base64::decode ignores everything after a '=' padding character
185							# in non strict mode split string after each sequence of padding characters and decode each substring
186							my $octets = $STRICT_DECODE ?
187							MIME::Base64::decode($text) :
188	46	100				415	join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
	46					317
189	46					188	return _decode_octets($enc, $octets, $chk);
190							}
191
192							sub _decode_q {
193	50165			50165		116325	my ($enc, $text, $chk) = @_;
194	50165					114742	$text =~ s/_/ /go;
195	50165					89731	$text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
	700					2410
196	50165					132269	return _decode_octets($enc, $text, $chk);
197							}
198
199							sub _decode_octets {
200	50211			50211		120819	my ($enc, $octets, $chk) = @_;
201	50211	100	100			228941	$chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
202	50211					288424	my $output = $enc->decode($octets, $chk);
203	50211	100	100			243776	return undef if not ref $chk and $chk and $octets ne '';
			100
204	50209					145358	return $output;
205							}
206
207							sub encode($$;$) {
208	60			60	1	234	my ($obj, $str, $chk) = @_;
209	60	100				242	return undef unless defined $str;
210	57					235	my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
211	57	100	100			407	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
212	57					366	return $output . substr($str, 0, 0); # to propagate taintedness
213							}
214
215							sub _fold_line {
216	57			57		191	my ($obj, $line) = @_;
217	57					166	my $bpl = $obj->{bpl};
218	57					140	my $output = '';
219
220	57					199	while ( length($line) ) {
221	114	50				848	if ( $line =~ s/^(.{0,$bpl})(\s\|\z)// ) {
		0
222	114					362	$output .= $1;
223	114	100				542	$output .= "\r\n" . $2 if length($line);
224							} elsif ( $line =~ s/(\s)(.*)$// ) {
225	0					0	$output .= $line;
226	0					0	$line = $2;
227	0	0				0	$output .= "\r\n" . $1 if length($line);
228							} else {
229	0					0	$output .= $line;
230	0					0	last;
231							}
232							}
233
234	57					196	return $output;
235							}
236
237							sub _encode_string {
238	57			57		222	my ($obj, $str, $chk) = @_;
239	57	50				239	my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
240	57					238	my $enc = Encode::find_mime_encoding($obj->{charset});
241	57	100	100			361	my $enc_chk = (not ref $chk and $chk) ? ($chk \| Encode::LEAVE_SRC) : $chk;
242	57					217	my @result = ();
243	57					162	my $octets = '';
244	57					411	while ( length( my $chr = substr($str, 0, 1, '') ) ) {
245	1256					4189	my $seq = $enc->encode($chr, $enc_chk);
246	1256	100				3247	if ( not length($seq) ) {
247	2					9	substr($str, 0, 0, $chr);
248	2					8	last;
249							}
250	1254	100				3313	if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
251	57					162	push @result, $obj->_encode_word($octets);
252	57					104	$octets = '';
253							}
254	1254					5195	$octets .= $seq;
255							}
256	57	50				289	length($octets) and push @result, $obj->_encode_word($octets);
257	57	100	100			428	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
258	57					358	return join(' ', @result);
259							}
260
261							sub _encode_word {
262	114			114		298	my ($obj, $octets) = @_;
263	114					253	my $charset = $obj->{charset};
264	114					227	my $encode = $obj->{encode};
265	114	100				376	my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
266	114					514	return "=?$charset?$encode?$text?=";
267							}
268
269							sub _encoded_word_len {
270	1254			1254		2703	my ($obj, $octets) = @_;
271	1254					2293	my $charset = $obj->{charset};
272	1254					2260	my $encode = $obj->{encode};
273	1254	100				3304	my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
274	1254					4580	return length("=?$charset?$encode??=") + $text_len;
275							}
276
277							sub _encode_b {
278	72			72		156	my ($octets) = @_;
279	72					362	return MIME::Base64::encode($octets, '');
280							}
281
282							sub _encoded_b_len {
283	848			848		1690	my ($octets) = @_;
284	848					2178	return ( length($octets) + 2 ) / 3 * 4;
285							}
286
287							my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
288
289							sub _encode_q {
290	42			42		97	my ($octets) = @_;
291	42					242	$octets =~ s{($re_invalid_q_char)}{
292	566					1433	join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
	566					2084
293							}egox;
294	42					145	$octets =~ s/ /_/go;
295	42					117	return $octets;
296							}
297
298							sub _encoded_q_len {
299	406			406		798	my ($octets) = @_;
300	406					2099	my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
301	406					1117	return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
302							}
303
304							1;
305							__END__