File Coverage

blib/lib/Encode/MIME/Header.pm

Criterion	Covered	Total	%
statement	139	151	92.0
branch	73	100	73.0
condition	63	84	75.0
subroutine	19	21	90.4
pod	4	4	100.0
total	298	360	82.7

line	stmt	bran	cond	sub	pod	time	code
1							package Encode::MIME::Header;
2	5			5		39001	use strict;
	5					14
	5					146
3	5			5		30	use warnings;
	5					12
	5					414
4
5							our $VERSION = do { my @r = ( q$Revision: 2.27 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6
7	5			5		33	use Carp ();
	5					9
	5					68
8	5			5		365	use Encode ();
	5					11
	5					81
9	5			5		2165	use MIME::Base64 ();
	5					2840
	5					538
10
11							my %seed = (
12							decode_b => 1, # decodes 'B' encoding ?
13							decode_q => 1, # decodes 'Q' encoding ?
14							encode => 'B', # encode with 'B' or 'Q' ?
15							charset => 'UTF-8', # encode charset
16							bpl => 75, # bytes per line
17							);
18
19							my @objs;
20
21							push @objs, bless {
22							%seed,
23							Name => 'MIME-Header',
24							} => __PACKAGE__;
25
26							push @objs, bless {
27							%seed,
28							decode_q => 0,
29							Name => 'MIME-B',
30							} => __PACKAGE__;
31
32							push @objs, bless {
33							%seed,
34							decode_b => 0,
35							encode => 'Q',
36							Name => 'MIME-Q',
37							} => __PACKAGE__;
38
39							Encode::define_encoding($_, $_->{Name}) foreach @objs;
40
41	5			5		36	use parent qw(Encode::Encoding);
	5					10
	5					41
42
43	0			0	1	0	sub needs_lines { 1 }
44	0			0	1	0	sub perlio_ok { 0 }
45
46							# RFC 2047 and RFC 2231 grammar
47							my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\\|\}~]+/;
48							my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49							my $re_encoding = qr/[QqBb]/;
50							my $re_encoded_text = qr/[^\?]*/;
51							my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52							my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53							my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54
55							# in strict mode check also for valid base64 characters and also for valid quoted printable codes
56							my $re_encoding_strict_b = qr/[Bb]/;
57							my $re_encoding_strict_q = qr/[Qq]/;
58							my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59							my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]\|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
60							my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61							my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62
63							my $re_newline = qr/(?:\r\n\|[\r\n])/;
64
65							# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66							# except in comments when separator between words and comment round brackets can be omitted
67							my $re_word_begin_strict = qr/(?:(?:[ \t]\|\A)$?\|(?:[^\\]\|\A)$\()/;
68							my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69							my $re_word_end_strict = qr/(?:\)$\|$?(?:$re_newline?[ \t]\|\z))/;
70
71							my $re_match = qr/()((?:$re_encoded_word\s)$re_encoded_word)()/;
72							my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73
74							my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75							my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76
77							our $STRICT_DECODE = 0;
78
79							sub decode($$;$) {
80	206			206	1	504	my ($obj, $str, $chk) = @_;
81	206	100				495	return undef unless defined $str;
82
83	202	100				466	my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84	202	100				375	my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85
86	202					287	my $stop = 0;
87	202					441	my $output = substr($str, 0, 0); # to propagate taintedness
88
89							# decode each line separately, match whole continuous folded line at one call
90	202		66			21551	1 while not $stop and $str =~ s{^((?:[^\r\n](?:$re_newline[ \t])?))($re_newline)?}{
91
92	420					2223	my $line = $1;
93	420	100				905	my $sep = defined $2 ? $2 : '';
94
95	420	100	100			1451	$stop = 1 unless length($line) or length($sep);
96
97							# NOTE: this code partially could break $chk support
98							# in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99							# fixes breaking inside multi-byte characters
100	420		100			595224	1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101
102							# process sequence of encoded MIME words at once
103	420		100			88902	1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104
105	198					577	my $begin = $1 . $2;
106	198					676	my $words = $3;
107
108	198					368	$begin =~ tr/\r\n//d;
109	198					307	$output .= $begin;
110
111							# decode one MIME word
112	198		100			1638	1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113
114	50225					155185	$output .= $1;
115	50225					100937	my $orig = $2;
116	50225					81662	my $charset = $3;
117	50225					132442	my ($mime_enc, $text) = split /\?/, $5;
118
119	50225					90650	$text =~ tr/\r\n//d;
120
121	50225					123531	my $enc = Encode::find_mime_encoding($charset);
122
123							# in non strict mode allow also perl encoding aliases
124	50225	100	100			108546	if ( not defined $enc and not $STRICT_DECODE ) {
125							# make sure that decoded string will be always strict UTF-8
126	6	100				19	$charset = 'UTF-8' if lc($charset) eq 'utf8';
127	6					14	$enc = Encode::find_encoding($charset);
128							}
129
130	50225	100				95080	if ( not defined $enc ) {
131	13	50	66			55	Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::DIE_ON_ERR;
132	13	50	66			50	Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::WARN_ON_ERR;
133	13	100	100			49	$stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
134	13	100				68	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		100
135	13	100				119	$stop ? $orig : '';
136							} else {
137	50212	100	66			206951	if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
		50	33
138	46					106	my $decoded = _decode_b($enc, $text, $chk);
139	46	50	33			163	$stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
			33
140	46	50				146	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		50
141	46	50				495	$stop ? $orig : '';
142							} elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143	50166					95373	my $decoded = _decode_q($enc, $text, $chk);
144	50166	100	66			148634	$stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
			66
145	50166	50				124808	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		100
146	50166	100				1961904	$stop ? $orig : '';
147							} else {
148	0	0	0			0	Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::DIE_ON_ERR;
149	0	0	0			0	Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::WARN_ON_ERR;
150	0	0	0			0	$stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
151	0	0				0	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		0
152	0	0				0	$stop ? $orig : '';
153							}
154							}
155
156							}se;
157
158	198	100				529	if ( not $stop ) {
159	194					332	$output .= $words;
160	194					347	$words = '';
161							}
162
163	198					1601	$words;
164
165							}se;
166
167	420	100				902	if ( not $stop ) {
168	218					4471	$line =~ tr/\r\n//d;
169	218					2250	$output .= $line . $sep;
170	218					374	$line = '';
171	218					320	$sep = '';
172							}
173
174	420					2802	$line . $sep;
175
176							}se;
177
178	202	100	100			835	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
179	202					654	return $output;
180							}
181
182							sub _decode_b {
183	46			46		93	my ($enc, $text, $chk) = @_;
184							# MIME::Base64::decode ignores everything after a '=' padding character
185							# in non strict mode split string after each sequence of padding characters and decode each substring
186							my $octets = $STRICT_DECODE ?
187							MIME::Base64::decode($text) :
188	46	100				292	join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
	46					210
189	46					125	return _decode_octets($enc, $octets, $chk);
190							}
191
192							sub _decode_q {
193	50166			50166		85680	my ($enc, $text, $chk) = @_;
194	50166					85898	$text =~ s/_/ /go;
195	50166					66951	$text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
	700					1876
196	50166					97792	return _decode_octets($enc, $text, $chk);
197							}
198
199							sub _decode_octets {
200	50212			50212		79787	my ($enc, $octets, $chk) = @_;
201	50212	100	100			183582	$chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
202	50212					193220	my $output = $enc->decode($octets, $chk);
203	50212	100	100			197387	return undef if not ref $chk and $chk and $octets ne '';
			100
204	50210					104490	return $output;
205							}
206
207							sub encode($$;$) {
208	60			60	1	143	my ($obj, $str, $chk) = @_;
209	60	100				171	return undef unless defined $str;
210	57					155	my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
211	57	100	100			263	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
212	57					236	return $output . substr($str, 0, 0); # to propagate taintedness
213							}
214
215							sub _fold_line {
216	57			57		124	my ($obj, $line) = @_;
217	57					97	my $bpl = $obj->{bpl};
218	57					92	my $output = '';
219
220	57					129	while ( length($line) ) {
221	114	50				634	if ( $line =~ s/^(.{0,$bpl})(\s\|\z)// ) {
		0
222	114					265	$output .= $1;
223	114	100				360	$output .= "\r\n" . $2 if length($line);
224							} elsif ( $line =~ s/(\s)(.*)$// ) {
225	0					0	$output .= $line;
226	0					0	$line = $2;
227	0	0				0	$output .= "\r\n" . $1 if length($line);
228							} else {
229	0					0	$output .= $line;
230	0					0	last;
231							}
232							}
233
234	57					136	return $output;
235							}
236
237							sub _encode_string {
238	57			57		120	my ($obj, $str, $chk) = @_;
239	57	50				148	my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
240	57					144	my $enc = Encode::find_mime_encoding($obj->{charset});
241	57	100	100			256	my $enc_chk = (not ref $chk and $chk) ? ($chk \| Encode::LEAVE_SRC) : $chk;
242	57					119	my @result = ();
243	57					93	my $octets = '';
244	57					303	while ( length( my $chr = substr($str, 0, 1, '') ) ) {
245	1256					3254	my $seq = $enc->encode($chr, $enc_chk);
246	1256	100				2594	if ( not length($seq) ) {
247	2					5	substr($str, 0, 0, $chr);
248	2					5	last;
249							}
250	1254	100				2684	if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
251	57					118	push @result, $obj->_encode_word($octets);
252	57					86	$octets = '';
253							}
254	1254					3872	$octets .= $seq;
255							}
256	57	50				181	length($octets) and push @result, $obj->_encode_word($octets);
257	57	100	100			270	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
258	57					232	return join(' ', @result);
259							}
260
261							sub _encode_word {
262	114			114		207	my ($obj, $octets) = @_;
263	114					183	my $charset = $obj->{charset};
264	114					165	my $encode = $obj->{encode};
265	114	100				266	my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
266	114					362	return "=?$charset?$encode?$text?=";
267							}
268
269							sub _encoded_word_len {
270	1254			1254		2152	my ($obj, $octets) = @_;
271	1254					1790	my $charset = $obj->{charset};
272	1254					1670	my $encode = $obj->{encode};
273	1254	100				2616	my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
274	1254					3611	return length("=?$charset?$encode??=") + $text_len;
275							}
276
277							sub _encode_b {
278	72			72		116	my ($octets) = @_;
279	72					254	return MIME::Base64::encode($octets, '');
280							}
281
282							sub _encoded_b_len {
283	848			848		1291	my ($octets) = @_;
284	848					1630	return ( length($octets) + 2 ) / 3 * 4;
285							}
286
287							my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
288
289							sub _encode_q {
290	42			42		73	my ($octets) = @_;
291	42					196	$octets =~ s{($re_invalid_q_char)}{
292	566					1234	join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
	566					1786
293							}egox;
294	42					103	$octets =~ s/ /_/go;
295	42					97	return $octets;
296							}
297
298							sub _encoded_q_len {
299	406			406		647	my ($octets) = @_;
300	406					1605	my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
301	406					1125	return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
302							}
303
304							1;
305							__END__