File Coverage

blib/lib/Encode/MIME/Header.pm

Criterion	Covered	Total	%
statement	142	154	92.2
branch	73	104	70.1
condition	75	108	69.4
subroutine	19	21	90.4
pod	4	4	100.0
total	313	391	80.0

line	stmt	bran	cond	sub	pod	time	code
1							package Encode::MIME::Header;
2	5			5		82232	use strict;
	5					14
	5					130
3	5			5		22	use warnings;
	5					6
	5					334
4
5							our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6
7	5			5		28	use Carp ();
	5					8
	5					59
8	5			5		299	use Encode ();
	5					8
	5					81
9	5			5		1159	use MIME::Base64 ();
	5					2342
	5					517
10
11							my %seed = (
12							decode_b => 1, # decodes 'B' encoding ?
13							decode_q => 1, # decodes 'Q' encoding ?
14							encode => 'B', # encode with 'B' or 'Q' ?
15							charset => 'UTF-8', # encode charset
16							bpl => 75, # bytes per line
17							);
18
19							my @objs;
20
21							push @objs, bless {
22							%seed,
23							Name => 'MIME-Header',
24							} => __PACKAGE__;
25
26							push @objs, bless {
27							%seed,
28							decode_q => 0,
29							Name => 'MIME-B',
30							} => __PACKAGE__;
31
32							push @objs, bless {
33							%seed,
34							decode_b => 0,
35							encode => 'Q',
36							Name => 'MIME-Q',
37							} => __PACKAGE__;
38
39							Encode::define_encoding($_, $_->{Name}) foreach @objs;
40
41	5			5		35	use parent qw(Encode::Encoding);
	5					11
	5					30
42
43	0			0	1	0	sub needs_lines { 1 }
44	0			0	1	0	sub perlio_ok { 0 }
45
46							# RFC 2047 and RFC 2231 grammar
47							my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\\|\}~]+/;
48							my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49							my $re_encoding = qr/[QqBb]/;
50							my $re_encoded_text = qr/[^\?]*/;
51							my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52							my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53							my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54
55							# in strict mode check also for valid base64 characters and also for valid quoted printable codes
56							my $re_encoding_strict_b = qr/[Bb]/;
57							my $re_encoding_strict_q = qr/[Qq]/;
58							my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59							my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]\|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
60							my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61							my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b\|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62
63							my $re_newline = qr/(?:\r\n\|[\r\n])/;
64
65							# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66							# except in comments when separator between words and comment round brackets can be omitted
67							my $re_word_begin_strict = qr/(?:(?:[ \t]\|\A)$?\|(?:[^\\]\|\A)$\()/;
68							my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69							my $re_word_end_strict = qr/(?:\)$\|$?(?:$re_newline?[ \t]\|\z))/;
70
71							my $re_match = qr/()((?:$re_encoded_word\s)$re_encoded_word)()/;
72							my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73
74							my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75							my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76
77							our $STRICT_DECODE = 0;
78
79							sub decode($$;$) {
80	206			206	1	1487	my ($obj, $str, $chk) = @_;
81	206	100				427	return undef unless defined $str;
82
83	202	100				422	my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84	202	100				359	my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85
86	202					310	my $stop = 0;
87	202					452	my $output = substr($str, 0, 0); # to propagate taintedness
88
89							# decode each line separately, match whole continuous folded line at one call
90	202		66			23137	1 while not $stop and $str =~ s{^((?:[^\r\n](?:$re_newline[ \t])?))($re_newline)?}{
91
92	420					2220	my $line = $1;
93	420	100				961	my $sep = defined $2 ? $2 : '';
94
95	420	100	100			1225	$stop = 1 unless length($line) or length($sep);
96
97							# NOTE: this code partially could break $chk support
98							# in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99							# fixes breaking inside multi-byte characters
100	420		100			613732	1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101
102							# process sequence of encoded MIME words at once
103	420		100			86353	1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104
105	198					618	my $begin = $1 . $2;
106	198					591	my $words = $3;
107
108	198					351	$begin =~ tr/\r\n//d;
109	198					377	$output .= $begin;
110
111							# decode one MIME word
112	198		100			1849	1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113
114	50225					140990	$output .= $1;
115	50225					93086	my $orig = $2;
116	50225					70713	my $charset = $3;
117	50225					126096	my ($mime_enc, $text) = split /\?/, $5;
118
119	50225					82712	$text =~ tr/\r\n//d;
120
121	50225					105111	my $enc = Encode::find_mime_encoding($charset);
122
123							# in non strict mode allow also perl encoding aliases
124	50225	100	100			85413	if ( not defined $enc and not $STRICT_DECODE ) {
125							# make sure that decoded string will be always strict UTF-8
126	6	100				24	$charset = 'UTF-8' if lc($charset) eq 'utf8';
127	6					17	$enc = Encode::find_encoding($charset);
128							}
129
130	50225	100				79365	if ( not defined $enc ) {
131	13	50	100			57	Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
			66
132	13	50	100			49	Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
			66
133	13	100	100			71	$stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
			100
134	13	100				79	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		100
135	13	100				126	$stop ? $orig : '';
136							} else {
137	50212	100	66			160814	if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
		50	33
138	46					110	my $decoded = _decode_b($enc, $text, $chk);
139	46	0	33			140	$stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
			33
			33
140	46	50				153	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		50
141	46	50				454	$stop ? $orig : '';
142							} elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143	50166					78095	my $decoded = _decode_q($enc, $text, $chk);
144	50166	50	66			110210	$stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
			66
			66
145	50166	50				103659	$output .= (defined $decoded ? $decoded : $text) unless $stop;
		100
146	50166	100				1942716	$stop ? $orig : '';
147							} else {
148	0	0	0			0	Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
			0
149	0	0	0			0	Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
			0
150	0	0	0			0	$stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
			0
151	0	0				0	$output .= ($output =~ /(?:\A\|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
		0
152	0	0				0	$stop ? $orig : '';
153							}
154							}
155
156							}se;
157
158	198	100				420	if ( not $stop ) {
159	194					316	$output .= $words;
160	194					344	$words = '';
161							}
162
163	198					1613	$words;
164
165							}se;
166
167	420	100				804	if ( not $stop ) {
168	218					4005	$line =~ tr/\r\n//d;
169	218					1675	$output .= $line . $sep;
170	218					411	$line = '';
171	218					320	$sep = '';
172							}
173
174	420					3185	$line . $sep;
175
176							}se;
177
178	202	100	100			696	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
179	202					668	return $output;
180							}
181
182							sub _decode_b {
183	46			46		91	my ($enc, $text, $chk) = @_;
184							# MIME::Base64::decode ignores everything after a '=' padding character
185							# in non strict mode split string after each sequence of padding characters and decode each substring
186							my $octets = $STRICT_DECODE ?
187							MIME::Base64::decode($text) :
188	46	100				313	join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
	46					199
189	46					114	return _decode_octets($enc, $octets, $chk);
190							}
191
192							sub _decode_q {
193	50166			50166		77019	my ($enc, $text, $chk) = @_;
194	50166					74366	$text =~ s/_/ /go;
195	50166					55188	$text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
	700					2062
196	50166					73647	return _decode_octets($enc, $text, $chk);
197							}
198
199							sub _decode_octets {
200	50212			50212		67565	my ($enc, $octets, $chk) = @_;
201	50212	50				75387	$chk = 0 unless defined $chk;
202	50212	100	100			121503	$chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
203	50212					175596	my $output = $enc->decode($octets, $chk);
204	50212	100	100			137083	return undef if not ref $chk and $chk and $octets ne '';
			100
205	50210					91183	return $output;
206							}
207
208							sub encode($$;$) {
209	60			60	1	123	my ($obj, $str, $chk) = @_;
210	60	100				126	return undef unless defined $str;
211	57					123	my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
212	57	100	100			239	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
213	57					222	return $output . substr($str, 0, 0); # to propagate taintedness
214							}
215
216							sub _fold_line {
217	57			57		115	my ($obj, $line) = @_;
218	57					86	my $bpl = $obj->{bpl};
219	57					75	my $output = '';
220
221	57					115	while ( length($line) ) {
222	114	50				644	if ( $line =~ s/^(.{0,$bpl})(\s\|\z)// ) {
		0
223	114					248	$output .= $1;
224	114	100				290	$output .= "\r\n" . $2 if length($line);
225							} elsif ( $line =~ s/(\s)(.*)$// ) {
226	0					0	$output .= $line;
227	0					0	$line = $2;
228	0	0				0	$output .= "\r\n" . $1 if length($line);
229							} else {
230	0					0	$output .= $line;
231	0					0	last;
232							}
233							}
234
235	57					126	return $output;
236							}
237
238							sub _encode_string {
239	57			57		103	my ($obj, $str, $chk) = @_;
240	57	50				148	my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
241	57					106	my $enc = Encode::find_mime_encoding($obj->{charset});
242	57					92	my $enc_chk = $chk;
243	57	50				111	$enc_chk = 0 unless defined $enc_chk;
244	57	100	100			187	$enc_chk \|= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
245	57					93	my @result = ();
246	57					83	my $octets = '';
247	57					284	while ( length( my $chr = substr($str, 0, 1, '') ) ) {
248	1256					3001	my $seq = $enc->encode($chr, $enc_chk);
249	1256	100				2100	if ( not length($seq) ) {
250	2					6	substr($str, 0, 0, $chr);
251	2					4	last;
252							}
253	1254	100				2335	if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
254	57					97	push @result, $obj->_encode_word($octets);
255	57					77	$octets = '';
256							}
257	1254					3616	$octets .= $seq;
258							}
259	57	50				145	length($octets) and push @result, $obj->_encode_word($octets);
260	57	100	100			249	$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
			100
261	57					237	return join(' ', @result);
262							}
263
264							sub _encode_word {
265	114			114		199	my ($obj, $octets) = @_;
266	114					145	my $charset = $obj->{charset};
267	114					139	my $encode = $obj->{encode};
268	114	100				216	my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
269	114					344	return "=?$charset?$encode?$text?=";
270							}
271
272							sub _encoded_word_len {
273	1254			1254		1860	my ($obj, $octets) = @_;
274	1254					1653	my $charset = $obj->{charset};
275	1254					1460	my $encode = $obj->{encode};
276	1254	100				2363	my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
277	1254					3063	return length("=?$charset?$encode??=") + $text_len;
278							}
279
280							sub _encode_b {
281	72			72		98	my ($octets) = @_;
282	72					206	return MIME::Base64::encode($octets, '');
283							}
284
285							sub _encoded_b_len {
286	848			848		1184	my ($octets) = @_;
287	848					1448	return ( length($octets) + 2 ) / 3 * 4;
288							}
289
290							my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
291
292							sub _encode_q {
293	42			42		59	my ($octets) = @_;
294	42					163	$octets =~ s{($re_invalid_q_char)}{
295	566					1040	join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
	566					1679
296							}egox;
297	42					108	$octets =~ s/ /_/go;
298	42					86	return $octets;
299							}
300
301							sub _encoded_q_len {
302	406			406		575	my ($octets) = @_;
303	406					1634	my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
304	406					814	return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
305							}
306
307							1;
308							__END__