File Coverage

blib/lib/Encode/MIME/Header.pm
Criterion Covered Total %
statement 142 154 92.2
branch 73 104 70.1
condition 75 108 69.4
subroutine 19 21 90.4
pod 4 4 100.0
total 313 391 80.0


line stmt bran cond sub pod time code
1             package Encode::MIME::Header;
2 5     5   82232 use strict;
  5         14  
  5         130  
3 5     5   22 use warnings;
  5         6  
  5         334  
4              
5             our $VERSION = do { my @r = ( q$Revision: 2.28 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6              
7 5     5   28 use Carp ();
  5         8  
  5         59  
8 5     5   299 use Encode ();
  5         8  
  5         81  
9 5     5   1159 use MIME::Base64 ();
  5         2342  
  5         517  
10              
11             my %seed = (
12             decode_b => 1, # decodes 'B' encoding ?
13             decode_q => 1, # decodes 'Q' encoding ?
14             encode => 'B', # encode with 'B' or 'Q' ?
15             charset => 'UTF-8', # encode charset
16             bpl => 75, # bytes per line
17             );
18              
19             my @objs;
20              
21             push @objs, bless {
22             %seed,
23             Name => 'MIME-Header',
24             } => __PACKAGE__;
25              
26             push @objs, bless {
27             %seed,
28             decode_q => 0,
29             Name => 'MIME-B',
30             } => __PACKAGE__;
31              
32             push @objs, bless {
33             %seed,
34             decode_b => 0,
35             encode => 'Q',
36             Name => 'MIME-Q',
37             } => __PACKAGE__;
38              
39             Encode::define_encoding($_, $_->{Name}) foreach @objs;
40              
41 5     5   35 use parent qw(Encode::Encoding);
  5         11  
  5         30  
42              
43 0     0 1 0 sub needs_lines { 1 }
44 0     0 1 0 sub perlio_ok { 0 }
45              
46             # RFC 2047 and RFC 2231 grammar
47             my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
48             my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49             my $re_encoding = qr/[QqBb]/;
50             my $re_encoded_text = qr/[^\?]*/;
51             my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52             my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53             my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54              
55             # in strict mode check also for valid base64 characters and also for valid quoted printable codes
56             my $re_encoding_strict_b = qr/[Bb]/;
57             my $re_encoding_strict_q = qr/[Qq]/;
58             my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59             my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
60             my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61             my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62              
63             my $re_newline = qr/(?:\r\n|[\r\n])/;
64              
65             # in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66             # except in comments when separator between words and comment round brackets can be omitted
67             my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
68             my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69             my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
70              
71             my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
72             my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73              
74             my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75             my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76              
77             our $STRICT_DECODE = 0;
78              
79             sub decode($$;$) {
80 206     206 1 1487 my ($obj, $str, $chk) = @_;
81 206 100       427 return undef unless defined $str;
82              
83 202 100       422 my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84 202 100       359 my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85              
86 202         310 my $stop = 0;
87 202         452 my $output = substr($str, 0, 0); # to propagate taintedness
88              
89             # decode each line separately, match whole continuous folded line at one call
90 202   66     23137 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
91              
92 420         2220 my $line = $1;
93 420 100       961 my $sep = defined $2 ? $2 : '';
94              
95 420 100 100     1225 $stop = 1 unless length($line) or length($sep);
96              
97             # NOTE: this code partially could break $chk support
98             # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99             # fixes breaking inside multi-byte characters
100 420   100     613732 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101              
102             # process sequence of encoded MIME words at once
103 420   100     86353 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104              
105 198         618 my $begin = $1 . $2;
106 198         591 my $words = $3;
107              
108 198         351 $begin =~ tr/\r\n//d;
109 198         377 $output .= $begin;
110              
111             # decode one MIME word
112 198   100     1849 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113              
114 50225         140990 $output .= $1;
115 50225         93086 my $orig = $2;
116 50225         70713 my $charset = $3;
117 50225         126096 my ($mime_enc, $text) = split /\?/, $5;
118              
119 50225         82712 $text =~ tr/\r\n//d;
120              
121 50225         105111 my $enc = Encode::find_mime_encoding($charset);
122              
123             # in non strict mode allow also perl encoding aliases
124 50225 100 100     85413 if ( not defined $enc and not $STRICT_DECODE ) {
125             # make sure that decoded string will be always strict UTF-8
126 6 100       24 $charset = 'UTF-8' if lc($charset) eq 'utf8';
127 6         17 $enc = Encode::find_encoding($charset);
128             }
129              
130 50225 100       79365 if ( not defined $enc ) {
131 13 50 100     57 Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
      66        
132 13 50 100     49 Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
      66        
133 13 100 100     71 $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
      100        
134 13 100       79 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    100          
135 13 100       126 $stop ? $orig : '';
136             } else {
137 50212 100 66     160814 if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
    50 33        
138 46         110 my $decoded = _decode_b($enc, $text, $chk);
139 46 0 33     140 $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
      33        
      33        
140 46 50       153 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    50          
141 46 50       454 $stop ? $orig : '';
142             } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143 50166         78095 my $decoded = _decode_q($enc, $text, $chk);
144 50166 50 66     110210 $stop = 1 if not defined $decoded and not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
      66        
      66        
145 50166 50       103659 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    100          
146 50166 100       1942716 $stop ? $orig : '';
147             } else {
148 0 0 0     0 Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::DIE_ON_ERR;
      0        
149 0 0 0     0 Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk and $chk & Encode::WARN_ON_ERR;
      0        
150 0 0 0     0 $stop = 1 if not ref $chk and $chk and $chk & Encode::RETURN_ON_ERR;
      0        
151 0 0       0 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    0          
152 0 0       0 $stop ? $orig : '';
153             }
154             }
155              
156             }se;
157              
158 198 100       420 if ( not $stop ) {
159 194         316 $output .= $words;
160 194         344 $words = '';
161             }
162              
163 198         1613 $words;
164              
165             }se;
166              
167 420 100       804 if ( not $stop ) {
168 218         4005 $line =~ tr/\r\n//d;
169 218         1675 $output .= $line . $sep;
170 218         411 $line = '';
171 218         320 $sep = '';
172             }
173              
174 420         3185 $line . $sep;
175              
176             }se;
177              
178 202 100 100     696 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
179 202         668 return $output;
180             }
181              
182             sub _decode_b {
183 46     46   91 my ($enc, $text, $chk) = @_;
184             # MIME::Base64::decode ignores everything after a '=' padding character
185             # in non strict mode split string after each sequence of padding characters and decode each substring
186             my $octets = $STRICT_DECODE ?
187             MIME::Base64::decode($text) :
188 46 100       313 join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
  46         199  
189 46         114 return _decode_octets($enc, $octets, $chk);
190             }
191              
192             sub _decode_q {
193 50166     50166   77019 my ($enc, $text, $chk) = @_;
194 50166         74366 $text =~ s/_/ /go;
195 50166         55188 $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
  700         2062  
196 50166         73647 return _decode_octets($enc, $text, $chk);
197             }
198              
199             sub _decode_octets {
200 50212     50212   67565 my ($enc, $octets, $chk) = @_;
201 50212 50       75387 $chk = 0 unless defined $chk;
202 50212 100 100     121503 $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
203 50212         175596 my $output = $enc->decode($octets, $chk);
204 50212 100 100     137083 return undef if not ref $chk and $chk and $octets ne '';
      100        
205 50210         91183 return $output;
206             }
207              
208             sub encode($$;$) {
209 60     60 1 123 my ($obj, $str, $chk) = @_;
210 60 100       126 return undef unless defined $str;
211 57         123 my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
212 57 100 100     239 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
213 57         222 return $output . substr($str, 0, 0); # to propagate taintedness
214             }
215              
216             sub _fold_line {
217 57     57   115 my ($obj, $line) = @_;
218 57         86 my $bpl = $obj->{bpl};
219 57         75 my $output = '';
220              
221 57         115 while ( length($line) ) {
222 114 50       644 if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
    0          
223 114         248 $output .= $1;
224 114 100       290 $output .= "\r\n" . $2 if length($line);
225             } elsif ( $line =~ s/(\s)(.*)$// ) {
226 0         0 $output .= $line;
227 0         0 $line = $2;
228 0 0       0 $output .= "\r\n" . $1 if length($line);
229             } else {
230 0         0 $output .= $line;
231 0         0 last;
232             }
233             }
234              
235 57         126 return $output;
236             }
237              
238             sub _encode_string {
239 57     57   103 my ($obj, $str, $chk) = @_;
240 57 50       148 my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
241 57         106 my $enc = Encode::find_mime_encoding($obj->{charset});
242 57         92 my $enc_chk = $chk;
243 57 50       111 $enc_chk = 0 unless defined $enc_chk;
244 57 100 100     187 $enc_chk |= Encode::LEAVE_SRC if not ref $enc_chk and $enc_chk;
245 57         93 my @result = ();
246 57         83 my $octets = '';
247 57         284 while ( length( my $chr = substr($str, 0, 1, '') ) ) {
248 1256         3001 my $seq = $enc->encode($chr, $enc_chk);
249 1256 100       2100 if ( not length($seq) ) {
250 2         6 substr($str, 0, 0, $chr);
251 2         4 last;
252             }
253 1254 100       2335 if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
254 57         97 push @result, $obj->_encode_word($octets);
255 57         77 $octets = '';
256             }
257 1254         3616 $octets .= $seq;
258             }
259 57 50       145 length($octets) and push @result, $obj->_encode_word($octets);
260 57 100 100     249 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
261 57         237 return join(' ', @result);
262             }
263              
264             sub _encode_word {
265 114     114   199 my ($obj, $octets) = @_;
266 114         145 my $charset = $obj->{charset};
267 114         139 my $encode = $obj->{encode};
268 114 100       216 my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
269 114         344 return "=?$charset?$encode?$text?=";
270             }
271              
272             sub _encoded_word_len {
273 1254     1254   1860 my ($obj, $octets) = @_;
274 1254         1653 my $charset = $obj->{charset};
275 1254         1460 my $encode = $obj->{encode};
276 1254 100       2363 my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
277 1254         3063 return length("=?$charset?$encode??=") + $text_len;
278             }
279              
280             sub _encode_b {
281 72     72   98 my ($octets) = @_;
282 72         206 return MIME::Base64::encode($octets, '');
283             }
284              
285             sub _encoded_b_len {
286 848     848   1184 my ($octets) = @_;
287 848         1448 return ( length($octets) + 2 ) / 3 * 4;
288             }
289              
290             my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
291              
292             sub _encode_q {
293 42     42   59 my ($octets) = @_;
294 42         163 $octets =~ s{($re_invalid_q_char)}{
295 566         1040 join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
  566         1679  
296             }egox;
297 42         108 $octets =~ s/ /_/go;
298 42         86 return $octets;
299             }
300              
301             sub _encoded_q_len {
302 406     406   575 my ($octets) = @_;
303 406         1634 my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
304 406         814 return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
305             }
306              
307             1;
308             __END__