File Coverage

blib/lib/Encode/MIME/Header.pm
Criterion Covered Total %
statement 139 151 92.0
branch 73 100 73.0
condition 63 84 75.0
subroutine 19 21 90.4
pod 4 4 100.0
total 298 360 82.7


line stmt bran cond sub pod time code
1             package Encode::MIME::Header;
2 5     5   80032 use strict;
  5         18  
  5         179  
3 5     5   38 use warnings;
  5         20  
  5         465  
4              
5             our $VERSION = do { my @r = ( q$Revision: 2.26 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6              
7 5     5   41 use Carp ();
  5         16  
  5         89  
8 5     5   647 use Encode ();
  5         24  
  5         118  
9 5     5   2814 use MIME::Base64 ();
  5         3858  
  5         635  
10              
11             my %seed = (
12             decode_b => 1, # decodes 'B' encoding ?
13             decode_q => 1, # decodes 'Q' encoding ?
14             encode => 'B', # encode with 'B' or 'Q' ?
15             charset => 'UTF-8', # encode charset
16             bpl => 75, # bytes per line
17             );
18              
19             my @objs;
20              
21             push @objs, bless {
22             %seed,
23             Name => 'MIME-Header',
24             } => __PACKAGE__;
25              
26             push @objs, bless {
27             %seed,
28             decode_q => 0,
29             Name => 'MIME-B',
30             } => __PACKAGE__;
31              
32             push @objs, bless {
33             %seed,
34             decode_b => 0,
35             encode => 'Q',
36             Name => 'MIME-Q',
37             } => __PACKAGE__;
38              
39             Encode::define_encoding($_, $_->{Name}) foreach @objs;
40              
41 5     5   46 use parent qw(Encode::Encoding);
  5         14  
  5         56  
42              
43 0     0 1 0 sub needs_lines { 1 }
44 0     0 1 0 sub perlio_ok { 0 }
45              
46             # RFC 2047 and RFC 2231 grammar
47             my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
48             my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49             my $re_encoding = qr/[QqBb]/;
50             my $re_encoded_text = qr/[^\?]*/;
51             my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52             my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53             my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54              
55             # in strict mode check also for valid base64 characters and also for valid quoted printable codes
56             my $re_encoding_strict_b = qr/[Bb]/;
57             my $re_encoding_strict_q = qr/[Qq]/;
58             my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59             my $re_encoded_text_strict_q = qr/(?:[^\?\s=]|=[0-9A-Fa-f]{2})*/;
60             my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61             my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62              
63             my $re_newline = qr/(?:\r\n|[\r\n])/;
64              
65             # in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66             # except in comments when separator between words and comment round brackets can be omitted
67             my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
68             my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69             my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
70              
71             my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
72             my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73              
74             my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75             my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76              
77             our $STRICT_DECODE = 0;
78              
79             sub decode($$;$) {
80 204     204 1 679 my ($obj, $str, $chk) = @_;
81 204 100       675 return undef unless defined $str;
82              
83 200 100       579 my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84 200 100       497 my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85              
86 200         381 my $stop = 0;
87 200         571 my $output = substr($str, 0, 0); # to propagate taintedness
88              
89             # decode each line separately, match whole continuous folded line at one call
90 200   66     29975 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
91              
92 416         3423 my $line = $1;
93 416 100       1297 my $sep = defined $2 ? $2 : '';
94              
95 416 100 100     1691 $stop = 1 unless length($line) or length($sep);
96              
97             # NOTE: this code partially could break $chk support
98             # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99             # fixes breaking inside multi-byte characters
100 416   100     840461 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101              
102             # process sequence of encoded MIME words at once
103 416   100     115583 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104              
105 197         772 my $begin = $1 . $2;
106 197         980 my $words = $3;
107              
108 197         464 $begin =~ tr/\r\n//d;
109 197         448 $output .= $begin;
110              
111             # decode one MIME word
112 197   100     2369 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113              
114 50224         206611 $output .= $1;
115 50224         140903 my $orig = $2;
116 50224         114426 my $charset = $3;
117 50224         189881 my ($mime_enc, $text) = split /\?/, $5;
118              
119 50224         120767 $text =~ tr/\r\n//d;
120              
121 50224         168898 my $enc = Encode::find_mime_encoding($charset);
122              
123             # in non strict mode allow also perl encoding aliases
124 50224 100 100     144644 if ( not defined $enc and not $STRICT_DECODE ) {
125             # make sure that decoded string will be always strict UTF-8
126 6 100       25 $charset = 'UTF-8' if lc($charset) eq 'utf8';
127 6         19 $enc = Encode::find_encoding($charset);
128             }
129              
130 50224 100       121820 if ( not defined $enc ) {
131 13 50 66     77 Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::DIE_ON_ERR;
132 13 50 66     73 Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::WARN_ON_ERR;
133 13 100 100     74 $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
134 13 100       114 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    100          
135 13 100       187 $stop ? $orig : '';
136             } else {
137 50211 100 66     279030 if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
    50 33        
138 46         163 my $decoded = _decode_b($enc, $text, $chk);
139 46 50 33     233 $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
      33        
140 46 50       215 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    50          
141 46 50       674 $stop ? $orig : '';
142             } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143 50165         131192 my $decoded = _decode_q($enc, $text, $chk);
144 50165 100 66     205179 $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
      66        
145 50165 50       175669 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    100          
146 50165 100       2790054 $stop ? $orig : '';
147             } else {
148 0 0 0     0 Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::DIE_ON_ERR;
149 0 0 0     0 Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::WARN_ON_ERR;
150 0 0 0     0 $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
151 0 0       0 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    0          
152 0 0       0 $stop ? $orig : '';
153             }
154             }
155              
156             }se;
157              
158 197 100       621 if ( not $stop ) {
159 193         440 $output .= $words;
160 193         467 $words = '';
161             }
162              
163 197         2075 $words;
164              
165             }se;
166              
167 416 100       1196 if ( not $stop ) {
168 216         5374 $line =~ tr/\r\n//d;
169 216         3294 $output .= $line . $sep;
170 216         497 $line = '';
171 216         420 $sep = '';
172             }
173              
174 416         4040 $line . $sep;
175              
176             }se;
177              
178 200 100 100     1090 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
179 200         1152 return $output;
180             }
181              
182             sub _decode_b {
183 46     46   149 my ($enc, $text, $chk) = @_;
184             # MIME::Base64::decode ignores everything after a '=' padding character
185             # in non strict mode split string after each sequence of padding characters and decode each substring
186             my $octets = $STRICT_DECODE ?
187             MIME::Base64::decode($text) :
188 46 100       415 join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
  46         317  
189 46         188 return _decode_octets($enc, $octets, $chk);
190             }
191              
192             sub _decode_q {
193 50165     50165   116325 my ($enc, $text, $chk) = @_;
194 50165         114742 $text =~ s/_/ /go;
195 50165         89731 $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
  700         2410  
196 50165         132269 return _decode_octets($enc, $text, $chk);
197             }
198              
199             sub _decode_octets {
200 50211     50211   120819 my ($enc, $octets, $chk) = @_;
201 50211 100 100     228941 $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
202 50211         288424 my $output = $enc->decode($octets, $chk);
203 50211 100 100     243776 return undef if not ref $chk and $chk and $octets ne '';
      100        
204 50209         145358 return $output;
205             }
206              
207             sub encode($$;$) {
208 60     60 1 234 my ($obj, $str, $chk) = @_;
209 60 100       242 return undef unless defined $str;
210 57         235 my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
211 57 100 100     407 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
212 57         366 return $output . substr($str, 0, 0); # to propagate taintedness
213             }
214              
215             sub _fold_line {
216 57     57   191 my ($obj, $line) = @_;
217 57         166 my $bpl = $obj->{bpl};
218 57         140 my $output = '';
219              
220 57         199 while ( length($line) ) {
221 114 50       848 if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
    0          
222 114         362 $output .= $1;
223 114 100       542 $output .= "\r\n" . $2 if length($line);
224             } elsif ( $line =~ s/(\s)(.*)$// ) {
225 0         0 $output .= $line;
226 0         0 $line = $2;
227 0 0       0 $output .= "\r\n" . $1 if length($line);
228             } else {
229 0         0 $output .= $line;
230 0         0 last;
231             }
232             }
233              
234 57         196 return $output;
235             }
236              
237             sub _encode_string {
238 57     57   222 my ($obj, $str, $chk) = @_;
239 57 50       239 my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
240 57         238 my $enc = Encode::find_mime_encoding($obj->{charset});
241 57 100 100     361 my $enc_chk = (not ref $chk and $chk) ? ($chk | Encode::LEAVE_SRC) : $chk;
242 57         217 my @result = ();
243 57         162 my $octets = '';
244 57         411 while ( length( my $chr = substr($str, 0, 1, '') ) ) {
245 1256         4189 my $seq = $enc->encode($chr, $enc_chk);
246 1256 100       3247 if ( not length($seq) ) {
247 2         9 substr($str, 0, 0, $chr);
248 2         8 last;
249             }
250 1254 100       3313 if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
251 57         162 push @result, $obj->_encode_word($octets);
252 57         104 $octets = '';
253             }
254 1254         5195 $octets .= $seq;
255             }
256 57 50       289 length($octets) and push @result, $obj->_encode_word($octets);
257 57 100 100     428 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
258 57         358 return join(' ', @result);
259             }
260              
261             sub _encode_word {
262 114     114   298 my ($obj, $octets) = @_;
263 114         253 my $charset = $obj->{charset};
264 114         227 my $encode = $obj->{encode};
265 114 100       376 my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
266 114         514 return "=?$charset?$encode?$text?=";
267             }
268              
269             sub _encoded_word_len {
270 1254     1254   2703 my ($obj, $octets) = @_;
271 1254         2293 my $charset = $obj->{charset};
272 1254         2260 my $encode = $obj->{encode};
273 1254 100       3304 my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
274 1254         4580 return length("=?$charset?$encode??=") + $text_len;
275             }
276              
277             sub _encode_b {
278 72     72   156 my ($octets) = @_;
279 72         362 return MIME::Base64::encode($octets, '');
280             }
281              
282             sub _encoded_b_len {
283 848     848   1690 my ($octets) = @_;
284 848         2178 return ( length($octets) + 2 ) / 3 * 4;
285             }
286              
287             my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
288              
289             sub _encode_q {
290 42     42   97 my ($octets) = @_;
291 42         242 $octets =~ s{($re_invalid_q_char)}{
292 566         1433 join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
  566         2084  
293             }egox;
294 42         145 $octets =~ s/ /_/go;
295 42         117 return $octets;
296             }
297              
298             sub _encoded_q_len {
299 406     406   798 my ($octets) = @_;
300 406         2099 my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
301 406         1117 return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
302             }
303              
304             1;
305             __END__