File Coverage

blib/lib/Encode/MIME/Header.pm
Criterion Covered Total %
statement 139 151 92.0
branch 73 100 73.0
condition 63 84 75.0
subroutine 19 21 90.4
pod 4 4 100.0
total 298 360 82.7


line stmt bran cond sub pod time code
1             package Encode::MIME::Header;
2 5     5   39001 use strict;
  5         14  
  5         146  
3 5     5   30 use warnings;
  5         12  
  5         414  
4              
5             our $VERSION = do { my @r = ( q$Revision: 2.27 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
6              
7 5     5   33 use Carp ();
  5         9  
  5         68  
8 5     5   365 use Encode ();
  5         11  
  5         81  
9 5     5   2165 use MIME::Base64 ();
  5         2840  
  5         538  
10              
11             my %seed = (
12             decode_b => 1, # decodes 'B' encoding ?
13             decode_q => 1, # decodes 'Q' encoding ?
14             encode => 'B', # encode with 'B' or 'Q' ?
15             charset => 'UTF-8', # encode charset
16             bpl => 75, # bytes per line
17             );
18              
19             my @objs;
20              
21             push @objs, bless {
22             %seed,
23             Name => 'MIME-Header',
24             } => __PACKAGE__;
25              
26             push @objs, bless {
27             %seed,
28             decode_q => 0,
29             Name => 'MIME-B',
30             } => __PACKAGE__;
31              
32             push @objs, bless {
33             %seed,
34             decode_b => 0,
35             encode => 'Q',
36             Name => 'MIME-Q',
37             } => __PACKAGE__;
38              
39             Encode::define_encoding($_, $_->{Name}) foreach @objs;
40              
41 5     5   36 use parent qw(Encode::Encoding);
  5         10  
  5         41  
42              
43 0     0 1 0 sub needs_lines { 1 }
44 0     0 1 0 sub perlio_ok { 0 }
45              
46             # RFC 2047 and RFC 2231 grammar
47             my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
48             my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
49             my $re_encoding = qr/[QqBb]/;
50             my $re_encoded_text = qr/[^\?]*/;
51             my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
52             my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
53             my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
54              
55             # in strict mode check also for valid base64 characters and also for valid quoted printable codes
56             my $re_encoding_strict_b = qr/[Bb]/;
57             my $re_encoding_strict_q = qr/[Qq]/;
58             my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
59             my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
60             my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
61             my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
62              
63             my $re_newline = qr/(?:\r\n|[\r\n])/;
64              
65             # in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
66             # except in comments when separator between words and comment round brackets can be omitted
67             my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
68             my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
69             my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
70              
71             my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
72             my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
73              
74             my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
75             my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
76              
77             our $STRICT_DECODE = 0;
78              
79             sub decode($$;$) {
80 206     206 1 504 my ($obj, $str, $chk) = @_;
81 206 100       495 return undef unless defined $str;
82              
83 202 100       466 my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
84 202 100       375 my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
85              
86 202         287 my $stop = 0;
87 202         441 my $output = substr($str, 0, 0); # to propagate taintedness
88              
89             # decode each line separately, match whole continuous folded line at one call
90 202   66     21551 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
91              
92 420         2223 my $line = $1;
93 420 100       905 my $sep = defined $2 ? $2 : '';
94              
95 420 100 100     1451 $stop = 1 unless length($line) or length($sep);
96              
97             # NOTE: this code partially could break $chk support
98             # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
99             # fixes breaking inside multi-byte characters
100 420   100     595224 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
101              
102             # process sequence of encoded MIME words at once
103 420   100     88902 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
104              
105 198         577 my $begin = $1 . $2;
106 198         676 my $words = $3;
107              
108 198         368 $begin =~ tr/\r\n//d;
109 198         307 $output .= $begin;
110              
111             # decode one MIME word
112 198   100     1638 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
113              
114 50225         155185 $output .= $1;
115 50225         100937 my $orig = $2;
116 50225         81662 my $charset = $3;
117 50225         132442 my ($mime_enc, $text) = split /\?/, $5;
118              
119 50225         90650 $text =~ tr/\r\n//d;
120              
121 50225         123531 my $enc = Encode::find_mime_encoding($charset);
122              
123             # in non strict mode allow also perl encoding aliases
124 50225 100 100     108546 if ( not defined $enc and not $STRICT_DECODE ) {
125             # make sure that decoded string will be always strict UTF-8
126 6 100       19 $charset = 'UTF-8' if lc($charset) eq 'utf8';
127 6         14 $enc = Encode::find_encoding($charset);
128             }
129              
130 50225 100       95080 if ( not defined $enc ) {
131 13 50 66     55 Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::DIE_ON_ERR;
132 13 50 66     50 Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::WARN_ON_ERR;
133 13 100 100     49 $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
134 13 100       68 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    100          
135 13 100       119 $stop ? $orig : '';
136             } else {
137 50212 100 66     206951 if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
    50 33        
138 46         106 my $decoded = _decode_b($enc, $text, $chk);
139 46 50 33     163 $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
      33        
140 46 50       146 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    50          
141 46 50       495 $stop ? $orig : '';
142             } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
143 50166         95373 my $decoded = _decode_q($enc, $text, $chk);
144 50166 100 66     148634 $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
      66        
145 50166 50       124808 $output .= (defined $decoded ? $decoded : $text) unless $stop;
    100          
146 50166 100       1961904 $stop ? $orig : '';
147             } else {
148 0 0 0     0 Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::DIE_ON_ERR;
149 0 0 0     0 Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::WARN_ON_ERR;
150 0 0 0     0 $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
151 0 0       0 $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
    0          
152 0 0       0 $stop ? $orig : '';
153             }
154             }
155              
156             }se;
157              
158 198 100       529 if ( not $stop ) {
159 194         332 $output .= $words;
160 194         347 $words = '';
161             }
162              
163 198         1601 $words;
164              
165             }se;
166              
167 420 100       902 if ( not $stop ) {
168 218         4471 $line =~ tr/\r\n//d;
169 218         2250 $output .= $line . $sep;
170 218         374 $line = '';
171 218         320 $sep = '';
172             }
173              
174 420         2802 $line . $sep;
175              
176             }se;
177              
178 202 100 100     835 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
179 202         654 return $output;
180             }
181              
182             sub _decode_b {
183 46     46   93 my ($enc, $text, $chk) = @_;
184             # MIME::Base64::decode ignores everything after a '=' padding character
185             # in non strict mode split string after each sequence of padding characters and decode each substring
186             my $octets = $STRICT_DECODE ?
187             MIME::Base64::decode($text) :
188 46 100       292 join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
  46         210  
189 46         125 return _decode_octets($enc, $octets, $chk);
190             }
191              
192             sub _decode_q {
193 50166     50166   85680 my ($enc, $text, $chk) = @_;
194 50166         85898 $text =~ s/_/ /go;
195 50166         66951 $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
  700         1876  
196 50166         97792 return _decode_octets($enc, $text, $chk);
197             }
198              
199             sub _decode_octets {
200 50212     50212   79787 my ($enc, $octets, $chk) = @_;
201 50212 100 100     183582 $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
202 50212         193220 my $output = $enc->decode($octets, $chk);
203 50212 100 100     197387 return undef if not ref $chk and $chk and $octets ne '';
      100        
204 50210         104490 return $output;
205             }
206              
207             sub encode($$;$) {
208 60     60 1 143 my ($obj, $str, $chk) = @_;
209 60 100       171 return undef unless defined $str;
210 57         155 my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
211 57 100 100     263 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
212 57         236 return $output . substr($str, 0, 0); # to propagate taintedness
213             }
214              
215             sub _fold_line {
216 57     57   124 my ($obj, $line) = @_;
217 57         97 my $bpl = $obj->{bpl};
218 57         92 my $output = '';
219              
220 57         129 while ( length($line) ) {
221 114 50       634 if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
    0          
222 114         265 $output .= $1;
223 114 100       360 $output .= "\r\n" . $2 if length($line);
224             } elsif ( $line =~ s/(\s)(.*)$// ) {
225 0         0 $output .= $line;
226 0         0 $line = $2;
227 0 0       0 $output .= "\r\n" . $1 if length($line);
228             } else {
229 0         0 $output .= $line;
230 0         0 last;
231             }
232             }
233              
234 57         136 return $output;
235             }
236              
237             sub _encode_string {
238 57     57   120 my ($obj, $str, $chk) = @_;
239 57 50       148 my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
240 57         144 my $enc = Encode::find_mime_encoding($obj->{charset});
241 57 100 100     256 my $enc_chk = (not ref $chk and $chk) ? ($chk | Encode::LEAVE_SRC) : $chk;
242 57         119 my @result = ();
243 57         93 my $octets = '';
244 57         303 while ( length( my $chr = substr($str, 0, 1, '') ) ) {
245 1256         3254 my $seq = $enc->encode($chr, $enc_chk);
246 1256 100       2594 if ( not length($seq) ) {
247 2         5 substr($str, 0, 0, $chr);
248 2         5 last;
249             }
250 1254 100       2684 if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
251 57         118 push @result, $obj->_encode_word($octets);
252 57         86 $octets = '';
253             }
254 1254         3872 $octets .= $seq;
255             }
256 57 50       181 length($octets) and push @result, $obj->_encode_word($octets);
257 57 100 100     270 $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
      100        
258 57         232 return join(' ', @result);
259             }
260              
261             sub _encode_word {
262 114     114   207 my ($obj, $octets) = @_;
263 114         183 my $charset = $obj->{charset};
264 114         165 my $encode = $obj->{encode};
265 114 100       266 my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
266 114         362 return "=?$charset?$encode?$text?=";
267             }
268              
269             sub _encoded_word_len {
270 1254     1254   2152 my ($obj, $octets) = @_;
271 1254         1790 my $charset = $obj->{charset};
272 1254         1670 my $encode = $obj->{encode};
273 1254 100       2616 my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
274 1254         3611 return length("=?$charset?$encode??=") + $text_len;
275             }
276              
277             sub _encode_b {
278 72     72   116 my ($octets) = @_;
279 72         254 return MIME::Base64::encode($octets, '');
280             }
281              
282             sub _encoded_b_len {
283 848     848   1291 my ($octets) = @_;
284 848         1630 return ( length($octets) + 2 ) / 3 * 4;
285             }
286              
287             my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
288              
289             sub _encode_q {
290 42     42   73 my ($octets) = @_;
291 42         196 $octets =~ s{($re_invalid_q_char)}{
292 566         1234 join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
  566         1786  
293             }egox;
294 42         103 $octets =~ s/ /_/go;
295 42         97 return $octets;
296             }
297              
298             sub _encoded_q_len {
299 406     406   647 my ($octets) = @_;
300 406         1605 my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
301 406         1125 return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
302             }
303              
304             1;
305             __END__