File Coverage

blib/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
Criterion Covered Total %
statement 45 294 15.3
branch 1 174 0.5
condition 1 101 0.9
subroutine 10 30 33.3
pod 7 19 36.8
total 64 618 10.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             MIMEEval - perform various tests against MIME structure and body
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::MIMEEval
25              
26             body NAME_OF_RULE eval:check_for_mime
27             body NAME_OF_RULE eval:check_for_mime_html
28             body NAME_OF_RULE eval:check_for_mime_html_only
29             body NAME_OF_RULE eval:check_mime_multipart_ratio
30             body NAME_OF_RULE eval:check_msg_parse_flags
31             body NAME_OF_RULE eval:check_for_ascii_text_illegal
32             body NAME_OF_RULE eval:check_abundant_unicode_ratio
33             body NAME_OF_RULE eval:check_for_faraway_charset
34             body NAME_OF_RULE eval:check_for_uppercase
35             body NAME_OF_RULE eval:check_ma_non_text
36             body NAME_OF_RULE eval:check_base64_length
37             body NAME_OF_RULE eval:check_qp_ratio
38              
39             =head1 DESCRIPTION
40              
41             Perform various tests against MIME structure and body.
42              
43             =cut
44              
45              
46             use strict;
47 22     22   162 use warnings;
  22         52  
  22         627  
48 22     22   111 # use bytes;
  22         50  
  22         693  
49             use re 'taint';
50 22     22   128  
  22         51  
  22         737  
51             use Mail::SpamAssassin::Plugin;
52 22     22   129 use Mail::SpamAssassin::Locales;
  22         45  
  22         581  
53 22     22   116 use Mail::SpamAssassin::Constants qw(:sa CHARSETS_LIKELY_TO_FP_AS_CAPS);
  22         44  
  22         576  
54 22     22   109 use Mail::SpamAssassin::Util qw(untaint_var);
  22         50  
  22         3544  
55 22     22   159 use Mail::SpamAssassin::Logger;
  22         38  
  22         1101  
56 22     22   145  
  22         66  
  22         77224  
57             our @ISA = qw(Mail::SpamAssassin::Plugin);
58              
59             # constructor: register the eval rule
60             my $class = shift;
61             my $mailsaobject = shift;
62 63     63 1 212  
63 63         132 # some boilerplate...
64             $class = ref($class) || $class;
65             my $self = $class->SUPER::new($mailsaobject);
66 63   33     397 bless ($self, $class);
67 63         324  
68 63         166 # the important bit!
69             $self->register_eval_rule("check_for_mime");
70             $self->register_eval_rule("check_for_mime_html");
71 63         254 $self->register_eval_rule("check_for_mime_html_only");
72 63         215 $self->register_eval_rule("check_mime_multipart_ratio");
73 63         216 $self->register_eval_rule("check_msg_parse_flags");
74 63         239 $self->register_eval_rule("check_for_ascii_text_illegal");
75 63         234 $self->register_eval_rule("check_abundant_unicode_ratio");
76 63         193 $self->register_eval_rule("check_for_faraway_charset");
77 63         170 $self->register_eval_rule("check_for_uppercase");
78 63         177 $self->register_eval_rule("check_ma_non_text");
79 63         223 $self->register_eval_rule("check_base64_length");
80 63         186 $self->register_eval_rule("check_qp_ratio");
81 63         168  
82 63         170 return $self;
83             }
84 63         551  
85             ###########################################################################
86              
87             my ($self, $str) = @_;
88              
89             # TODO: I suspect a tr// trick may be faster here
90 0     0 0 0 my $numhis = () = ($str =~ /[\200-\377]/g);
91             my $numlos = length($str) - $numhis;
92              
93 0         0 ($numlos <= $numhis && $numhis > 3);
94 0         0 }
95              
96 0 0       0 =over 4
97              
98             =item has_check_for_ascii_text_illegal
99              
100             Adds capability check for "if can()" for check_for_ascii_text_illegal
101              
102             =cut
103              
104              
105             =item check_for_ascii_text_illegal
106              
107 0     0 1 0 If a MIME part claims to be text/plain or text/plain;charset=us-ascii and the Content-Transfer-Encoding is 7bit (either explicitly or by default), then we should enforce the actual text being only TAB, NL, SPACE through TILDE, i.e. all 7bit characters excluding NO-WS-CTL (per RFC-2822).
108              
109             All mainstream MTA's get this right.
110              
111             =cut
112              
113             my ($self, $pms) = @_;
114              
115             $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
116             return 0 unless exists $pms->{mime_ascii_text_illegal};
117             return ($pms->{mime_ascii_text_illegal} > 0);
118 0     0 1 0 }
119              
120 0 0       0 =item has_check_abundant_unicode_ratio
121 0 0       0  
122 0         0 Adds capability check for "if can()" for check_abundant_unicode_ratio
123              
124             =cut
125              
126              
127             =item check_abundant_unicode_ratio
128              
129             A MIME part claiming to be text/plain and containing Unicode characters must be encoded as quoted-printable or base64, or use UTF data coding (typically with 8bit encoding). Any message in 7bit or 8bit encoding containing (HTML) Unicode entities will not render them as Unicode, but literally.
130              
131 0     0 1 0 Thus a few such sequences might occur on a mailing list of developers discussing such characters, but a message with a high density of such characters is likely spam.
132              
133             =cut
134              
135             my ($self, $pms, undef, $ratio) = @_;
136              
137             # validate ratio?
138             return 0 unless ($ratio =~ /^\d{0,3}\.\d{1,3}$/);
139              
140             $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
141             return 0 unless exists $pms->{mime_text_unicode_ratio};
142 0     0 1 0 return ($pms->{mime_text_unicode_ratio} >= $ratio);
143             }
144              
145 0 0       0 my ($self, $pms, $body) = @_;
146              
147 0 0       0 my $type = $pms->get('Content-Type',undef);
148 0 0       0  
149 0         0 my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
150              
151             return 0 if grep { $_ eq "all" } @locales;
152              
153 0     0 0 0 $type = get_charset_from_ct_line($type) if defined $type;
154              
155 0         0 if (defined $type &&
156             !Mail::SpamAssassin::Locales::is_charset_ok_for_locales
157 0         0 ($type, @locales))
158             {
159 0 0       0 # sanity check. Some charsets (e.g. koi8-r) include the ASCII
  0         0  
160             # 7-bit charset as well, so make sure we actually have a high
161 0 0       0 # number of 8-bit chars in the body text first.
162              
163 0 0 0     0 $body = join("\n", @$body);
164             if ($self->are_more_high_bits_set ($body)) {
165             return 1;
166             }
167             }
168              
169             0;
170             }
171 0         0  
172 0 0       0 my ($self, $pms, undef, $test) = @_;
173 0         0  
174             $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
175             return 0 unless exists $pms->{$test};
176             return $pms->{$test};
177 0         0 }
178              
179             # any text/html MIME part
180             my ($self, $pms) = @_;
181 0     0 0 0  
182             my $ctype = $pms->get('Content-Type');
183 0 0       0 return 1 if $ctype =~ m{^text/html}i;
184 0 0       0  
185 0         0 $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
186             return 0 unless exists $pms->{mime_body_html_count};
187             return ($pms->{mime_body_html_count} > 0);
188             }
189              
190 0     0 0 0 # HTML without some other type of MIME text part
191             my ($self, $pms) = @_;
192 0         0  
193 0 0       0 my $ctype = $pms->get('Content-Type');
194             return 1 if $ctype =~ m{^text/html}i;
195 0 0       0  
196 0 0       0 $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
197 0         0 return 0 unless exists $pms->{mime_body_html_count};
198             return 0 unless exists $pms->{mime_body_text_count};
199             return ($pms->{mime_body_html_count} > 0 &&
200             $pms->{mime_body_text_count} == 0);
201             }
202 0     0 0 0  
203             my ($self, $pms, undef, $min, $max) = @_;
204 0         0  
205 0 0       0 $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
206             return 0 unless exists $pms->{mime_multipart_ratio};
207 0 0       0 return ($pms->{mime_multipart_ratio} >= $min &&
208 0 0       0 $pms->{mime_multipart_ratio} < $max);
209 0 0       0 }
210              
211 0   0     0 my ($self, $pms, $ctype, $cte, $cd, $charset, $name) = @_;
212              
213             $charset ||= '';
214              
215 0     0 0 0 if ($ctype eq 'text/html') {
216             $pms->{mime_body_html_count}++;
217 0 0       0 }
218 0 0       0 elsif ($ctype =~ m@^text@i) {
219             $pms->{mime_body_text_count}++;
220 0   0     0 }
221              
222             if ($cte =~ /base64/) {
223             $pms->{mime_base64_count}++;
224 0     0   0 }
225             elsif ($cte =~ /quoted-printable/) {
226 0   0     0 $pms->{mime_qp_count}++;
227             }
228 0 0       0  
    0          
229 0         0 if ($cd && $cd =~ /attachment/) {
230             $pms->{mime_attachment}++;
231             }
232 0         0  
233             if ($ctype =~ /^text/ &&
234             $cte =~ /base64/ &&
235 0 0       0 (!$charset || $charset =~ /(?:us-ascii|ansi_x3\.4-1968|iso-ir-6|ansi_x3\.4-1986|iso_646\.irv:1991|ascii|iso646-us|us|ibm367|cp367|csascii)/) &&
    0          
236 0         0 !($cd && $cd =~ /^(?:attachment|inline)/))
237             {
238             $pms->{mime_base64_encoded_text} = 1;
239 0         0 }
240              
241             if ($charset =~ /iso-\S+-\S+\b/i &&
242 0 0 0     0 $charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
243 0         0 {
244             $pms->{mime_bad_iso_charset} = 1;
245             }
246 0 0 0     0  
      0        
      0        
      0        
      0        
247             # MIME_BASE64_LATIN: now a zero-hitter
248             # if (!$name &&
249             # $cte =~ /base64/ &&
250             # $charset =~ /\b(?:us-ascii|iso-8859-(?:[12349]|1[0345])|windows-(?:125[0247]))\b/)
251 0         0 # {
252             # $pms->{mime_base64_latin} = 1;
253             # }
254 0 0 0     0  
255             # MIME_QP_NO_CHARSET: now a zero-hitter
256             # if ($cte =~ /quoted-printable/ && $cd =~ /inline/ && !$charset) {
257 0         0 # $pms->{mime_qp_inline_no_charset} = 1;
258             # }
259              
260             # MIME_HTML_NO_CHARSET: now a zero-hitter
261             # if ($ctype eq 'text/html' &&
262             # !(defined($charset) && $charset) &&
263             # !($cd && $cd =~ /^(?:attachment|inline)/))
264             # {
265             # $pms->{mime_html_no_charset} = 1;
266             # }
267              
268             if ($charset =~ /[a-z]/i) {
269             if (defined $pms->{mime_html_charsets}) {
270             $pms->{mime_html_charsets} .= " ".$charset;
271             } else {
272             $pms->{mime_html_charsets} = $charset;
273             }
274              
275             if (! $pms->{mime_faraway_charset}) {
276             my @l = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
277              
278             if (!(grep { $_ eq "all" } @l) &&
279             !Mail::SpamAssassin::Locales::is_charset_ok_for_locales($charset, @l))
280             {
281 0 0       0 dbg ("mimeeval: $charset is not ok for @l");
282 0 0       0 $pms->{mime_faraway_charset} = 1;
283 0         0 }
284             }
285 0         0 }
286             }
287              
288 0 0       0 my ($self, $pms) = @_;
289 0         0  
290             # MIME status
291 0 0 0     0 my $where = -1; # -1 = start, 0 = nowhere, 1 = header, 2 = body
292             my $qp_bytes = 0; # total bytes in QP regions
293             my $qp_count = 0; # QP-encoded bytes in QP regions
294 0         0 my @part_bytes; # MIME part total bytes
295 0         0 my @part_type; # MIME part types
296              
297             my $normal_chars = 0; # MIME text bytes that aren't encoded
298             my $unicode_chars = 0; # MIME text bytes that are unicode entities
299              
300             # MIME header information
301             my $part = -1; # MIME part index
302 0     0   0  
303             # indicate the scan has taken place
304             $pms->{mime_checked_attachments} = 1;
305 0         0  
306 0         0 # results
307 0         0 # $pms->{mime_base64_blanks} = 0; # expensive to determine, no longer avail
308 0         0 $pms->{mime_base64_count} = 0;
309             $pms->{mime_base64_encoded_text} = 0;
310             # $pms->{mime_base64_illegal} = 0;
311 0         0 # $pms->{mime_base64_latin} = 0;
312 0         0 $pms->{mime_body_html_count} = 0;
313             $pms->{mime_body_text_count} = 0;
314             $pms->{mime_faraway_charset} = 0;
315 0         0 # $pms->{mime_html_no_charset} = 0;
316             $pms->{mime_missing_boundary} = 0;
317             $pms->{mime_multipart_alternative} = 0;
318 0         0 $pms->{mime_multipart_ratio} = 1.0;
319             $pms->{mime_qp_count} = 0;
320             # $pms->{mime_qp_illegal} = 0;
321             # $pms->{mime_qp_inline_no_charset} = 0;
322 0         0 $pms->{mime_qp_long_line} = 0;
323 0         0 $pms->{mime_qp_ratio} = 0;
324             $pms->{mime_ascii_text_illegal} = 0;
325             $pms->{mime_text_unicode_ratio} = 0;
326 0         0  
327 0         0 # Get all parts ...
328 0         0 foreach my $p ($pms->{msg}->find_parts(qr/./)) {
329             # message headers
330 0         0 my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
331 0         0  
332 0         0 if ($ctype eq 'multipart/alternative') {
333 0         0 $pms->{mime_multipart_alternative} = 1;
334             }
335              
336 0         0 my $cte = $p->get_header('Content-Transfer-Encoding') || '';
337 0         0 chomp($cte = defined($cte) ? lc $cte : "");
338 0         0  
339 0         0 my $cd = $p->get_header('Content-Disposition') || '';
340             chomp($cd = defined($cd) ? lc $cd : "");
341              
342 0         0 $charset = lc $charset if ($charset);
343             $name = lc $name if ($name);
344 0         0  
345             $self->_check_mime_header($pms, $ctype, $cte, $cd, $charset, $name);
346 0 0       0  
347 0         0 # If we're not in a leaf node in the tree, there will be no raw
348             # section, so skip it.
349             if (! $p->is_leaf()) {
350 0   0     0 next;
351 0 0       0 }
352              
353 0   0     0 $part++;
354 0 0       0 $part_type[$part] = $ctype;
355             $part_bytes[$part] = 0 if $cd !~ /attachment/;
356 0 0       0  
357 0 0       0 my $cte_is_base64 = $cte =~ /base64/i;
358             my $previous = '';
359 0         0 foreach (@{$p->raw()}) {
360              
361             # if ($cte_is_base64) {
362             # if ($previous =~ /^\s*$/ && /^\s*$/) { # expensive, avoid!
363 0 0       0 # $pms->{mime_base64_blanks} = 1; # never used, don't bother
364 0         0 # }
365             # # MIME_BASE64_ILLEGAL: now a zero-hitter
366             # # if (m@[^A-Za-z0-9+/=\n]@ || /=[^=\s]/) {
367 0         0 # # $pms->{mime_base64_illegal} = 1;
368 0         0 # # }
369 0 0       0 # }
370              
371 0         0 # if ($pms->{mime_html_no_charset} && $ctype eq 'text/html' && defined $charset) {
372 0         0 # $pms->{mime_html_no_charset} = 0;
373 0         0 # }
  0         0  
374             if ($pms->{mime_multipart_alternative} && $cd !~ /attachment/ &&
375             ($ctype eq 'text/plain' || $ctype eq 'text/html')) {
376             $part_bytes[$part] += length;
377             }
378              
379             if ($where != 1 && $cte eq "quoted-printable" && ! /^SPAM: /) {
380             # RFC 5322: Each line SHOULD be no more than 78 characters,
381             # excluding the CRLF.
382             # RFC 2045: The Quoted-Printable encoding REQUIRES that
383             # encoded lines be no more than 76 characters long.
384             # Bug 5491: 6% of email classified as HAM by SA triggered the
385             # MIME_QP_LONG_LINE rule. Apple Mail can generate a QP-line
386             # that is 2 chars too long. Same goes for Outlook Web Access.
387             # lines include one trailing \n character
388 0 0 0     0 # if (length > 76+1) { # conforms to RFC 5322 and RFC 2045
      0        
      0        
389             if (length > 78+1) { # conforms to RFC 5322 only, not RFC 2045
390 0         0 $pms->{mime_qp_long_line} = 1;
391             }
392             $qp_bytes += length;
393 0 0 0     0  
      0        
394             # MIME_QP_DEFICIENT: zero-hitter now
395              
396             # check for illegal substrings (RFC 2045), hexadecimal values 7F-FF and
397             # control characters other than TAB, or CR and LF as parts of CRLF pairs
398             # if (!$pms->{mime_qp_illegal} && /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/)
399             # {
400             # $pms->{mime_qp_illegal} = 1;
401             # }
402              
403 0 0       0 # count excessive QP bytes
404 0         0 if (index($_, '=') != -1) {
405             # whoever wrote this next line is an evil hacker -- jm
406 0         0 my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
407             if ($qp) {
408             $qp_count += $qp;
409             # tabs and spaces at end of encoded line are okay. Also, multiple
410             # whitespace at the end of a line are OK, like ">=20=20=20=20=20=20".
411             my ($trailing) = m/((?:=09|=20)+)\s*$/g;
412             if ($trailing) {
413             $qp_count -= (length($trailing) / 3);
414             }
415             }
416             }
417             }
418 0 0       0  
419             # if our charset is ASCII, this should only contain 7-bit characters
420 0         0 # except NUL or a free-standing CR. anything else is a violation of
421 0 0       0 # the definition of charset="us-ascii".
422 0         0 if ($ctype eq 'text/plain' && (!defined $charset || $charset eq 'us-ascii')) {
423             # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
424             if (m/[\x00\x0d\x80-\xff]+/) {
425 0         0 if (would_log('dbg', 'eval')) {
426 0 0       0 my $str = $_;
427 0         0 $str =~ s/([\x00\x0d\x80-\xff]+)/'<' . unpack('H*', $1) . '>'/eg;
428             dbg("check: ascii_text_illegal: matches " . $str . "\n");
429             }
430             $pms->{mime_ascii_text_illegal}++;
431             }
432             }
433              
434             # if we're text/plain, we should never see unicode escapes in this
435             # format, especially not for 7bit or 8bit.
436 0 0 0     0 if ($ctype eq 'text/plain' && ($cte eq '' || $cte eq '7bit' || $cte eq '8bit')) {
      0        
437             my ($text, $subs) = $_;
438 0 0       0  
439 0 0       0 $subs = $text =~ s/&#x[0-9A-F]{4};//g;
440 0         0 $normal_chars += length($text);
441 0         0 $unicode_chars += $subs;
  0         0  
442 0         0  
443             if ($subs && would_log('dbg', 'eval')) {
444 0         0 my $str = $_;
445             $str = substr($str, 0, 512) . '...' if (length($str) > 512);
446             dbg("check: abundant_unicode: " . $str . " (" . $subs . ")\n");
447             }
448             }
449              
450 0 0 0     0 $previous = $_;
      0        
451 0         0 }
452             }
453 0         0  
454 0         0 if ($qp_bytes) {
455 0         0 $pms->{mime_qp_ratio} = $qp_count / $qp_bytes;
456             $pms->{mime_qp_count} = $qp_count;
457 0 0 0     0 $pms->{mime_qp_bytes} = $qp_bytes;
458 0         0 }
459 0 0       0  
460 0         0 if ($normal_chars) {
461             $pms->{mime_text_unicode_ratio} = $unicode_chars / $normal_chars;
462             }
463              
464 0         0 if ($pms->{mime_multipart_alternative}) {
465             my $text;
466             my $html;
467             # bug 4207: we want the size of the last parts
468 0 0       0 for (my $i = $part; $i >= 0; $i--) {
469 0         0 next if !defined $part_bytes[$i];
470 0         0 if (!defined($html) && $part_type[$i] eq 'text/html') {
471 0         0 $html = $part_bytes[$i];
472             }
473             elsif (!defined($text) && $part_type[$i] eq 'text/plain') {
474 0 0       0 $text = $part_bytes[$i];
475 0         0 }
476             last if (defined($html) && defined($text));
477             }
478 0 0       0 if (defined($text) && defined($html) && $html > 0) {
479 0         0 $pms->{mime_multipart_ratio} = ($text / $html);
480             }
481             }
482 0         0  
483 0 0       0 # Look to see if any multipart boundaries are not "balanced"
484 0 0 0     0 foreach my $val (values %{$pms->{msg}->{mime_boundary_state}}) {
    0 0        
485 0         0 if ($val != 0) {
486             $pms->{mime_missing_boundary} = 1;
487             last;
488 0         0 }
489             }
490 0 0 0     0 }
491              
492 0 0 0     0 =item has_check_qp_ratio
      0        
493 0         0  
494             Adds capability check for "if can()" for check_qp_ratio
495              
496             =cut
497              
498 0         0  
  0         0  
499 0 0       0 =item check_qp_ratio
500 0         0  
501 0         0 Takes a min ratio to use in eval to see if there is an spamminess to the ratio of
502             quoted printable to total bytes in an email.
503              
504             =back
505              
506             =cut
507              
508             my ($self, $pms, undef, $min) = @_;
509              
510             $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
511             return 0 unless exists $pms->{mime_qp_ratio};
512 0     0 1 0  
513             my $qp_ratio = $pms->{mime_qp_ratio};
514              
515             dbg("eval: qp_ratio - %s - check for min of %s", $qp_ratio, $min);
516              
517             return (defined $qp_ratio && $qp_ratio >= $min) ? 1 : 0;
518             }
519              
520              
521             my($self, $pms, $type, $type2) = @_;
522             $type = $type2 if ref($type);
523             return defined $pms->{msg}->{$type};
524 0     0 1 0 }
525              
526 0 0       0 my ($self, $pms, $body, $min, $max) = @_;
527 0 0       0 local ($_);
528              
529 0         0 if (exists $pms->{uppercase}) {
530             return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
531 0         0 }
532              
533 0 0 0     0 if ($self->body_charset_is_likely_to_fp($pms)) {
534             $pms->{uppercase} = 0; return 0;
535             }
536              
537             # Dec 20 2002 jm: trade off some speed for low memory footprint, by
538 81     81 0 307 # iterating over the array computing sums, instead of joining the
539 81 50       325 # array into a giant string and working from that.
540 81         1267  
541             my $len = 0;
542             my $lower = 0;
543             my $upper = 0;
544 0     0 0   foreach (@{$body}) {
545 0           # examine lines in the body that have an intermediate space
546             next unless /\S\s+\S/;
547 0 0         # strip out lingering base64 (currently possible for forwarded messages)
548 0   0       next if /^(?:[A-Za-z0-9+\/=]{60,76} ){2}/;
549              
550             my $line = $_; # copy so we don't muck up the original
551 0 0          
552 0           # remove shift-JIS charset codes
  0            
553             $line =~ s/\x1b\$B.*\x1b\(B//gs;
554              
555             $len += length($line);
556              
557             # count numerals as lower case, otherwise 'date|mail' is spam
558             $lower += ($line =~ tr/a-z0-9//d);
559 0           $upper += ($line =~ tr/A-Z//);
560 0           }
561 0            
562 0           # report only on mails above a minimum size; otherwise one
  0            
563             # or two acronyms can throw it off
564 0 0         if ($len < 200) {
565             $pms->{uppercase} = 0;
566 0 0         return 0;
567             }
568 0           if (($upper + $lower) == 0) {
569             $pms->{uppercase} = 0;
570             } else {
571 0           $pms->{uppercase} = ($upper / ($upper + $lower)) * 100;
572             }
573 0            
574             return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
575             }
576 0            
577 0           my ($self, $pms) = @_;
578              
579             # check for charsets where this test will FP -- iso-2022-jp, gb2312,
580             # koi8-r etc.
581             #
582 0 0         $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
583 0           my @charsets;
584 0           my $type = $pms->get('Content-Type',undef);
585             $type = get_charset_from_ct_line($type) if defined $type;
586 0 0         push (@charsets, $type) if defined $type;
587 0           if (defined $pms->{mime_html_charsets}) {
588             push (@charsets, split(' ', $pms->{mime_html_charsets}));
589 0           }
590              
591             my $CHARSETS_LIKELY_TO_FP_AS_CAPS = CHARSETS_LIKELY_TO_FP_AS_CAPS;
592 0   0       foreach my $charset (@charsets) {
593             if ($charset =~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) {
594             return 1;
595             }
596 0     0 0   }
597             return 0;
598             }
599              
600             my $type = shift;
601 0 0         if (!defined $type) { return; }
602 0           if ($type =~ /charset="([^"]+)"/i) { return $1; }
603 0           if ($type =~ /charset='([^']+)'/i) { return $1; }
604 0 0         if ($type =~ /charset=(\S+)/i) { return $1; }
605 0 0         return;
606 0 0         }
607 0            
608             # came up on the users@ list, look for multipart/alternative parts which
609             # include non-text parts -- skip certain types which occur normally in ham
610 0           my($self, $pms) = @_;
611 0            
612 0 0         foreach my $map ($pms->{msg}->find_parts(qr@^multipart/alternative$@i)) {
613 0           foreach my $p ($map->find_parts(qr/./, 1, 0)) {
614             next if (lc $p->{'type'} eq 'multipart/related');
615             next if (lc $p->{'type'} eq 'application/rtf');
616 0           next if ($p->{'type'} =~ m@^text/@i);
617             return 1;
618             }
619             }
620 0     0 0  
621 0 0         return 0;
  0            
622 0 0         }
  0            
623 0 0          
  0            
624 0 0         my $self = shift;
  0            
625 0           my $pms = shift;
626             shift; # body array, unnecessary
627             my $min = shift;
628             my $max = shift;
629              
630             if (!defined $pms->{base64_length}) {
631 0     0 0   $pms->{base64_length} = $self->_check_base64_length($pms->{msg});
632             }
633 0            
634 0           return 0 if (defined $max && $pms->{base64_length} > $max);
635 0 0         return $pms->{base64_length} >= $min;
636 0 0         }
637 0 0          
638 0           my $self = shift;
639             my $msg = shift;
640              
641             my $result = 0;
642 0            
643             foreach my $p ($msg->find_parts(qr@.@, 1)) {
644             my $ctype=
645             Mail::SpamAssassin::Util::parse_content_type($p->get_header('content-type'));
646 0     0 0    
647 0           # FPs from Google Calendar invites, etc.
648 0           # perhaps just limit to test, and image?
649 0           next if ($ctype eq 'application/ics');
650 0            
651             my $cte = lc($p->get_header('content-transfer-encoding') || '');
652 0 0         next if ($cte !~ /^base64$/);
653 0           foreach my $l ( @{$p->raw()} ) {
654             $result = length $l if length $l > $result;
655             }
656 0 0 0       }
657 0          
658             return $result;
659             }
660              
661 0     0     1;