File Coverage

blib/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
Criterion Covered Total %
statement 47 274 17.1
branch 5 146 3.4
condition 3 29 10.3
subroutine 9 16 56.2
pod 4 9 44.4
total 68 474 14.3


line stmt bran cond sub pod time code
1             package Mail::SpamAssassin::Plugin::FreeMail;
2 22     22   149 use strict;
  22         51  
  22         722  
3 22     22   120 use warnings;
  22         47  
  22         1420  
4             my $VERSION = 2.002;
5              
6             ### About:
7             #
8             # If From-address is freemail, and Reply-To or address found in mail body is
9             # a different freemail address, return success. Good sign of Nigerian scams
10             # etc. Test idea from Marc Perkel.
11             #
12             # Also separate functions to check various portions of message for freemails.
13             #
14              
15             ### Install:
16             #
17             # Please add loadplugin to init.pre (so it's loaded before cf files!):
18             #
19             # loadplugin Mail::SpamAssassin::Plugin::FreeMail FreeMail.pm
20             #
21              
22             ### Supported .cf clauses:
23             #
24             # freemail_domains domain ...
25             #
26             # List of domains to be used in checks.
27             #
28             # Regexp is not supported, but following wildcards work:
29             #
30             # ? for single character (does not match a dot)
31             # * for multiple characters (does not match a dot)
32             #
33             # For example:
34             # freemail_domains hotmail.com hotmail.co.?? yahoo.* yahoo.*.*
35             #
36             # freemail_whitelist email/domain ...
37             #
38             # Emails or domains listed here are ignored (pretend they arent
39             # freemail). No wildcards!
40             #
41             # header FREEMAIL_REPLYTO eval:check_freemail_replyto(['option'])
42             #
43             # Checks/compares freemail addresses found from headers and body.
44             #
45             # Possible options:
46             #
47             # replyto From: or body address is different than Reply-To
48             # (this is the default)
49             # reply as above, but if no Reply-To header is found,
50             # compares From: and body
51             #
52             # header FREEMAIL_FROM eval:check_freemail_from(['regex'])
53             #
54             # Checks all possible "from" headers to see if sender is freemail.
55             # Uses SA all_from_addrs() function (includes 'Resent-From', 'From',
56             # 'EnvelopeFrom' etc).
57             #
58             # Add optional regex to match the found email address(es). For example,
59             # to see if user ends in digit: check_freemail_from('\d@')
60             #
61             # If you use multiple check_freemail_from rules with regexes, remember
62             # that they might hit different emails from different heades. To match
63             # a certain header only, use check_freemail_header.
64             #
65             # header FREEMAIL_HDRX eval:check_freemail_header('header' [, 'regex'])
66             #
67             # Searches defined header for freemail address. Optional regex to match
68             # the found address (like in check_freemail_from).
69             #
70             # header FREEMAIL_BODY eval:check_freemail_body(['regex'])
71             #
72             # Searches body for freemail address. With optional regex to match.
73             #
74              
75             ### Changelog:
76             #
77             # 1.995 - public beta version, revamped whole code, moved default
78             # domains to separate file: http://sa.hege.li/freemail_domains.cf
79             # 1.996 - fix freemail_skip_bulk_envfrom
80             # 1.997 - set freemail_skip_when_over_max to 1 by default
81             # 1.998 - don't warn about missing freemail_domains when linting
82             # 1.999 - default whitelist undisclosed-recipient@yahoo.com etc
83             # 2.000 - some cleaning up
84             # 2.001 - fix freemail_whitelist
85             # 2.002 - _add_desc -> _got_hit, fix description email append bug
86             #
87              
88             ### Blah:
89             #
90             # Author: Henrik Krohns <sa@hege.li>
91             # Copyright 2009 Henrik Krohns
92             #
93             # <@LICENSE>
94             # Licensed to the Apache Software Foundation (ASF) under one or more
95             # contributor license agreements. See the NOTICE file distributed with
96             # this work for additional information regarding copyright ownership.
97             # The ASF licenses this file to you under the Apache License, Version 2.0
98             # (the "License"); you may not use this file except in compliance with
99             # the License. You may obtain a copy of the License at:
100             #
101             # http://www.apache.org/licenses/LICENSE-2.0
102             #
103             # Unless required by applicable law or agreed to in writing, software
104             # distributed under the License is distributed on an "AS IS" BASIS,
105             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
106             # See the License for the specific language governing permissions and
107             # limitations under the License.
108             # </@LICENSE>
109             #
110              
111 22     22   146 use Mail::SpamAssassin::Plugin;
  22         42  
  22         616  
112 22     22   138 use Mail::SpamAssassin::PerMsgStatus;
  22         41  
  22         70047  
113              
114             our @ISA = qw(Mail::SpamAssassin::Plugin);
115              
116             # default email whitelist
117             our $email_whitelist = qr/
118             ^(?:
119             abuse|support|sales|info|helpdesk|contact|kontakt
120             | (?:post|host|domain)master
121             | undisclosed.* # yahoo.com etc(?)
122             | request-[a-f0-9]{16} # live.com
123             | bounced?- # yahoo.com etc
124             | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
125             | .+=.+=.+ # gmail forward
126             )\@
127             /xi;
128              
129             # skip replyto check when envelope sender is
130             # allow <> for now
131             # no re "strict"; # since perl 5.21.8: Ranges of ASCII printables...
132             our $skip_replyto_envfrom = qr/
133             (?:
134             ^(?:post|host|domain)master
135             | ^double-bounce
136             | ^(?:sentto|owner|return|(?:gr)?bounced?)-.+
137             | -(?:request|bounces?|admin|owner)
138             | \b(?:do[._-t]?)?no[._-t]?repl(?:y|ies)
139             | .+=.+
140             )\@
141             /xi;
142              
143 63     63 1 452 sub dbg { Mail::SpamAssassin::Plugin::dbg ("FreeMail: @_"); }
144              
145             sub new {
146 63     63 1 297 my ($class, $mailsa) = @_;
147              
148 63   33     475 $class = ref($class) || $class;
149 63         387 my $self = $class->SUPER::new($mailsa);
150 63         170 bless ($self, $class);
151              
152 63         294 $self->{freemail_available} = 1;
153 63         343 $self->set_config($mailsa->{conf});
154 63         442 $self->register_eval_rule("check_freemail_replyto");
155 63         236 $self->register_eval_rule("check_freemail_from");
156 63         217 $self->register_eval_rule("check_freemail_header");
157 63         243 $self->register_eval_rule("check_freemail_body");
158              
159             # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
160             # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
161             # full email regex v0.02
162 63         120678 $self->{email_regex} = qr/
163             (?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
164             (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
165             ( # capture email
166             [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
167             (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
168             \@
169             (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
170             $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
171             )
172             (?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
173             /xi;
174              
175 63         1076 return $self;
176             }
177              
178             sub set_config {
179 63     63 0 202 my ($self, $conf) = @_;
180 63         161 my @cmds;
181 63         359 push(@cmds, {
182             setting => 'freemail_max_body_emails',
183             default => 5,
184             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
185             }
186             );
187 63         377 push(@cmds, {
188             setting => 'freemail_max_body_freemails',
189             default => 3,
190             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
191             }
192             );
193 63         326 push(@cmds, {
194             setting => 'freemail_skip_when_over_max',
195             default => 1,
196             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
197             }
198             );
199 63         316 push(@cmds, {
200             setting => 'freemail_skip_bulk_envfrom',
201             default => 1,
202             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
203             }
204             );
205 63         257 push(@cmds, {
206             setting => 'freemail_add_describe_email',
207             default => 1,
208             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
209             }
210             );
211 63         337 $conf->{parser}->register_commands(\@cmds);
212             }
213              
214             sub parse_config {
215 5     5 1 9 my ($self, $opts) = @_;
216              
217 5 50       12 if ($opts->{key} eq "freemail_domains") {
218 0         0 foreach my $temp (split(/\s+/, $opts->{value})) {
219 0 0       0 if ($temp =~ /^[a-z0-9.*?-]+$/i) {
220 0         0 my $value = lc($temp);
221 0 0       0 if ($value =~ /[*?]/) { # separate wildcard list
222 0         0 $self->{freemail_temp_wc}{$value} = 1;
223             }
224             else {
225 0         0 $self->{freemail_domains}{$value} = 1;
226             }
227             }
228             else {
229 0         0 warn("invalid freemail_domains: $temp");
230             }
231             }
232 0         0 $self->inhibit_further_callbacks();
233 0         0 return 1;
234             }
235              
236 5 50       13 if ($opts->{key} eq "freemail_whitelist") {
237 0         0 foreach my $temp (split(/\s+/, $opts->{value})) {
238 0         0 my $value = lc($temp);
239 0 0       0 if ($value =~ /\w[.@]\w/) {
240 0         0 $self->{freemail_whitelist}{$value} = 1;
241             }
242             else {
243 0         0 warn("invalid freemail_whitelist: $temp");
244             }
245             }
246 0         0 $self->inhibit_further_callbacks();
247 0         0 return 1;
248             }
249              
250 5         10 return 0;
251             }
252              
253             sub finish_parsing_end {
254 63     63 1 203 my ($self, $opts) = @_;
255              
256 63         147 my $wcount = 0;
257 63 50       266 if (defined $self->{freemail_temp_wc}) {
258 0         0 my @domains;
259 0         0 foreach my $value (keys %{$self->{freemail_temp_wc}}) {
  0         0  
260 0         0 $value =~ s/\./\\./g;
261 0         0 $value =~ s/\?/./g;
262 0         0 $value =~ s/\*/[^.]*/g;
263 0         0 push(@domains, $value);
264             }
265 0         0 my $doms = join('|', @domains);
266 0         0 $self->{freemail_domains_re} = qr/\@(?:${doms})$/;
267 0         0 $wcount = scalar @domains;
268 0         0 undef %{$self->{freemail_temp_wc}};
  0         0  
269             }
270              
271 63         139 my $count = scalar keys %{$self->{freemail_domains}};
  63         319  
272 63 50       271 if ($count + $wcount) {
273 0         0 dbg("loaded freemail_domains entries: $count normal, $wcount wildcard");
274             }
275             else {
276 63 50 100     486 if ($self->{main}->{lint_rules} ||1) {
277 63         328 dbg("no freemail_domains entries defined, disabling plugin");
278             }
279             else {
280 0         0 warn("no freemail_domains entries defined, disabling plugin");
281             }
282 63         215 $self->{freemail_available} = 0;
283             }
284              
285 63         190 return 0;
286             }
287              
288             sub _is_freemail {
289 0     0     my ($self, $email) = @_;
290              
291 0 0         return 0 if $email eq '';
292              
293 0 0         if (defined $self->{freemail_whitelist}{$email}) {
294 0           dbg("whitelisted email: $email");
295 0           return 0;
296             }
297              
298 0           my $domain = $email;
299 0           $domain =~ s/.*\@//;
300              
301 0 0         if (defined $self->{freemail_whitelist}{$domain}) {
302 0           dbg("whitelisted domain: $domain");
303 0           return 0;
304             }
305 0 0         if ($email =~ $email_whitelist) {
306 0           dbg("whitelisted email, default: $email");
307 0           return 0;
308             }
309 0 0 0       if (defined $self->{freemail_domains}{$domain}
      0        
310             or ( defined $self->{freemail_domains_re}
311             and $email =~ $self->{freemail_domains_re} )) {
312 0           return 1;
313             }
314              
315 0           return 0;
316             }
317              
318             sub _parse_body {
319 0     0     my ($self, $pms) = @_;
320              
321             # Parse body
322 0 0         if (not defined $pms->{freemail_cache}{body}) {
323 0           %{$pms->{freemail_cache}{body}} = ();
  0            
324 0           my %seen;
325             my @body_emails;
326             # get all <a href="mailto:", since they don't show up on stripped_body
327 0           my $parsed = $pms->get_uri_detail_list();
328 0           while (my($uri, $info) = each %{$parsed}) {
  0            
329 0 0 0       if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
330 0 0         if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
331 0           my $email = lc($1);
332 0 0         push(@body_emails, $email) unless defined $seen{$email};
333 0           $seen{$email} = 1;
334 0 0         last if scalar @body_emails >= 20; # sanity
335             }
336             }
337             }
338             # scan stripped normalized body
339             # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
340 0           my $body = $pms->get_decoded_stripped_body_text_array();
341 0           BODY: foreach (@$body) {
342             # strip urls with possible emails inside
343 0           s{<?https?://\S{0,255}(?:\@|%40)\S{0,255}}{ }gi;
344             # strip emails contained in <>, not mailto:
345             # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
346 0           s{<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)}{ }gi;
347 0           while (/$self->{email_regex}/g) {
348 0           my $email = lc($1);
349 0 0         utf8::encode($email) if utf8::is_utf8($email); # chars to UTF-8
350 0 0         push(@body_emails, $email) unless $seen{$email};
351 0           $seen{$email} = 1;
352 0 0         last BODY if @body_emails >= 40; # sanity
353             }
354             }
355 0           my $count_all = 0;
356 0           my $count_fm = 0;
357 0           foreach my $email (@body_emails) { # as UTF-8 octets
358 0 0         if (++$count_all == $pms->{main}->{conf}->{freemail_max_body_emails}) {
359 0 0         if ($pms->{main}->{conf}->{freemail_skip_when_over_max}) {
360 0           $pms->{freemail_skip_body} = 1;
361 0           dbg("too many unique emails found from body");
362 0           return 0;
363             }
364             }
365 0 0         next unless $self->_is_freemail($email);
366 0 0         if (++$count_fm == $pms->{main}->{conf}->{freemail_max_body_freemails}) {
367 0 0         if ($pms->{main}->{conf}->{freemail_skip_when_over_max}) {
368 0           $pms->{freemail_skip_body} = 1;
369 0           dbg("too many unique freemails found from body");
370 0           return 0;
371             }
372             }
373 0           $pms->{freemail_cache}{body}{$email} = 1;
374             }
375 0           dbg("all body freemails: ".join(', ', keys %{$pms->{freemail_cache}{body}}))
376 0 0         if scalar keys %{$pms->{freemail_cache}{body}};
  0            
377             }
378              
379 0 0         if (defined $pms->{freemail_skip_body}) {
380 0           dbg("[cached] body email limit exceeded, skipping");
381 0           return 0;
382             }
383              
384 0           return 1;
385             }
386              
387             sub _got_hit {
388 0     0     my ($self, $pms, $email, $desc) = @_;
389              
390 0           my $rulename = $pms->get_current_eval_rule_name();
391              
392 0 0         if (defined $pms->{conf}->{descriptions}->{$rulename}) {
393 0           $desc = $pms->{conf}->{descriptions}->{$rulename};
394             }
395              
396 0 0         if ($pms->{main}->{conf}->{freemail_add_describe_email}) {
397 0           $email =~ s/\@/[at]/g;
398 0           $pms->got_hit($rulename, "", description => $desc." ($email)", ruletype => 'eval');
399             }
400             else {
401 0           $pms->got_hit($rulename, "", description => $desc, ruletype => 'eval');
402             }
403             }
404              
405             sub check_freemail_header {
406 0     0 0   my ($self, $pms, $header, $regex) = @_;
407              
408 0 0         return 0 unless $self->{freemail_available};
409              
410 0           my $rulename = $pms->get_current_eval_rule_name();
411 0 0         dbg("RULE ($rulename) check_freemail_header".(defined $regex ? " regex:$regex" : ""));
412              
413 0 0         unless (defined $header) {
414 0           warn("check_freemail_header needs argument");
415 0           return 0;
416             }
417              
418 0           my $re;
419 0 0         if (defined $regex) {
420 0           $re = eval { qr/$regex/; };
  0            
421 0 0         if ($@) {
422 0           warn("invalid regex: $@");
423 0           return 0;
424             }
425             }
426              
427 0           my @emails = map (lc, $pms->{main}->find_all_addrs_in_line ($pms->get($header)));
428              
429 0 0         if (!scalar (@emails)) {
430 0           dbg("header $header not found from mail");
431 0           return 0;
432             }
433 0           dbg("addresses from header $header: ".join(';',@emails));
434              
435 0           foreach my $email (@emails) {
436 0 0         if ($self->_is_freemail($email)) {
437 0 0         if (defined $re) {
438 0 0         next unless $email =~ $re;
439 0           dbg("HIT! $email is freemail and matches regex");
440             }
441             else {
442 0           dbg("HIT! $email is freemail");
443             }
444 0           $self->_got_hit($pms, $email, "Header $header is freemail");
445 0           return 1;
446             }
447             }
448              
449 0           return 0;
450             }
451              
452             sub check_freemail_body {
453 0     0 0   my ($self, $pms, $regex) = @_;
454              
455 0 0         return 0 unless $self->{freemail_available};
456              
457 0           my $rulename = $pms->get_current_eval_rule_name();
458 0 0         dbg("RULE ($rulename) check_freemail_body".(defined $regex ? " regex:$regex" : ""));
459              
460 0 0         return 0 unless $self->_parse_body($pms);
461              
462 0           my $re;
463 0 0         if (defined $regex) {
464 0           $re = eval { qr/$regex/; };
  0            
465 0 0         if ($@) {
466 0           warn("invalid regex: $@");
467 0           return 0;
468             }
469             }
470              
471 0 0         if (defined $re) {
    0          
472 0           foreach my $email (keys %{$pms->{freemail_cache}{body}}) {
  0            
473 0 0         if ($email =~ $re) {
474 0           dbg("HIT! email from body is freemail and matches regex: $email");
475 0           $self->_got_hit($pms, $email, "Email from body is freemail");
476 0           return 0;
477             }
478             }
479             }
480 0           elsif (scalar keys %{$pms->{freemail_cache}{body}}) {
481 0           my $emails = join(', ', keys %{$pms->{freemail_cache}{body}});
  0            
482 0           dbg("HIT! body has freemails: $emails");
483 0           $self->_got_hit($pms, $emails, "Body contains freemails");
484 0           return 0;
485             }
486              
487 0           return 0;
488             }
489              
490             sub check_freemail_from {
491 0     0 0   my ($self, $pms, $regex) = @_;
492              
493 0 0         return 0 unless $self->{freemail_available};
494              
495 0           my $rulename = $pms->get_current_eval_rule_name();
496 0 0         dbg("RULE ($rulename) check_freemail_from".(defined $regex ? " regex:$regex" : ""));
497              
498 0           my $re;
499 0 0         if (defined $regex) {
500 0           $re = eval { qr/$regex/; };
  0            
501 0 0 0       if ($@ or not defined $re) {
502 0           warn("invalid regex: $@");
503 0           return 0;
504             }
505             }
506              
507 0           my %from_addrs = map { lc($_) => 1 } ($pms->all_from_addrs());
  0            
508 0           delete $from_addrs{''}; # no empty ones thx
509              
510 0 0         unless (scalar keys %from_addrs) {
511 0           dbg("no from-addresses found to check");
512 0           return 0;
513             }
514              
515 0           dbg("all from-addresses: ".join(', ', keys %from_addrs));
516              
517 0           foreach my $email (keys %from_addrs) {
518 0 0         next unless $self->_is_freemail($email);
519 0 0         if (defined $re) {
520 0 0         next unless $email =~ $re;
521 0           dbg("HIT! $email is freemail and matches regex");
522             }
523             else {
524 0           dbg("HIT! $email is freemail");
525             }
526 0           $self->_got_hit($pms, $email, "Sender address is freemail");
527 0           return 0;
528             }
529              
530 0           return 0;
531             }
532              
533             sub check_freemail_replyto {
534 0     0 0   my ($self, $pms, $what) = @_;
535              
536 0 0         return 0 unless $self->{freemail_available};
537              
538 0           my $rulename = $pms->get_current_eval_rule_name();
539 0           dbg("RULE ($rulename) check_freemail_replyto");
540              
541 0 0         if (defined $what) {
542 0 0 0       if ($what ne 'replyto' and $what ne 'reply') {
543 0           warn("invalid check_freemail_replyto option: $what");
544 0           return 0;
545             }
546             }
547             else {
548 0           $what = 'replyto';
549             }
550              
551             # Skip mailing-list etc looking requests, mostly FPs from them
552 0 0         if ($pms->{main}->{conf}->{freemail_skip_bulk_envfrom}) {
553 0           my $envfrom = lc($pms->get("EnvelopeFrom"));
554 0 0         if ($envfrom =~ $skip_replyto_envfrom) {
555 0           dbg("envelope sender looks bulk, skipping check: $envfrom");
556 0           return 0;
557             }
558             }
559              
560 0           my $from = lc($pms->get("From:addr"));
561 0           my $replyto = lc($pms->get("Reply-To:addr"));
562 0           my $from_is_fm = $self->_is_freemail($from);
563 0           my $replyto_is_fm = $self->_is_freemail($replyto);
564              
565 0 0         dbg("From address: $from") if $from ne '';
566 0 0         dbg("Reply-To address: $replyto") if $replyto ne '';
567              
568 0 0 0       if ($from_is_fm and $replyto_is_fm and ($from ne $replyto)) {
      0        
569 0           dbg("HIT! From and Reply-To are different freemails");
570 0           $self->_got_hit($pms, "$from, $replyto", "From and Reply-To are different freemails");
571 0           return 0;
572             }
573              
574 0 0         if ($what eq 'replyto') {
    0          
575 0 0         if (!$replyto_is_fm) {
576 0           dbg("Reply-To is not freemail, skipping check");
577 0           return 0;
578             }
579             }
580             elsif ($what eq 'reply') {
581 0 0 0       if ($replyto ne '' and !$replyto_is_fm) {
    0          
582 0           dbg("Reply-To defined and is not freemail, skipping check");
583 0           return 0;
584             }
585             elsif (!$from_is_fm) {
586 0           dbg("No Reply-To and From is not freemail, skipping check");
587 0           return 0;
588             }
589             }
590 0 0         my $reply = $replyto_is_fm ? $replyto : $from;
591              
592 0 0         return 0 unless $self->_parse_body($pms);
593            
594             # Compare body to headers
595 0 0         if (scalar keys %{$pms->{freemail_cache}{body}}) {
  0            
596 0 0         my $check = $what eq 'replyto' ? $replyto : $reply;
597 0           dbg("comparing $check to body freemails");
598 0           foreach my $email (keys %{$pms->{freemail_cache}{body}}) {
  0            
599 0 0         if ($email ne $check) {
600 0           dbg("HIT! $check and $email are different freemails");
601 0           $self->_got_hit($pms, "$check, $email", "Different freemails in reply header and body");
602 0           return 0;
603             }
604             }
605             }
606              
607 0           return 0;
608             }
609              
610             1;