File Coverage

blib/lib/Mail/SpamAssassin/Plugin/URIDetail.pm
Criterion Covered Total %
statement 31 134 23.1
branch 0 52 0.0
condition 1 48 2.0
subroutine 8 11 72.7
pod 1 4 25.0
total 41 249 16.4


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17             #
18             # TODO: where are the tests?
19              
20             =head1 NAME
21              
22             URIDetail - test URIs using detailed URI information
23              
24             =head1 SYNOPSIS
25              
26             This plugin creates a new rule test type, known as "uri_detail". These
27             rules apply to all URIs found in the message.
28              
29             loadplugin Mail::SpamAssassin::Plugin::URIDetail
30              
31             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
32              
33             The format for defining a rule is as follows:
34              
35             uri_detail SYMBOLIC_TEST_NAME key1 =~ /value1/ key2 !~ /value2/ ...
36              
37             Supported keys are:
38              
39             C<raw> is the raw URI prior to any cleaning
40             (e.g. "http://spamassassin.apache%2Eorg/").
41              
42             C<type> is the tag(s) which referenced the raw_uri. I<parsed> is a
43             faked type which specifies that the raw_uri was parsed from the
44             rendered text.
45              
46             C<cleaned> is a list including the raw URI and various cleaned
47             versions of the raw URI (http://spamassassin.apache%2Eorg/,
48             http://spamassassin.apache.org/).
49              
50             C<text> is the anchor text(s) (text between <a> and </a>) that
51             linked to the raw URI.
52              
53             C<domain> is the domain(s) found in the cleaned URIs.
54              
55             Example rule for matching a URI where the raw URI matches "%2Ebar",
56             the domain "bar.com" is found, and the type is "a" (an anchor tag).
57              
58             uri_detail TEST1 raw =~ /%2Ebar/ domain =~ /^bar\.com$/ type =~ /^a$/
59              
60             Example rule to look for suspicious "https" links:
61              
62             uri_detail FAKE_HTTPS text =~ /\bhttps:/ cleaned !~ /\bhttps:/
63              
64             Regular expressions should be delimited by slashes.
65              
66             =cut
67              
68             package Mail::SpamAssassin::Plugin::URIDetail;
69 22     22   147 use Mail::SpamAssassin::Plugin;
  22         49  
  22         697  
70 22     22   118 use Mail::SpamAssassin::Logger;
  22         48  
  22         1329  
71 22     22   198 use Mail::SpamAssassin::Util qw(untaint_var);
  22         50  
  22         995  
72              
73 22     22   149 use strict;
  22         61  
  22         580  
74 22     22   114 use warnings;
  22         44  
  22         706  
75             # use bytes;
76 22     22   119 use re 'taint';
  22         45  
  22         28004  
77              
78             our @ISA = qw(Mail::SpamAssassin::Plugin);
79              
80             # constructor
81             sub new {
82 63     63 1 191 my $class = shift;
83 63         146 my $mailsaobject = shift;
84              
85             # some boilerplate...
86 63   33     473 $class = ref($class) || $class;
87 63         364 my $self = $class->SUPER::new($mailsaobject);
88 63         163 bless ($self, $class);
89              
90 63         289 $self->register_eval_rule("check_uri_detail");
91              
92 63         298 $self->set_config($mailsaobject->{conf});
93              
94 63         549 return $self;
95             }
96              
97             sub set_config {
98 63     63 0 164 my ($self, $conf) = @_;
99 63         153 my @cmds;
100              
101 63         121 my $pluginobj = $self; # allow use inside the closure below
102              
103             push (@cmds, {
104             setting => 'uri_detail',
105             is_priv => 1,
106             code => sub {
107 0     0   0 my ($self, $key, $value, $line) = @_;
108              
109 0 0       0 if ($value !~ /^(\S+)\s+(.+)$/) {
110 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
111             }
112 0         0 my $name = $1;
113 0         0 my $def = $2;
114 0         0 my $added_criteria = 0;
115              
116             # if this matches a regex, it strips slashes
117 0         0 while ($def =~ m{\b(\w+)\b\s*([\=\!]\~)\s*((?:/.*?/|m(\W).*?\4)[imsx]*)(?=\s|$)}g) {
118 0         0 my $target = $1;
119 0         0 my $op = $2;
120 0         0 my $pattern = $3;
121              
122 0 0       0 if ($target !~ /^(?:raw|type|cleaned|text|domain)$/) {
123 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
124             }
125 0 0       0 if ($conf->{parser}->is_delimited_regexp_valid($name, $pattern)) {
126 0         0 $pattern = $pluginobj->make_qr($pattern);
127             }
128             else {
129 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
130             }
131              
132 0         0 dbg("config: uri_detail adding ($target $op /$pattern/) to $name");
133 0         0 $conf->{parser}->{conf}->{uri_detail}->{$name}->{$target} =
134             [$op, $pattern];
135 0         0 $added_criteria = 1;
136             }
137              
138 0 0       0 if ($added_criteria) {
139 0         0 dbg("config: uri_detail added $name\n");
140 0         0 $conf->{parser}->add_test($name, 'check_uri_detail()', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
141             }
142             else {
143 0         0 warn "config: failed to add invalid rule $name";
144 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
145             }
146             }
147 63         772 });
148            
149 63         354 $conf->{parser}->register_commands(\@cmds);
150             }
151              
152             sub check_uri_detail {
153 0     0 0   my ($self, $permsg) = @_;
154              
155 0           my %uri_detail = %{ $permsg->get_uri_detail_list() };
  0            
156              
157 0           while (my ($raw, $info) = each %uri_detail) {
158 0           my $test = $permsg->{current_rule_name};
159              
160 0           dbg("uri: running $test\n");
161              
162 0           my $rule = $permsg->{conf}->{uri_detail}->{$test};
163              
164 0 0         if (exists $rule->{raw}) {
165 0           my($op,$patt) = @{$rule->{raw}};
  0            
166 0 0 0       if ( ($op eq '=~' && $raw =~ /$patt/) ||
      0        
      0        
167             ($op eq '!~' && $raw !~ /$patt/) ) {
168 0           dbg("uri: raw matched: '%s' %s /%s/", $raw,$op,$patt);
169             } else {
170 0           next;
171             }
172             }
173              
174 0 0         if (exists $rule->{type}) {
175 0 0         next unless $info->{types};
176 0           my($op,$patt) = @{$rule->{type}};
  0            
177 0           my $match;
178 0           for my $text (keys %{ $info->{types} }) {
  0            
179 0 0 0       if ( ($op eq '=~' && $text =~ /$patt/) ||
      0        
      0        
180 0           ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
  0            
181             }
182 0 0         next unless defined $match;
183 0           dbg("uri: type matched: '%s' %s /%s/", $match,$op,$patt);
184             }
185              
186 0 0         if (exists $rule->{cleaned}) {
187 0 0         next unless $info->{cleaned};
188 0           my($op,$patt) = @{$rule->{cleaned}};
  0            
189 0           my $match;
190 0           for my $text (@{ $info->{cleaned} }) {
  0            
191 0 0 0       if ( ($op eq '=~' && $text =~ /$patt/) ||
      0        
      0        
192 0           ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
  0            
193             }
194 0 0         next unless defined $match;
195 0           dbg("uri: cleaned matched: '%s' %s /%s/", $match,$op,$patt);
196             }
197              
198 0 0         if (exists $rule->{text}) {
199 0 0         next unless $info->{anchor_text};
200 0           my($op,$patt) = @{$rule->{text}};
  0            
201 0           my $match;
202 0           for my $text (@{ $info->{anchor_text} }) {
  0            
203 0 0 0       if ( ($op eq '=~' && $text =~ /$patt/) ||
      0        
      0        
204 0           ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
  0            
205             }
206 0 0         next unless defined $match;
207 0           dbg("uri: text matched: '%s' %s /%s/", $match,$op,$patt);
208             }
209              
210 0 0         if (exists $rule->{domain}) {
211 0 0         next unless $info->{domains};
212 0           my($op,$patt) = @{$rule->{domain}};
  0            
213 0           my $match;
214 0           for my $text (keys %{ $info->{domains} }) {
  0            
215 0 0 0       if ( ($op eq '=~' && $text =~ /$patt/) ||
      0        
      0        
216 0           ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
  0            
217             }
218 0 0         next unless defined $match;
219 0           dbg("uri: domain matched: '%s' %s /%s/", $match,$op,$patt);
220             }
221              
222 0 0         if (would_log('dbg', 'rules') > 1) {
223 0           dbg("uri: criteria for $test met");
224             }
225            
226 0           $permsg->got_hit($test);
227              
228             # reset hash
229 0           keys %uri_detail;
230              
231 0           return 0;
232             }
233              
234 0           return 0;
235             }
236              
237             # ---------------------------------------------------------------------------
238              
239             # turn "/foobar/i" into qr/(?i)foobar/
240             sub make_qr {
241 0     0 0   my ($self, $pattern) = @_;
242              
243 0           my $re_delim;
244 0 0         if ($pattern =~ s/^m(\W)//) { # m!foo/bar!
245 0           $re_delim = $1;
246             } else { # /foo\/bar/ or !foo/bar!
247 0           $pattern =~ s/^(\W)//; $re_delim = $1;
  0            
248             }
249 0 0         if (!$re_delim) {
250 0           return;
251             }
252              
253 0           $pattern =~ s/${re_delim}([imsx]*)$//;
254              
255 0           my $mods = $1;
256 0 0         if ($mods) { $pattern = "(?".$mods.")".$pattern; }
  0            
257              
258 0           return qr/$pattern/;
259             }
260              
261             # ---------------------------------------------------------------------------
262              
263             1;