File Coverage

blib/lib/Mail/SpamAssassin/Plugin/URIDetail.pm
Criterion Covered Total %
statement 31 135 22.9
branch 0 54 0.0
condition 1 57 1.7
subroutine 8 11 72.7
pod 1 4 25.0
total 41 261 15.7


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17             #
18             # TODO: where are the tests?
19              
20             =head1 NAME
21              
22             URIDetail - test URIs using detailed URI information
23              
24             =head1 SYNOPSIS
25              
26             This plugin creates a new rule test type, known as "uri_detail". These
27             rules apply to all URIs found in the message.
28              
29             loadplugin Mail::SpamAssassin::Plugin::URIDetail
30              
31             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
32              
33             The format for defining a rule is as follows:
34              
35             uri_detail SYMBOLIC_TEST_NAME key1 =~ /value1/ key2 !~ /value2/ ...
36              
37             Supported keys are:
38              
39             C<raw> is the raw URI prior to any cleaning
40             (e.g. "http://spamassassin.apache%2Eorg/").
41              
42             C<type> is the tag(s) which referenced the raw_uri. I<parsed> is a
43             faked type which specifies that the raw_uri was parsed from the
44             rendered text.
45              
46             C<cleaned> is a list including the raw URI and various cleaned
47             versions of the raw URI (http://spamassassin.apache%2Eorg/,
48             https://spamassassin.apache.org/).
49              
50             C<text> is the anchor text(s) (text between <a> and </a>) that
51             linked to the raw URI.
52              
53             C<domain> is the domain(s) found in the cleaned URIs, as trimmed to
54             registrar boundary by Mail::SpamAssassin::Util::RegistrarBoundaries(3).
55              
56             C<host> is the full host(s) in the cleaned URIs. (Supported since SA 3.4.5)
57              
58             Example rule for matching a URI where the raw URI matches "%2Ebar",
59             the domain "bar.com" is found, and the type is "a" (an anchor tag).
60              
61             uri_detail TEST1 raw =~ /%2Ebar/ domain =~ /^bar\.com$/ type =~ /^a$/
62              
63             Example rule to look for suspicious "https" links:
64              
65             uri_detail FAKE_HTTPS text =~ /\bhttps:/ cleaned !~ /\bhttps:/
66              
67             Regular expressions should be delimited by slashes.
68              
69             =cut
70              
71             use Mail::SpamAssassin::Plugin;
72 22     22   146 use Mail::SpamAssassin::Logger;
  22         42  
  22         654  
73 22     22   118 use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
  22         41  
  22         1256  
74 22     22   147  
  22         37  
  22         1052  
75             use strict;
76 22     22   127 use warnings;
  22         41  
  22         1301  
77 22     22   129 # use bytes;
  22         34  
  22         602  
78             use re 'taint';
79 22     22   139  
  22         50  
  22         24160  
80             our @ISA = qw(Mail::SpamAssassin::Plugin);
81              
82             # constructor
83             my $class = shift;
84             my $mailsaobject = shift;
85 63     63 1 209  
86 63         165 # some boilerplate...
87             $class = ref($class) || $class;
88             my $self = $class->SUPER::new($mailsaobject);
89 63   33     393 bless ($self, $class);
90 63         335  
91 63         161 $self->register_eval_rule("check_uri_detail");
92              
93 63         272 $self->set_config($mailsaobject->{conf});
94              
95 63         310 return $self;
96             }
97 63         604  
98             my ($self, $conf) = @_;
99             my @cmds;
100              
101 63     63 0 154 my $pluginobj = $self; # allow use inside the closure below
102 63         133  
103             push (@cmds, {
104 63         112 setting => 'uri_detail',
105             is_priv => 1,
106             code => sub {
107             my ($self, $key, $value, $line) = @_;
108              
109             if ($value !~ /^(\S+)\s+(.+)$/) {
110 0     0   0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
111             }
112 0 0       0 my $name = $1;
113 0         0 my $def = $2;
114             my $added_criteria = 0;
115 0         0  
116 0         0 # if this matches a regex, it strips slashes
117 0         0 while ($def =~ m{\b(\w+)\b\s*([\=\!]\~)\s*((?:/.*?/|m(\W).*?\4)[imsx]*)(?=\s|$)}g) {
118             my $target = $1;
119             my $op = $2;
120 0         0 my $pattern = $3;
121 0         0  
122 0         0 if ($target !~ /^(?:raw|type|cleaned|text|domain|host)$/) {
123 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
124             }
125 0 0       0  
126 0         0 my ($rec, $err) = compile_regexp($pattern, 1);
127             if (!$rec) {
128             dbg("config: uri_detail invalid regexp '$pattern': $err");
129 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
130 0 0       0 }
131 0         0  
132 0         0 dbg("config: uri_detail adding ($target $op /$rec/) to $name");
133             $conf->{parser}->{conf}->{uri_detail}->{$name}->{$target} =
134             [$op, $rec];
135 0         0 $added_criteria = 1;
136 0         0 }
137              
138 0         0 if ($added_criteria) {
139             dbg("config: uri_detail added $name\n");
140             $conf->{parser}->add_test($name, 'check_uri_detail()',
141 0 0       0 $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
142 0         0 }
143 0         0 else {
144             warn "config: failed to add invalid rule $name";
145             return $Mail::SpamAssassin::Conf::INVALID_VALUE;
146             }
147 0         0 }
148 0         0 });
149            
150             $conf->{parser}->register_commands(\@cmds);
151 63         678 }
152              
153 63         323 my ($self, $permsg) = @_;
154              
155             my %uri_detail = %{ $permsg->get_uri_detail_list() };
156              
157 0     0 0   while (my ($raw, $info) = each %uri_detail) {
158             my $test = $permsg->{current_rule_name};
159 0            
  0            
160             dbg("uri: running $test\n");
161 0            
162 0           my $rule = $permsg->{conf}->{uri_detail}->{$test};
163              
164 0           if (exists $rule->{raw}) {
165             my($op,$patt) = @{$rule->{raw}};
166 0           if ( ($op eq '=~' && $raw =~ $patt) ||
167             ($op eq '!~' && $raw !~ $patt) ) {
168 0 0         dbg("uri: raw matched: '%s' %s /%s/", $raw,$op,$patt);
169 0           } else {
  0            
170 0 0 0       next;
      0        
      0        
171             }
172 0           }
173              
174 0           if (exists $rule->{type}) {
175             next unless $info->{types};
176             my($op,$patt) = @{$rule->{type}};
177             my $match;
178 0 0         for my $text (keys %{ $info->{types} }) {
179 0 0         if ( ($op eq '=~' && $text =~ $patt) ||
180 0           ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
  0            
181 0           }
182 0           next unless defined $match;
  0            
183 0 0 0       dbg("uri: type matched: '%s' %s /%s/", $match,$op,$patt);
      0        
      0        
184 0           }
  0            
185              
186 0 0         if (exists $rule->{cleaned}) {
187 0           next unless $info->{cleaned};
188             my($op,$patt) = @{$rule->{cleaned}};
189             my $match;
190 0 0         for my $text (@{ $info->{cleaned} }) {
191 0 0         if ( ($op eq '=~' && $text =~ $patt) ||
192 0           ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
  0            
193 0           }
194 0           next unless defined $match;
  0            
195 0 0 0       dbg("uri: cleaned matched: '%s' %s /%s/", $match,$op,$patt);
      0        
      0        
196 0           }
  0            
197              
198 0 0         if (exists $rule->{text}) {
199 0           next unless $info->{anchor_text};
200             my($op,$patt) = @{$rule->{text}};
201             my $match;
202 0 0         for my $text (@{ $info->{anchor_text} }) {
203 0 0         if ( ($op eq '=~' && $text =~ $patt) ||
204 0           ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
  0            
205 0           }
206 0           next unless defined $match;
  0            
207 0 0 0       dbg("uri: text matched: '%s' %s /%s/", $match,$op,$patt);
      0        
      0        
208 0           }
  0            
209              
210 0 0         if (exists $rule->{domain}) {
211 0           next unless $info->{domains};
212             my($op,$patt) = @{$rule->{domain}};
213             my $match;
214 0 0         for my $text (keys %{ $info->{domains} }) {
215 0 0         if ( ($op eq '=~' && $text =~ $patt) ||
216 0           ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
  0            
217 0           }
218 0           next unless defined $match;
  0            
219 0 0 0       dbg("uri: domain matched: '%s' %s /%s/", $match,$op,$patt);
      0        
      0        
220 0           }
  0            
221              
222 0 0         if (exists $rule->{host}) {
223 0           next unless $info->{hosts};
224             my($op,$patt) = @{$rule->{host}};
225             my $match;
226 0 0         for my $text (keys %{ $info->{hosts} }) {
227 0 0         if ( ($op eq '=~' && $text =~ $patt) ||
228 0           ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
  0            
229 0           }
230 0           next unless defined $match;
  0            
231 0 0 0       dbg("uri: host matched: '%s' %s /%s/", $match,$op,$patt);
      0        
      0        
232 0           }
  0            
233              
234 0 0         if (would_log('dbg', 'rules') > 1) {
235 0           dbg("uri: criteria for $test met");
236             }
237            
238 0 0         $permsg->got_hit($test);
239 0            
240             # reset hash
241             keys %uri_detail;
242 0            
243             return 0;
244             }
245 0            
246             return 0;
247 0           }
248              
249             # ---------------------------------------------------------------------------
250 0            
251              
252             1;