File Coverage

blib/lib/Mail/SpamAssassin/Plugin/ASN.pm
Criterion Covered Total %
statement 47 185 25.4
branch 3 90 3.3
condition 2 56 3.5
subroutine 11 18 61.1
pod 2 5 40.0
total 65 354 18.3


line stmt bran cond sub pod time code
1             # SpamAssassin - ASN Lookup Plugin
2             #
3             # <@LICENSE>
4             # Licensed to the Apache Software Foundation (ASF) under one or more
5             # contributor license agreements. See the NOTICE file distributed with
6             # this work for additional information regarding copyright ownership.
7             # The ASF licenses this file to you under the Apache License, Version 2.0
8             # (the "License"); you may not use this file except in compliance with
9             # the License. You may obtain a copy of the License at:
10             #
11             # http://www.apache.org/licenses/LICENSE-2.0
12             #
13             # Unless required by applicable law or agreed to in writing, software
14             # distributed under the License is distributed on an "AS IS" BASIS,
15             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16             # See the License for the specific language governing permissions and
17             # limitations under the License.
18             # </@LICENSE>
19             #
20             ###########################################################################
21             #
22             # Originated by Matthias Leisi, 2006-12-15 (SpamAssassin enhancement #4770).
23             # Modifications by D. Stussy, 2010-12-15 (SpamAssassin enhancement #6484):
24             #
25             # Since SA 3.4.0 a fixed text prefix (such as AS) to each ASN is configurable
26             # through an asn_prefix directive. Its value is 'AS' by default for backward
27             # compatibility with SA 3.3.*, but is rather redundant and can be set to an
28             # empty string for clarity if desired.
29             #
30             # Enhanced TXT-RR decoding for alternative formats from other DNS zones.
31             # Some of the supported formats of TXT RR are (quoted strings here represent
32             # individual string fields in a TXT RR):
33             # "1103" "192.88.99.0" "24"
34             # "559 1103 1239 1257 1299 | 192.88.99.0/24 | US | iana | 2001-06-01"
35             # "192.88.99.0/24 | AS1103 | SURFnet, The Netherlands | 2002-10-15 | EU"
36             # "15169 | 2a00:1450::/32 | IE | ripencc | 2009-10-05"
37             # "as1103"
38             # Multiple routes are sometimes provided by returning multiple TXT records
39             # (e.g. from cymru.com). This form of a response is handled as well.
40             #
41             # Some zones also support IPv6 lookups, for example:
42             # asn_lookup_ipv6 origin6.asn.cymru.com [_ASN_ _ASNCIDR_]
43              
44             =head1 NAME
45              
46             Mail::SpamAssassin::Plugin::ASN - SpamAssassin plugin to look up the
47             Autonomous System Number (ASN) of the connecting IP address.
48              
49             =head1 SYNOPSIS
50              
51             loadplugin Mail::SpamAssassin::Plugin::ASN
52              
53             asn_lookup asn.routeviews.org _ASN_ _ASNCIDR_
54            
55             asn_lookup_ipv6 origin6.asn.cymru.com _ASN_ _ASNCIDR_
56              
57             add_header all ASN _ASN_ _ASNCIDR_
58              
59             header TEST_AS1234 X-ASN =~ /^1234$/
60              
61             =head1 DESCRIPTION
62              
63             This plugin uses DNS lookups to the services of an external DNS zone such
64             as at C<http://www.routeviews.org/> to do the actual work. Please make
65             sure that your use of the plugin does not overload their infrastructure -
66             this generally means that B<you should not use this plugin in a
67             high-volume environment> or that you should use a local mirror of the
68             zone (see C<ftp://ftp.routeviews.org/dnszones/>). Other similar zones
69             may also be used.
70              
71             =head1 TEMPLATE TAGS
72              
73             This plugin allows you to create template tags containing the connecting
74             IP's AS number and route info for that AS number.
75              
76             The default config will add a header field that looks like this:
77              
78             X-Spam-ASN: AS24940 213.239.192.0/18
79              
80             where "24940" is the ASN and "213.239.192.0/18" is the route
81             announced by that ASN where the connecting IP address came from.
82             If the AS announces multiple networks (more/less specific), they will
83             all be added to the C<_ASNCIDR_> tag, separated by spaces, eg:
84              
85             X-Spam-ASN: AS1680 89.138.0.0/15 89.139.0.0/16
86              
87             Note that the literal "AS" before the ASN in the _ASN_ tag is configurable
88             through the I<asn_prefix> directive and may be set to an empty string.
89              
90             =head1 CONFIGURATION
91              
92             The standard ruleset contains a configuration that will add a header field
93             containing ASN data to scanned messages. The bayes tokenizer will use the
94             added header field for bayes calculations, and thus affect which BAYES_* rule
95             will trigger for a particular message.
96              
97             B<Note> that in most cases you should not score on the ASN data directly.
98             Bayes learning will probably trigger on the _ASNCIDR_ tag, but probably not
99             very well on the _ASN_ tag alone.
100              
101             =head1 SEE ALSO
102              
103             http://www.routeviews.org/ - all data regarding routing, ASNs, etc....
104              
105             http://issues.apache.org/SpamAssassin/show_bug.cgi?id=4770 -
106             SpamAssassin Issue #4770 concerning this plugin
107              
108             =head1 STATUS
109              
110             No in-depth analysis of the usefulness of bayes tokenization of ASN data has
111             been performed.
112              
113             =cut
114              
115              
116             use strict;
117 20     20   144 use warnings;
  20         43  
  20         632  
118 20     20   113 use re 'taint';
  20         41  
  20         600  
119 20     20   111  
  20         39  
  20         624  
120             use Mail::SpamAssassin::Plugin;
121 20     20   127 use Mail::SpamAssassin::Logger;
  20         67  
  20         480  
122 20     20   101 use Mail::SpamAssassin::Util qw(reverse_ip_address);
  20         39  
  20         1138  
123 20     20   133 use Mail::SpamAssassin::Dns;
  20         41  
  20         843  
124 20     20   132 use Mail::SpamAssassin::Constants qw(:ip);
  20         43  
  20         730  
125 20     20   123  
  20         36  
  20         40418  
126             our @ISA = qw(Mail::SpamAssassin::Plugin);
127              
128             our $txtdata_can_provide_a_list;
129              
130             my $IPV4_ADDRESS = IPV4_ADDRESS;
131              
132             my ($class, $mailsa) = @_;
133             $class = ref($class) || $class;
134 61     61 1 248 my $self = $class->SUPER::new($mailsa);
135 61   33     392 bless ($self, $class);
136 61         288  
137 61         166 $self->set_config($mailsa->{conf});
138              
139 61         323 #$txtdata_can_provide_a_list = Net::DNS->VERSION >= 0.69;
140             #more robust version check from Damyan Ivanov - Bug 7095
141             $txtdata_can_provide_a_list = version->parse(Net::DNS->VERSION) >= version->parse('0.69');
142              
143 61         1468 return $self;
144             }
145 61         698  
146             ###########################################################################
147              
148             my ($self, $conf) = @_;
149             my @cmds;
150              
151 61     61 0 174 =head1 ADMINISTRATOR SETTINGS
152 61         114  
153             =over 4
154              
155             =item asn_lookup asn-zone.example.com [ _ASNTAG_ _ASNCIDRTAG_ ]
156              
157             Use this to lookup the ASN info in the specified zone for the first external
158             IPv4 address and add the AS number to the first specified tag and routing info
159             to the second specified tag.
160              
161             If no tags are specified the AS number will be added to the _ASN_ tag and the
162             routing info will be added to the _ASNCIDR_ tag. You must specify either none
163             or both of the tag names. Tag names must start and end with an underscore.
164              
165             If two or more I<asn_lookup>s use the same set of template tags, the results of
166             their lookups will be appended to each other in the template tag values in no
167             particular order. Duplicate results will be omitted when combining results.
168             In a similar fashion, you can also use the same template tag for both the AS
169             number tag and the routing info tag.
170              
171             Examples:
172              
173             asn_lookup asn.routeviews.org
174              
175             asn_lookup asn.routeviews.org _ASN_ _ASNCIDR_
176             asn_lookup myview.example.com _MYASN_ _MYASNCIDR_
177              
178             asn_lookup asn.routeviews.org _COMBINEDASN_ _COMBINEDASNCIDR_
179             asn_lookup myview.example.com _COMBINEDASN_ _COMBINEDASNCIDR_
180              
181             asn_lookup in1tag.example.net _ASNDATA_ _ASNDATA_
182              
183             =item asn_lookup_ipv6 asn-zone6.example.com [_ASN_ _ASNCIDR_]
184              
185             Use specified zone for lookups of IPv6 addresses. If zone supports both
186             IPv4 and IPv6 queries, use both asn_lookup and asn_lookup_ipv6 for the same
187             zone.
188              
189             =item clear_asn_lookups
190              
191             Removes any previously declared I<asn_lookup> entries from a list of queries.
192              
193             =item asn_prefix 'prefix_string' (default: 'AS')
194              
195             The string specified in the argument is prepended to each ASN when storing
196             it as a tag. This prefix is rather redundant, but its default value 'AS'
197             is kept for backward compatibility with versions of SpamAssassin earlier
198             than 3.4.0. A sensible setting is an empty string. The argument may be (but
199             need not be) enclosed in single or double quotes for clarity.
200              
201             =back
202              
203             =cut
204              
205             push (@cmds, {
206             setting => 'asn_lookup',
207             is_admin => 1,
208             code => sub {
209             my ($conf, $key, $value, $line) = @_;
210             unless (defined $value && $value !~ /^$/) {
211             return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
212 0     0   0 }
213 0 0 0     0 local($1,$2,$3);
214 0         0 unless ($value =~ /^(\S+?)\.?(?:\s+_(\S+)_\s+_(\S+)_)?$/) {
215             return $Mail::SpamAssassin::Conf::INVALID_VALUE;
216 0         0 }
217 0 0       0 my ($zone, $asn_tag, $route_tag) = ($1, $2, $3);
218 0         0 $asn_tag = 'ASN' if !defined $asn_tag;
219             $route_tag = 'ASNCIDR' if !defined $route_tag;
220 0         0 push @{$conf->{asnlookups}},
221 0 0       0 { zone=>$zone, asn_tag=>$asn_tag, route_tag=>$route_tag };
222 0 0       0 }
223 0         0 });
  0         0  
224              
225             push (@cmds, {
226 61         573 setting => 'asn_lookup_ipv6',
227             is_admin => 1,
228             code => sub {
229             my ($conf, $key, $value, $line) = @_;
230             unless (defined $value && $value !~ /^$/) {
231             return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
232 0     0   0 }
233 0 0 0     0 local($1,$2,$3);
234 0         0 unless ($value =~ /^(\S+?)\.?(?:\s+_(\S+)_\s+_(\S+)_)?$/) {
235             return $Mail::SpamAssassin::Conf::INVALID_VALUE;
236 0         0 }
237 0 0       0 my ($zone, $asn_tag, $route_tag) = ($1, $2, $3);
238 0         0 $asn_tag = 'ASN' if !defined $asn_tag;
239             $route_tag = 'ASNCIDR' if !defined $route_tag;
240 0         0 push @{$conf->{asnlookups_ipv6}},
241 0 0       0 { zone=>$zone, asn_tag=>$asn_tag, route_tag=>$route_tag };
242 0 0       0 }
243 0         0 });
  0         0  
244              
245             push (@cmds, {
246 61         477 setting => 'clear_asn_lookups',
247             is_admin => 1,
248             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NOARGS,
249             code => sub {
250             my ($conf, $key, $value, $line) = @_;
251             if (defined $value && $value ne '') {
252             return $Mail::SpamAssassin::Conf::INVALID_VALUE;
253 0     0   0 }
254 0 0 0     0 delete $conf->{asnlookups};
255 0         0 delete $conf->{asnlookups_ipv6};
256             }
257 0         0 });
258 0         0  
259             push (@cmds, {
260 61         474 setting => 'asn_prefix',
261             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
262             default => 'AS',
263             code => sub {
264             my ($conf, $key, $value, $line) = @_;
265             $value = '' if !defined $value;
266             local($1,$2);
267 0     0   0 $value = $2 if $value =~ /^(['"])(.*)\1\z/; # strip quotes if any
268 0 0       0 $conf->{$key} = $value; # keep tainted
269 0         0 }
270 0 0       0 });
271 0         0  
272             $conf->{parser}->register_commands(\@cmds);
273 61         438 }
274              
275 61         281 # ---------------------------------------------------------------------------
276              
277             my ($self, $opts) = @_;
278              
279             my $pms = $opts->{permsgstatus};
280             my $conf = $self->{main}->{conf};
281 81     81 1 173  
282             if (!$pms->is_dns_available()) {
283 81         169 dbg("asn: DNS is not available, skipping ASN checks");
284 81         168 return;
285             }
286 81 100       217  
287 77         250 if (!$conf->{asnlookups} && !$conf->{asnlookups_ipv6}) {
288 77         222 dbg("asn: no asn_lookups configured, skipping ASN lookups");
289             return;
290             }
291 4 50 33     47  
292 4         13 # initialize the tag data so that if no result is returned from the DNS
293 4         9 # query we won't end up with a missing tag. Don't use $pms->set_tag()
294             # here to avoid triggering any tag-dependent action unnecessarily
295             if ($conf->{asnlookups}) {
296             foreach my $entry (@{$conf->{asnlookups}}) {
297             $pms->{tag_data}->{$entry->{asn_tag}} ||= '';
298             $pms->{tag_data}->{$entry->{route_tag}} ||= '';
299 0 0         }
300 0           }
  0            
301 0   0       if ($conf->{asnlookups_ipv6}) {
302 0   0       foreach my $entry (@{$conf->{asnlookups_ipv6}}) {
303             $pms->{tag_data}->{$entry->{asn_tag}} ||= '';
304             $pms->{tag_data}->{$entry->{route_tag}} ||= '';
305 0 0         }
306 0           }
  0            
307 0   0        
308 0   0       # get reversed IP address of last external relay to lookup
309             # don't return until we've initialized the template tags
310             my $relay = $pms->{relays_external}->[0];
311             if (!defined $relay) {
312             dbg("asn: no first external relay IP available, skipping ASN check");
313             return;
314 0           } elsif ($relay->{ip_private}) {
315 0 0         dbg("asn: first external relay is a private IP, skipping ASN check");
    0          
316 0           return;
317 0           }
318              
319 0           my $ip = $relay->{ip};
320 0           my $reversed_ip = reverse_ip_address($ip);
321             if (defined $reversed_ip) {
322             dbg("asn: using first external relay IP for lookups: %s", $ip);
323 0           } else {
324 0           dbg("asn: could not parse first external relay IP: %s, skipping", $ip);
325 0 0         return;
326 0           }
327              
328 0           my $lookup_zone;
329 0           if ($ip =~ /^$IPV4_ADDRESS$/o) {
330             if (!defined $conf->{asnlookups}) {
331             dbg("asn: asn_lookup for IPv4 not defined, skipping");
332 0           return;
333 0 0         }
334 0 0         $lookup_zone = "asnlookups";
335 0           } else {
336 0           if (!defined $conf->{asnlookups_ipv6}) {
337             dbg("asn: asn_lookup_ipv6 for IPv6 not defined, skipping");
338 0           return;
339             }
340 0 0         $lookup_zone = "asnlookups_ipv6";
341 0           }
342 0          
343             # we use arrays and array indices rather than hashes and hash keys
344 0           # in case someone wants the same zone added to multiple sets of tags
345             my $index = 0;
346             foreach my $entry (@{$conf->{$lookup_zone}}) {
347             # do the DNS query, have the callback process the result
348             my $zone_index = $index;
349 0           my $zone = $reversed_ip . '.' . $entry->{zone};
350 0           my $key = "asnlookup-${lookup_zone}-${zone_index}-".$entry->{zone};
  0            
351             my $ent = $pms->{async}->bgsend_and_start_lookup($zone, 'TXT', undef,
352 0           { type => 'ASN', key => $key, zone => $lookup_zone },
353 0           sub { my($ent, $pkt) = @_;
354 0           $self->process_dns_result($pms, $pkt, $zone_index, $lookup_zone) },
355             master_deadline => $pms->{master_deadline}
356             );
357 0     0     $pms->register_async_rule_start($key) if $ent;
358 0           $index++;
359             }
360 0           }
361 0 0          
362 0           #
363             # TXT-RR format of response:
364             # 3 fields, each as one TXT RR <character-string> (RFC 1035): ASN IP MASK
365             # The latter two fields are combined to create a CIDR.
366             # or: At least 2 fields made of a single or multiple
367             # <character-string>s, fields are separated by a vertical bar.
368             # They will be the ASN and CIDR fields in any order.
369             # If only one field is returned, it is the ASN. There will
370             # be no CIDR field in that case.
371             #
372             my ($self, $pms, $pkt, $zone_index, $lookup_zone) = @_;
373              
374             my $conf = $self->{main}->{conf};
375              
376             my $zone = $conf->{$lookup_zone}[$zone_index]->{zone};
377 0     0 0   my $asn_tag = $conf->{$lookup_zone}[$zone_index]->{asn_tag};
378             my $route_tag = $conf->{$lookup_zone}[$zone_index]->{route_tag};
379 0            
380             my($any_asn_updates, $any_route_updates, $tag_value);
381 0            
382 0           my(@asn_tag_data, %asn_tag_data_seen);
383 0           $tag_value = $pms->get_tag($asn_tag);
384             if (defined $tag_value) {
385 0           my $prefix = $pms->{conf}->{asn_prefix};
386             if (defined $prefix && $prefix ne '') {
387 0           # must strip prefix before splitting on whitespace
388 0           $tag_value =~ s/(^| )\Q$prefix\E(?=\d+)/$1/gs;
389 0 0         }
390 0           @asn_tag_data = split(/ /,$tag_value);
391 0 0 0       %asn_tag_data_seen = map(($_,1), @asn_tag_data);
392             }
393 0            
394             my(@route_tag_data, %route_tag_data_seen);
395 0           $tag_value = $pms->get_tag($route_tag);
396 0           if (defined $tag_value) {
397             @route_tag_data = split(/ /,$tag_value);
398             %route_tag_data_seen = map(($_,1), @route_tag_data);
399 0           }
400 0            
401 0 0         # NOTE: $pkt will be undef if the DNS query was aborted (e.g. timed out)
402 0           my @answer = !defined $pkt ? () : $pkt->answer;
403 0            
404             foreach my $rr (@answer) {
405             #dbg("asn: %s: lookup result packet: %s", $zone, $rr->string);
406             next if $rr->type ne 'TXT';
407 0 0         my @strings = $txtdata_can_provide_a_list ? $rr->txtdata :
408             $rr->char_str_list; # historical
409 0           next if !@strings;
410             for (@strings) { utf8::encode($_) if utf8::is_utf8($_) }
411 0 0          
412 0 0         my @items;
413             if (@strings > 1 && join('',@strings) !~ m{\|}) {
414 0 0         # routeviews.org style, multiple string fields in a TXT RR
415 0 0         @items = @strings;
  0            
416             if (@items >= 3 && $items[1] !~ m{/} && $items[2] =~ /^\d+\z/) {
417 0           $items[1] .= '/' . $items[2]; # append the net mask length to route
418 0 0 0       }
419             } else {
420 0           # cymru.com and spameatingmonkey.net style, or just a single field
421 0 0 0       @items = split(/\s*\|\s*/, join(' ',@strings));
      0        
422 0           }
423              
424             my(@route_value, @asn_value);
425             if (@items && $items[0] =~ /(?: (?:^|\s+) (?:AS)? \d+ )+ \z/xsi) {
426 0           # routeviews.org and cymru.com style, ASN is the first field,
427             # possibly a whitespace-separated list (e.g. cymru.com)
428             @asn_value = split(' ',$items[0]);
429 0           @route_value = split(' ',$items[1]) if @items >= 2;
430 0 0 0       } elsif (@items > 1 && $items[1] =~ /(?: (?:^|\s+) (?:AS)? \d+ )+ \z/xsi) {
    0 0        
431             # spameatingmonkey.net style, ASN is the second field
432             @asn_value = split(' ',$items[1]);
433 0           @route_value = split(' ',$items[0]);
434 0 0         } else {
435             dbg("asn: unparseable response: %s", join(' ', map("\"$_\"",@strings)));
436             }
437 0            
438 0           foreach my $route (@route_value) {
439             if (!defined $route || $route eq '') {
440 0           # ignore, just in case
441             } elsif ($route =~ m{/0+\z}) {
442             # unassigned/unannounced address space
443 0           } elsif ($route_tag_data_seen{$route}) {
444 0 0 0       dbg("asn: %s duplicate route %s", $route_tag, $route);
    0          
    0          
445             } else {
446             dbg("asn: %s added route %s", $route_tag, $route);
447             push(@route_tag_data, $route);
448             $route_tag_data_seen{$route} = 1;
449 0           $any_route_updates = 1;
450             }
451 0           }
452 0            
453 0           foreach my $asn (@asn_value) {
454 0           $asn =~ s/^AS(?=\d+)//si;
455             if (!$asn || $asn == 4294967295) {
456             # unassigned/unannounced address space
457             } elsif ($asn_tag_data_seen{$asn}) {
458 0           dbg("asn: %s duplicate asn %s", $asn_tag, $asn);
459 0           } else {
460 0 0 0       dbg("asn: %s added asn %s", $asn_tag, $asn);
    0          
461             push(@asn_tag_data, $asn);
462             $asn_tag_data_seen{$asn} = 1;
463 0           $any_asn_updates = 1;
464             }
465 0           }
466 0           }
467 0            
468 0           if ($any_asn_updates && @asn_tag_data) {
469             $pms->{msg}->put_metadata('X-ASN', join(' ',@asn_tag_data));
470             my $prefix = $pms->{conf}->{asn_prefix};
471             if (defined $prefix && $prefix ne '') { s/^/$prefix/ for @asn_tag_data }
472             $pms->set_tag($asn_tag,
473 0 0 0       @asn_tag_data == 1 ? $asn_tag_data[0] : \@asn_tag_data);
474 0           }
475 0           if ($any_route_updates && @route_tag_data) {
476 0 0 0       # Bayes already has X-ASN, Route is pointless duplicate, skip
  0            
477 0 0         #$pms->{msg}->put_metadata('X-ASN-Route', join(' ',@route_tag_data));
478             $pms->set_tag($route_tag,
479             @route_tag_data == 1 ? $route_tag_data[0] : \@route_tag_data);
480 0 0 0       }
481             }
482              
483 0 0         # Version features
484              
485             1;