File Coverage

blib/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
Criterion Covered Total %
statement 238 450 52.8
branch 77 188 40.9
condition 24 71 33.8
subroutine 20 40 50.0
pod 2 18 11.1
total 361 767 47.0


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             URIDNSBL - look up URLs against DNS blocklists
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25             uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26              
27             =head1 DESCRIPTION
28              
29             This works by analysing message text and HTML for URLs, extracting host
30             names from those, then querying various DNS blocklists for either:
31             IP addresses of these hosts (uridnsbl,a) or their nameservers (uridnsbl,ns),
32             or domain names of these hosts (urirhsbl), or domain names of their
33             nameservers (urinsrhsbl, urifullnsrhsbl).
34              
35             =head1 USER SETTINGS
36              
37             =over 4
38              
39             =item skip_uribl_checks ( 0 | 1 ) (default: 0)
40              
41             Turning on the skip_uribl_checks setting will disable the URIDNSBL plugin.
42              
43             By default, SpamAssassin will run URI DNSBL checks. Individual URI blocklists
44             may be disabled selectively by setting a score of a corresponding rule to 0
45             or through the uridnsbl_skip_domain parameter.
46              
47             See also a related configuration parameter skip_rbl_checks,
48             which controls the DNSEval plugin (documented in the Conf man page).
49              
50             =back
51              
52             =over 4
53              
54             =item uridnsbl_skip_domain domain1 domain2 ...
55              
56             Specify a domain, or a number of domains, which should be skipped for the
57             URIBL checks. This is very useful to specify very common domains which are
58             not going to be listed in URIBLs.
59              
60             =back
61              
62             =over 4
63              
64             =item clear_uridnsbl_skip_domain [domain1 domain2 ...]
65              
66             If no argument is given, then clears the entire list of domains declared
67             by I<uridnsbl_skip_domain> configuration directives so far. Any subsequent
68             I<uridnsbl_skip_domain> directives will start creating a new list of skip
69             domains.
70              
71             When given a list of domains as arguments, only the specified domains
72             are removed from the list of skipped domains.
73              
74             =back
75              
76             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
77              
78             =over 4
79              
80             =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
81              
82             Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
83             used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
84             is the type of lookup (B<TXT> or B<A>). Note that you must also
85             define a body-eval rule calling C<check_uridnsbl()> to use this.
86              
87             This works by collecting domain names from URLs and querying DNS
88             blocklists with an IP address of host names found in URLs or with
89             IP addresses of their name servers, according to tflags as follows.
90              
91             If the corresponding body rule has a tflag 'a', the DNS blocklist will
92             be queried with an IP address of a host found in URLs.
93              
94             If the corresponding body rule has a tflag 'ns', DNS will be queried
95             for name servers (NS records) of a domain name found in URLs, then
96             these name server names will be resolved to their IP addresses, which
97             in turn will be sent to DNS blocklist.
98              
99             Tflags directive may specify either 'a' or 'ns' or both flags. In absence
100             of any of these two flags, a default is a 'ns', which is compatible with
101             pre-3.4 versions of SpamAssassin.
102              
103             The choice of tflags must correspond to the policy and expected use of
104             each DNS blocklist and is normally not a local decision. As an example,
105             a blocklist expecting queries resulting from an 'a' tflag is a
106             "black_a.txt" ( http://www.uribl.com/datasets.shtml ).
107              
108             Example:
109              
110             uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
111             body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
112             describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
113             tflags URIBL_SBLXBL net ns
114              
115             =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
116              
117             Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
118             name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
119             and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
120              
121             Tflags 'ns' and 'a' on a corresponding body rule are recognized and have
122             the same meaning as in the uridnsbl directive.
123              
124             C<subtest> is a sub-test to run against the returned data. The sub-test may
125             be in one of the following forms: m, n1-n2, or n/m, where n,n1,n2,m can be
126             any of: decimal digits, 0x followed by up to 8 hexadecimal digits, or an IPv4
127             address in quad-dot form. The 'A' records (IPv4 dotted address) as returned
128             by DNSBLs lookups are converted into a numerical form (r) and checked against
129             the specified sub-test as follows:
130             for a range n1-n2 the following must be true: (r >= n1 && r <= n2);
131             for a n/m form the following must be true: (r & m) == (n & m);
132             for a single value in quad-dot form the following must be true: r == n;
133             for a single decimal or hex form the following must be true:
134             ((r & n) != 0) && ((r & 0xff000000) == 0x7f000000), i.e. within 127.0.0.0/8
135              
136             Some typical examples of a sub-test are: 127.0.1.2, 127.0.1.20-127.0.1.39,
137             127.0.1.0/255.255.255.0, 0.0.0.16/0.0.0.16, 0x10/0x10, 16, 0x10 .
138              
139             Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
140             C<check_uridnsbl()> to use this.
141              
142             Example:
143              
144             uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
145             uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
146              
147             =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
148              
149             Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
150             to be used, C<rhsbl_zone> is the zone to look up domain names in, and
151             C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
152             define a body-eval rule calling C<check_uridnsbl()> to use this.
153              
154             An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
155             URI using the domain C<foo.com> will cause a lookup of
156             C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
157             used in the URIBL lookup, so the domain C<foo.bar.com> will look up
158             C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
159             C<bar.co.uk.uriblzone.net>.
160              
161             If an URI consists of an IP address instead of a hostname, the IP address is
162             looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
163              
164             Example:
165              
166             urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
167              
168             =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
169              
170             Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
171             name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
172             in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
173              
174             C<subtest> is a sub-test to run against the returned data. The sub-test may
175             be in one of the following forms: m, n1-n2, or n/m, where n,n1,n2,m can be
176             any of: decimal digits, 0x followed by up to 8 hexadecimal digits, or an IPv4
177             address in quad-dot form. The 'A' records (IPv4 dotted address) as returned
178             by DNSBLs lookups are converted into a numerical form (r) and checked against
179             the specified sub-test as follows:
180             for a range n1-n2 the following must be true: (r >= n1 && r <= n2);
181             for a n/m form the following must be true: (r & m) == (n & m);
182             for a single value in quad-dot form the following must be true: r == n;
183             for a single decimal or hex form the following must be true:
184             ((r & n) != 0) && ((r & 0xff000000) == 0x7f000000), i.e. within 127.0.0.0/8
185              
186             Some typical examples of a sub-test are: 127.0.1.2, 127.0.1.20-127.0.1.39,
187             127.2.3.0/255.255.255.0, 0.0.0.16/0.0.0.16, 0x10/0x10, 16, 0x10 .
188              
189             Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
190             C<check_uridnsbl()> to use this.
191              
192             Example:
193              
194             urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
195             urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
196              
197             =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
198              
199             Perform a RHSBL-style domain lookup against the contents of the NS records
200             for each URI. In other words, a URI using the domain C<foo.com> will cause
201             an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
202             that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
203             are stripped from both the domain used in the URI, and the domain in the
204             lookup.
205              
206             C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
207             to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
208             B<A>).
209              
210             Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
211             C<check_uridnsbl()> to use this.
212              
213             =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
214              
215             Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
216             C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
217             to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
218             B<A>). C<subtest> is the sub-test to run against the returned data; see
219             <urirhssub>.
220              
221             Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
222             C<check_uridnsbl()> to use this.
223              
224             =item urifullnsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
225              
226             Perform a RHSBL-style domain lookup against the contents of the NS records for
227             each URI. In other words, a URI using the domain C<foo.com> will cause an NS
228             lookup to take place; assuming that domain has an NS of C<ns0.bar.com>, that
229             will cause a lookup of C<ns0.bar.com.uriblzone.net>. Note that hostnames are
230             stripped from the domain used in the URI.
231              
232             C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
233             to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
234             B<A>).
235              
236             Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
237             C<check_uridnsbl()> to use this.
238              
239             =item urifullnsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
240              
241             Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
242             C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
243             to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
244             B<A>). C<subtest> is the sub-test to run against the returned data; see
245             <urirhssub>.
246              
247             Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
248             C<check_uridnsbl()> to use this.
249              
250             =item tflags NAME_OF_RULE ips_only
251              
252             Only URIs containing IP addresses as the "host" component will be matched
253             against the named "urirhsbl"/"urirhssub" rule.
254              
255             =item tflags NAME_OF_RULE domains_only
256              
257             Only URIs containing a non-IP-address "host" component will be matched against
258             the named "urirhsbl"/"urirhssub" rule.
259              
260             =item tflags NAME_OF_RULE ns
261              
262             The 'ns' flag may be applied to rules corresponding to uridnsbl and uridnssub
263             directives. Host names from URLs will be mapped to their name server IP
264             addresses (a NS lookup followed by an A lookup), which in turn will be sent
265             to blocklists. This is a default when neither 'a' nor 'ns' flags are specified.
266              
267             =item tflags NAME_OF_RULE a
268              
269             The 'a' flag may be applied to rules corresponding to uridnsbl and uridnssub
270             directives. Host names from URLs will be mapped to their IP addresses, which
271             will be sent to blocklists. When both 'ns' and 'a' flags are specified,
272             both queries will be performed.
273              
274             =back
275              
276             =head1 ADMINISTRATOR SETTINGS
277              
278             =over 4
279              
280             =item uridnsbl_max_domains N (default: 20)
281              
282             The maximum number of domains to look up.
283              
284             =item parse_dkim_uris ( 0 / 1 )
285              
286             Include DKIM uris in lookups. This option is documented in
287             Mail::SpamAssassin::Conf.
288              
289             =back
290              
291             =head1 NOTES
292              
293             The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
294             option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
295              
296             =cut
297              
298             package Mail::SpamAssassin::Plugin::URIDNSBL;
299              
300 21     21   168 use Mail::SpamAssassin::Plugin;
  21         46  
  21         1000  
301 21     21   153 use Mail::SpamAssassin::Constants qw(:ip);
  21         46  
  21         3509  
302 21     21   161 use Mail::SpamAssassin::Util;
  21         53  
  21         896  
303 21     21   129 use Mail::SpamAssassin::Logger;
  21         40  
  21         1328  
304 21     21   140 use strict;
  21         73  
  21         507  
305 21     21   127 use warnings;
  21         68  
  21         849  
306             # use bytes;
307 21     21   139 use re 'taint';
  21         52  
  21         1335  
308              
309             our @ISA = qw(Mail::SpamAssassin::Plugin);
310              
311 21     21   152 use constant LOG_COMPLETION_TIMES => 0;
  21         53  
  21         121345  
312              
313             # constructor
314             sub new {
315 62     62 1 259 my $class = shift;
316 62         151 my $samain = shift;
317              
318             # some boilerplate...
319 62   33     518 $class = ref($class) || $class;
320 62         647 my $self = $class->SUPER::new($samain);
321 62         157 bless ($self, $class);
322              
323             # this can be effectively global, at least in each process, safely
324              
325 62         295 $self->{finished} = { };
326              
327 62         419 $self->register_eval_rule ("check_uridnsbl");
328 62         372 $self->set_config($samain->{conf});
329              
330 62         708 return $self;
331             }
332              
333             # this is just a placeholder; in fact the results are dealt with later
334             sub check_uridnsbl {
335 88     88 0 1326 return 0;
336             }
337              
338             # ---------------------------------------------------------------------------
339              
340             # once the metadata is parsed, we can access the URI list. So start off
341             # the lookups here!
342             sub parsed_metadata {
343 81     81 1 282 my ($self, $opts) = @_;
344 81         205 my $pms = $opts->{permsgstatus};
345 81         212 my $conf = $pms->{conf};
346              
347 81 50       276 return 0 if $conf->{skip_uribl_checks};
348 81 100       516 return 0 if !$pms->is_dns_available();
349              
350 4         15 $pms->{'uridnsbl_activerules'} = { };
351 4         26 $pms->{'uridnsbl_hits'} = { };
352 4         11 $pms->{'uridnsbl_seen_lookups'} = { };
353              
354             # only hit DNSBLs for active rules (defined and score != 0)
355 4         10 $pms->{'uridnsbl_active_rules_rhsbl'} = { };
356 4         22 $pms->{'uridnsbl_active_rules_rhsbl_ipsonly'} = { };
357 4         13 $pms->{'uridnsbl_active_rules_rhsbl_domsonly'} = { };
358 4         11 $pms->{'uridnsbl_active_rules_nsrhsbl'} = { };
359 4         12 $pms->{'uridnsbl_active_rules_fullnsrhsbl'} = { };
360 4         13 $pms->{'uridnsbl_active_rules_nsrevipbl'} = { };
361 4         12 $pms->{'uridnsbl_active_rules_arevipbl'} = { };
362              
363 4         9 foreach my $rulename (keys %{$conf->{uridnsbls}}) {
  4         39  
364 88 50       203 next unless ($conf->is_rule_active('body_evals',$rulename));
365              
366 88         143 my $rulecf = $conf->{uridnsbls}->{$rulename};
367 88         208 my $tflags = $conf->{tflags}->{$rulename};
368 88 100       194 $tflags = '' if !defined $tflags;
369 88         194 my %tfl = map { ($_,1) } split(' ',$tflags);
  76         236  
370              
371 88         190 my $is_rhsbl = $rulecf->{is_rhsbl};
372 88 50 33     376 if ( $is_rhsbl && $tfl{'ips_only'}) {
    100 33        
    50          
    0          
    0          
373 0         0 $pms->{uridnsbl_active_rules_rhsbl_ipsonly}->{$rulename} = 1;
374             } elsif ($is_rhsbl && $tfl{'domains_only'}) {
375 76         246 $pms->{uridnsbl_active_rules_rhsbl_domsonly}->{$rulename} = 1;
376             } elsif ($is_rhsbl) {
377 12         39 $pms->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
378             } elsif ($rulecf->{is_fullnsrhsbl}) {
379 0         0 $pms->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename} = 1;
380             } elsif ($rulecf->{is_nsrhsbl}) {
381 0         0 $pms->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
382             } else { # just a plain dnsbl rule (IP based), not a RHS rule (name-based)
383 0 0       0 if ($tfl{'a'}) { # tflag 'a' explicitly
384 0         0 $pms->{uridnsbl_active_rules_arevipbl}->{$rulename} = 1;
385             }
386 0 0 0     0 if ($tfl{'ns'} || !$tfl{'a'}) { # tflag 'ns' explicitly, or default
387 0         0 $pms->{uridnsbl_active_rules_nsrevipbl}->{$rulename} = 1;
388             }
389             }
390             }
391              
392             # get all domains in message
393              
394             # don't keep dereferencing this
395 4         17 my $skip_domains = $conf->{uridnsbl_skip_domains};
396 4 50       16 $skip_domains = {} if !$skip_domains;
397              
398             # list of hashes to use in order
399 4         8 my @uri_ordered;
400              
401             # Generate the full list of html-parsed domains.
402 4         20 my $uris = $pms->get_uri_detail_list();
403              
404             # go from uri => info to uri_ordered
405             # 0: a
406             # 1: form
407             # 2: img
408             # 3: !a_empty
409             # 4: parsed
410             # 5: a_empty
411 4         11 while (my($uri, $info) = each %{$uris}) {
  11         37  
412             # we want to skip mailto: uris
413 7 50       22 next if ($uri =~ /^mailto:/i);
414              
415             # no hosts/domains were found via this uri, so skip
416 7 50       20 next unless ($info->{hosts});
417              
418 7         10 my $entry = 3;
419              
420 7 50 33     37 if ($info->{types}->{a}) {
    50          
    50          
    50          
421 0         0 $entry = 5;
422              
423             # determine a vs a_empty
424 0         0 foreach my $at (@{$info->{anchor_text}}) {
  0         0  
425 0 0       0 if (length $at) {
426 0         0 $entry = 0;
427 0         0 last;
428             }
429             }
430             }
431             elsif ($info->{types}->{form}) {
432 0         0 $entry = 1;
433             }
434             elsif ($info->{types}->{img}) {
435 0         0 $entry = 2;
436             }
437 7         28 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
438 7         11 $entry = 4;
439             }
440              
441             # take the usable domains and add them to the ordered list
442 7         16 while (my($host,$domain) = each( %{$info->{hosts}} )) {
  14         55  
443 7 50       18 if ($skip_domains->{$domain}) {
444 0         0 dbg("uridnsbl: domain $domain in skip list, host $host");
445             } else {
446             # use hostname as a key, and drag along the stripped domain name part
447 7         21 $uri_ordered[$entry]->{$host} = $domain;
448             }
449             }
450             }
451              
452             # at this point, @uri_ordered is an ordered array of hostname hashes
453              
454 4         8 my %hostlist; # keys are host names, values are their domain parts
455              
456 4         10 my $umd = $conf->{uridnsbl_max_domains};
457 4   66     26 while (keys %hostlist < $umd && @uri_ordered) {
458 15         71 my $array = shift @uri_ordered;
459 15 100       50 next unless $array;
460              
461             # run through and find the new domains in this grouping
462 3         11 my @hosts = grep(!$hostlist{$_}, keys %{$array});
  3         21  
463 3 50       26 next unless @hosts;
464              
465             # the new hosts are all useful, just add them in
466 3 50       21 if (keys(%hostlist) + @hosts <= $umd) {
467 3         9 foreach my $host (@hosts) {
468 7         33 $hostlist{$host} = $array->{$host};
469             }
470             }
471             else {
472 0         0 dbg("uridnsbl: more than $umd URIs, picking a subset");
473             # trim down to a limited number - pick randomly
474 0   0     0 while (@hosts && keys %hostlist < $umd) {
475 0         0 my $r = int rand(scalar @hosts);
476 0         0 my $picked_host = splice(@hosts, $r, 1);
477 0         0 $hostlist{$picked_host} = $array->{$picked_host};
478             }
479             }
480             }
481              
482 4         23 my @hnames = sort keys %hostlist;
483 4 100       32 $pms->set_tag('URIHOSTS',
484             @hnames == 1 ? $hnames[0] : \@hnames);
485 4         13 my @dnames = do { my %seen; grep { !$seen{$_}++ } sort values %hostlist };
  4         6  
  4         17  
  7         29  
486 4 100       22 $pms->set_tag('URIDOMAINS',
487             @dnames == 1 ? $dnames[0] : \@dnames);
488              
489             # and query
490 4         23 $self->query_hosts_or_domains($pms, \%hostlist);
491              
492 4         25 return 1;
493             }
494              
495             # Accepts argument in one of the following forms: m, n1-n2, or n/m,
496             # where n,n1,n2,m can be any of: decimal digits, 0x followed by up to 8
497             # hexadecimal digits, or an IPv4 address in quad-dot form. The argument
498             # is checked for syntax (undef is returned on syntax errors), hex numbers
499             # are converted to decimal, and quad-dot is converted to decimal, then
500             # reassembled into original string delimited by '-' or '/'. As a special
501             # backward compatibility measure, a single quad-dot (with no second number)
502             # is converted into n-n, to distinguish it from a traditional mask-only form.
503             #
504             # In practice, arguments like the following are anticipated:
505             # 127.0.1.2 (same as 127.0.1.2-127.0.1.2 or 127.0.1.2/255.255.255.255)
506             # 127.0.1.20-127.0.1.39 (= 0x7f000114-0x7f000127 or 2130706708-2130706727)
507             # 0.0.0.16/0.0.0.16 (same as 0x10/0x10 or 16/0x10 or 16/16)
508             # 16 (traditional style mask-only, same as 0x10)
509             #
510             sub parse_and_canonicalize_subtest {
511 22     22 0 45 my($subtest) = @_;
512 22         29 my $digested_subtest;
513              
514 22         70 local($1,$2,$3);
515 22 50       178 if ($subtest =~ m{^ ([^/-]+) (?: ([/-]) (.+) )? \z}xs) {
516 22         84 my($n1,$delim,$n2) = ($1,$2,$3);
517 22         35 my $any_quad_dot;
518 22         37 for ($n1,$n2) {
519 44 100       241 if (!defined $_) {
    100          
    100          
    50          
520             # ok, $n2 may not exist
521             } elsif (/^\d{1,10}\z/) {
522             # ok, already a decimal number
523             } elsif (/^0x[0-9a-zA-Z]{1,8}\z/) {
524 5         20 $_ = hex($_); # hex -> number
525             } elsif (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/) {
526 26         89 $_ = Mail::SpamAssassin::Util::my_inet_aton($_); # quad-dot -> number
527 26         80 $any_quad_dot = 1;
528             } else {
529 0         0 return;
530             }
531             }
532 22 100       128 $digested_subtest = defined $n2 ? $n1.$delim.$n2
    100          
533             : $any_quad_dot ? $n1.'-'.$n1 : "$n1";
534             }
535 22         91 return $digested_subtest;
536             }
537              
538             sub set_config {
539 62     62 0 177 my($self, $conf) = @_;
540 62         167 my @cmds;
541              
542 62         378 push(@cmds, {
543             setting => 'skip_uribl_checks',
544             default => 0,
545             type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
546             });
547              
548 62         354 push(@cmds, {
549             setting => 'uridnsbl_max_domains',
550             is_admin => 1,
551             default => 20,
552             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
553             });
554              
555             push (@cmds, {
556             setting => 'uridnsbl',
557             is_priv => 1,
558             code => sub {
559 0     0   0 my ($self, $key, $value, $line) = @_;
560 0         0 local($1,$2,$3);
561 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
    0          
562 0         0 my $rulename = $1;
563 0         0 my $zone = $2;
564 0         0 my $type = $3;
565 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
566 0         0 $self->{uridnsbls}->{$rulename} = {
567             zone => $zone, type => $type,
568             is_rhsbl => 0
569             };
570             }
571             elsif ($value =~ /^$/) {
572 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
573             }
574             else {
575 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
576             }
577             }
578 62         631 });
579              
580             push (@cmds, {
581             setting => 'uridnssub',
582             is_priv => 1,
583             code => sub {
584 0     0   0 my ($self, $key, $value, $line) = @_;
585 0         0 local($1,$2,$3,$4);
586 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*?)\s*$/) {
    0          
587 0         0 my $rulename = $1;
588 0         0 my $zone = $2;
589 0         0 my $type = $3;
590 0         0 my $subrule = $4;
591 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
592 0         0 $subrule = parse_and_canonicalize_subtest($subrule);
593 0 0       0 defined $subrule or return $Mail::SpamAssassin::Conf::INVALID_VALUE;
594 0         0 $self->{uridnsbls}->{$rulename} = {
595             zone => $zone, type => $type,
596             is_rhsbl => 0, subtest => $subrule,
597             };
598             }
599             elsif ($value =~ /^$/) {
600 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
601             }
602             else {
603 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
604             }
605             }
606 62         515 });
607              
608             push (@cmds, {
609             setting => 'urirhsbl',
610             is_priv => 1,
611             code => sub {
612 0     0   0 my ($self, $key, $value, $line) = @_;
613 0         0 local($1,$2,$3);
614 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
    0          
615 0         0 my $rulename = $1;
616 0         0 my $zone = $2;
617 0         0 my $type = $3;
618 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
619 0         0 $self->{uridnsbls}->{$rulename} = {
620             zone => $zone, type => $type,
621             is_rhsbl => 1
622             };
623             }
624             elsif ($value =~ /^$/) {
625 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
626             }
627             else {
628 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
629             }
630             }
631 62         490 });
632              
633             push (@cmds, {
634             setting => 'urirhssub',
635             is_priv => 1,
636             code => sub {
637 22     22   67 my ($self, $key, $value, $line) = @_;
638 22         76 local($1,$2,$3,$4);
639 22 50       139 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*?)\s*$/) {
    0          
640 22         59 my $rulename = $1;
641 22         45 my $zone = $2;
642 22         43 my $type = $3;
643 22         41 my $subrule = $4;
644 22         43 $zone =~ s/\.\z//; # strip a redundant trailing dot
645 22         48 $subrule = parse_and_canonicalize_subtest($subrule);
646 22 50       66 defined $subrule or return $Mail::SpamAssassin::Conf::INVALID_VALUE;
647 22         225 $self->{uridnsbls}->{$rulename} = {
648             zone => $zone, type => $type,
649             is_rhsbl => 1, subtest => $subrule,
650             };
651             }
652             elsif ($value =~ /^$/) {
653 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
654             }
655             else {
656 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
657             }
658             }
659 62         555 });
660              
661             push (@cmds, {
662             setting => 'urinsrhsbl',
663             is_priv => 1,
664             code => sub {
665 0     0   0 my ($self, $key, $value, $line) = @_;
666 0         0 local($1,$2,$3);
667 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
    0          
668 0         0 my $rulename = $1;
669 0         0 my $zone = $2;
670 0         0 my $type = $3;
671 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
672 0         0 $self->{uridnsbls}->{$rulename} = {
673             zone => $zone, type => $type,
674             is_nsrhsbl => 1
675             };
676             }
677             elsif ($value =~ /^$/) {
678 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
679             }
680             else {
681 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
682             }
683             }
684 62         513 });
685              
686             push (@cmds, {
687             setting => 'urinsrhssub',
688             is_priv => 1,
689             code => sub {
690 0     0   0 my ($self, $key, $value, $line) = @_;
691 0         0 local($1,$2,$3,$4);
692 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*?)\s*$/) {
    0          
693 0         0 my $rulename = $1;
694 0         0 my $zone = $2;
695 0         0 my $type = $3;
696 0         0 my $subrule = $4;
697 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
698 0         0 $subrule = parse_and_canonicalize_subtest($subrule);
699 0 0       0 defined $subrule or return $Mail::SpamAssassin::Conf::INVALID_VALUE;
700 0         0 $self->{uridnsbls}->{$rulename} = {
701             zone => $zone, type => $type,
702             is_nsrhsbl => 1, subtest => $subrule,
703             };
704             }
705             elsif ($value =~ /^$/) {
706 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
707             }
708             else {
709 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
710             }
711             }
712 62         539 });
713              
714             push (@cmds, {
715             setting => 'urifullnsrhsbl',
716             is_priv => 1,
717             code => sub {
718 0     0   0 my ($self, $key, $value, $line) = @_;
719 0         0 local($1,$2,$3);
720 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
    0          
721 0         0 my $rulename = $1;
722 0         0 my $zone = $2;
723 0         0 my $type = $3;
724 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
725 0         0 $self->{uridnsbls}->{$rulename} = {
726             zone => $zone, type => $type,
727             is_fullnsrhsbl => 1
728             };
729             }
730             elsif ($value =~ /^$/) {
731 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
732             }
733             else {
734 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
735             }
736             }
737 62         514 });
738              
739             push (@cmds, {
740             setting => 'urifullnsrhssub',
741             is_priv => 1,
742             code => sub {
743 0     0   0 my ($self, $key, $value, $line) = @_;
744 0         0 local($1,$2,$3,$4);
745 0 0       0 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*?)\s*$/) {
    0          
746 0         0 my $rulename = $1;
747 0         0 my $zone = $2;
748 0         0 my $type = $3;
749 0         0 my $subrule = $4;
750 0         0 $zone =~ s/\.\z//; # strip a redundant trailing dot
751 0         0 $subrule = parse_and_canonicalize_subtest($subrule);
752 0 0       0 defined $subrule or return $Mail::SpamAssassin::Conf::INVALID_VALUE;
753 0         0 $self->{uridnsbls}->{$rulename} = {
754             zone => $zone, type => $type,
755             is_fullnsrhsbl => 1, subtest => $subrule,
756             };
757             }
758             elsif ($value =~ /^$/) {
759 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
760             }
761             else {
762 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
763             }
764             }
765 62         541 });
766              
767             push (@cmds, {
768             setting => 'uridnsbl_skip_domain',
769             default => {},
770             type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
771             code => sub {
772 0     0   0 my ($self, $key, $value, $line) = @_;
773 0 0       0 if ($value =~ /^$/) {
774 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
775             }
776 0         0 foreach my $domain (split(/\s+/, $value)) {
777 0         0 $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
778             }
779             }
780 62         553 });
781              
782             push (@cmds, {
783             setting => 'clear_uridnsbl_skip_domain',
784             type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
785             code => sub {
786 0     0   0 my ($self, $key, $value, $line) = @_;
787 0 0 0     0 if (!defined $value || $value eq '') {
788             # clear the entire list
789 0         0 $self->{uridnsbl_skip_domains} = {};
790             } else {
791 0         0 foreach my $domain (split(/\s+/, $value)) {
792 0         0 delete $self->{uridnsbl_skip_domains}->{lc $domain};
793             }
794             }
795             }
796 62         556 });
797              
798             # obsolete
799             push(@cmds, {
800             setting => 'uridnsbl_timeout',
801             code => sub {
802             # not a lint_warn(), since it's pretty harmless and we don't want
803             # to break stuff like sa-update
804 0     0   0 warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
805 0         0 return 0;
806             }
807 62         460 });
808              
809 62         409 $conf->{parser}->register_commands(\@cmds);
810             }
811              
812             # ---------------------------------------------------------------------------
813              
814             sub query_hosts_or_domains {
815 4     4 0 11 my ($self, $pms, $hosthash_ref) = @_;
816 4         9 my $conf = $pms->{conf};
817 4         9 my $seen_lookups = $pms->{'uridnsbl_seen_lookups'};
818              
819 4         10 my $rhsblrules = $pms->{uridnsbl_active_rules_rhsbl};
820 4         9 my $rhsbliprules = $pms->{uridnsbl_active_rules_rhsbl_ipsonly};
821 4         8 my $rhsbldomrules = $pms->{uridnsbl_active_rules_rhsbl_domsonly};
822 4         8 my $nsrhsblrules = $pms->{uridnsbl_active_rules_nsrhsbl};
823 4         9 my $fullnsrhsblrules = $pms->{uridnsbl_active_rules_fullnsrhsbl};
824 4         9 my $nsreviprules = $pms->{uridnsbl_active_rules_nsrevipbl};
825 4         7 my $areviprules = $pms->{uridnsbl_active_rules_arevipbl};
826              
827 4         21 while (my($host,$domain) = each(%$hosthash_ref)) {
828 7         18 $domain = lc $domain; # just in case
829 7         14 $host = lc $host;
830 7         31 dbg("uridnsbl: considering host=$host, domain=$domain");
831 7         24 my $obj = { dom => $domain };
832              
833 7         16 my ($is_ip, $single_dnsbl);
834 7 100       27 if ($host =~ /^\d+\.\d+\.\d+\.\d+$/) {
835 1         4 my $IPV4_ADDRESS = IPV4_ADDRESS;
836 1         3 my $IP_PRIVATE = IP_PRIVATE;
837             # only look up the IP if it is public and valid
838 1 50 33     322 if ($host =~ /^$IPV4_ADDRESS$/o && $host !~ /^$IP_PRIVATE$/o) {
839 1         5 my $obj = { dom => $host };
840 1         8 $self->lookup_dnsbl_for_ip($pms, $obj, $host);
841             # and check the IP in RHSBLs too
842 1         4 local($1,$2,$3,$4);
843 1 50       7 if ($host =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
844 1         6 $domain = "$4.$3.$2.$1";
845 1         3 $single_dnsbl = 1;
846 1         6 $is_ip = 1;
847             }
848             }
849             }
850             else {
851 6         12 $single_dnsbl = 1;
852             }
853              
854 7 50       64 if ($single_dnsbl) {
855             # rule names which look up a domain in the basic RHSBL subset
856 7         14 my @rhsblrules = keys %{$rhsblrules};
  7         24  
857              
858             # and add the "domains_only" and "ips_only" subsets as appropriate
859 7 100       18 if ($is_ip) {
860 1         5 push @rhsblrules, keys %{$rhsbliprules};
  1         3  
861             } else {
862 6         11 push @rhsblrules, keys %{$rhsbldomrules};
  6         37  
863             }
864              
865 7         21 foreach my $rulename (@rhsblrules) {
866 135         277 my $rulecf = $conf->{uridnsbls}->{$rulename};
867             $self->lookup_single_dnsbl($pms, $obj, $rulename,
868 135         481 $domain, $rulecf->{zone}, $rulecf->{type});
869              
870             # note that these rules are now underway. important: unless the
871             # rule hits, in the current design, these will not be considered
872             # "finished" until harvest_dnsbl_queries() completes
873 135         359 $pms->register_async_rule_start($rulename);
874             }
875              
876             # perform NS+A or A queries to look up the domain in the non-RHSBL subset,
877             # but only if there are active reverse-IP-URIBL rules
878 7 100       36 if ($host !~ /^\d+\.\d+\.\d+\.\d+$/) {
879 6 50 33     61 if ( !$seen_lookups->{'NS:'.$domain} &&
      33        
880             (%$nsreviprules || %$nsrhsblrules || %$fullnsrhsblrules) ) {
881 0         0 $seen_lookups->{'NS:'.$domain} = 1;
882 0         0 $self->lookup_domain_ns($pms, $obj, $domain);
883             }
884 6 50 33     54 if (%$areviprules && !$seen_lookups->{'A:'.$host}) {
885 0         0 $seen_lookups->{'A:'.$host} = 1;
886 0         0 my $obj = { dom => $host, is_arevip => 1 };
887 0         0 $self->lookup_a_record($pms, $obj, $host);
888 0         0 $pms->register_async_rule_start($_) for keys %$areviprules;
889             }
890             }
891             }
892             }
893             }
894              
895             # ---------------------------------------------------------------------------
896              
897             sub lookup_domain_ns {
898 0     0 0 0 my ($self, $pms, $obj, $dom) = @_;
899              
900 0         0 my $key = "NS:" . $dom;
901 0         0 my $ent = {
902             key => $key, zone => $dom, obj => $obj, type => "URI-NS",
903             };
904             # dig $dom ns
905             $ent = $pms->{async}->bgsend_and_start_lookup(
906             $dom, 'NS', undef, $ent,
907 0     0   0 sub { my ($ent2,$pkt) = @_;
908 0         0 $self->complete_ns_lookup($pms, $ent2, $pkt, $dom) },
909 0         0 master_deadline => $pms->{master_deadline} );
910              
911 0         0 return $ent;
912             }
913              
914             sub complete_ns_lookup {
915 0     0 0 0 my ($self, $pms, $ent, $pkt, $dom) = @_;
916              
917 0 0       0 if (!$pkt) {
918             # $pkt will be undef if the DNS query was aborted (e.g. timed out)
919 0         0 dbg("uridnsbl: complete_ns_lookup aborted %s", $ent->{key});
920 0         0 return;
921             }
922              
923 0         0 dbg("uridnsbl: complete_ns_lookup %s", $ent->{key});
924 0         0 my $conf = $pms->{conf};
925 0         0 my @answer = $pkt->answer;
926              
927 0         0 my $IPV4_ADDRESS = IPV4_ADDRESS;
928 0         0 my $IP_PRIVATE = IP_PRIVATE;
929 0         0 my $nsrhsblrules = $pms->{uridnsbl_active_rules_nsrhsbl};
930 0         0 my $fullnsrhsblrules = $pms->{uridnsbl_active_rules_fullnsrhsbl};
931 0         0 my $seen_lookups = $pms->{'uridnsbl_seen_lookups'};
932              
933 0         0 my $j = 0;
934 0         0 foreach my $rr (@answer) {
935 0         0 $j++;
936 0         0 my $str = $rr->string;
937 0 0 0     0 next unless (defined($str) && defined($dom));
938 0         0 dbg("uridnsbl: got($j) NS for $dom: $str");
939              
940 0 0       0 if ($rr->type eq 'NS') {
941 0         0 my $nsmatch = lc $rr->nsdname; # available since at least Net::DNS 0.14
942 0         0 my $nsrhblstr = $nsmatch;
943 0         0 my $fullnsrhblstr = $nsmatch;
944              
945 0 0       0 if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+$/) {
946             # only look up the IP if it is public and valid
947 0 0 0     0 if ($nsmatch =~ /^$IPV4_ADDRESS$/o && $nsmatch !~ /^$IP_PRIVATE$/o) {
948 0         0 $self->lookup_dnsbl_for_ip($pms, $ent->{obj}, $nsmatch);
949             }
950 0         0 $nsrhblstr = $nsmatch;
951             }
952             else {
953 0 0       0 if (!$seen_lookups->{'A:'.$nsmatch}) {
954 0         0 $seen_lookups->{'A:'.$nsmatch} = 1;
955 0         0 $self->lookup_a_record($pms, $ent->{obj}, $nsmatch);
956             }
957 0         0 $nsrhblstr = $self->{main}->{registryboundaries}->trim_domain($nsmatch);
958             }
959              
960 0         0 foreach my $rulename (keys %{$nsrhsblrules}) {
  0         0  
961 0         0 my $rulecf = $conf->{uridnsbls}->{$rulename};
962             $self->lookup_single_dnsbl($pms, $ent->{obj}, $rulename,
963 0         0 $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
964              
965 0         0 $pms->register_async_rule_start($rulename);
966             }
967              
968 0         0 foreach my $rulename (keys %{$fullnsrhsblrules}) {
  0         0  
969 0         0 my $rulecf = $conf->{uridnsbls}->{$rulename};
970             $self->lookup_single_dnsbl($pms, $ent->{obj}, $rulename,
971 0         0 $fullnsrhblstr, $rulecf->{zone}, $rulecf->{type});
972              
973 0         0 $pms->register_async_rule_start($rulename);
974             }
975             }
976             }
977             }
978              
979             # ---------------------------------------------------------------------------
980              
981             sub lookup_a_record {
982 0     0 0 0 my ($self, $pms, $obj, $hname) = @_;
983              
984 0         0 my $key = "A:" . $hname;
985 0         0 my $ent = {
986             key => $key, zone => $hname, obj => $obj, type => "URI-A",
987             };
988             # dig $hname a
989             $ent = $pms->{async}->bgsend_and_start_lookup(
990             $hname, 'A', undef, $ent,
991 0     0   0 sub { my ($ent2,$pkt) = @_;
992 0         0 $self->complete_a_lookup($pms, $ent2, $pkt, $hname) },
993 0         0 master_deadline => $pms->{master_deadline} );
994              
995 0         0 return $ent;
996             }
997              
998             sub complete_a_lookup {
999 0     0 0 0 my ($self, $pms, $ent, $pkt, $hname) = @_;
1000              
1001 0 0       0 if (!$pkt) {
1002             # $pkt will be undef if the DNS query was aborted (e.g. timed out)
1003 0         0 dbg("uridnsbl: complete_a_lookup aborted %s", $ent->{key});
1004 0         0 return;
1005             }
1006 0         0 dbg("uridnsbl: complete_a_lookup %s", $ent->{key});
1007 0         0 my $j = 0;
1008 0         0 my @answer = $pkt->answer;
1009 0         0 foreach my $rr (@answer) {
1010 0         0 $j++;
1011 0         0 my $str = $rr->string;
1012 0 0       0 if (!defined $hname) {
    0          
1013 0         0 warn "complete_a_lookup-1: $j, (hname is undef), $str";
1014             } elsif (!defined $str) {
1015 0         0 warn "complete_a_lookup-2: $j, $hname, (str is undef)";
1016 0         0 next;
1017             }
1018 0         0 dbg("uridnsbl: complete_a_lookup got(%d) A for %s: %s", $j,$hname,$str);
1019              
1020 0 0       0 if ($rr->type eq 'A') {
1021 0         0 my $ip_address = $rr->rdatastr;
1022 0         0 $self->lookup_dnsbl_for_ip($pms, $ent->{obj}, $ip_address);
1023             }
1024             }
1025             }
1026              
1027             # ---------------------------------------------------------------------------
1028              
1029             sub lookup_dnsbl_for_ip {
1030 1     1 0 4 my ($self, $pms, $obj, $ip) = @_;
1031              
1032 1         7 local($1,$2,$3,$4);
1033 1         6 $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
1034 1         7 my $revip = "$4.$3.$2.$1";
1035              
1036 1         2 my $conf = $pms->{conf};
1037              
1038 1         2 my @rulenames;
1039 1 50       5 if ($obj->{is_arevip}) {
1040 0         0 @rulenames = keys %{$pms->{uridnsbl_active_rules_arevipbl}};
  0         0  
1041             } else {
1042 1         2 @rulenames = keys %{$pms->{uridnsbl_active_rules_nsrevipbl}};
  1         5  
1043             }
1044 1         7 foreach my $rulename (@rulenames) {
1045 0         0 my $rulecf = $conf->{uridnsbls}->{$rulename};
1046              
1047 0   0     0 my $tflags = $conf->{tflags}->{$rulename} || '';
1048             # ips_only/domains_only lookups should not act on this kind of BL
1049 0 0       0 next if $tflags =~ /\b(?:ips_only|domains_only)\b/;
1050              
1051             $self->lookup_single_dnsbl($pms, $obj, $rulename,
1052 0         0 $revip, $rulecf->{zone}, $rulecf->{type});
1053             }
1054             }
1055              
1056             sub lookup_single_dnsbl {
1057 135     135 0 574 my ($self, $pms, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
1058              
1059 135         508 my $qkey = "$rulename:$lookupstr:$dnsbl:$qtype";
1060 135 50       454 return if exists $pms->{uridnsbl_seen_lookups}{$qkey};
1061 135         350 $pms->{uridnsbl_seen_lookups}{$qkey} = 1;
1062              
1063 135         375 my $key = "DNSBL:" . $lookupstr . ':' . $dnsbl;
1064 135         552 my $ent = {
1065             key => $key, zone => $dnsbl, obj => $obj, type => 'URI-DNSBL',
1066             rulename => $rulename,
1067             };
1068             $ent = $pms->{async}->bgsend_and_start_lookup(
1069             $lookupstr.".".$dnsbl, $qtype, undef, $ent,
1070 135     135   225 sub { my ($ent2,$pkt) = @_;
1071 135         308 $self->complete_dnsbl_lookup($pms, $ent2, $pkt) },
1072 135         929 master_deadline => $pms->{master_deadline} );
1073              
1074 135         392 return $ent;
1075             }
1076              
1077             sub complete_dnsbl_lookup {
1078 135     135 0 223 my ($self, $pms, $ent, $pkt) = @_;
1079              
1080 135 50       271 if (!$pkt) {
1081             # $pkt will be undef if the DNS query was aborted (e.g. timed out)
1082             dbg("uridnsbl: complete_dnsbl_lookup aborted %s %s",
1083 0         0 $ent->{rulename}, $ent->{key});
1084 0         0 return;
1085             }
1086              
1087 135         338 dbg("uridnsbl: complete_dnsbl_lookup %s %s", $ent->{rulename}, $ent->{key});
1088 135         217 my $conf = $pms->{conf};
1089              
1090 135         273 my $zone = $ent->{zone};
1091 135         253 my $dom = $ent->{obj}->{dom};
1092 135         198 my $rulename = $ent->{rulename};
1093 135         224 my $rulecf = $conf->{uridnsbls}->{$rulename};
1094              
1095 135         215 my @subtests;
1096 135         317 my @answer = $pkt->answer;
1097 135         778 foreach my $rr (@answer)
1098             {
1099 126         182 my($rdatastr,$rdatanum);
1100 126         316 my $rr_type = $rr->type;
1101              
1102 126 50       1361 if ($rr_type eq 'A') {
    0          
1103             # Net::DNS::RR::A::address() is available since Net::DNS 0.69
1104 126 50       561 $rdatastr = $rr->UNIVERSAL::can('address') ? $rr->address
1105             : $rr->rdatastr;
1106 126 50       1998 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/) {
1107 126         352 $rdatanum = Mail::SpamAssassin::Util::my_inet_aton($rdatastr);
1108             }
1109             } elsif ($rr_type eq 'TXT') {
1110             # txtdata returns a non- zone-file-format encoded result, unlike rdatastr;
1111             # avoid space-separated RDATA <character-string> fields if possible;
1112             # txtdata provides a list of strings in list context since Net::DNS 0.69
1113 0         0 $rdatastr = join('',$rr->txtdata);
1114             } else {
1115 0         0 next;
1116             }
1117              
1118 126         373 my $subtest = $rulecf->{subtest};
1119              
1120 126 50       669 dbg("uridnsbl: %s . %s -> %s, %s%s",
1121             $dom, $zone, $rdatastr, $rulename,
1122             !defined $subtest ? '' : ', subtest:'.$subtest);
1123              
1124 126         184 my $match;
1125 126 50       779 if (!defined $subtest) {
    50          
    50          
1126             # this zone is a simple rule, not a set of subrules
1127             # skip any A record that isn't on 127/8
1128 0 0 0     0 if ($rr_type eq 'A' && $rdatastr !~ /^127\./) {
1129 0         0 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
1130             $pkt->header->id." rr=".$rr->string);
1131 0         0 next;
1132             }
1133 0         0 $match = 1;
1134             } elsif ($subtest eq $rdatastr) {
1135 0         0 $match = 1;
1136             } elsif ($subtest =~ m{^ (\d+) (?: ([/-]) (\d+) )? \z}x) {
1137 126         528 my($n1,$delim,$n2) = ($1,$2,$3);
1138 126 50 100     679 $match =
    100 100        
    100          
1139             !defined $n2 ? ($rdatanum & $n1) && # mask only
1140             (($rdatanum & 0xff000000) == 0x7f000000) # 127/8
1141             : $delim eq '-' ? $rdatanum >= $n1 && $rdatanum <= $n2 # range
1142             : $delim eq '/' ? ($rdatanum & $n2) == (int($n1) & $n2) # value/mask
1143             : 0; # notice int($n1) to fix perl ~5.14 taint bug (Bug 7725)
1144              
1145 126 100       836 dbg("uridnsbl: %s . %s -> %s, %s, %08x %s %s",
    100          
    100          
1146             $dom, $zone, $rdatastr, $rulename, $rdatanum,
1147             !defined $n2 ? sprintf('& %08x', $n1)
1148             : $n1 == $n2 ? sprintf('== %08x', $n1)
1149             : sprintf('%08x%s%08x', $n1,$delim,$n2),
1150             $match ? 'match' : 'no');
1151             }
1152 126 100       587 $self->got_dnsbl_hit($pms, $ent, $rdatastr, $dom, $rulename) if $match;
1153             }
1154             }
1155              
1156             sub got_dnsbl_hit {
1157 42     42 0 131 my ($self, $pms, $ent, $str, $dom, $rulename) = @_;
1158              
1159 42         101 $str =~ s/\s+/ /gs; # long whitespace => short
1160 42         194 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
1161              
1162 42 100       115 if (!defined $pms->{uridnsbl_hits}->{$rulename}) {
1163 34         84 $pms->{uridnsbl_hits}->{$rulename} = { };
1164             };
1165 42         98 $pms->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
1166              
1167 42 50 33     495 if ( $pms->{uridnsbl_active_rules_nsrevipbl}->{$rulename}
      33        
      33        
      66        
      66        
      33        
1168             || $pms->{uridnsbl_active_rules_arevipbl}->{$rulename}
1169             || $pms->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
1170             || $pms->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename}
1171             || $pms->{uridnsbl_active_rules_rhsbl}->{$rulename}
1172             || $pms->{uridnsbl_active_rules_rhsbl_ipsonly}->{$rulename}
1173             || $pms->{uridnsbl_active_rules_rhsbl_domsonly}->{$rulename})
1174             {
1175             # TODO: this needs to handle multiple domain hits per rule
1176 42         153 $pms->clear_test_state();
1177 42         69 my $uris = join (' ', keys %{$pms->{uridnsbl_hits}->{$rulename}});
  42         144  
1178 42         201 $pms->test_log ("URIs: $uris");
1179 42         128 $pms->got_hit ($rulename, "");
1180              
1181             # note that this rule has completed (since it got at least 1 hit)
1182 42         142 $pms->register_async_rule_finish($rulename);
1183             }
1184             }
1185              
1186             # ---------------------------------------------------------------------------
1187              
1188             # capability checks for "if can()":
1189             #
1190 0     0 0   sub has_tflags_domains_only { 1 }
1191 0     0 0   sub has_subtest_for_ranges { 1 }
1192 0     0 0   sub has_uridnsbl_for_a { 1 } # uridnsbl rules recognize tflags 'a' and 'ns'
1193 0     0 0   sub has_uridnsbl_a_ns { 1 } # has an actually working 'a' flag, unlike above :-(
1194              
1195             1;