File Coverage

blib/lib/Mail/SpamAssassin/Conf.pm
Criterion Covered Total %
statement 548 970 56.4
branch 118 388 30.4
condition 37 146 25.3
subroutine 59 112 52.6
pod 0 48 0.0
total 762 1664 45.7


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Conf - SpamAssassin configuration file
21              
22             =head1 SYNOPSIS
23              
24             # a comment
25              
26             rewrite_header Subject *****SPAM*****
27              
28             full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i
29             describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618
30              
31             header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i
32             describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters
33              
34             score A_HREF_TO_REMOVE 2.0
35              
36             lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com
37              
38             lang pt_BR report O programa detetor de Spam ZOE [...]
39              
40             =head1 DESCRIPTION
41              
42             SpamAssassin is configured using traditional UNIX-style configuration files,
43             loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin>
44             directories.
45              
46             The following web page lists the most important configuration settings
47             used to configure SpamAssassin; novices are encouraged to read it first:
48              
49             http://wiki.apache.org/spamassassin/ImportantInitialConfigItems
50              
51             =head1 FILE FORMAT
52              
53             The C<#> character starts a comment, which continues until end of line.
54             B<NOTE:> if the C<#> character is to be used as part of a rule or
55             configuration option, it must be escaped with a backslash. i.e.: C<\#>
56              
57             Whitespace in the files is not significant, but please note that starting a
58             line with whitespace is deprecated, as we reserve its use for multi-line rule
59             definitions, at some point in the future.
60              
61             Currently, each rule or configuration setting must fit on one-line; multi-line
62             settings are not supported yet.
63              
64             File and directory paths can use C<~> to refer to the user's home
65             directory, but no other shell-style path extensions such as globing or
66             C<~user/> are supported.
67              
68             Where appropriate below, default values are listed in parentheses.
69              
70             Test names ("SYMBOLIC_TEST_NAME") can only contain alphanumerics/underscores,
71             can not start with digit, and must be less than 128 characters.
72              
73             =head1 USER PREFERENCES
74              
75             The following options can be used in both site-wide (C<local.cf>) and
76             user-specific (C<user_prefs>) configuration files to customize how
77             SpamAssassin handles incoming email messages.
78              
79             =cut
80              
81             package Mail::SpamAssassin::Conf;
82              
83 40     40   259 use strict;
  40         67  
  40         1131  
84 40     40   199 use warnings;
  40         113  
  40         1149  
85             # use bytes;
86 40     40   206 use re 'taint';
  40         68  
  40         1266  
87              
88 40     40   12470 use Mail::SpamAssassin::NetSet;
  40         140  
  40         1645  
89 40     40   251 use Mail::SpamAssassin::Constants qw(:sa :ip);
  40         85  
  40         7656  
90 40     40   15346 use Mail::SpamAssassin::Conf::Parser;
  40         130  
  40         1583  
91 40     40   304 use Mail::SpamAssassin::Logger;
  40         89  
  40         2486  
92 40     40   298 use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
  40         83  
  40         1755  
93 40     40   298 use File::Spec;
  40         78  
  40         604882  
94              
95             our @ISA = qw();
96              
97             our $COLLECT_REGRESSION_TESTS; # Used only for unit tests.
98              
99             # odd => eval test. Not constants so they can be shared with Parser
100             # TODO: move to Constants.pm?
101             our $TYPE_HEAD_TESTS = 0x0008;
102             our $TYPE_HEAD_EVALS = 0x0009;
103             our $TYPE_BODY_TESTS = 0x000a;
104             our $TYPE_BODY_EVALS = 0x000b;
105             our $TYPE_FULL_TESTS = 0x000c;
106             our $TYPE_FULL_EVALS = 0x000d;
107             our $TYPE_RAWBODY_TESTS = 0x000e;
108             our $TYPE_RAWBODY_EVALS = 0x000f;
109             our $TYPE_URI_TESTS = 0x0010;
110             our $TYPE_URI_EVALS = 0x0011;
111             our $TYPE_META_TESTS = 0x0012;
112             our $TYPE_RBL_EVALS = 0x0013;
113             our $TYPE_EMPTY_TESTS = 0x0014;
114              
115             my @rule_types = ("body_tests", "uri_tests", "uri_evals",
116             "head_tests", "head_evals", "body_evals", "full_tests",
117             "full_evals", "rawbody_tests", "rawbody_evals",
118             "rbl_evals", "meta_tests");
119              
120             #Removed $VERSION per BUG 6422
121             #$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later
122              
123             # these are variables instead of constants so that other classes can
124             # access them; if they're constants, they'd have to go in Constants.pm
125             # TODO: move to Constants.pm?
126             our $CONF_TYPE_STRING = 1;
127             our $CONF_TYPE_BOOL = 2;
128             our $CONF_TYPE_NUMERIC = 3;
129             our $CONF_TYPE_HASH_KEY_VALUE = 4;
130             our $CONF_TYPE_ADDRLIST = 5;
131             our $CONF_TYPE_TEMPLATE = 6;
132             our $CONF_TYPE_NOARGS = 7;
133             our $CONF_TYPE_STRINGLIST = 8;
134             our $CONF_TYPE_IPADDRLIST = 9;
135             our $CONF_TYPE_DURATION = 10;
136             our $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser
137             our $INVALID_VALUE = '-99999999999998';
138             our $INVALID_HEADER_FIELD_NAME = '-99999999999997';
139              
140             # set to "1" by the test suite code, to record regression tests
141             # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1;
142              
143             # search for "sub new {" to find the start of the code
144             ###########################################################################
145              
146             sub set_default_commands {
147 91     91 0 325 my($self) = @_;
148              
149             # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt.
150             # push each config item like this, to avoid a POD bug; it can't just accept
151             # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies.
152 91         244 my @cmds;
153              
154             =head2 SCORING OPTIONS
155              
156             =over 4
157              
158             =item required_score n.nn (default: 5)
159              
160             Set the score required before a mail is considered spam. C<n.nn> can
161             be an integer or a real number. 5.0 is the default setting, and is
162             quite aggressive; it would be suitable for a single-user setup, but if
163             you're an ISP installing SpamAssassin, you should probably set the
164             default to be more conservative, like 8.0 or 10.0. It is not
165             recommended to automatically delete or discard messages marked as
166             spam, as your users B<will> complain, but if you choose to do so, only
167             delete messages with an exceptionally high score such as 15.0 or
168             higher. This option was previously known as C<required_hits> and that
169             name is still accepted, but is deprecated.
170              
171             =cut
172              
173 91         1669 push (@cmds, {
174             setting => 'required_score',
175             aliases => ['required_hits'], # backward compatible
176             default => 5,
177             type => $CONF_TYPE_NUMERIC,
178             });
179              
180             =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ]
181              
182             Assign scores (the number of points for a hit) to a given test.
183             Scores can be positive or negative real numbers or integers.
184             C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for
185             that test; for example, 'FROM_ENDS_IN_NUMS'.
186              
187             If only one valid score is listed, then that score is always used
188             for a test.
189              
190             If four valid scores are listed, then the score that is used depends
191             on how SpamAssassin is being used. The first score is used when
192             both Bayes and network tests are disabled (score set 0). The second
193             score is used when Bayes is disabled, but network tests are enabled
194             (score set 1). The third score is used when Bayes is enabled and
195             network tests are disabled (score set 2). The fourth score is used
196             when Bayes is enabled and network tests are enabled (score set 3).
197              
198             Setting a rule's score to 0 will disable that rule from running.
199              
200             If any of the score values are surrounded by parenthesis '()', then
201             all of the scores in the line are considered to be relative to the
202             already set score. ie: '(3)' means increase the score for this
203             rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase
204             the score for this rule by 3 in score sets 0 and 2 only.
205              
206             If no score is given for a test by the end of the configuration,
207             a default score is assigned: a score of 1.0 is used for all tests,
208             except those whose names begin with 'T_' (this is used to indicate a
209             rule in testing) which receive 0.01.
210              
211             Note that test names which begin with '__' are indirect rules used
212             to compose meta-match rules and can also act as prerequisites to
213             other rules. They are not scored or listed in the 'tests hit'
214             reports, but assigning a score of 0 to an indirect rule will disable
215             it from running.
216              
217             =cut
218              
219             push (@cmds, {
220             setting => 'score',
221             is_frequent => 1,
222             code => sub {
223 1225     1225   4304 my ($self, $key, $value, $line) = @_;
224 1225         7925 my($rule, @scores) = split(/\s+/, $value);
225 1225 50 33     11575 unless (defined $value && $value !~ /^$/ &&
      66        
      66        
226             (scalar @scores == 1 || scalar @scores == 4)) {
227 0         0 info("config: score: requires a symbolic rule name and 1 or 4 scores");
228 0         0 return $MISSING_REQUIRED_VALUE;
229             }
230              
231             # Figure out if we're doing relative scores, remove the parens if we are
232 1225         2523 my $relative = 0;
233 1225         2821 foreach (@scores) {
234 1783         3792 local ($1);
235 1783 100       4178 if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) {
236 9         15 $relative = 1;
237             }
238 1783 50       9239 unless (/^-?\d+(?:\.\d+)?$/) {
239 0         0 info("config: score: the non-numeric score ($_) is not valid, " .
240             "a numeric score is required");
241 0         0 return $INVALID_VALUE;
242             }
243             }
244              
245 1225 50 66     3177 if ($relative && !exists $self->{scoreset}->[0]->{$rule}) {
246 0         0 info("config: score: relative score without previous setting in " .
247             "configuration");
248 0         0 return $INVALID_VALUE;
249             }
250              
251             # If we're only passed 1 score, copy it to the other scoresets
252 1225 50       3243 if (@scores) {
253 1225 100       2701 if (@scores != 4) {
254 1039         4240 @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] );
255             }
256              
257             # Set the actual scoreset values appropriately
258 1225         2828 for my $index (0..3) {
259             my $score = $relative ?
260 4900 100       10913 $self->{scoreset}->[$index]->{$rule} + $scores[$index] :
261             $scores[$index];
262              
263 4900         24148 $self->{scoreset}->[$index]->{$rule} = $score + 0.0;
264             }
265             }
266             }
267 91         2296 });
268              
269             =back
270              
271             =head2 WHITELIST AND BLACKLIST OPTIONS
272              
273             =over 4
274              
275             =item whitelist_from user@example.com
276              
277             Used to whitelist sender addresses which send mail that is often tagged
278             (incorrectly) as spam.
279              
280             Use of this setting is not recommended, since it blindly trusts the message,
281             which is routinely and easily forged by spammers and phish senders. The
282             recommended solution is to instead use C<whitelist_auth> or other authenticated
283             whitelisting methods, or C<whitelist_from_rcvd>.
284              
285             Whitelist and blacklist addresses are now file-glob-style patterns, so
286             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
287             Specifically, C<*> and C<?> are allowed, but all other metacharacters
288             are not. Regular expressions are not used for security reasons.
289             Matching is case-insensitive.
290              
291             Multiple addresses per line, separated by spaces, is OK. Multiple
292             C<whitelist_from> lines are also OK.
293              
294             The headers checked for whitelist addresses are as follows: if C<Resent-From>
295             is set, use that; otherwise check all addresses taken from the following
296             set of headers:
297              
298             Envelope-Sender
299             Resent-Sender
300             X-Envelope-From
301             From
302              
303             In addition, the "envelope sender" data, taken from the SMTP envelope data
304             where this is available, is looked up. See C<envelope_sender_header>.
305              
306             e.g.
307              
308             whitelist_from joe@example.com fred@example.com
309             whitelist_from *@example.com
310              
311             =cut
312              
313 91         705 push (@cmds, {
314             setting => 'whitelist_from',
315             type => $CONF_TYPE_ADDRLIST,
316             });
317              
318             =item unwhitelist_from user@example.com
319              
320             Used to override a default whitelist_from entry, so for example a distribution
321             whitelist_from can be overridden in a local.cf file, or an individual user can
322             override a whitelist_from entry in their own C<user_prefs> file.
323             The specified email address has to match exactly (although case-insensitively)
324             the address previously used in a whitelist_from line, which implies that a
325             wildcard only matches literally the same wildcard (not 'any' address).
326              
327             e.g.
328              
329             unwhitelist_from joe@example.com fred@example.com
330             unwhitelist_from *@example.com
331              
332             =cut
333              
334 91         1061 push (@cmds, {
335             command => 'unwhitelist_from',
336             setting => 'whitelist_from',
337             type => $CONF_TYPE_ADDRLIST,
338             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
339             });
340              
341             =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
342              
343             Works similarly to whitelist_from, except that in addition to matching
344             a sender address, a relay's rDNS name or its IP address must match too
345             for the whitelisting rule to fire. The first parameter is a sender's e-mail
346             address to whitelist, and the second is a string to match the relay's rDNS,
347             or its IP address. Matching is case-insensitive.
348              
349             This second parameter is matched against a TCP-info information field as
350             provided in a FROM clause of a trace information (i.e. in a Received header
351             field, see RFC 5321). Only the Received header fields inserted by trusted
352             hosts are considered. This parameter can either be a full hostname, or a
353             domain component of that hostname, or an IP address (optionally followed
354             by a slash and a prefix length) in square brackets. The address prefix
355             (mask) length with a slash may stand within brackets along with an address,
356             or may follow the bracketed address. Reverse DNS lookup is done by an MTA,
357             not by SpamAssassin.
358              
359             For backward compatibility as an alternative to a CIDR notation, an IPv4
360             address in brackets may be truncated on classful boundaries to cover whole
361             subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>.
362              
363             In other words, if the host that connected to your MX had an IP address
364             192.0.2.123 that mapped to 'sendinghost.example.org', you should specify
365             C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or
366             C<[192.0.2.0/24]>, or C<[192.0.2]> here.
367              
368             Note that this requires that C<internal_networks> be correct. For simple
369             cases, it will be, but for a complex network you may get better results
370             by setting that parameter.
371              
372             It also requires that your mail exchangers be configured to perform DNS
373             reverse lookups on the connecting host's IP address, and to record the
374             result in the generated Received header field according to RFC 5321.
375              
376             e.g.
377              
378             whitelist_from_rcvd joe@example.com example.com
379             whitelist_from_rcvd *@* mail.example.org
380             whitelist_from_rcvd *@axkit.org [192.0.2.123]
381             whitelist_from_rcvd *@axkit.org [192.0.2.0/24]
382             whitelist_from_rcvd *@axkit.org [192.0.2.0]/24
383             whitelist_from_rcvd *@axkit.org [2001:db8:1234::/48]
384             whitelist_from_rcvd *@axkit.org [2001:db8:1234::]/48
385              
386             =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
387              
388             Same as C<whitelist_from_rcvd>, but used for the default whitelist entries
389             in the SpamAssassin distribution. The whitelist score is lower, because
390             these are often targets for spammer spoofing.
391              
392             =cut
393              
394             push (@cmds, {
395             setting => 'whitelist_from_rcvd',
396             type => $CONF_TYPE_ADDRLIST,
397             code => sub {
398 0     0   0 my ($self, $key, $value, $line) = @_;
399 0 0 0     0 unless (defined $value && $value !~ /^$/) {
400 0         0 return $MISSING_REQUIRED_VALUE;
401             }
402 0 0       0 unless ($value =~ /^\S+\s+\S+$/) {
403 0         0 return $INVALID_VALUE;
404             }
405 0         0 $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd',
406             split(/\s+/, $value));
407             }
408 91         1468 });
409              
410             push (@cmds, {
411             setting => 'def_whitelist_from_rcvd',
412             type => $CONF_TYPE_ADDRLIST,
413             code => sub {
414 0     0   0 my ($self, $key, $value, $line) = @_;
415 0 0 0     0 unless (defined $value && $value !~ /^$/) {
416 0         0 return $MISSING_REQUIRED_VALUE;
417             }
418 0 0       0 unless ($value =~ /^\S+\s+\S+$/) {
419 0         0 return $INVALID_VALUE;
420             }
421 0         0 $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd',
422             split(/\s+/, $value));
423             }
424 91         1301 });
425              
426             =item whitelist_allows_relays user@example.com
427              
428             Specify addresses which are in C<whitelist_from_rcvd> that sometimes
429             send through a mail relay other than the listed ones. By default mail
430             with a From address that is in C<whitelist_from_rcvd> that does not match
431             the relay will trigger a forgery rule. Including the address in
432             C<whitelist_allows_relay> prevents that.
433              
434             Whitelist and blacklist addresses are now file-glob-style patterns, so
435             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
436             Specifically, C<*> and C<?> are allowed, but all other metacharacters
437             are not. Regular expressions are not used for security reasons.
438             Matching is case-insensitive.
439              
440             Multiple addresses per line, separated by spaces, is OK. Multiple
441             C<whitelist_allows_relays> lines are also OK.
442              
443             The specified email address does not have to match exactly the address
444             previously used in a whitelist_from_rcvd line as it is compared to the
445             address in the header.
446              
447             e.g.
448              
449             whitelist_allows_relays joe@example.com fred@example.com
450             whitelist_allows_relays *@example.com
451              
452             =cut
453              
454 91         701 push (@cmds, {
455             setting => 'whitelist_allows_relays',
456             type => $CONF_TYPE_ADDRLIST,
457             });
458              
459             =item unwhitelist_from_rcvd user@example.com
460              
461             Used to override a default whitelist_from_rcvd entry, so for example a
462             distribution whitelist_from_rcvd can be overridden in a local.cf file,
463             or an individual user can override a whitelist_from_rcvd entry in
464             their own C<user_prefs> file.
465              
466             The specified email address has to match exactly the address previously
467             used in a whitelist_from_rcvd line.
468              
469             e.g.
470              
471             unwhitelist_from_rcvd joe@example.com fred@example.com
472             unwhitelist_from_rcvd *@axkit.org
473              
474             =cut
475              
476             push (@cmds, {
477             setting => 'unwhitelist_from_rcvd',
478             type => $CONF_TYPE_ADDRLIST,
479             code => sub {
480 0     0   0 my ($self, $key, $value, $line) = @_;
481 0 0 0     0 unless (defined $value && $value !~ /^$/) {
482 0         0 return $MISSING_REQUIRED_VALUE;
483             }
484 0 0       0 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
485 0         0 return $INVALID_VALUE;
486             }
487 0         0 $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd',
488             split (/\s+/, $value));
489 0         0 $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd',
490             split (/\s+/, $value));
491             }
492 91         1353 });
493              
494             =item blacklist_from user@example.com
495              
496             Used to specify addresses which send mail that is often tagged (incorrectly) as
497             non-spam, but which the user doesn't want. Same format as C<whitelist_from>.
498              
499             =cut
500              
501 91         685 push (@cmds, {
502             setting => 'blacklist_from',
503             type => $CONF_TYPE_ADDRLIST,
504             });
505              
506             =item unblacklist_from user@example.com
507              
508             Used to override a default blacklist_from entry, so for example a
509             distribution blacklist_from can be overridden in a local.cf file, or
510             an individual user can override a blacklist_from entry in their own
511             C<user_prefs> file. The specified email address has to match exactly
512             the address previously used in a blacklist_from line.
513              
514              
515             e.g.
516              
517             unblacklist_from joe@example.com fred@example.com
518             unblacklist_from *@spammer.com
519              
520             =cut
521              
522              
523 91         527 push (@cmds, {
524             command => 'unblacklist_from',
525             setting => 'blacklist_from',
526             type => $CONF_TYPE_ADDRLIST,
527             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
528             });
529              
530              
531             =item whitelist_to user@example.com
532              
533             If the given address appears as a recipient in the message headers
534             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
535             be whitelisted. Useful if you're deploying SpamAssassin system-wide,
536             and don't want some users to have their mail filtered. Same format
537             as C<whitelist_from>.
538              
539             There are three levels of To-whitelisting, C<whitelist_to>, C<more_spam_to>
540             and C<all_spam_to>. Users in the first level may still get some spammish
541             mails blocked, but users in C<all_spam_to> should never get mail blocked.
542              
543             The headers checked for whitelist addresses are as follows: if C<Resent-To> or
544             C<Resent-Cc> are set, use those; otherwise check all addresses taken from the
545             following set of headers:
546              
547             To
548             Cc
549             Apparently-To
550             Delivered-To
551             Envelope-Recipients
552             Apparently-Resent-To
553             X-Envelope-To
554             Envelope-To
555             X-Delivered-To
556             X-Original-To
557             X-Rcpt-To
558             X-Real-To
559              
560             =item more_spam_to user@example.com
561              
562             See above.
563              
564             =item all_spam_to user@example.com
565              
566             See above.
567              
568             =cut
569              
570 91         4603 push (@cmds, {
571             setting => 'whitelist_to',
572             type => $CONF_TYPE_ADDRLIST,
573             });
574 91         542 push (@cmds, {
575             setting => 'more_spam_to',
576             type => $CONF_TYPE_ADDRLIST,
577             });
578 91         563 push (@cmds, {
579             setting => 'all_spam_to',
580             type => $CONF_TYPE_ADDRLIST,
581             });
582              
583             =item blacklist_to user@example.com
584              
585             If the given address appears as a recipient in the message headers
586             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
587             be blacklisted. Same format as C<blacklist_from>.
588              
589             =cut
590              
591 91         2637 push (@cmds, {
592             setting => 'blacklist_to',
593             type => $CONF_TYPE_ADDRLIST,
594             });
595              
596             =item whitelist_auth user@example.com
597              
598             Used to specify addresses which send mail that is often tagged (incorrectly) as
599             spam. This is different from C<whitelist_from> and C<whitelist_from_rcvd> in
600             that it first verifies that the message was sent by an authorized sender for
601             the address, before whitelisting.
602              
603             Authorization is performed using one of the installed sender-authorization
604             schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using
605             C<Mail::SpamAssassin::Plugin::DKIM>). Note that those plugins must be active,
606             and working, for this to operate.
607              
608             Using C<whitelist_auth> is roughly equivalent to specifying duplicate
609             C<whitelist_from_spf>, C<whitelist_from_dk>, and C<whitelist_from_dkim> lines
610             for each of the addresses specified.
611              
612             e.g.
613              
614             whitelist_auth joe@example.com fred@example.com
615             whitelist_auth *@example.com
616              
617             =item def_whitelist_auth user@example.com
618              
619             Same as C<whitelist_auth>, but used for the default whitelist entries
620             in the SpamAssassin distribution. The whitelist score is lower, because
621             these are often targets for spammer spoofing.
622              
623             =cut
624              
625 91         692 push (@cmds, {
626             setting => 'whitelist_auth',
627             type => $CONF_TYPE_ADDRLIST,
628             });
629              
630 91         643 push (@cmds, {
631             setting => 'def_whitelist_auth',
632             type => $CONF_TYPE_ADDRLIST,
633             });
634              
635             =item unwhitelist_auth user@example.com
636              
637             Used to override a C<whitelist_auth> entry. The specified email address has to
638             match exactly the address previously used in a C<whitelist_auth> line.
639              
640             e.g.
641              
642             unwhitelist_auth joe@example.com fred@example.com
643             unwhitelist_auth *@example.com
644              
645             =cut
646              
647 91         795 push (@cmds, {
648             command => 'unwhitelist_auth',
649             setting => 'whitelist_auth',
650             type => $CONF_TYPE_ADDRLIST,
651             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
652             });
653              
654              
655             =item enlist_uri_host (listname) host ...
656              
657             Adds one or more host names or domain names to a named list of URI domains.
658             The named list can then be consulted through a check_uri_host_listed()
659             eval rule implemented by the WLBLEval plugin, which takes the list name as
660             an argument. Parenthesis around a list name are literal - a required syntax.
661              
662             Host names may optionally be prefixed by an exclamation mark '!', which
663             produces false as a result if this entry matches. This makes it easier
664             to exclude some subdomains when their superdomain is listed, for example:
665              
666             enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com
667              
668             No wildcards are supported, but subdomains do match implicitly. Lists
669             are independent. Search for each named list starts by looking up the
670             full hostname first, then leading fields are progressively stripped off
671             (e.g.: sub.example.com, example.com, com) until a match is found or we run
672             out of fields. The first matching entry (the most specific) determines if a
673             lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result.
674              
675             If an URL found in a message contains an IP address in place of a host name,
676             the given list must specify the exact same IP address (instead of a host name)
677             in order to match.
678              
679             Use the delist_uri_host directive to neutralize previous enlist_uri_host
680             settings.
681              
682             Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives
683             blacklist_uri_host and whitelist_uri_host and corresponding default rules,
684             but the names 'BLACK' and 'WHITE' are otherwise not special or reserved.
685              
686             =cut
687              
688             push (@cmds, {
689             command => 'enlist_uri_host',
690             setting => 'uri_host_lists',
691             type => $CONF_TYPE_ADDRLIST,
692             code => sub {
693 0     0   0 my($conf, $key, $value, $line) = @_;
694 0         0 local($1,$2);
695 0 0       0 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
696 0         0 return $MISSING_REQUIRED_VALUE;
697             }
698 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
699             # note: must not factor out dereferencing, as otherwise
700             # subhashes would spring up in a copy and be lost
701 0         0 foreach my $host ( split(/\s+/, lc $2) ) {
702 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
703 0         0 $conf->{uri_host_lists}{$listname}{$host} = $v;
704             }
705             }
706 91         1314 });
707              
708             =item delist_uri_host [ (listname) ] host ...
709              
710             Removes one or more specified host names from a named list of URI domains.
711             Removing an unlisted name is ignored (is not an error). Listname is optional,
712             if specified then just the named list is affected, otherwise hosts are
713             removed from all URI host lists created so far. Parenthesis around a list
714             name are a required syntax.
715              
716             Note that directives in configuration files are processed in sequence,
717             the delist_uri_host only applies to previously listed entries and has
718             no effect on enlisted entries in yet-to-be-processed directives.
719              
720             For convenience (similarity to the enlist_uri_host directive) hostnames
721             may be prefixed by a an exclamation mark, which is stripped off from each
722             name and has no meaning here.
723              
724             =cut
725              
726             push (@cmds, {
727             command => 'delist_uri_host',
728             setting => 'uri_host_lists',
729             type => $CONF_TYPE_ADDRLIST,
730             code => sub {
731 0     0   0 my($conf, $key, $value, $line) = @_;
732 0         0 local($1,$2);
733 0 0       0 if ($value !~ /^ (?: \( (.+?) \) \s+ )? (.+) \z/sx) {
734 0         0 return $MISSING_REQUIRED_VALUE;
735             }
736 0 0       0 my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}};
  0         0  
737 0         0 my @args = split(/\s+/, lc $2);
738 0         0 foreach my $listname (@listnames) {
739 0         0 foreach my $host (@args) {
740 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
741 0         0 delete $conf->{uri_host_lists}{$listname}{$host};
742             }
743             }
744             }
745 91         1459 });
746              
747             =item enlist_addrlist (listname) user@example.com
748              
749             Adds one or more addresses to a named list of addresses.
750             The named list can then be consulted through a check_from_in_list() or a
751             check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes
752             the list name as an argument. Parenthesis around a list name are literal - a
753             required syntax.
754              
755             Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>,
756             C<*@isp.com>, or C<*.domain.net> will all work.
757             Specifically, C<*> and C<?> are allowed, but all other metacharacters
758             are not. Regular expressions are not used for security reasons.
759             Matching is case-insensitive.
760              
761             Multiple addresses per line, separated by spaces, is OK. Multiple
762             C<enlist_addrlist> lines are also OK.
763              
764             Enlisting an address to the list named blacklist_to is synonymous to using the
765             directive blacklist_to
766              
767             Enlisting an address to the list named blacklist_from is synonymous to using the
768             directive blacklist_from
769              
770             Enlisting an address to the list named whitelist_to is synonymous to using the
771             directive whitelist_to
772              
773             Enlisting an address to the list named whitelist_from is synonymous to using the
774             directive whitelist_from
775              
776             e.g.
777              
778             enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com
779             enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk
780              
781             =cut
782              
783             push (@cmds, {
784             setting => 'enlist_addrlist',
785             type => $CONF_TYPE_ADDRLIST,
786             code => sub {
787 0     0   0 my($conf, $key, $value, $line) = @_;
788 0         0 local($1,$2);
789 0 0       0 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
790 0         0 return $MISSING_REQUIRED_VALUE;
791             }
792 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
793             # note: must not factor out dereferencing, as otherwise
794             # subhashes would spring up in a copy and be lost
795 0         0 $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value));
796             }
797 91         1385 });
798              
799             =item blacklist_uri_host host-or-domain ...
800              
801             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
802              
803             Please see directives enlist_uri_host and delist_uri_host for details.
804              
805             =cut
806              
807             push (@cmds, {
808             command => 'blacklist_uri_host',
809             setting => 'uri_host_lists',
810             type => $CONF_TYPE_ADDRLIST,
811             code => sub {
812 0     0   0 my($conf, $key, $value, $line) = @_;
813 0         0 foreach my $host ( split(/\s+/, lc $value) ) {
814 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
815 0         0 $conf->{uri_host_lists}{'BLACK'}{$host} = $v;
816             }
817             }
818 91         1354 });
819              
820             =item whitelist_uri_host host-or-domain ...
821              
822             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
823              
824             Please see directives enlist_uri_host and delist_uri_host for details.
825              
826             =cut
827              
828             push (@cmds, {
829             command => 'whitelist_uri_host',
830             setting => 'uri_host_lists',
831             type => $CONF_TYPE_ADDRLIST,
832             code => sub {
833 0     0   0 my($conf, $key, $value, $line) = @_;
834 0         0 foreach my $host ( split(/\s+/, lc $value) ) {
835 0 0       0 my $v = $host =~ s/^!// ? 0 : 1;
836 0         0 $conf->{uri_host_lists}{'WHITE'}{$host} = $v;
837             }
838             }
839 91         1215 });
840              
841             =back
842              
843             =head2 BASIC MESSAGE TAGGING OPTIONS
844              
845             =over 4
846              
847             =item rewrite_header { subject | from | to } STRING
848              
849             By default, suspected spam messages will not have the C<Subject>,
850             C<From> or C<To> lines tagged to indicate spam. By setting this option,
851             the header will be tagged with C<STRING> to indicate that a message is
852             spam. For the From or To headers, this will take the form of an RFC 2822
853             comment following the address in parentheses. For the Subject header,
854             this will be prepended to the original subject. Note that you should
855             only use the _REQD_ and _SCORE_ tags when rewriting the Subject header
856             if C<report_safe> is 0. Otherwise, you may not be able to remove
857             the SpamAssassin markup via the normal methods. More information
858             about tags is explained below in the B<TEMPLATE TAGS> section.
859              
860             Parentheses are not permitted in STRING if rewriting the From or To headers.
861             (They will be converted to square brackets.)
862              
863             If C<rewrite_header subject> is used, but the message being rewritten
864             does not already contain a C<Subject> header, one will be created.
865              
866             A null value for C<STRING> will remove any existing rewrite for the specified
867             header.
868              
869             =cut
870              
871             push (@cmds, {
872             setting => 'rewrite_header',
873             type => $CONF_TYPE_HASH_KEY_VALUE,
874             code => sub {
875 0     0   0 my ($self, $key, $value, $line) = @_;
876 0         0 my($hdr, $string) = split(/\s+/, $value, 2);
877 0         0 $hdr = ucfirst(lc($hdr));
878              
879 0 0       0 if ($hdr =~ /^$/) {
    0          
880 0         0 return $MISSING_REQUIRED_VALUE;
881             }
882             # We only deal with From, Subject, and To ...
883             elsif ($hdr =~ /^(?:From|Subject|To)$/) {
884 0 0 0     0 unless (defined $string && $string =~ /\S/) {
885 0         0 delete $self->{rewrite_header}->{$hdr};
886 0         0 return;
887             }
888              
889 0 0       0 if ($hdr ne 'Subject') {
890 0         0 $string =~ tr/()/[]/;
891             }
892 0         0 $self->{rewrite_header}->{$hdr} = $string;
893 0         0 return;
894             }
895             else {
896             # if we get here, note the issue, then we'll fail through for an error.
897 0         0 info("config: rewrite_header: ignoring $hdr, not From, Subject, or To");
898 0         0 return $INVALID_VALUE;
899             }
900             }
901 91         1307 });
902              
903             =item subjprefix
904              
905             Add a prefix in emails Subject if a rule is matched.
906             To enable this option "rewrite_header Subject" config
907             option must be enabled as well.
908              
909             The check C<if can(Mail::SpamAssassin::Conf::feature_subjprefix)>
910             should be used to silence warnings in previous
911             SpamAssassin versions.
912              
913             To be able to use this feature a C<add_header all Subjprefix _SUBJPREFIX_>
914             configuration line could be needed on some setups.
915              
916             =cut
917              
918 91         733 push (@cmds, {
919             command => 'subjprefix',
920             setting => 'subjprefix',
921             is_frequent => 1,
922             type => $CONF_TYPE_HASH_KEY_VALUE,
923             });
924              
925             =item add_header { spam | ham | all } header_name string
926              
927             Customized headers can be added to the specified type of messages (spam,
928             ham, or "all" to add to either). All headers begin with C<X-Spam->
929             (so a C<header_name> Foo will generate a header called X-Spam-Foo).
930             header_name is restricted to the character set [A-Za-z0-9_-].
931              
932             The order of C<add_header> configuration options is preserved, inserted
933             headers will follow this order of declarations. When combining C<add_header>
934             with C<clear_headers> and C<remove_header>, keep in mind that C<add_header>
935             appends a new header to the current list, after first removing any existing
936             header fields of the same name. Note also that C<add_header>, C<clear_headers>
937             and C<remove_header> may appear in multiple .cf files, which are interpreted
938             in alphabetic order.
939              
940             C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section.
941             You can also use C<\n> and C<\t> in the header to add newlines and tabulators
942             as desired. A backslash has to be written as \\, any other escaped chars will
943             be silently removed.
944              
945             All headers will be folded if fold_headers is set to C<1>. Note: Manually
946             adding newlines via C<\n> disables any further automatic wrapping (ie:
947             long header lines are possible). The lines will still be properly folded
948             (marked as continuing) though.
949              
950             You can customize existing headers with B<add_header> (only the specified
951             subset of messages will be changed).
952              
953             See also C<clear_headers> and C<remove_header> for removing headers.
954              
955             Here are some examples (these are the defaults, note that Checker-Version can
956             not be changed or removed):
957              
958             add_header spam Flag _YESNOCAPS_
959             add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
960             add_header all Level _STARS(*)_
961             add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_
962              
963             =cut
964              
965             push (@cmds, {
966             setting => 'add_header',
967             code => sub {
968 308     308   1186 my ($self, $key, $value, $line) = @_;
969 308         1215 local ($1,$2,$3);
970 308 50       3217 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) {
971 0         0 return $INVALID_VALUE;
972             }
973              
974 308         1448 my ($type, $name, $hline) = ($1, $2, $3);
975 308 100       1339 if ($hline =~ /^"(.*)"$/) {
976 61         253 $hline = $1;
977             }
978 308         1542 my @line = split(
979             /\\\\/, # split at double backslashes,
980             $hline."\n" # newline needed to make trailing backslashes work
981             );
982 308         840 foreach (@line) {
983 308         585 s/\\t/\t/g; # expand tabs
984 308         537 s/\\n/\n/g; # expand newlines
985 308         826 s/\\.//g; # purge all other escapes
986             };
987 308         1173 $hline = join("\\", @line);
988 308         851 chop($hline); # remove dummy newline again
989 308 100 66     2265 if (($type eq "ham") || ($type eq "all")) {
990             $self->{headers_ham} =
991 247         533 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ];
  468         2001  
  247         686  
992 247         500 push(@{$self->{headers_ham}}, [$name, $hline]);
  247         1121  
993             }
994 308 50 66     1921 if (($type eq "spam") || ($type eq "all")) {
995             $self->{headers_spam} =
996 308         582 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ];
  715         2342  
  308         675  
997 308         642 push(@{$self->{headers_spam}}, [$name, $hline]);
  308         3054  
998             }
999             }
1000 91         1234 });
1001              
1002             =item remove_header { spam | ham | all } header_name
1003              
1004             Headers can be removed from the specified type of messages (spam, ham,
1005             or "all" to remove from either). All headers begin with C<X-Spam->
1006             (so C<header_name> will be appended to C<X-Spam->).
1007              
1008             See also C<clear_headers> for removing all the headers at once.
1009              
1010             Note that B<X-Spam-Checker-Version> is not removable because the version
1011             information is needed by mail administrators and developers to debug
1012             problems. Without at least one header, it might not even be possible to
1013             determine that SpamAssassin is running.
1014              
1015             =cut
1016              
1017             push (@cmds, {
1018             setting => 'remove_header',
1019             code => sub {
1020 0     0   0 my ($self, $key, $value, $line) = @_;
1021 0         0 local ($1,$2);
1022 0 0       0 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) {
1023 0         0 return $INVALID_VALUE;
1024             }
1025              
1026 0         0 my ($type, $name) = ($1, $2);
1027 0 0       0 return if ( $name eq "Checker-Version" );
1028              
1029 0         0 $name = lc($name);
1030 0 0 0     0 if (($type eq "ham") || ($type eq "all")) {
1031             $self->{headers_ham} =
1032 0         0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ];
  0         0  
  0         0  
1033             }
1034 0 0 0     0 if (($type eq "spam") || ($type eq "all")) {
1035             $self->{headers_spam} =
1036 0         0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ];
  0         0  
  0         0  
1037             }
1038             }
1039 91         1182 });
1040              
1041             =item clear_headers
1042              
1043             Clear the list of headers to be added to messages. You may use this
1044             before any B<add_header> options to prevent the default headers from being
1045             added to the message.
1046              
1047             C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple
1048             .cf files, which are interpreted in alphabetic order, so C<clear_headers>
1049             in a later file will remove all added headers from previously interpreted
1050             configuration files, which may or may not be desired.
1051              
1052             Note that B<X-Spam-Checker-Version> is not removable because the version
1053             information is needed by mail administrators and developers to debug
1054             problems. Without at least one header, it might not even be possible to
1055             determine that SpamAssassin is running.
1056              
1057             =cut
1058              
1059             push (@cmds, {
1060             setting => 'clear_headers',
1061             type => $CONF_TYPE_NOARGS,
1062             code => sub {
1063 61     61   350 my ($self, $key, $value, $line) = @_;
1064 61 50 33     585 unless (!defined $value || $value eq '') {
1065 0         0 return $INVALID_VALUE;
1066             }
1067 61         430 my @h = grep { lc($_->[0]) eq "checker-version" }
1068 61         151 @{$self->{headers_ham}};
  61         293  
1069 61 50       506 $self->{headers_ham} = !@h ? [] : [ $h[0] ];
1070 61 50       480 $self->{headers_spam} = !@h ? [] : [ $h[0] ];
1071             }
1072 91         1270 });
1073              
1074             =item report_safe ( 0 | 1 | 2 ) (default: 1)
1075              
1076             if this option is set to 1, if an incoming message is tagged as spam,
1077             instead of modifying the original message, SpamAssassin will create a
1078             new report message and attach the original message as a message/rfc822
1079             MIME part (ensuring the original message is completely preserved, not
1080             easily opened, and easier to recover).
1081              
1082             If this option is set to 2, then original messages will be attached with
1083             a content type of text/plain instead of message/rfc822. This setting
1084             may be required for safety reasons on certain broken mail clients that
1085             automatically load attachments without any action by the user. This
1086             setting may also make it somewhat more difficult to extract or view the
1087             original message.
1088              
1089             If this option is set to 0, incoming spam is only modified by adding
1090             some C<X-Spam-> headers and no changes will be made to the body. In
1091             addition, a header named B<X-Spam-Report> will be added to spam. You
1092             can use the B<remove_header> option to remove that header after setting
1093             B<report_safe> to 0.
1094              
1095             See B<report_safe_copy_headers> if you want to copy headers from
1096             the original mail into tagged messages.
1097              
1098             =cut
1099              
1100             push (@cmds, {
1101             setting => 'report_safe',
1102             default => 1,
1103             type => $CONF_TYPE_NUMERIC,
1104             code => sub {
1105 61     61   354 my ($self, $key, $value, $line) = @_;
1106 61 50       553 if ($value eq '') {
    50          
1107 0         0 return $MISSING_REQUIRED_VALUE;
1108             }
1109             elsif ($value !~ /^[012]$/) {
1110 0         0 return $INVALID_VALUE;
1111             }
1112              
1113 61         305 $self->{report_safe} = $value+0;
1114 61 50 33     573 if (! $self->{report_safe} &&
1115             ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) {
1116 0         0 push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]);
  0         0  
1117             }
1118             }
1119 91         1367 });
1120              
1121             =item report_wrap_width (default: 70)
1122              
1123             This option sets the wrap width for description lines in the X-Spam-Report
1124             header, not accounting for tab width.
1125              
1126             =cut
1127              
1128 91         604 push (@cmds, {
1129             setting => 'report_wrap_width',
1130             default => '70',
1131             type => $CONF_TYPE_NUMERIC,
1132             });
1133              
1134             =back
1135              
1136             =head2 LANGUAGE OPTIONS
1137              
1138             =over 4
1139              
1140             =item ok_locales xx [ yy zz ... ] (default: all)
1141              
1142             This option is used to specify which locales are considered OK for
1143             incoming mail. Mail using the B<character sets> that are allowed by
1144             this option will not be marked as possibly being spam in a foreign
1145             language.
1146              
1147             If you receive lots of spam in foreign languages, and never get any non-spam in
1148             these languages, this may help. Note that all ISO-8859-* character sets, and
1149             Windows code page character sets, are always permitted by default.
1150              
1151             Set this to C<all> to allow all character sets. This is the default.
1152              
1153             The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and
1154             C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set.
1155              
1156             Examples:
1157              
1158             ok_locales all (allow all locales)
1159             ok_locales en (only allow English)
1160             ok_locales en ja zh (allow English, Japanese, and Chinese)
1161              
1162             Note: if there are multiple ok_locales lines, only the last one is used.
1163              
1164             Select the locales to allow from the list below:
1165              
1166             =over 4
1167              
1168             =item en - Western character sets in general
1169              
1170             =item ja - Japanese character sets
1171              
1172             =item ko - Korean character sets
1173              
1174             =item ru - Cyrillic character sets
1175              
1176             =item th - Thai character sets
1177              
1178             =item zh - Chinese (both simplified and traditional) character sets
1179              
1180             =back
1181              
1182             =cut
1183              
1184 91         565 push (@cmds, {
1185             setting => 'ok_locales',
1186             default => 'all',
1187             type => $CONF_TYPE_STRING,
1188             });
1189              
1190             =item normalize_charset ( 0 | 1) (default: 0)
1191              
1192             Whether to decode non- UTF-8 and non-ASCII textual parts and recode them
1193             to UTF-8 before the text is given over to rules processing. The character
1194             set used for attempted decoding is primarily based on a declared character
1195             set in a Content-Type header, but if the decoding attempt fails a module
1196             Encode::Detect::Detector is consulted (if available) to provide a guess
1197             based on the actual text, and decoding is re-attempted. Even if the option
1198             is enabled no unnecessary decoding and re-encoding work is done when
1199             possible (like with an all-ASCII text with a US-ASCII or extended ASCII
1200             character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn).
1201              
1202             Unicode support in old versions of perl or in a core module Encode is likely
1203             to be buggy in places, so if the normalize_charset function is enabled
1204             it is advised to stick to more recent versions of perl (preferably 5.12
1205             or later). The module Encode::Detect::Detector is optional, when necessary
1206             it will be used if it is available.
1207              
1208             =cut
1209              
1210             push (@cmds, {
1211             setting => 'normalize_charset',
1212             default => 0,
1213             type => $CONF_TYPE_BOOL,
1214             code => sub {
1215 0     0   0 my ($self, $key, $value, $line) = @_;
1216 0 0 0     0 unless (defined $value && $value !~ /^$/) {
1217 0         0 return $MISSING_REQUIRED_VALUE;
1218             }
1219 0 0 0     0 if (lc $value eq 'yes' || $value eq '1') { $value = 1 }
  0 0 0     0  
1220 0         0 elsif (lc $value eq 'no' || $value eq '0') { $value = 0 }
1221 0         0 else { return $INVALID_VALUE }
1222              
1223 0         0 $self->{normalize_charset} = $value;
1224              
1225 0 0       0 unless ($] > 5.008004) {
1226 0         0 $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later");
1227 0         0 $self->{normalize_charset} = 0;
1228 0         0 return $INVALID_VALUE;
1229             }
1230 0         0 require HTML::Parser;
1231             #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN
1232 0 0       0 unless ( eval { HTML::Parser->VERSION(3.46) } ) {
  0         0  
1233 0         0 $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later");
1234 0         0 $self->{normalize_charset} = 0;
1235 0         0 return $INVALID_VALUE;
1236             }
1237 0 0       0 unless (eval 'require Encode') {
1238 0         0 $self->{parser}->lint_warn("config: normalize_charset requires Encode");
1239 0         0 $self->{normalize_charset} = 0;
1240 0         0 return $INVALID_VALUE;
1241             }
1242             }
1243 91         1153 });
1244              
1245              
1246             =item body_part_scan_size (default: 50000)
1247              
1248             Per mime-part scan size limit in bytes for "body" type rules.
1249             The decoded/stripped mime-part is truncated approx to this size.
1250             Helps scanning large messages safely, so it's not necessary to
1251             skip them completely. Disabled with 0.
1252              
1253             =cut
1254              
1255 91         684 push (@cmds, {
1256             setting => 'body_part_scan_size',
1257             default => 50000,
1258             type => $CONF_TYPE_NUMERIC,
1259             });
1260              
1261              
1262             =item rawbody_part_scan_size (default: 500000)
1263              
1264             Like body_part_scan_size, for "rawbody" type rules.
1265              
1266             =cut
1267              
1268 91         631 push (@cmds, {
1269             setting => 'rawbody_part_scan_size',
1270             default => 500000,
1271             type => $CONF_TYPE_NUMERIC,
1272             });
1273              
1274              
1275             =back
1276              
1277             =head2 NETWORK TEST OPTIONS
1278              
1279             =over 4
1280              
1281             =item trusted_networks IPaddress[/masklen] ... (default: none)
1282              
1283             What networks or hosts are 'trusted' in your setup. B<Trusted> in this case
1284             means that relay hosts on these networks are considered to not be potentially
1285             operated by spammers, open relays, or open proxies. A trusted host could
1286             conceivably relay spam, but will not originate it, and will not forge header
1287             data. DNS blacklist checks will never query for hosts on these networks.
1288              
1289             See C<http://wiki.apache.org/spamassassin/TrustPath> for more information.
1290              
1291             MXes for your domain(s) and internal relays should B<also> be specified using
1292             the C<internal_networks> setting. When there are 'trusted' hosts that
1293             are not MXes or internal relays for your domain(s) they should B<only> be
1294             specified in C<trusted_networks>.
1295              
1296             The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6
1297             address optionally enclosed in square brackets. Scoped link-local IPv6
1298             addresses are syntactically recognized but the interface scope is currently
1299             ignored (e.g. [fe80::1234%eth0] ) and should be avoided.
1300              
1301             If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length,
1302             specified in bits. If it is not specified, but less than 4 octets of an IPv4
1303             address are specified with a trailing dot, an implied netmask length covers
1304             all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24).
1305             If masklen is not specified, and there is not trailing dot, then just a single
1306             IP address specified is used, as if the masklen were C</32> with an IPv4
1307             address, or C</128> in case of an IPv6 address.
1308              
1309             If a network or host address is prefaced by a C<!> the matching network or
1310             host will be excluded from the list even if a less specific (shorter netmask
1311             length) subnet is later specified in the list. This allows a subset of
1312             a wider network to be exempt. In case of specifying overlapping subnets,
1313             specify more specific subnets first (tighter matching, i.e. with a longer
1314             netmask length), followed by less specific (shorter netmask length) subnets
1315             to get predictable results regardless of the search algorithm used - when
1316             Net::Patricia module is installed the search finds the tightest matching
1317             entry in the list, while a sequential search as used in absence of the
1318             module Net::Patricia will find the first matching entry in the list.
1319              
1320             Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless
1321             of your config.
1322              
1323             Examples:
1324              
1325             trusted_networks 192.168.0.0/16 # all in 192.168.*.*
1326             trusted_networks 192.168. # all in 192.168.*.*
1327             trusted_networks 212.17.35.15 # just that host
1328             trusted_networks !10.0.1.5 10.0.1/24 # all in 10.0.1.* but not 10.0.1.5
1329             trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32
1330             # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64
1331              
1332             This operates additively, so a C<trusted_networks> line after another one
1333             will append new entries to the list of trusted networks. To clear out the
1334             existing entries, use C<clear_trusted_networks>.
1335              
1336             If C<trusted_networks> is not set and C<internal_networks> is, the value
1337             of C<internal_networks> will be used for this parameter.
1338              
1339             If neither C<trusted_networks> or C<internal_networks> is set, a basic
1340             inference algorithm is applied. This works as follows:
1341              
1342             =over 4
1343              
1344             =item *
1345              
1346             If the 'from' host has an IP address in a private (RFC 1918) network range,
1347             then it's trusted
1348              
1349             =item *
1350              
1351             If there are authentication tokens in the received header, and
1352             the previous host was trusted, then this host is also trusted
1353              
1354             =item *
1355              
1356             Otherwise this host, and all further hosts, are consider untrusted.
1357              
1358             =back
1359              
1360             =cut
1361              
1362 91         637 push (@cmds, {
1363             setting => 'trusted_networks',
1364             type => $CONF_TYPE_IPADDRLIST,
1365             });
1366              
1367             =item clear_trusted_networks
1368              
1369             Empty the list of trusted networks.
1370              
1371             =cut
1372              
1373             push (@cmds, {
1374             setting => 'clear_trusted_networks',
1375             type => $CONF_TYPE_NOARGS,
1376             code => sub {
1377 32     32   186 my ($self, $key, $value, $line) = @_;
1378 32 50 33     208 unless (!defined $value || $value eq '') {
1379 0         0 return $INVALID_VALUE;
1380             }
1381 32         201 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
1382 32         172 $self->{trusted_networks_configured} = 0;
1383             }
1384 91         1055 });
1385              
1386             =item internal_networks IPaddress[/masklen] ... (default: none)
1387              
1388             What networks or hosts are 'internal' in your setup. B<Internal> means
1389             that relay hosts on these networks are considered to be MXes for your
1390             domain(s), or internal relays. This uses the same syntax as
1391             C<trusted_networks>, above - see there for details.
1392              
1393             This value is used when checking 'dial-up' or dynamic IP address
1394             blocklists, in order to detect direct-to-MX spamming.
1395              
1396             Trusted relays that accept mail directly from dial-up connections
1397             (i.e. are also performing a role of mail submission agents - MSA)
1398             should not be listed in C<internal_networks>. List them only in
1399             C<trusted_networks>.
1400              
1401             If C<trusted_networks> is set and C<internal_networks> is not, the value
1402             of C<trusted_networks> will be used for this parameter.
1403              
1404             If neither C<trusted_networks> nor C<internal_networks> is set, no addresses
1405             will be considered local; in other words, any relays past the machine where
1406             SpamAssassin is running will be considered external.
1407              
1408             Every entry in C<internal_networks> must appear in C<trusted_networks>; in
1409             other words, C<internal_networks> is always a subset of the trusted set.
1410              
1411             Note: 127/8 and ::1 are always included in internal_networks, regardless of
1412             your config.
1413              
1414             =cut
1415              
1416 91         505 push (@cmds, {
1417             setting => 'internal_networks',
1418             type => $CONF_TYPE_IPADDRLIST,
1419             });
1420              
1421             =item clear_internal_networks
1422              
1423             Empty the list of internal networks.
1424              
1425             =cut
1426              
1427             push (@cmds, {
1428             setting => 'clear_internal_networks',
1429             type => $CONF_TYPE_NOARGS,
1430             code => sub {
1431 32     32   186 my ($self, $key, $value, $line) = @_;
1432 32 50 33     252 unless (!defined $value || $value eq '') {
1433 0         0 return $INVALID_VALUE;
1434             }
1435 32         141 $self->{internal_networks} = $self->new_netset('internal_networks',1);
1436 32         193 $self->{internal_networks_configured} = 0;
1437             }
1438 91         996 });
1439              
1440             =item msa_networks IPaddress[/masklen] ... (default: none)
1441              
1442             The networks or hosts which are acting as MSAs in your setup (but not also
1443             as MX relays). This uses the same syntax as C<trusted_networks>, above - see
1444             there for details.
1445              
1446             B<MSA> means that the relay hosts on these networks accept mail from your
1447             own users and authenticates them appropriately. These relays will never
1448             accept mail from hosts that aren't authenticated in some way. Examples of
1449             authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc.
1450              
1451             All relays found in the message headers after the MSA relay will take
1452             on the same trusted and internal classifications as the MSA relay itself,
1453             as defined by your I<trusted_networks> and I<internal_networks> configuration.
1454              
1455             For example, if the MSA relay is trusted and internal so will all of the
1456             relays that precede it.
1457              
1458             When using msa_networks to identify an MSA it is recommended that you treat
1459             that MSA as both trusted and internal. When an MSA is not included in
1460             msa_networks you should treat the MSA as trusted but not internal, however
1461             if the MSA is also acting as an MX or intermediate relay you must always
1462             treat it as both trusted and internal and ensure that the MSA includes
1463             visible auth tokens in its Received header to identify submission clients.
1464              
1465             B<Warning:> Never include an MSA that also acts as an MX (or is also an
1466             intermediate relay for an MX) or otherwise accepts mail from
1467             non-authenticated users in msa_networks. Doing so will result in unknown
1468             external relays being trusted.
1469              
1470             =cut
1471              
1472 91         500 push (@cmds, {
1473             setting => 'msa_networks',
1474             type => $CONF_TYPE_IPADDRLIST,
1475             });
1476              
1477             =item clear_msa_networks
1478              
1479             Empty the list of msa networks.
1480              
1481             =cut
1482              
1483             push (@cmds, {
1484             setting => 'clear_msa_networks',
1485             type => $CONF_TYPE_NOARGS,
1486             code => sub {
1487 32     32   194 my ($self, $key, $value, $line) = @_;
1488 32 50 33     220 unless (!defined $value || $value eq '') {
1489 0         0 return $INVALID_VALUE;
1490             }
1491             $self->{msa_networks} =
1492 32         116 $self->new_netset('msa_networks',0); # no loopback IP
1493 32         169 $self->{msa_networks_configured} = 0;
1494             }
1495 91         969 });
1496              
1497             =item originating_ip_headers header ... (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP)
1498              
1499             A list of header field names from which an originating IP address can
1500             be obtained. For example, webmail servers may record a client IP address
1501             in X-Originating-IP.
1502              
1503             These IP addresses are virtually appended into the Received: chain, so they
1504             are used in RBL checks where appropriate.
1505              
1506             Currently the IP addresses are not added into X-Spam-Relays-* header fields,
1507             but they may be in the future.
1508              
1509             =cut
1510              
1511             push (@cmds, {
1512             setting => 'originating_ip_headers',
1513             default => [],
1514             type => $CONF_TYPE_STRINGLIST,
1515             code => sub {
1516 122     122   536 my ($self, $key, $value, $line) = @_;
1517 122 50 33     978 unless (defined $value && $value !~ /^$/) {
1518 0         0 return $MISSING_REQUIRED_VALUE;
1519             }
1520 122         666 foreach my $hfname (split(/\s+/, $value)) {
1521             # avoid duplicates, consider header field names case-insensitive
1522 244         1348 push(@{$self->{originating_ip_headers}}, $hfname)
1523 244 50       389 if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}});
  244         1233  
1524             }
1525             }
1526 91         1209 });
1527              
1528             =item clear_originating_ip_headers
1529              
1530             Empty the list of 'originating IP address' header field names.
1531              
1532             =cut
1533              
1534             push (@cmds, {
1535             setting => 'clear_originating_ip_headers',
1536             type => $CONF_TYPE_NOARGS,
1537             code => sub {
1538 61     61   325 my ($self, $key, $value, $line) = @_;
1539 61 50 33     553 unless (!defined $value || $value eq '') {
1540 0         0 return $INVALID_VALUE;
1541             }
1542 61         328 $self->{originating_ip_headers} = [];
1543             }
1544 91         1256 });
1545              
1546             =item always_trust_envelope_sender ( 0 | 1 ) (default: 0)
1547              
1548             Trust the envelope sender even if the message has been passed through one or
1549             more trusted relays. See also C<envelope_sender_header>.
1550              
1551             =cut
1552              
1553 91         543 push (@cmds, {
1554             setting => 'always_trust_envelope_sender',
1555             default => 0,
1556             type => $CONF_TYPE_BOOL,
1557             });
1558              
1559             =item skip_rbl_checks ( 0 | 1 ) (default: 0)
1560              
1561             Turning on the skip_rbl_checks setting will disable the DNSEval plugin,
1562             which implements Real-time Block List (or: Blackhole List) (RBL) lookups.
1563              
1564             By default, SpamAssassin will run RBL checks. Individual blocklists may
1565             be disabled selectively by setting a score of a corresponding rule to 0.
1566              
1567             See also a related configuration parameter skip_uribl_checks,
1568             which controls the URIDNSBL plugin (documented in the URIDNSBL man page).
1569              
1570             =cut
1571              
1572 91         411 push (@cmds, {
1573             setting => 'skip_rbl_checks',
1574             default => 0,
1575             type => $CONF_TYPE_BOOL,
1576             });
1577              
1578             =item dns_available { yes | no | test[: domain1 domain2...] } (default: yes)
1579              
1580             Tells SpamAssassin whether DNS resolving is available or not. A value I<yes>
1581             indicates DNS resolving is available, a value I<no> indicates DNS resolving
1582             is not available - both of these values apply unconditionally and skip initial
1583             DNS tests, which can be slow or unreliable.
1584              
1585             When the option value is a I<test> (with or without arguments), SpamAssassin
1586             will query some domain names on the internet during initialization, attempting
1587             to determine if DNS resolving is working or not. A space-separated list
1588             of domain names may be specified explicitly, or left to a built-in default
1589             of a dozen or so domain names. From an explicit or a default list a subset
1590             of three domain names is picked randomly for checking. The test queries for
1591             NS records of these domain: if at least one query returns a success then
1592             SpamAssassin considers DNS resolving as available, otherwise not.
1593              
1594             The problem is that the test can introduce some startup delay if a network
1595             connection is down, and in some cases it can wrongly guess that DNS is
1596             unavailable because a test connection failed, what causes disabling several
1597             DNS-dependent tests.
1598              
1599             Please note, the DNS test queries for NS records, so specify domain names,
1600             not host names.
1601              
1602             Since version 3.4.0 of SpamAssassin a default setting for option
1603             I<dns_available> is I<yes>. A default in older versions was I<test>.
1604              
1605             =cut
1606              
1607             push (@cmds, {
1608             setting => 'dns_available',
1609             default => 'yes',
1610             type => $CONF_TYPE_STRING,
1611             code => sub {
1612 13     13   42 my ($self, $key, $value, $line) = @_;
1613 13 50       90 if ($value =~ /^test(?::\s*\S.*)?$/) {
    100          
    50          
1614 0         0 $self->{dns_available} = $value;
1615             }
1616             elsif ($value =~ /^(?:yes|1)$/) {
1617 1         7 $self->{dns_available} = 'yes';
1618             }
1619             elsif ($value =~ /^(?:no|0)$/) {
1620 12         42 $self->{dns_available} = 'no';
1621             }
1622             else {
1623 0         0 return $INVALID_VALUE;
1624             }
1625             }
1626 91         936 });
1627              
1628             =item dns_server ip-addr-port (default: entries provided by Net::DNS)
1629              
1630             Specifies an IP address of a DNS server, and optionally its port number.
1631             The I<dns_server> directive may be specified multiple times, each entry
1632             adding to a list of available resolving name servers. The I<ip-addr-port>
1633             argument can either be an IPv4 or IPv6 address, optionally enclosed in
1634             brackets, and optionally followed by a colon and a port number. In absence
1635             of a port number a standard port number 53 is assumed. When an IPv6 address
1636             is specified along with a port number, the address B<must> be enclosed in
1637             brackets to avoid parsing ambiguity regarding a colon separator. A scoped
1638             link-local IP address is allowed (assuming underlying modules allow it).
1639              
1640             Examples :
1641             dns_server 127.0.0.1
1642             dns_server 127.0.0.1:53
1643             dns_server [127.0.0.1]:53
1644             dns_server [::1]:53
1645             dns_server fe80::1%lo0
1646             dns_server [fe80::1%lo0]:53
1647              
1648             In absence of I<dns_server> directives, the list of name servers is provided
1649             by Net::DNS module, which typically obtains the list from /etc/resolv.conf,
1650             but this may be platform dependent. Please consult the Net::DNS::Resolver
1651             documentation for details.
1652              
1653             =cut
1654              
1655             push (@cmds, {
1656             setting => 'dns_server',
1657             type => $CONF_TYPE_STRING,
1658             code => sub {
1659 1     1   5 my ($self, $key, $value, $line) = @_;
1660 1         3 my($address,$port); local($1,$2,$3);
  1         4  
1661 1 50       14 if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) {
    0          
1662 1 50       6 $address = defined $1 ? $1 : $2; $port = $3;
  1         4  
1663             } elsif ($value =~ /^(?: \[ ([^\]]*) \] |
1664             ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) {
1665 0 0       0 $address = defined $1 ? $1 : $2; $port = '53';
  0         0  
1666             } else {
1667 0         0 return $INVALID_VALUE;
1668             }
1669 1         4 my $scope = ''; # scoped IP address?
1670 1 50       24 $scope = $1 if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi;
1671 1         5 my $IP_ADDRESS = IP_ADDRESS; # IP_ADDRESS regexp does not handle scope
1672 1 50 33     32 if ($address =~ /$IP_ADDRESS/ && $port >= 1 && $port <= 65535) {
      33        
1673 1 50       7 $self->{dns_servers} = [] if !$self->{dns_servers};
1674             # checked, untainted, stored in a normalized form
1675 1         4 push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port"));
  1         12  
1676             } else {
1677 0         0 return $INVALID_VALUE;
1678             }
1679             }
1680 91         1247 });
1681              
1682             =item clear_dns_servers
1683              
1684             Empty the list of explicitly configured DNS servers through a I<dns_server>
1685             directive, falling back to Net::DNS -supplied defaults.
1686              
1687             =cut
1688              
1689             push (@cmds, {
1690             setting => 'clear_dns_servers',
1691             type => $CONF_TYPE_NOARGS,
1692             code => sub {
1693 1     1   6 my ($self, $key, $value, $line) = @_;
1694 1 50 33     10 unless (!defined $value || $value eq '') {
1695 0         0 return $INVALID_VALUE;
1696             }
1697 1         20 undef $self->{dns_servers};
1698             }
1699 91         1220 });
1700              
1701             =item dns_local_ports_permit ranges...
1702              
1703             Add the specified ports or ports ranges to the set of allowed port numbers
1704             that can be used as local port numbers when sending DNS queries to a resolver.
1705              
1706             The argument is a whitespace-separated or a comma-separated list of
1707             single port numbers n, or port number pairs (i.e. m-n) delimited by a '-',
1708             representing a range. Allowed port numbers are between 1 and 65535.
1709              
1710             Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed
1711             in order in which they appear in configuration files. Each directive adds
1712             (or subtracts) its subsets of ports to a current set of available ports.
1713             Whatever is left in the set by the end of configuration processing
1714             is made available to a DNS resolving client code.
1715              
1716             If the resulting set of port numbers is empty (see also the directive
1717             I<dns_local_ports_none>), then SpamAssassin does not apply its ports
1718             randomization logic, but instead leaves the operating system to choose
1719             a suitable free local port number.
1720              
1721             The initial set consists of all port numbers in the range 1024-65535.
1722             Note that system config files already modify the set and remove all the
1723             IANA registered port numbers and some other ranges, so there is rarely
1724             a need to adjust the ranges by site-specific directives.
1725              
1726             See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>.
1727              
1728             =cut
1729              
1730             push (@cmds, {
1731             setting => 'dns_local_ports_permit',
1732             type => $CONF_TYPE_STRING,
1733             is_admin => 1,
1734             code => sub {
1735 0     0   0 my($self, $key, $value, $line) = @_;
1736 0         0 my(@port_ranges); local($1,$2);
  0         0  
1737 0         0 foreach my $range (split(/[ \t,]+/, $value)) {
1738 0 0       0 if ($range =~ /^(\d{1,5})\z/) {
    0          
1739             # don't allow adding a port number 0
1740 0 0 0     0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
  0         0  
1741 0         0 push(@port_ranges, [$1,$1]);
1742             } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1743 0 0 0     0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
  0         0  
1744 0 0 0     0 if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE }
  0         0  
1745 0         0 push(@port_ranges, [$1,$2]);
1746             } else {
1747 0         0 return $INVALID_VALUE;
1748             }
1749             }
1750 0         0 foreach my $p (@port_ranges) {
1751 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1752             set_ports_range(\$self->{dns_available_ports_bitset},
1753 0         0 $p->[0], $p->[1], 1);
1754             }
1755             }
1756 91         1248 });
1757              
1758             =item dns_local_ports_avoid ranges...
1759              
1760             Remove specified ports or ports ranges from the set of allowed port numbers
1761             that can be used as local port numbers when sending DNS queries to a resolver.
1762              
1763             Please see directive I<dns_local_ports_permit> for details.
1764              
1765             =cut
1766              
1767             push (@cmds, {
1768             setting => 'dns_local_ports_avoid',
1769             type => $CONF_TYPE_STRING,
1770             is_admin => 1,
1771             code => sub {
1772 0     0   0 my($self, $key, $value, $line) = @_;
1773 0         0 my(@port_ranges); local($1,$2);
  0         0  
1774 0         0 foreach my $range (split(/[ \t,]+/, $value)) {
1775 0 0       0 if ($range =~ /^(\d{1,5})\z/) {
    0          
1776 0 0       0 if ($1 > 65535) { return $INVALID_VALUE }
  0         0  
1777             # don't mind clearing also the port number 0
1778 0         0 push(@port_ranges, [$1,$1]);
1779             } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1780 0 0 0     0 if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE }
  0         0  
1781 0         0 push(@port_ranges, [$1,$2]);
1782             } else {
1783 0         0 return $INVALID_VALUE;
1784             }
1785             }
1786 0         0 foreach my $p (@port_ranges) {
1787 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1788             set_ports_range(\$self->{dns_available_ports_bitset},
1789 0         0 $p->[0], $p->[1], 0);
1790             }
1791             }
1792 91         1226 });
1793              
1794             =item dns_local_ports_none
1795              
1796             Is a fast shorthand for:
1797              
1798             dns_local_ports_avoid 1-65535
1799              
1800             leaving the set of available DNS query local port numbers empty. In all
1801             respects (apart from speed) it is equivalent to the shown directive, and can
1802             be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1803              
1804             If the resulting set of port numbers is empty, then SpamAssassin does not
1805             apply its ports randomization logic, but instead leaves the operating system
1806             to choose a suitable free local port number.
1807              
1808             See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1809              
1810             =cut
1811              
1812             push (@cmds, {
1813             setting => 'dns_local_ports_none',
1814             type => $CONF_TYPE_NOARGS,
1815             is_admin => 1,
1816             code => sub {
1817 0     0   0 my ($self, $key, $value, $line) = @_;
1818 0 0 0     0 unless (!defined $value || $value eq '') {
1819 0         0 return $INVALID_VALUE;
1820             }
1821 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1822 0         0 wipe_ports_range(\$self->{dns_available_ports_bitset}, 0);
1823             }
1824 91         1348 });
1825              
1826             =item dns_test_interval n (default: 600 seconds)
1827              
1828             If dns_available is set to I<test>, the dns_test_interval time in number
1829             of seconds will tell SpamAssassin how often to retest for working DNS.
1830             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
1831             indicating seconds (default), minutes, hours, days, weeks).
1832              
1833             =cut
1834              
1835 91         573 push (@cmds, {
1836             setting => 'dns_test_interval',
1837             default => 600,
1838             type => $CONF_TYPE_DURATION,
1839             });
1840              
1841             =item dns_options opts (default: norotate, nodns0x20, edns=4096)
1842              
1843             Provides a (whitespace or comma -separated) list of options applying
1844             to DNS resolving. Available options are: I<rotate>, I<dns0x20> and
1845             I<edns> (or I<edns0>). Option name may be negated by prepending a I<no>
1846             (e.g. I<norotate>, I<NoEDNS>) to counteract a previously enabled option.
1847             Option names are not case-sensitive. The I<dns_options> directive may
1848             appear in configuration files multiple times, the last setting prevails.
1849              
1850             Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's
1851             acceptable UDP payload size according to EDNS0 specifications (RFC 6891,
1852             ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>)
1853             a traditional implied UDP payload size is 512 bytes, which is also a minimum
1854             allowed value for this option. When the option is specified but a value
1855             is not provided, a conservative default of 1220 bytes is implied. It is
1856             recommended to keep I<edns> enabled when using a local recursive DNS server
1857             which supports EDNS0 (like most modern DNS servers do), a suitable setting
1858             in this case is I<edns=4096>, which is also a default. Allowing UDP payload
1859             size larger than 512 bytes can avoid truncation of resource records in large
1860             DNS responses (like in TXT records of some SPF and DKIM responses, or when
1861             an unreasonable number of A records is published by some domain). The option
1862             should be disabled when a recursive DNS server is only reachable through
1863             non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall)
1864             which bans DNS UDP payload sizes larger than 512 bytes. A suitable value
1865             when a non-local recursive DNS server is used and a middlebox B<does> allow
1866             EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a
1867             DNS UDP packet to fit within a single IP packet in most cases (a slightly
1868             less conservative range would be 1280-1410 bytes).
1869              
1870             Option I<rotate> causes SpamAssassin to choose a DNS server at random
1871             from all servers listed in C</etc/resolv.conf> every I<dns_test_interval>
1872             seconds, effectively spreading the load over all currently available DNS
1873             servers when there are many spamd workers.
1874              
1875             Option I<dns0x20> enables randomization of letters in a DNS query label
1876             according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions
1877             of responses (by chance or by a malicious intent) by increasing spread
1878             as provided by a 16-bit query ID and up to 16 bits of a port number,
1879             with additional bits as encoded by flipping case (upper/lower) of letters
1880             in a query. The number of additional random bits corresponds to the number
1881             of letters in a query label. Should work reliably with all mainstream
1882             DNS servers - do not turn on if you see frequent info messages
1883             "dns: no callback for id:" in the log, or if RBL or URIDNS lookups
1884             do not work for no apparent reason.
1885              
1886             =cut
1887              
1888             push (@cmds, {
1889             setting => 'dns_options',
1890             type => $CONF_TYPE_HASH_KEY_VALUE,
1891             code => sub {
1892 0     0   0 my ($self, $key, $value, $line) = @_;
1893 0         0 foreach my $option (split (/[\s,]+/, lc $value)) {
1894 0         0 local($1,$2);
1895 0 0       0 if ($option =~ /^no(rotate|dns0x20)\z/) {
    0          
    0          
    0          
1896 0         0 $self->{dns_options}->{$1} = 0;
1897             } elsif ($option =~ /^no(edns)0?\z/) {
1898 0         0 $self->{dns_options}->{$1} = 0;
1899             } elsif ($option =~ /^(rotate|dns0x20)\z/) {
1900 0         0 $self->{dns_options}->{$1} = 1;
1901             } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) {
1902             # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload
1903             # size, defaults to some UDP packet size likely to fit into a single
1904             # IP packet which is more likely to pass firewalls which choke on IP
1905             # fragments. RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for
1906             # basic header, yielding 1240. RFC 3226 prescribes a min of 1220 for
1907             # RFC 2535 compliant servers. RFC 6891: choosing between 1280 and
1908             # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable.
1909             #
1910 0   0     0 $self->{dns_options}->{$1} = $2 || 1220;
1911 0 0       0 return $INVALID_VALUE if $self->{dns_options}->{$1} < 512;
1912             } else {
1913 0         0 return $INVALID_VALUE;
1914             }
1915             }
1916             }
1917 91         1155 });
1918              
1919             =item dns_query_restriction (allow|deny) domain1 domain2 ...
1920              
1921             Option allows disabling of rules which would result in a DNS query to one of
1922             the listed domains. The first argument must be a literal C<allow> or C<deny>,
1923             remaining arguments are domains names.
1924              
1925             Most DNS queries (with some exceptions) are subject to dns_query_restriction.
1926             A domain to be queried is successively stripped-off of its leading labels
1927             (thus yielding a series of its parent domains), and on each iteration a
1928             check is made against an associative array generated by dns_query_restriction
1929             options. Search stops at the first match (i.e. the tightest match), and the
1930             matching entry with its C<allow> or C<deny> value then controls whether a
1931             DNS query is allowed to be launched.
1932              
1933             If no match is found an implicit default is to allow a query. The purpose of
1934             an explicit C<allow> entry is to be able to override a previously configured
1935             C<deny> on the same domain or to override an entry (possibly yet to be
1936             configured in subsequent config directives) on one of its parent domains.
1937             Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit
1938             DNS queries on a specific DNS BL zone but deny queries to other zones under
1939             the same parent domain.
1940              
1941             Domains are matched case-insensitively, no wildcards are recognized,
1942             there should be no leading or trailing dot.
1943              
1944             Specifying a block on querying a domain name has a similar effect as setting
1945             a score of corresponding DNSBL and URIBL rules to zero, and can be a handy
1946             alternative to hunting for such rules when a site policy does not allow
1947             certain DNS block lists to be queried.
1948              
1949             Example:
1950             dns_query_restriction deny dnswl.org surbl.org
1951             dns_query_restriction allow zen.spamhaus.org
1952             dns_query_restriction deny spamhaus.org mailspike.net spamcop.net
1953              
1954             =cut
1955              
1956             push (@cmds, {
1957             setting => 'dns_query_restriction',
1958             type => $CONF_TYPE_STRING,
1959             code => sub {
1960 0     0   0 my ($self, $key, $value, $line) = @_;
1961 0 0 0     0 defined $value && $value =~ s/^(allow|deny)\s+//i
1962             or return $INVALID_VALUE;
1963 0 0       0 my $blocked = lc($1) eq 'deny' ? 1 : 0;
1964 0         0 foreach my $domain (split(/\s+/, $value)) {
1965 0         0 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots
  0         0  
1966 0         0 $self->{dns_query_blocked}{lc $domain} = $blocked;
1967             }
1968             }
1969 91         1242 });
1970              
1971             =item clear_dns_query_restriction
1972              
1973             The option removes any entries entered by previous 'dns_query_restriction'
1974             options, leaving the list empty, i.e. allowing DNS queries for any domain
1975             (including any DNS BL zone).
1976              
1977             =cut
1978              
1979             push (@cmds, {
1980             setting => 'clear_dns_query_restriction',
1981             aliases => ['clear_dns_query_restrictions'],
1982             type => $CONF_TYPE_NOARGS,
1983             code => sub {
1984 0     0   0 my ($self, $key, $value, $line) = @_;
1985 0 0 0     0 return $INVALID_VALUE if defined $value && $value ne '';
1986 0         0 delete $self->{dns_query_blocked};
1987             }
1988 91         1207 });
1989              
1990             =back
1991              
1992             =head2 LEARNING OPTIONS
1993              
1994             =over 4
1995              
1996             =item use_learner ( 0 | 1 ) (default: 1)
1997              
1998             Whether to use any machine-learning classifiers with SpamAssassin, such as the
1999             default 'BAYES_*' rules. Setting this to 0 will disable use of any and all
2000             human-trained classifiers.
2001              
2002             =cut
2003              
2004 91         450 push (@cmds, {
2005             setting => 'use_learner',
2006             default => 1,
2007             type => $CONF_TYPE_BOOL,
2008             });
2009              
2010             =item use_bayes ( 0 | 1 ) (default: 1)
2011              
2012             Whether to use the naive-Bayesian-style classifier built into
2013             SpamAssassin. This is a master on/off switch for all Bayes-related
2014             operations.
2015              
2016             =cut
2017              
2018 91         564 push (@cmds, {
2019             setting => 'use_bayes',
2020             default => 1,
2021             type => $CONF_TYPE_BOOL,
2022             });
2023              
2024             =item use_bayes_rules ( 0 | 1 ) (default: 1)
2025              
2026             Whether to use rules using the naive-Bayesian-style classifier built
2027             into SpamAssassin. This allows you to disable the rules while leaving
2028             auto and manual learning enabled.
2029              
2030             =cut
2031              
2032 91         371 push (@cmds, {
2033             setting => 'use_bayes_rules',
2034             default => 1,
2035             type => $CONF_TYPE_BOOL,
2036             });
2037              
2038             =item bayes_auto_learn ( 0 | 1 ) (default: 1)
2039              
2040             Whether SpamAssassin should automatically feed high-scoring mails (or
2041             low-scoring mails, for non-spam) into its learning systems. The only
2042             learning system supported currently is a naive-Bayesian-style classifier.
2043              
2044             See the documentation for the
2045             C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module
2046             for details on how Bayes auto-learning is implemented by default.
2047              
2048             =cut
2049              
2050 91         370 push (@cmds, {
2051             setting => 'bayes_auto_learn',
2052             default => 1,
2053             type => $CONF_TYPE_BOOL,
2054             });
2055              
2056             =item bayes_token_sources (default: header visible invisible uri)
2057              
2058             Controls which sources in a mail message can contribute tokens (e.g. words,
2059             phrases, etc.) to a Bayes classifier. The argument is a space-separated list
2060             of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each
2061             of which may be prefixed by a I<no> to indicate its exclusion. Additionally
2062             two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list
2063             of keywords is processed sequentially: a keyword I<all> adds all available
2064             keywords to a set being built, a I<none> or I<noall> clears the set, other
2065             non-negated keywords are added to the set, and negated keywords are removed
2066             from the set. Keywords are case-insensitive.
2067              
2068             The default set is: I<header> I<visible> I<invisible> I<uri>, which is
2069             equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart>
2070             is not currently in a default set is that it is a newer source (introduced
2071             with SpamAssassin version 3.4.1) and not much experience has yet been gathered
2072             regarding its usefulness.
2073              
2074             See also option C<bayes_ignore_header> for a fine-grained control on individual
2075             header fields under the umbrella of a more general keyword I<header> here.
2076              
2077             Keywords imply the following data sources:
2078              
2079             =over 4
2080              
2081             =item I<header> - tokens collected from a message header section
2082              
2083             =item I<visible> - words from visible text (plain or HTML) in a message body
2084              
2085             =item I<invisible> - hidden/invisible text in HTML parts of a message body
2086              
2087             =item I<uri> - URIs collected from a message body
2088              
2089             =item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type
2090              
2091             =item I<all> - adds all the above keywords to the set being assembled
2092              
2093             =item I<none> or I<noall> - removes all keywords from the set
2094              
2095             =back
2096              
2097             The C<bayes_token_sources> directive may appear multiple times, its keywords
2098             are interpreted sequentially, adding or removing items from the final set
2099             as they appear in their order in C<bayes_token_sources> directive(s).
2100              
2101             =cut
2102              
2103             push (@cmds, {
2104             setting => 'bayes_token_sources',
2105             default => { map(($_,1), qw(header visible invisible uri)) }, # mimepart
2106             type => $CONF_TYPE_HASH_KEY_VALUE,
2107             code => sub {
2108 0     0   0 my ($self, $key, $value, $line) = @_;
2109 0 0       0 return $MISSING_REQUIRED_VALUE if $value eq '';
2110 0   0     0 my $h = ($self->{bayes_token_sources} ||= {});
2111 0         0 my %all_kw = map(($_,1), qw(header visible invisible uri mimepart));
2112 0         0 foreach (split(/\s+/, lc $value)) {
2113 0 0 0     0 if (/^(none|noall)\z/) {
    0          
    0          
2114 0         0 %$h = ();
2115             } elsif ($_ eq 'all') {
2116 0         0 %$h = %all_kw;
2117             } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) {
2118 0 0       0 $h->{$2} = defined $1 ? 0 : 1;
2119             } else {
2120 0         0 return $INVALID_VALUE;
2121             }
2122             }
2123             }
2124 91         2197 });
2125              
2126             =item bayes_ignore_header header_name
2127              
2128             If you receive mail filtered by upstream mail systems, like
2129             a spam-filtering ISP or mailing list, and that service adds
2130             new headers (as most of them do), these headers may provide
2131             inappropriate cues to the Bayesian classifier, allowing it
2132             to take a "short cut". To avoid this, list the headers using this
2133             setting. Example:
2134              
2135             bayes_ignore_header X-Upstream-Spamfilter
2136             bayes_ignore_header X-Upstream-SomethingElse
2137              
2138             =cut
2139              
2140             push (@cmds, {
2141             setting => 'bayes_ignore_header',
2142             default => [],
2143             type => $CONF_TYPE_STRINGLIST,
2144             code => sub {
2145 0     0   0 my ($self, $key, $value, $line) = @_;
2146 0 0       0 if ($value eq '') {
2147 0         0 return $MISSING_REQUIRED_VALUE;
2148             }
2149 0         0 push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value));
  0         0  
2150             }
2151 91         1487 });
2152              
2153             =item bayes_ignore_from user@example.com
2154              
2155             Bayesian classification and autolearning will not be performed on mail
2156             from the listed addresses. Program C<sa-learn> will also ignore the
2157             listed addresses if it is invoked using the C<--use-ignores> option.
2158             One or more addresses can be listed, see C<whitelist_from>.
2159              
2160             Spam messages from certain senders may contain many words that
2161             frequently occur in ham. For example, one might read messages from a
2162             preferred bookstore but also get unwanted spam messages from other
2163             bookstores. If the unwanted messages are learned as spam then any
2164             messages discussing books, including the preferred bookstore and
2165             antiquarian messages would be in danger of being marked as spam. The
2166             addresses of the annoying bookstores would be listed. (Assuming they
2167             were halfway legitimate and didn't send you mail through myriad
2168             affiliates.)
2169              
2170             Those who have pieces of spam in legitimate messages or otherwise
2171             receive ham messages containing potentially spammy words might fear
2172             that some spam messages might be in danger of being marked as ham.
2173             The addresses of the spam mailing lists, correspondents, etc. would
2174             be listed.
2175              
2176             =cut
2177              
2178 91         479 push (@cmds, {
2179             setting => 'bayes_ignore_from',
2180             type => $CONF_TYPE_ADDRLIST,
2181             });
2182              
2183             =item bayes_ignore_to user@example.com
2184              
2185             Bayesian classification and autolearning will not be performed on mail
2186             to the listed addresses. See C<bayes_ignore_from> for details.
2187              
2188             =cut
2189              
2190 91         406 push (@cmds, {
2191             setting => 'bayes_ignore_to',
2192             type => $CONF_TYPE_ADDRLIST,
2193             });
2194              
2195             =item bayes_min_ham_num (Default: 200)
2196              
2197             =item bayes_min_spam_num (Default: 200)
2198              
2199             To be accurate, the Bayes system does not activate until a certain number of
2200             ham (non-spam) and spam have been learned. The default is 200 of each ham and
2201             spam, but you can tune these up or down with these two settings.
2202              
2203             =cut
2204              
2205 91         465 push (@cmds, {
2206             setting => 'bayes_min_ham_num',
2207             default => 200,
2208             type => $CONF_TYPE_NUMERIC,
2209             });
2210 91         414 push (@cmds, {
2211             setting => 'bayes_min_spam_num',
2212             default => 200,
2213             type => $CONF_TYPE_NUMERIC,
2214             });
2215              
2216             =item bayes_learn_during_report (Default: 1)
2217              
2218             The Bayes system will, by default, learn any reported messages
2219             (C<spamassassin -r>) as spam. If you do not want this to happen, set
2220             this option to 0.
2221              
2222             =cut
2223              
2224 91         389 push (@cmds, {
2225             setting => 'bayes_learn_during_report',
2226             default => 1,
2227             type => $CONF_TYPE_BOOL,
2228             });
2229              
2230             =item bayes_sql_override_username
2231              
2232             Used by BayesStore::SQL storage implementation.
2233              
2234             If this options is set the BayesStore::SQL module will override the set
2235             username with the value given. This could be useful for implementing global or
2236             group bayes databases.
2237              
2238             =cut
2239              
2240 91         396 push (@cmds, {
2241             setting => 'bayes_sql_override_username',
2242             default => '',
2243             type => $CONF_TYPE_STRING,
2244             });
2245              
2246             =item bayes_use_hapaxes (default: 1)
2247              
2248             Should the Bayesian classifier use hapaxes (words/tokens that occur only
2249             once) when classifying? This produces significantly better hit-rates.
2250              
2251             =cut
2252              
2253 91         408 push (@cmds, {
2254             setting => 'bayes_use_hapaxes',
2255             default => 1,
2256             type => $CONF_TYPE_BOOL,
2257             });
2258              
2259             =item bayes_journal_max_size (default: 102400)
2260              
2261             SpamAssassin will opportunistically sync the journal and the database.
2262             It will do so once a day, but will sync more often if the journal file
2263             size goes above this setting, in bytes. If set to 0, opportunistic
2264             syncing will not occur.
2265              
2266             =cut
2267              
2268 91         340 push (@cmds, {
2269             setting => 'bayes_journal_max_size',
2270             default => 102400,
2271             type => $CONF_TYPE_NUMERIC,
2272             });
2273              
2274             =item bayes_expiry_max_db_size (default: 150000)
2275              
2276             What should be the maximum size of the Bayes tokens database? When expiry
2277             occurs, the Bayes system will keep either 75% of the maximum value, or
2278             100,000 tokens, whichever has a larger value. 150,000 tokens is roughly
2279             equivalent to a 8Mb database file.
2280              
2281             =cut
2282              
2283 91         560 push (@cmds, {
2284             setting => 'bayes_expiry_max_db_size',
2285             default => 150000,
2286             type => $CONF_TYPE_NUMERIC,
2287             });
2288              
2289             =item bayes_auto_expire (default: 1)
2290              
2291             If enabled, the Bayes system will try to automatically expire old tokens
2292             from the database. Auto-expiry occurs when the number of tokens in the
2293             database surpasses the bayes_expiry_max_db_size value. If a bayes datastore
2294             backend does not implement individual key/value expirations, the setting
2295             is silently ignored.
2296              
2297             =cut
2298              
2299 91         432 push (@cmds, {
2300             setting => 'bayes_auto_expire',
2301             default => 1,
2302             type => $CONF_TYPE_BOOL,
2303             });
2304              
2305             =item bayes_token_ttl (default: 3w, i.e. 3 weeks)
2306              
2307             Time-to-live / expiration time in seconds for tokens kept in a Bayes database.
2308             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2309             indicating seconds (default), minutes, hours, days, weeks).
2310              
2311             If bayes_auto_expire is true and a Bayes datastore backend supports it
2312             (currently only Redis), this setting controls deletion of expired tokens
2313             from a bayes database. The value is observed on a best-effort basis, exact
2314             timing promises are not necessarily kept. If a bayes datastore backend
2315             does not implement individual key/value expirations, the setting is silently
2316             ignored.
2317              
2318             =cut
2319              
2320 91         392 push (@cmds, {
2321             setting => 'bayes_token_ttl',
2322             default => 3*7*24*60*60, # seconds (3 weeks)
2323             type => $CONF_TYPE_DURATION,
2324             });
2325              
2326             =item bayes_seen_ttl (default: 8d, i.e. 8 days)
2327              
2328             Time-to-live / expiration time in seconds for 'seen' entries
2329             (i.e. mail message digests with their status) kept in a Bayes database.
2330             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2331             indicating seconds (default), minutes, hours, days, weeks).
2332              
2333             If bayes_auto_expire is true and a Bayes datastore backend supports it
2334             (currently only Redis), this setting controls deletion of expired 'seen'
2335             entries from a bayes database. The value is observed on a best-effort basis,
2336             exact timing promises are not necessarily kept. If a bayes datastore backend
2337             does not implement individual key/value expirations, the setting is silently
2338             ignored.
2339              
2340             =cut
2341              
2342 91         510 push (@cmds, {
2343             setting => 'bayes_seen_ttl',
2344             default => 8*24*60*60, # seconds (8 days)
2345             type => $CONF_TYPE_DURATION,
2346             });
2347              
2348             =item bayes_learn_to_journal (default: 0)
2349              
2350             If this option is set, whenever SpamAssassin does Bayes learning, it
2351             will put the information into the journal instead of directly into the
2352             database. This lowers contention for locking the database to execute
2353             an update, but will also cause more access to the journal and cause a
2354             delay before the updates are actually committed to the Bayes database.
2355              
2356             =cut
2357              
2358 91         416 push (@cmds, {
2359             setting => 'bayes_learn_to_journal',
2360             default => 0,
2361             type => $CONF_TYPE_BOOL,
2362             });
2363              
2364             =back
2365              
2366             =head2 MISCELLANEOUS OPTIONS
2367              
2368             =over 4
2369              
2370             =item time_limit n (default: 300)
2371              
2372             Specifies a limit on elapsed time in seconds that SpamAssassin is allowed
2373             to spend before providing a result. The value may be fractional and must
2374             not be negative, zero is interpreted as unlimited. The default is 300
2375             seconds for consistency with the spamd default setting of --timeout-child .
2376              
2377             This is a best-effort advisory setting, processing will not be abruptly
2378             aborted at an arbitrary point in processing when the time limit is exceeded,
2379             but only on reaching one of locations in the program flow equipped with a
2380             time test. Currently equipped with the test are the main checking loop,
2381             asynchronous DNS lookups, plugins which are calling external programs.
2382             Rule evaluation is guarded by starting a timer (alarm) on each set of
2383             compiled rules.
2384              
2385             When a message is passed to Mail::SpamAssassin::parse, a deadline time
2386             is established as a sum of current time and the C<time_limit> setting.
2387              
2388             This deadline may also be specified by a caller through an option
2389             'master_deadline' in $suppl_attrib on a call to parse(), possibly providing
2390             a more accurate deadline taking into account past and expected future
2391             processing of a message in a mail filtering setup. If both the config
2392             option as well as a 'master_deadline' option in a call are provided,
2393             the shorter time limit of the two is used (since version 3.3.2).
2394             Note that spamd (and possibly third-party callers of SpamAssassin) will
2395             supply the 'master_deadline' option in a call based on its --timeout-child
2396             option (or equivalent), unlike the command line C<spamassassin>, which has
2397             no such command line option.
2398              
2399             When a time limit is exceeded, most of the remaining tests will be skipped,
2400             as well as auto-learning. Whatever tests fired so far will determine the
2401             final score. The behaviour is similar to short-circuiting with attribute 'on',
2402             as implemented by a Shortcircuit plugin. A synthetic hit on a rule named
2403             TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that
2404             the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may
2405             be provided explicitly in a configuration file, for example to achieve
2406             whitelisting or blacklisting effect for messages with long processing times.
2407              
2408             The C<time_limit> option is a useful protection against excessive processing
2409             time on certain degenerate or unusually long or complex mail messages, as well
2410             as against some DoS attacks. It is also needed in time-critical pre-queue
2411             filtering setups (e.g. milter, proxy, integration with MTA), where message
2412             processing must finish before a SMTP client times out. RFC 5321 prescribes
2413             in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes,
2414             although it is not unusual to see some SMTP clients abort sooner on waiting
2415             for a response. A sensible C<time_limit> for a pre-queue filtering setup is
2416             maybe 50 seconds, assuming that clients are willing to wait at least a minute.
2417              
2418             =cut
2419              
2420 91         374 push (@cmds, {
2421             setting => 'time_limit',
2422             default => 300,
2423             type => $CONF_TYPE_DURATION,
2424             });
2425              
2426             =item lock_method type
2427              
2428             Select the file-locking method used to protect database files on-disk. By
2429             default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
2430             are sure that the database files you'll be using for Bayes and AWL storage will
2431             never be accessed over NFS, a non-NFS-safe locking system can be selected.
2432              
2433             This will be quite a bit faster, but may risk file corruption if the files are
2434             ever accessed by multiple clients at once, and one or more of them is accessing
2435             them through an NFS filesystem.
2436              
2437             Note that different platforms require different locking systems.
2438              
2439             The supported locking systems for C<type> are as follows:
2440              
2441             =over 4
2442              
2443             =item I<nfssafe> - an NFS-safe locking system
2444              
2445             =item I<flock> - simple UNIX C<flock()> locking
2446              
2447             =item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>.
2448              
2449             =back
2450              
2451             nfssafe and flock are only available on UNIX, and win32 is only available
2452             on Windows. By default, SpamAssassin will choose either nfssafe or
2453             win32 depending on the platform in use.
2454              
2455             =cut
2456              
2457             push (@cmds, {
2458             setting => 'lock_method',
2459             default => '',
2460             type => $CONF_TYPE_STRING,
2461             code => sub {
2462 0     0   0 my ($self, $key, $value, $line) = @_;
2463 0 0       0 if ($value !~ /^(nfssafe|flock|win32)$/) {
2464 0         0 return $INVALID_VALUE;
2465             }
2466            
2467 0         0 $self->{lock_method} = $value;
2468             # recreate the locker
2469 0         0 $self->{main}->create_locker();
2470             }
2471 91         935 });
2472              
2473             =item fold_headers ( 0 | 1 ) (default: 1)
2474              
2475             By default, headers added by SpamAssassin will be whitespace folded.
2476             In other words, they will be broken up into multiple lines instead of
2477             one very long one and each continuation line will have a tabulator
2478             prepended to mark it as a continuation of the preceding one.
2479              
2480             The automatic wrapping can be disabled here. Note that this can generate very
2481             long lines. RFC 2822 required that header lines do not exceed 998 characters
2482             (not counting the final CRLF).
2483              
2484             =cut
2485              
2486 91         570 push (@cmds, {
2487             setting => 'fold_headers',
2488             default => 1,
2489             type => $CONF_TYPE_BOOL,
2490             });
2491              
2492             =item report_safe_copy_headers header_name ...
2493              
2494             If using C<report_safe>, a few of the headers from the original message
2495             are copied into the wrapper header (From, To, Cc, Subject, Date, etc.)
2496             If you want to have other headers copied as well, you can add them
2497             using this option. You can specify multiple headers on the same line,
2498             separated by spaces, or you can just use multiple lines.
2499              
2500             =cut
2501              
2502             push (@cmds, {
2503             setting => 'report_safe_copy_headers',
2504             default => [],
2505             type => $CONF_TYPE_STRINGLIST,
2506             code => sub {
2507 0     0   0 my ($self, $key, $value, $line) = @_;
2508 0 0       0 if ($value eq '') {
2509 0         0 return $MISSING_REQUIRED_VALUE;
2510             }
2511 0         0 push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value));
  0         0  
2512             }
2513 91         984 });
2514              
2515             =item envelope_sender_header Name-Of-Header
2516              
2517             SpamAssassin will attempt to discover the address used in the 'MAIL FROM:'
2518             phase of the SMTP transaction that delivered this message, if this data has
2519             been made available by the SMTP server. This is used in the C<EnvelopeFrom>
2520             pseudo-header, and for various rules such as SPF checking.
2521              
2522             By default, various MTAs will use different headers, such as the following:
2523              
2524             X-Envelope-From
2525             Envelope-Sender
2526             X-Sender
2527             Return-Path
2528              
2529             SpamAssassin will attempt to use these, if some heuristics (such as the header
2530             placement in the message, or the absence of fetchmail signatures) appear to
2531             indicate that they are safe to use. However, it may choose the wrong headers
2532             in some mailserver configurations. (More discussion of this can be found
2533             in bug 2142 and bug 4747 in the SpamAssassin BugZilla.)
2534              
2535             To avoid this heuristic failure, the C<envelope_sender_header> setting may be
2536             helpful. Name the header that your MTA or MDA adds to messages containing the
2537             address used at the MAIL FROM step of the SMTP transaction.
2538              
2539             If the header in question contains C<E<lt>> or C<E<gt>> characters at the start
2540             and end of the email address in the right-hand side, as in the SMTP
2541             transaction, these will be stripped.
2542              
2543             If the header is not found in a message, or if it's value does not contain an
2544             C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its
2545             default heuristics.
2546              
2547             (Note for MTA developers: we would prefer if the use of a single header be
2548             avoided in future, since that precludes 'downstream' spam scanning.
2549             C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a
2550             better proposal, storing the envelope sender at each hop in the C<Received>
2551             header.)
2552              
2553             example:
2554              
2555             envelope_sender_header X-SA-Exim-Mail-From
2556              
2557             =cut
2558              
2559 91         410 push (@cmds, {
2560             setting => 'envelope_sender_header',
2561             default => undef,
2562             type => $CONF_TYPE_STRING,
2563             });
2564              
2565             =item describe SYMBOLIC_TEST_NAME description ...
2566              
2567             Used to describe a test. This text is shown to users in the detailed report.
2568              
2569             Note that test names which begin with '__' are reserved for meta-match
2570             sub-rules, and are not scored or listed in the 'tests hit' reports.
2571              
2572             Also note that by convention, rule descriptions should be limited in
2573             length to no more than 50 characters.
2574              
2575             =cut
2576              
2577 91         564 push (@cmds, {
2578             command => 'describe',
2579             setting => 'descriptions',
2580             is_frequent => 1,
2581             type => $CONF_TYPE_HASH_KEY_VALUE,
2582             });
2583              
2584             =item report_charset CHARSET (default: unset)
2585              
2586             Set the MIME Content-Type charset used for the text/plain report which
2587             is attached to spam mail messages.
2588              
2589             =cut
2590              
2591 91         554 push (@cmds, {
2592             setting => 'report_charset',
2593             default => '',
2594             type => $CONF_TYPE_STRING,
2595             });
2596              
2597             =item report ...some text for a report...
2598              
2599             Set the report template which is attached to spam mail messages. See the
2600             C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an
2601             example.
2602              
2603             If you change this, try to keep it under 78 columns. Each C<report>
2604             line appends to the existing template, so use C<clear_report_template>
2605             to restart.
2606              
2607             Tags can be included as explained above.
2608              
2609             =cut
2610              
2611 91         588 push (@cmds, {
2612             command => 'report',
2613             setting => 'report_template',
2614             default => '',
2615             type => $CONF_TYPE_TEMPLATE,
2616             });
2617              
2618             =item clear_report_template
2619              
2620             Clear the report template.
2621              
2622             =cut
2623              
2624 91         622 push (@cmds, {
2625             command => 'clear_report_template',
2626             setting => 'report_template',
2627             type => $CONF_TYPE_NOARGS,
2628             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2629             });
2630              
2631             =item report_contact ...text of contact address...
2632              
2633             Set what _CONTACTADDRESS_ is replaced with in the above report text.
2634             By default, this is 'the administrator of that system', since the hostname
2635             of the system the scanner is running on is also included.
2636              
2637             =cut
2638              
2639 91         432 push (@cmds, {
2640             setting => 'report_contact',
2641             default => 'the administrator of that system',
2642             type => $CONF_TYPE_STRING,
2643             });
2644              
2645             =item report_hostname ...hostname to use...
2646              
2647             Set what _HOSTNAME_ is replaced with in the above report text.
2648             By default, this is determined dynamically as whatever the host running
2649             SpamAssassin calls itself.
2650              
2651             =cut
2652              
2653 91         441 push (@cmds, {
2654             setting => 'report_hostname',
2655             default => '',
2656             type => $CONF_TYPE_STRING,
2657             });
2658              
2659             =item unsafe_report ...some text for a report...
2660              
2661             Set the report template which is attached to spam mail messages which contain a
2662             non-text/plain part. See the C<10_default_prefs.cf> configuration file in
2663             C</usr/share/spamassassin> for an example.
2664              
2665             Each C<unsafe-report> line appends to the existing template, so use
2666             C<clear_unsafe_report_template> to restart.
2667              
2668             Tags can be used in this template (see above for details).
2669              
2670             =cut
2671              
2672 91         435 push (@cmds, {
2673             command => 'unsafe_report',
2674             setting => 'unsafe_report_template',
2675             default => '',
2676             type => $CONF_TYPE_TEMPLATE,
2677             });
2678              
2679             =item clear_unsafe_report_template
2680              
2681             Clear the unsafe_report template.
2682              
2683             =cut
2684              
2685 91         835 push (@cmds, {
2686             command => 'clear_unsafe_report_template',
2687             setting => 'unsafe_report_template',
2688             type => $CONF_TYPE_NOARGS,
2689             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2690             });
2691              
2692             =item mbox_format_from_regex
2693              
2694             Set a specific regular expression to be used for mbox file From separators.
2695              
2696             For example, this setting will allow sa-learn to process emails stored in
2697             a kmail 2 mbox:
2698              
2699             mbox_format_from_regex /^From \S+ ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/
2700              
2701              
2702             =cut
2703              
2704 91         422 push (@cmds, {
2705             setting => 'mbox_format_from_regex',
2706             type => $CONF_TYPE_STRING
2707             });
2708              
2709              
2710             =item parse_dkim_uris ( 0 | 1 ) (default: 1)
2711              
2712             If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL)
2713              
2714             =cut
2715              
2716 91         381 push (@cmds, {
2717             setting => 'parse_dkim_uris',
2718             default => 1,
2719             type => $CONF_TYPE_BOOL,
2720             });
2721              
2722             =back
2723              
2724             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
2725              
2726             These settings differ from the ones above, in that they are considered
2727             'privileged'. Only users running C<spamassassin> from their procmailrc's or
2728             forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can
2729             use them. C<spamd> users cannot use them in their C<user_prefs> files, for
2730             security and efficiency reasons, unless C<allow_user_rules> is enabled (and
2731             then, they may only add rules from below).
2732              
2733             =over 4
2734              
2735             =item allow_user_rules ( 0 | 1 ) (default: 0)
2736              
2737             This setting allows users to create rules (and only rules) in their
2738             C<user_prefs> files for use with C<spamd>. It defaults to off, because
2739             this could be a severe security hole. It may be possible for users to
2740             gain root level access if C<spamd> is run as root. It is NOT a good
2741             idea, unless you have some other way of ensuring that users' tests are
2742             safe. Don't use this unless you are certain you know what you are
2743             doing. Furthermore, this option causes spamassassin to recompile all
2744             the tests each time it processes a message for a user with a rule in
2745             his/her C<user_prefs> file, which could have a significant effect on
2746             server load. It is not recommended.
2747              
2748             Note that it is not currently possible to use C<allow_user_rules> to modify an
2749             existing system rule from a C<user_prefs> file with C<spamd>.
2750              
2751             =cut
2752              
2753             push (@cmds, {
2754             setting => 'allow_user_rules',
2755             is_priv => 1,
2756             default => 0,
2757             type => $CONF_TYPE_BOOL,
2758             code => sub {
2759 0     0   0 my ($self, $key, $value, $line) = @_;
2760 0 0       0 if ($value eq '') {
    0          
2761 0         0 return $MISSING_REQUIRED_VALUE;
2762             }
2763             elsif ($value !~ /^[01]$/) {
2764 0         0 return $INVALID_VALUE;
2765             }
2766              
2767 0         0 $self->{allow_user_rules} = $value+0;
2768 0 0       0 dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!");
2769             }
2770 91         1069 });
2771              
2772             =item redirector_pattern /pattern/modifiers
2773              
2774             A regex pattern that matches both the redirector site portion, and
2775             the target site portion of a URI.
2776              
2777             Note: The target URI portion must be surrounded in parentheses and
2778             no other part of the pattern may create a backreference.
2779              
2780             Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude
2781              
2782             redirector_pattern /^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i
2783              
2784             =cut
2785              
2786             push (@cmds, {
2787             setting => 'redirector_pattern',
2788             is_priv => 1,
2789             default => [],
2790             type => $CONF_TYPE_STRINGLIST,
2791             code => sub {
2792 427     427   1492 my ($self, $key, $value, $line) = @_;
2793              
2794 427         1274 $value =~ s/^\s+//;
2795 427 50       1196 if ($value eq '') {
2796 0         0 return $MISSING_REQUIRED_VALUE;
2797             }
2798              
2799 427         1354 my ($rec, $err) = compile_regexp($value, 1);
2800 427 50       1159 if (!$rec) {
2801 0         0 dbg("config: invalid redirector_pattern '$value': $err");
2802 0         0 return $INVALID_VALUE;
2803             }
2804              
2805 427         598 push @{$self->{main}->{conf}->{redirector_patterns}}, $rec;
  427         2285  
2806             }
2807 91         1174 });
2808              
2809             =item header SYMBOLIC_TEST_NAME header op /pattern/modifiers [if-unset: STRING]
2810              
2811             Define a test. C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as
2812             'FROM_ENDS_IN_NUMS'. C<header> is the name of a mail header field,
2813             such as 'Subject', 'To', 'From', etc. Header field names are matched
2814             case-insensitively (conforming to RFC 5322 section 1.2.2), except for
2815             all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED.
2816              
2817             Appending a modifier C<:raw> to a header field name will inhibit decoding of
2818             quoted-printable or base-64 encoded strings, and will preserve all whitespace
2819             inside the header string. The C<:raw> may also be applied to pseudo-headers
2820             e.g. C<ALL:raw> will return a pristine (unmodified) header section.
2821              
2822             Appending a modifier C<:addr> to a header field name will cause everything
2823             except the first email address to be removed from the header field. It is
2824             mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
2825             their 'Resent-*' counterparts, and the 'Return-Path'.
2826              
2827             Appending a modifier C<:name> to a header field name will cause everything
2828             except the first display name to be removed from the header field. It is
2829             mainly applicable to header fields containing a single mail address: 'From',
2830             'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
2831              
2832             It is syntactically permitted to append more than one modifier to a header
2833             field name, although currently most combinations achieve no additional effect,
2834             for example C<From:addr:raw> or C<From:raw:addr> is currently the same as
2835             C<From:addr> .
2836              
2837             For example, appending C<:addr> to a header name will result in example@foo
2838             in all of the following cases:
2839              
2840             =over 4
2841              
2842             =item example@foo
2843              
2844             =item example@foo (Foo Blah)
2845              
2846             =item example@foo, example@bar
2847              
2848             =item display: example@foo (Foo Blah), example@bar ;
2849              
2850             =item Foo Blah <example@foo>
2851              
2852             =item "Foo Blah" <example@foo>
2853              
2854             =item "'Foo Blah'" <example@foo>
2855              
2856             =back
2857              
2858             For example, appending C<:name> to a header name will result in "Foo Blah"
2859             (without quotes) in all of the following cases:
2860              
2861             =over 4
2862              
2863             =item example@foo (Foo Blah)
2864              
2865             =item example@foo (Foo Blah), example@bar
2866              
2867             =item display: example@foo (Foo Blah), example@bar ;
2868              
2869             =item Foo Blah <example@foo>
2870              
2871             =item "Foo Blah" <example@foo>
2872              
2873             =item "'Foo Blah'" <example@foo>
2874              
2875             =back
2876              
2877             There are several special pseudo-headers that can be specified:
2878              
2879             =over 4
2880              
2881             =item C<ALL> can be used to mean the text of all the message's headers.
2882             Note that all whitespace inside the headers, at line folds, is currently
2883             compressed into a single space (' ') character. To obtain a pristine
2884             (unmodified) header section, use C<ALL:raw> - the :raw modifier is documented
2885             above. Also similar that return headers added by specific relays: ALL-TRUSTED,
2886             ALL-INTERNAL, ALL-UNTRUSTED, ALL-EXTERNAL.
2887              
2888             =item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'
2889             headers.
2890              
2891             =item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP
2892             transaction that delivered this message, if this data has been made available
2893             by the SMTP server. See C<envelope_sender_header> for more information
2894             on how to set this.
2895              
2896             =item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;
2897             some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
2898             or to 'X-Message-Id', then uses its own one in the 'Message-Id' header.
2899             The value returned for this symbol is the text from all 3 headers, separated
2900             by newlines.
2901              
2902             =item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>,
2903             C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable,
2904             pre-parsed representation of the message's network path, as recorded in the
2905             Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs
2906             'external' sets. See C<http://wiki.apache.org/spamassassin/TrustedRelays> for
2907             more details.
2908              
2909             =back
2910              
2911             C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain
2912             regular expression), and C<pattern> is a valid Perl regular expression, with
2913             C<modifiers> as regexp modifiers in the usual style. Note that multi-line
2914             rules are not supported, even if you use C<x> as a modifier. Also note that
2915             the C<#> character must be escaped (C<\#>) or else it will be considered to be
2916             the start of a comment and not part of the regexp.
2917              
2918             If the header specified matches multiple headers, their text will be
2919             concatenated with embedded \n's. Therefore you may wish to use C</m> if you
2920             use C<^> or C<$> in your regular expression.
2921              
2922             If the C<[if-unset: STRING]> tag is present, then C<STRING> will
2923             be used if the header is not found in the mail message.
2924              
2925             Test names must not start with a number, and must contain only
2926             alphanumerics and underscores. It is suggested that lower-case characters
2927             not be used, and names have a length of no more than 22 characters,
2928             as an informal convention. Dashes are not allowed.
2929              
2930             Note that test names which begin with '__' are reserved for meta-match
2931             sub-rules, and are not scored or listed in the 'tests hit' reports.
2932             Test names which begin with 'T_' are reserved for tests which are
2933             undergoing QA, and these are given a very low score.
2934              
2935             If you add or modify a test, please be sure to run a sanity check afterwards
2936             by running C<spamassassin --lint>. This will avoid confusing error
2937             messages, or other tests being skipped as a side-effect.
2938              
2939             =item header SYMBOLIC_TEST_NAME exists:header_field_name
2940              
2941             Define a header field existence test. C<header_field_name> is the name
2942             of a header field to test for existence. Not to be confused with a
2943             test for a nonempty header field body, which can be implemented by a
2944             C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above.
2945              
2946             =item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments])
2947              
2948             Define a header eval test. C<name_of_eval_method> is the name of
2949             a method registered by a C<Mail::SpamAssassin::Plugin> object.
2950             C<arguments> are optional arguments to the function call.
2951              
2952             =item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test'])
2953              
2954             Check a DNSBL (a DNS blacklist or whitelist). This will retrieve Received:
2955             headers from the message, extract the IP addresses, select which ones are
2956             'untrusted' based on the C<trusted_networks> logic, and query that DNSBL
2957             zone. There's a few things to note:
2958              
2959             =over 4
2960              
2961             =item duplicated or private IPs
2962              
2963             Duplicated IPs are only queried once and reserved IPs are not queried.
2964             Private IPs are those listed in
2965             <https://www.iana.org/assignments/ipv4-address-space>,
2966             <http://duxcw.com/faq/network/privip.htm>,
2967             <http://duxcw.com/faq/network/autoip.htm>, or
2968             <https://tools.ietf.org/html/rfc5735> as private.
2969              
2970             =item the 'set' argument
2971              
2972             This is used as a 'zone ID'. If you want to look up a multiple-meaning zone
2973             like SORBS, you can then query the results from that zone using it;
2974             but all check_rbl_sub() calls must use that zone ID.
2975              
2976             Also, if more than one IP address gets a DNSBL hit for a particular rule, it
2977             does not affect the score because rules only trigger once per message.
2978              
2979             =item the 'zone' argument
2980              
2981             This is the root zone of the DNSBL.
2982              
2983             The domain name is considered to be a fully qualified domain name
2984             (i.e. not subject to DNS resolver's search or default domain options).
2985             No trailing period is needed, and will be removed if specified.
2986              
2987             =item the 'sub-test' argument
2988              
2989             This optional argument behaves the same as the sub-test argument in
2990             C<check_rbl_sub()> below.
2991              
2992             =item selecting all IPs except for the originating one
2993              
2994             This is accomplished by placing '-notfirsthop' at the end of the set name.
2995             This is useful for querying against DNS lists which list dialup IP
2996             addresses; the first hop may be a dialup, but as long as there is at least
2997             one more hop, via their outgoing SMTP server, that's legitimate, and so
2998             should not gain points. If there is only one hop, that will be queried
2999             anyway, as it should be relaying via its outgoing SMTP server instead of
3000             sending directly to your MX (mail exchange).
3001              
3002             =item selecting IPs by whether they are trusted
3003              
3004             When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP
3005             addresses in Received headers that were not added by trusted relays. To
3006             test the first IP address that can be trusted, place '-firsttrusted' at the
3007             end of the set name. That should test the IP address of the relay that
3008             connected to the most remote trusted relay.
3009              
3010             Note that this requires that SpamAssassin know which relays are trusted. For
3011             simple cases, SpamAssassin can make a good estimate. For complex cases, you
3012             may get better results by setting C<trusted_networks> manually.
3013              
3014             In addition, you can test all untrusted IP addresses by placing '-untrusted'
3015             at the end of the set name. Important note -- this does NOT include the
3016             IP address from the most recent 'untrusted line', as used in '-firsttrusted'
3017             above. That's because we're talking about the trustworthiness of the
3018             IP address data, not the source header line, here; and in the case of
3019             the most recent header (the 'firsttrusted'), that data can be trusted.
3020             See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays>
3021             for more information on this.
3022              
3023             =item Selecting just the last external IP
3024              
3025             By using '-lastexternal' at the end of the set name, you can select only
3026             the external host that connected to your internal network, or at least
3027             the last external host with a public IP.
3028              
3029             =back
3030              
3031             =item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone')
3032              
3033             Same as check_rbl(), except querying using IN TXT instead of IN A records.
3034             If the zone supports it, it will result in a line of text describing
3035             why the IP is listed, typically a hyperlink to a database entry.
3036              
3037             =item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test')
3038              
3039             Create a sub-test for 'set'. If you want to look up a multi-meaning zone
3040             like relays.osirusoft.com, you can then query the results from that zone
3041             using the zone ID from the original query. The sub-test may either be an
3042             IPv4 dotted address for RBLs that return multiple A records, or a
3043             non-negative decimal number to specify a bitmask for RBLs that return a
3044             single A record containing a bitmask of results, or a regular expression.
3045              
3046             Note: the set name must be exactly the same for as the main query rule,
3047             including selections like '-notfirsthop' appearing at the end of the set
3048             name.
3049              
3050             =cut
3051              
3052             push (@cmds, {
3053             setting => 'header',
3054             is_frequent => 1,
3055             is_priv => 1,
3056             code => sub {
3057 2835     2835   8828 my ($self, $key, $value, $line) = @_;
3058 2835         6818 local($1);
3059 2835 50       14335 if ($value !~ s/^(\S+)\s+//) {
3060 0         0 return $INVALID_VALUE;
3061             }
3062 2835         7125 my $rulename = $1;
3063 2835 50       7134 if ($value eq '') {
3064 0         0 return $MISSING_REQUIRED_VALUE;
3065             }
3066 2835 100       12488 if ($value =~ /^(?:rbl)?eval:(.*)$/) {
3067 2286         5840 my $fn = $1;
3068 2286 50       8583 if ($fn !~ /^\w+\(.*\)$/) {
3069 0         0 return $INVALID_VALUE;
3070             }
3071 2286 100       6465 if ($fn =~ /^check_(?:rbl|dns)/) {
3072 1         6 $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS);
3073             }
3074             else {
3075 2285         7062 $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS);
3076             }
3077             }
3078             else {
3079             # Detailed parsing in add_test
3080 549         2405 $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS);
3081             }
3082             }
3083 91         1221 });
3084              
3085             =item body SYMBOLIC_TEST_NAME /pattern/modifiers
3086              
3087             Define a body pattern test. C<pattern> is a Perl regular expression. Note:
3088             as per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3089             the beginning of a comment.
3090              
3091             The 'body' in this case is the textual parts of the message body; any
3092             non-text MIME parts are stripped, and the message decoded from
3093             Quoted-Printable or Base-64-encoded format if necessary. Parts declared as
3094             text/html will be rendered from HTML to text.
3095              
3096             All body paragraphs (double-newline-separated blocks text) are turned into a
3097             line breaks removed, whitespace normalized single line. Any lines longer
3098             than 2kB are split into shorter separate lines (from a boundary when
3099             possible), this may unexpectedly prevent pattern from matching. Patterns
3100             are matched independently against each of these lines.
3101              
3102             Note that by default the message Subject header is considered part of the
3103             body and becomes the first line when running the rules. If you don't want
3104             to match Subject along with body text, use "tflags RULENAME nosubject".
3105              
3106             =item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3107              
3108             Define a body eval test. See above.
3109              
3110             =cut
3111              
3112             push (@cmds, {
3113             setting => 'body',
3114             is_frequent => 1,
3115             is_priv => 1,
3116             code => sub {
3117 312     312   1325 my ($self, $key, $value, $line) = @_;
3118 312         806 local($1);
3119 312 50       1883 if ($value !~ s/^(\S+)\s+//) {
3120 0         0 return $INVALID_VALUE;
3121             }
3122 312         901 my $rulename = $1;
3123 312 50       1032 if ($value eq '') {
3124 0         0 return $MISSING_REQUIRED_VALUE;
3125             }
3126 312 100       1468 if ($value =~ /^eval:(.*)$/) {
3127 83         397 my $fn = $1;
3128 83 50       596 if ($fn !~ /^\w+\(.*\)$/) {
3129 0         0 return $INVALID_VALUE;
3130             }
3131 83         464 $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS);
3132             } else {
3133 229         1093 $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS);
3134             }
3135             }
3136 91         1348 });
3137              
3138             =item uri SYMBOLIC_TEST_NAME /pattern/modifiers
3139              
3140             Define a uri pattern test. C<pattern> is a Perl regular expression. Note: as
3141             per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3142             the beginning of a comment.
3143              
3144             The 'uri' in this case is a list of all the URIs in the body of the email,
3145             and the test will be run on each and every one of those URIs, adjusting the
3146             score if a match is found. Use this test instead of one of the body tests
3147             when you need to match a URI, as it is more accurately bound to the start/end
3148             points of the URI, and will also be faster.
3149              
3150             =cut
3151              
3152             # we don't do URI evals yet - maybe later
3153             # if (/^uri\s+(\S+)\s+eval:(.*)$/) {
3154             # $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS);
3155             # next;
3156             # }
3157             push (@cmds, {
3158             setting => 'uri',
3159             is_priv => 1,
3160             code => sub {
3161 61     61   328 my ($self, $key, $value, $line) = @_;
3162 61         204 local($1);
3163 61 50       569 if ($value !~ s/^(\S+)\s+//) {
3164 0         0 return $INVALID_VALUE;
3165             }
3166 61         269 my $rulename = $1;
3167 61 50       328 if ($value eq '') {
3168 0         0 return $MISSING_REQUIRED_VALUE;
3169             }
3170 61         435 $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS);
3171             }
3172 91         1273 });
3173              
3174             =item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers
3175              
3176             Define a raw-body pattern test. C<pattern> is a Perl regular expression.
3177             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3178             considered the beginning of a comment.
3179              
3180             The 'raw body' of a message is the raw data inside all textual parts. The
3181             text will be decoded from base64 or quoted-printable encoding, but HTML
3182             tags and line breaks will still be present. Multiline expressions will
3183             need to be used to match strings that are broken by line breaks.
3184              
3185             Note that the text is split into 2-4kB chunks (from a word boundary when
3186             possible), this may unexpectedly prevent pattern from matching. Patterns
3187             are matched independently against each of these chunks.
3188              
3189             =item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3190              
3191             Define a raw-body eval test. See above.
3192              
3193             =cut
3194              
3195             push (@cmds, {
3196             setting => 'rawbody',
3197             is_frequent => 1,
3198             is_priv => 1,
3199             code => sub {
3200 0     0   0 my ($self, $key, $value, $line) = @_;
3201 0         0 local($1);
3202 0 0       0 if ($value !~ s/^(\S+)\s+//) {
3203 0         0 return $INVALID_VALUE;
3204             }
3205 0         0 my $rulename = $1;
3206 0 0       0 if ($value eq '') {
3207 0         0 return $MISSING_REQUIRED_VALUE;
3208             }
3209 0 0       0 if ($value =~ /^eval:(.*)$/) {
3210 0         0 my $fn = $1;
3211 0 0       0 if ($fn !~ /^\w+\(.*\)$/) {
3212 0         0 return $INVALID_VALUE;
3213             }
3214 0         0 $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS);
3215             } else {
3216 0         0 $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS);
3217             }
3218             }
3219 91         1348 });
3220              
3221             =item full SYMBOLIC_TEST_NAME /pattern/modifiers
3222              
3223             Define a full message pattern test. C<pattern> is a Perl regular expression.
3224             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3225             considered the beginning of a comment.
3226              
3227             The full message is the pristine message headers plus the pristine message
3228             body, including all MIME data such as images, other attachments, MIME
3229             boundaries, etc.
3230              
3231             =item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3232              
3233             Define a full message eval test. See above.
3234              
3235             =cut
3236              
3237             push (@cmds, {
3238             setting => 'full',
3239             is_priv => 1,
3240             code => sub {
3241 300     300   1128 my ($self, $key, $value, $line) = @_;
3242 300         773 local($1);
3243 300 50       1625 if ($value !~ s/^(\S+)\s+//) {
3244 0         0 return $INVALID_VALUE;
3245             }
3246 300         857 my $rulename = $1;
3247 300 50       1005 if ($value eq '') {
3248 0         0 return $MISSING_REQUIRED_VALUE;
3249             }
3250 300 50       1416 if ($value =~ /^eval:(.*)$/) {
3251 300         847 my $fn = $1;
3252 300 50       1436 if ($fn !~ /^\w+\(.*\)$/) {
3253 0         0 return $INVALID_VALUE;
3254             }
3255 300         1169 $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS);
3256             } else {
3257 0         0 $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS);
3258             }
3259             }
3260 91         1155 });
3261              
3262             =item meta SYMBOLIC_TEST_NAME boolean expression
3263              
3264             Define a boolean expression test in terms of other tests that have
3265             been hit or not hit. For example:
3266              
3267             meta META1 TEST1 && !(TEST2 || TEST3)
3268              
3269             Note that English language operators ("and", "or") will be treated as
3270             rule names, and that there is no C<XOR> operator.
3271              
3272             =item meta SYMBOLIC_TEST_NAME boolean arithmetic expression
3273              
3274             Can also define an arithmetic expression in terms of other tests,
3275             with an unhit test having the value "0" and a hit test having a
3276             nonzero value. The value of a hit meta test is that of its arithmetic
3277             expression. The value of a hit eval test is that returned by its
3278             method. The value of a hit header, body, rawbody, uri, or full test
3279             which has the "multiple" tflag is the number of times the test hit.
3280             The value of any other type of hit test is "1".
3281              
3282             For example:
3283              
3284             meta META2 (3 * TEST1 - 2 * TEST2) > 0
3285              
3286             Note that Perl builtins and functions, like C<abs()>, B<can't> be
3287             used, and will be treated as rule names.
3288              
3289             If you want to define a meta-rule, but do not want its individual sub-rules to
3290             count towards the final score unless the entire meta-rule matches, give the
3291             sub-rules names that start with '__' (two underscores). SpamAssassin will
3292             ignore these for scoring.
3293              
3294             =item meta SYMBOLIC_TEST_NAME ... rules_matching(RULEGLOB) ...
3295              
3296             Special function that will expand to list of matching rulenames. Can be
3297             used anywhere in expressions. Argument supports glob style rulename
3298             matching (* = anything, ? = one character). Matching is case-sensitive.
3299              
3300             For example, this will hit if at least two __FOO_* rule hits:
3301              
3302             body __FOO_1 /xxx/
3303             body __FOO_2 /yyy/
3304             body __FOO_3 /zzz/
3305             meta FOO_META rules_matching(__FOO_*) >= 2
3306              
3307             Which would be the same as:
3308              
3309             meta FOO_META (__FOO_1 + __FOO_2 + __FOO_3) >= 2
3310              
3311              
3312             =cut
3313              
3314             push (@cmds, {
3315             setting => 'meta',
3316             is_frequent => 1,
3317             is_priv => 1,
3318             code => sub {
3319 130     130   594 my ($self, $key, $value, $line) = @_;
3320 130         402 local($1);
3321 130 50       970 if ($value !~ s/^(\S+)\s+//) {
3322 0         0 return $INVALID_VALUE;
3323             }
3324 130         444 my $rulename = $1;
3325 130 50       530 if ($value eq '') {
3326 0         0 return $MISSING_REQUIRED_VALUE;
3327             }
3328 130 50       558 if ($value =~ /\*\s*\*/) {
3329 0         0 info("config: found invalid '**' or '* *' operator in meta command");
3330 0         0 return $INVALID_VALUE;
3331             }
3332 130         582 $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS);
3333             }
3334 91         1204 });
3335              
3336             =item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ]
3337              
3338             Defines the name of a test that should be "reused" during the scoring
3339             process. If a message has an X-Spam-Status header that shows a hit for
3340             this rule or any of the old rule names given, a hit will be added for
3341             this rule when B<mass-check --reuse> is used. Examples:
3342              
3343             C<reuse SPF_PASS>
3344              
3345             C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1>
3346              
3347             The actual logic for reuse tests is done by
3348             B<Mail::SpamAssassin::Plugin::Reuse>.
3349              
3350             =cut
3351              
3352             push (@cmds, {
3353             setting => 'reuse',
3354             is_priv => 1,
3355             code => sub {
3356 60     60   340 my ($self, $key, $value, $line) = @_;
3357 60 50       551 if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) {
3358 0         0 return $INVALID_VALUE;
3359             }
3360 60         279 my $rule_name = $1;
3361             # don't overwrite tests, just define them so scores, priorities work
3362 60 50       509 if (!exists $self->{tests}->{$rule_name}) {
3363 0         0 $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS);
3364             }
3365             }
3366 91         1141 });
3367              
3368             =item tflags SYMBOLIC_TEST_NAME flags
3369              
3370             Used to set flags on a test. Parameter is a space-separated list of flag
3371             names or flag name = value pairs.
3372             These flags are used in the score-determination back end system for details
3373             of the test's behaviour. Please see C<bayes_auto_learn> for more information
3374             about tflag interaction with those systems. The following flags can be set:
3375              
3376             =over 4
3377              
3378             =item net
3379              
3380             The test is a network test, and will not be run in the mass checking system
3381             or if B<-L> is used, therefore its score should not be modified.
3382              
3383             =item nice
3384              
3385             The test is intended to compensate for common false positives, and should be
3386             assigned a negative score.
3387              
3388             =item userconf
3389              
3390             The test requires user configuration before it can be used (like
3391             language-specific tests).
3392              
3393             =item learn
3394              
3395             The test requires training before it can be used.
3396              
3397             =item noautolearn
3398              
3399             The test will explicitly be ignored when calculating the score for
3400             learning systems.
3401              
3402             =item autolearn_force
3403              
3404             The test will be subject to less stringent autolearn thresholds.
3405              
3406             Normally, SpamAssassin will require 3 points from the header and 3
3407             points from the body to be auto-learned as spam. This option keeps
3408             the threshold at 6 points total but changes it to have no regard to the
3409             source of the points.
3410              
3411             =item noawl
3412              
3413             This flag is specific when using AWL plugin.
3414              
3415             Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios
3416             it works against the system administrator when trying to add some rules to
3417             correct miss-classified email. When AWL plugin searches the email and finds
3418             the noawl flag it will exit without normalizing the score nor storing the
3419             value in db.
3420              
3421             =item multiple
3422              
3423             The test will be evaluated multiple times, for use with meta rules.
3424             Only affects header, body, rawbody, uri, and full tests.
3425              
3426             =item maxhits=N
3427              
3428             If B<multiple> is specified, limit the number of hits found to N.
3429             If the rule is used in a meta that counts the hits (e.g. __RULENAME > 5),
3430             this is a way to avoid wasted extra work (use "tflags multiple maxhits=6").
3431              
3432             For example:
3433              
3434             uri __KAM_COUNT_URIS /^./
3435             tflags __KAM_COUNT_URIS multiple maxhits=16
3436             describe __KAM_COUNT_URIS A multiple match used to count URIs in a message
3437              
3438             meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0)
3439             meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1)
3440             meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2)
3441             meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3)
3442             meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4)
3443             meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5)
3444             meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10)
3445             meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15)
3446              
3447             =item nosubject
3448              
3449             Used only for B<body> rules. If specified, Subject header will not be a
3450             part of the matched body text. See I<body> for more info.
3451              
3452             =item ips_only
3453              
3454             This flag is specific to rules invoking an URIDNSBL plugin,
3455             it is documented there.
3456              
3457             =item domains_only
3458              
3459             This flag is specific to rules invoking an URIDNSBL plugin,
3460             it is documented there.
3461              
3462             =item ns
3463              
3464             This flag is specific to rules invoking an URIDNSBL plugin,
3465             it is documented there.
3466              
3467             =item a
3468              
3469             This flag is specific to rules invoking an URIDNSBL plugin,
3470             it is documented there.
3471              
3472             =back
3473              
3474             =cut
3475              
3476 91         548 push (@cmds, {
3477             setting => 'tflags',
3478             is_frequent => 1,
3479             is_priv => 1,
3480             type => $CONF_TYPE_HASH_KEY_VALUE,
3481             });
3482              
3483             =item priority SYMBOLIC_TEST_NAME n
3484              
3485             Assign a specific priority to a test. All tests, except for DNS and Meta
3486             tests, are run in increasing priority value order (negative priority values
3487             are run before positive priority values). The default test priority is 0
3488             (zero).
3489              
3490             The values <-99999999999999> and <-99999999999998> have a special meaning
3491             internally, and should not be used.
3492              
3493             =cut
3494              
3495 91         638 push (@cmds, {
3496             setting => 'priority',
3497             is_priv => 1,
3498             type => $CONF_TYPE_HASH_KEY_VALUE,
3499             });
3500              
3501             =back
3502              
3503             =head1 ADMINISTRATOR SETTINGS
3504              
3505             These settings differ from the ones above, in that they are considered 'more
3506             privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section.
3507             No matter what C<allow_user_rules> is set to, these can never be set from a
3508             user's C<user_prefs> file when spamc/spamd is being used. However, all
3509             settings can be used by local programs run directly by the user.
3510              
3511             =over 4
3512              
3513             =item version_tag string
3514              
3515             This tag is appended to the SA version in the X-Spam-Status header. You should
3516             include it when you modify your ruleset, especially if you plan to distribute it.
3517             A good choice for I<string> is your last name or your initials followed by a
3518             number which you increase with each change.
3519              
3520             The version_tag will be lowercased, and any non-alphanumeric or period
3521             character will be replaced by an underscore.
3522              
3523             e.g.
3524              
3525             version_tag myrules1 # version=2.41-myrules1
3526              
3527             =cut
3528              
3529             push (@cmds, {
3530             setting => 'version_tag',
3531             is_admin => 1,
3532             code => sub {
3533 0     0   0 my ($self, $key, $value, $line) = @_;
3534 0 0       0 if ($value eq '') {
3535 0         0 return $MISSING_REQUIRED_VALUE;
3536             }
3537 0         0 my $tag = lc($value);
3538 0         0 $tag =~ tr/a-z0-9./_/c;
3539 0         0 foreach (@Mail::SpamAssassin::EXTRA_VERSION) {
3540 0 0       0 if($_ eq $tag) { $tag = undef; last; }
  0         0  
  0         0  
3541             }
3542 0 0       0 push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag);
3543             }
3544 91         929 });
3545              
3546             =item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against
3547              
3548             Define a regression testing string. You can have more than one regression test
3549             string per symbolic test name. Simply specify a string that you wish the test
3550             to match.
3551              
3552             These tests are only run as part of the test suite - they should not affect the
3553             general running of SpamAssassin.
3554              
3555             =cut
3556              
3557             push (@cmds, {
3558             setting => 'test',
3559             is_admin => 1,
3560             code => sub {
3561 1827 50   1827   4145 return unless defined $COLLECT_REGRESSION_TESTS;
3562 0         0 my ($self, $key, $value, $line) = @_;
3563 0         0 local ($1,$2,$3);
3564 0 0       0 if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; }
  0         0  
3565 0         0 $self->{parser}->add_regression_test($1, $2, $3);
3566             }
3567 91         997 });
3568              
3569             =item rbl_timeout t [t_min] [zone] (default: 15 3)
3570              
3571             All DNS queries are made at the beginning of a check and we try to read
3572             the results at the end. This value specifies the maximum period of time
3573             (in seconds) to wait for a DNS query. If most of the DNS queries have
3574             succeeded for a particular message, then SpamAssassin will not wait for
3575             the full period to avoid wasting time on unresponsive server(s), but will
3576             shrink the timeout according to a percentage of queries already completed.
3577             As the number of queries remaining approaches 0, the timeout value will
3578             gradually approach a t_min value, which is an optional second parameter
3579             and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout
3580             is set to t_min. Here is a chart of queries remaining versus the timeout
3581             in seconds, for the default 15 second / 3 second timeout setting:
3582              
3583             queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0%
3584             timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3
3585              
3586             For example, if 20 queries are made at the beginning of a message check
3587             and 16 queries have returned (leaving 20%), the remaining 4 queries should
3588             finish within 7.3 seconds since their query started or they will be timed out.
3589             Note that timed out queries are only aborted when there is nothing else left
3590             for SpamAssassin to do - long evaluation of other rules may grant queries
3591             additional time.
3592              
3593             If a parameter 'zone' is specified (it must end with a letter, which
3594             distinguishes it from other numeric parametrs), then the setting only
3595             applies to DNS queries against the specified DNS domain (host, domain or
3596             RBL (sub)zone). Matching is case-insensitive, the actual domain may be a
3597             subdomain of the specified zone.
3598              
3599             =cut
3600              
3601             push (@cmds, {
3602             setting => 'rbl_timeout',
3603             is_admin => 1,
3604             default => 15,
3605             code => sub {
3606 1     1   11 my ($self, $key, $value, $line) = @_;
3607 1 50 33     29 unless (defined $value && $value !~ /^$/) {
3608 0         0 return $MISSING_REQUIRED_VALUE;
3609             }
3610 1         11 local ($1,$2,$3);
3611 1 50       11 unless ($value =~ /^ ( \+? \d+ (?: \. \d*)? [smhdw]? )
3612             (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )?
3613             (?: \s+ (\S* [a-zA-Z]) )? $/xsi) {
3614 0         0 return $INVALID_VALUE;
3615             }
3616 1         7 my($timeout, $timeout_min, $zone) = ($1, $2, $3);
3617 1         3 foreach ($timeout, $timeout_min) {
3618 2 50 66     25 if (defined $_ && s/\s*([smhdw])\z//i) {
3619             $_ *= { s => 1, m => 60, h => 3600,
3620 0         0 d => 24*3600, w => 7*24*3600 }->{lc $1};
3621             }
3622             }
3623 1 50       5 if (!defined $zone) { # a global setting
3624 1         9 $self->{rbl_timeout} = 0 + $timeout;
3625 1 50       11 $self->{rbl_timeout_min} = 0 + $timeout_min if defined $timeout_min;
3626             }
3627             else { # per-zone settings
3628 0         0 $zone =~ s/^\.//; $zone =~ s/\.\z//; # strip leading and trailing dot
  0         0  
3629 0         0 $zone = lc $zone;
3630 0         0 $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout;
3631             $self->{by_zone}{$zone}{rbl_timeout_min} =
3632 0 0       0 0 + $timeout_min if defined $timeout_min;
3633             }
3634             },
3635 91         1216 type => $CONF_TYPE_DURATION,
3636             });
3637              
3638             =item util_rb_tld tld1 tld2 ...
3639              
3640             This option maintains list of valid TLDs in the RegistryBoundaries code.
3641             TLDs include things like com, net, org, etc.
3642              
3643             =cut
3644              
3645             push (@cmds, {
3646             setting => 'util_rb_tld',
3647             is_admin => 1,
3648             code => sub {
3649 10620     10620   29358 my ($self, $key, $value, $line) = @_;
3650 10620 50 33     53568 unless (defined $value && $value !~ /^$/) {
3651 0         0 return $MISSING_REQUIRED_VALUE;
3652             }
3653 10620 50       87900 unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) {
3654 0         0 return $INVALID_VALUE;
3655             }
3656 10620         92410 foreach (split(/\s+/, $value)) {
3657 120132         336769 $self->{valid_tlds}{lc $_} = 1;
3658             }
3659             }
3660 91         1301 });
3661              
3662             =item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3663              
3664             This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3665             code. 2TLDs include things like co.uk, fed.us, etc.
3666              
3667             =cut
3668              
3669             push (@cmds, {
3670             setting => 'util_rb_2tld',
3671             is_admin => 1,
3672             code => sub {
3673 35881     35881   97465 my ($self, $key, $value, $line) = @_;
3674 35881 50 33     174751 unless (defined $value && $value !~ /^$/) {
3675 0         0 return $MISSING_REQUIRED_VALUE;
3676             }
3677 35881 50       196797 unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
3678 0         0 return $INVALID_VALUE;
3679             }
3680 35881         165059 foreach (split(/\s+/, $value)) {
3681 146173         448748 $self->{two_level_domains}{lc $_} = 1;
3682             }
3683             }
3684 91         1207 });
3685              
3686             =item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3687              
3688             This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3689             code. 3TLDs include things like demon.co.uk, plc.co.im, etc.
3690              
3691             =cut
3692              
3693             push (@cmds, {
3694             setting => 'util_rb_3tld',
3695             is_admin => 1,
3696             code => sub {
3697 3745     3745   10114 my ($self, $key, $value, $line) = @_;
3698 3745 50 33     18480 unless (defined $value && $value !~ /^$/) {
3699 0         0 return $MISSING_REQUIRED_VALUE;
3700             }
3701 3745 50       16019 unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) {
3702 0         0 return $INVALID_VALUE;
3703             }
3704 3745         11345 foreach (split(/\s+/, $value)) {
3705 3745         19573 $self->{three_level_domains}{lc $_} = 1;
3706             }
3707             }
3708 91         1134 });
3709              
3710             =item clear_util_rb
3711              
3712             Empty internal list of valid TLDs (including 2nd and 3rd level) which
3713             RegistryBoundaries code uses. Only useful if you want to override the
3714             standard lists supplied by sa-update.
3715              
3716             =cut
3717              
3718             push (@cmds, {
3719             setting => 'clear_util_rb',
3720             type => $CONF_TYPE_NOARGS,
3721             code => sub {
3722 78     78   477 my ($self, $key, $value, $line) = @_;
3723 78 50 33     871 unless (!defined $value || $value eq '') {
3724 0         0 return $INVALID_VALUE;
3725             }
3726 78         318 $self->{valid_tlds} = ();
3727 78         353 $self->{two_level_domains} = ();
3728 78         308 $self->{three_level_domains} = ();
3729 78         543 dbg("config: cleared tld lists");
3730             }
3731 91         1078 });
3732              
3733             =item bayes_path /path/filename (default: ~/.spamassassin/bayes)
3734              
3735             This is the directory and filename for Bayes databases. Several databases
3736             will be created, with this as the base directory and filename, with C<_toks>,
3737             C<_seen>, etc. appended to the base. The default setting results in files
3738             called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc.
3739              
3740             By default, each user has their own in their C<~/.spamassassin> directory with
3741             mode 0700/0600. For system-wide SpamAssassin use, you may want to reduce disk
3742             space usage by sharing this across all users. However, Bayes appears to be
3743             more effective with individual user databases.
3744              
3745             =cut
3746              
3747             push (@cmds, {
3748             setting => 'bayes_path',
3749             is_admin => 1,
3750             default => '__userstate__/bayes',
3751             type => $CONF_TYPE_STRING,
3752             code => sub {
3753 61     61   378 my ($self, $key, $value, $line) = @_;
3754 61 50 33     791 unless (defined $value && $value !~ /^$/) {
3755 0         0 return $MISSING_REQUIRED_VALUE;
3756             }
3757 61 50       2587 if (-d $value) {
3758 0         0 return $INVALID_VALUE;
3759             }
3760 61         604 $self->{bayes_path} = $value;
3761             }
3762 91         1145 });
3763              
3764             =item bayes_file_mode (default: 0700)
3765              
3766             The file mode bits used for the Bayesian filtering database files.
3767              
3768             Make sure you specify this using the 'x' mode bits set, as it may also be used
3769             to create directories. However, if a file is created, the resulting file will
3770             not have any execute bits set (the umask is set to 111). The argument is a
3771             string of octal digits, it is converted to a numeric value internally.
3772              
3773             =cut
3774              
3775             push (@cmds, {
3776             setting => 'bayes_file_mode',
3777             is_admin => 1,
3778             default => '0700',
3779             type => $CONF_TYPE_NUMERIC,
3780             code => sub {
3781 0     0   0 my ($self, $key, $value, $line) = @_;
3782 0 0       0 if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE }
  0         0  
3783 0         0 $self->{bayes_file_mode} = untaint_var($value);
3784             }
3785 91         1353 });
3786              
3787             =item bayes_store_module Name::Of::BayesStore::Module
3788              
3789             If this option is set, the module given will be used as an alternate
3790             to the default bayes storage mechanism. It must conform to the
3791             published storage specification (see
3792             Mail::SpamAssassin::BayesStore). For example, set this to
3793             Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage
3794             module.
3795              
3796             =cut
3797              
3798             push (@cmds, {
3799             setting => 'bayes_store_module',
3800             is_admin => 1,
3801             default => '',
3802             type => $CONF_TYPE_STRING,
3803             code => sub {
3804 6     6   37 my ($self, $key, $value, $line) = @_;
3805 6         20 local ($1);
3806 6 50       65 if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; }
  0         0  
3807 6         46 $self->{bayes_store_module} = $1;
3808             }
3809 91         1140 });
3810              
3811             =item bayes_sql_dsn DBI::databasetype:databasename:hostname:port
3812              
3813             Used for BayesStore::SQL storage implementation.
3814              
3815             This option give the connect string used to connect to the SQL based Bayes storage.
3816              
3817             =cut
3818              
3819 91         492 push (@cmds, {
3820             setting => 'bayes_sql_dsn',
3821             is_admin => 1,
3822             default => '',
3823             type => $CONF_TYPE_STRING,
3824             });
3825              
3826             =item bayes_sql_username
3827              
3828             Used by BayesStore::SQL storage implementation.
3829              
3830             This option gives the username used by the above DSN.
3831              
3832             =cut
3833              
3834 91         455 push (@cmds, {
3835             setting => 'bayes_sql_username',
3836             is_admin => 1,
3837             default => '',
3838             type => $CONF_TYPE_STRING,
3839             });
3840              
3841             =item bayes_sql_password
3842              
3843             Used by BayesStore::SQL storage implementation.
3844              
3845             This option gives the password used by the above DSN.
3846              
3847             =cut
3848              
3849 91         422 push (@cmds, {
3850             setting => 'bayes_sql_password',
3851             is_admin => 1,
3852             default => '',
3853             type => $CONF_TYPE_STRING,
3854             });
3855              
3856             =item bayes_sql_username_authorized ( 0 | 1 ) (default: 0)
3857              
3858             Whether to call the services_authorized_for_username plugin hook in BayesSQL.
3859             If the hook does not determine that the user is allowed to use bayes or is
3860             invalid then then database will not be initialized.
3861              
3862             NOTE: By default the user is considered invalid until a plugin returns
3863             a true value. If you enable this, but do not have a proper plugin
3864             loaded, all users will turn up as invalid.
3865              
3866             The username passed into the plugin can be affected by the
3867             bayes_sql_override_username config option.
3868              
3869             =cut
3870              
3871 91         393 push (@cmds, {
3872             setting => 'bayes_sql_username_authorized',
3873             is_admin => 1,
3874             default => 0,
3875             type => $CONF_TYPE_BOOL,
3876             });
3877              
3878             =item user_scores_dsn DBI:databasetype:databasename:hostname:port
3879              
3880             If you load user scores from an SQL database, this will set the DSN
3881             used to connect. Example: C<DBI:mysql:spamassassin:localhost>
3882              
3883             If you load user scores from an LDAP directory, this will set the DSN used to
3884             connect. You have to write the DSN as an LDAP URL, the components being the
3885             host and port to connect to, the base DN for the search, the scope of the
3886             search (base, one or sub), the single attribute being the multivalued attribute
3887             used to hold the configuration data (space separated pairs of key and value,
3888             just as in a file) and finally the filter being the expression used to filter
3889             out the wanted username. Note that the filter expression is being used in a
3890             sprintf statement with the username as the only parameter, thus is can hold a
3891             single __USERNAME__ expression. This will be replaced with the username.
3892              
3893             Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__>
3894              
3895             =cut
3896              
3897 91         400 push (@cmds, {
3898             setting => 'user_scores_dsn',
3899             is_admin => 1,
3900             default => '',
3901             type => $CONF_TYPE_STRING,
3902             });
3903              
3904             =item user_scores_sql_username username
3905              
3906             The authorized username to connect to the above DSN.
3907              
3908             =cut
3909              
3910 91         497 push (@cmds, {
3911             setting => 'user_scores_sql_username',
3912             is_admin => 1,
3913             default => '',
3914             type => $CONF_TYPE_STRING,
3915             });
3916              
3917             =item user_scores_sql_password password
3918              
3919             The password for the database username, for the above DSN.
3920              
3921             =cut
3922              
3923 91         566 push (@cmds, {
3924             setting => 'user_scores_sql_password',
3925             is_admin => 1,
3926             default => '',
3927             type => $CONF_TYPE_STRING,
3928             });
3929              
3930             =item user_scores_sql_custom_query query
3931              
3932             This option gives you the ability to create a custom SQL query to
3933             retrieve user scores and preferences. In order to work correctly your
3934             query should return two values, the preference name and value, in that
3935             order. In addition, there are several "variables" that you can use
3936             as part of your query, these variables will be substituted for the
3937             current values right before the query is run. The current allowed
3938             variables are:
3939              
3940             =over 4
3941              
3942             =item _TABLE_
3943              
3944             The name of the table where user scores and preferences are stored. Currently
3945             hardcoded to userpref, to change this value you need to create a new custom
3946             query with the new table name.
3947              
3948             =item _USERNAME_
3949              
3950             The current user's username.
3951              
3952             =item _MAILBOX_
3953              
3954             The portion before the @ as derived from the current user's username.
3955              
3956             =item _DOMAIN_
3957              
3958             The portion after the @ as derived from the current user's username, this
3959             value may be null.
3960              
3961             =back
3962              
3963             The query must be one continuous line in order to parse correctly.
3964              
3965             Here are several example queries, please note that these are broken up
3966             for easy reading, in your config it should be one continuous line.
3967              
3968             =over 4
3969              
3970             =item Current default query:
3971              
3972             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC>
3973              
3974             =item Use global and then domain level defaults:
3975              
3976             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC>
3977              
3978             =item Maybe global prefs should override user prefs:
3979              
3980             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC>
3981              
3982             =back
3983              
3984             =cut
3985              
3986 91         470 push (@cmds, {
3987             setting => 'user_scores_sql_custom_query',
3988             is_admin => 1,
3989             default => undef,
3990             type => $CONF_TYPE_STRING,
3991             });
3992              
3993             =item user_scores_ldap_username
3994              
3995             This is the Bind DN used to connect to the LDAP server. It defaults
3996             to the empty string (""), allowing anonymous binding to work.
3997              
3998             Example: C<cn=master,dc=koehntopp,dc=de>
3999              
4000             =cut
4001              
4002 91         489 push (@cmds, {
4003             setting => 'user_scores_ldap_username',
4004             is_admin => 1,
4005             default => '',
4006             type => $CONF_TYPE_STRING,
4007             });
4008              
4009             =item user_scores_ldap_password
4010              
4011             This is the password used to connect to the LDAP server. It defaults
4012             to the empty string ("").
4013              
4014             =cut
4015              
4016 91         447 push (@cmds, {
4017             setting => 'user_scores_ldap_password',
4018             is_admin => 1,
4019             default => '',
4020             type => $CONF_TYPE_STRING,
4021             });
4022              
4023             =item user_scores_fallback_to_global (default: 1)
4024              
4025             Fall back to global scores and settings if userprefs can't be loaded
4026             from SQL or LDAP, instead of passing the message through unprocessed.
4027              
4028             =cut
4029              
4030 91         460 push (@cmds, {
4031             setting => 'user_scores_fallback_to_global',
4032             is_admin => 1,
4033             default => 1,
4034             type => $CONF_TYPE_BOOL,
4035             });
4036              
4037             =item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm]
4038              
4039             Load a SpamAssassin plugin module. The C<ModuleName> is the perl module
4040             name, used to create the plugin object itself.
4041              
4042             Module naming is strict, name must only contain alphanumeric characters or
4043             underscores. File must have .pm extension.
4044              
4045             C</path/module.pm> is the file to load, containing the module's perl code;
4046             if it's specified as a relative path, it's considered to be relative to the
4047             current configuration file. If it is omitted, the module will be loaded
4048             using perl's search path (the C<@INC> array).
4049              
4050             See C<Mail::SpamAssassin::Plugin> for more details on writing plugins.
4051              
4052             =cut
4053              
4054             push (@cmds, {
4055             setting => 'loadplugin',
4056             is_admin => 1,
4057             code => sub {
4058 5148     5148   15787 my ($self, $key, $value, $line) = @_;
4059 5148 50       14523 if ($value eq '') {
4060 0         0 return $MISSING_REQUIRED_VALUE;
4061             }
4062 5148         8231 my ($package, $path);
4063 5148         12979 local ($1,$2);
4064 5148 50       28462 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4065 5148         17037 ($package, $path) = ($1, $2);
4066             } else {
4067 0         0 return $INVALID_VALUE;
4068             }
4069 5148         13348 $self->load_plugin ($package, $path);
4070             }
4071 91         1010 });
4072              
4073             =item tryplugin ModuleName [/path/module.pm]
4074              
4075             Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in
4076             the filesystem.
4077              
4078             =cut
4079              
4080             push (@cmds, {
4081             setting => 'tryplugin',
4082             is_admin => 1,
4083             code => sub {
4084 0     0   0 my ($self, $key, $value, $line) = @_;
4085 0 0       0 if ($value eq '') {
4086 0         0 return $MISSING_REQUIRED_VALUE;
4087             }
4088 0         0 my ($package, $path);
4089 0         0 local ($1,$2);
4090 0 0       0 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4091 0         0 ($package, $path) = ($1, $2);
4092             } else {
4093 0         0 return $INVALID_VALUE;
4094             }
4095 0         0 $self->load_plugin ($package, $path, 1);
4096             }
4097 91         1034 });
4098              
4099             =item ignore_always_matching_regexps (Default: 0)
4100              
4101             Ignore any rule which contains a regexp which always matches.
4102             Currently only catches regexps which contain '||', or which begin or
4103             end with a '|'. Also ignore rules with C<some> combinatorial explosions.
4104              
4105             =cut
4106              
4107 91         614 push (@cmds, {
4108             setting => 'ignore_always_matching_regexps',
4109             is_admin => 1,
4110             default => 0,
4111             type => $CONF_TYPE_BOOL,
4112             });
4113              
4114             =back
4115              
4116             =head1 PREPROCESSING OPTIONS
4117              
4118             =over 4
4119              
4120             =item include filename
4121              
4122             Include configuration lines from C<filename>. Relative paths are considered
4123             relative to the current configuration file or user preferences file.
4124              
4125             =item if (boolean perl expression)
4126              
4127             Used to support conditional interpretation of the configuration
4128             file. Lines between this and a corresponding C<else> or C<endif> line
4129             will be ignored unless the expression evaluates as true
4130             (in the perl sense; that is, defined and non-0 and non-empty string).
4131              
4132             The conditional accepts a limited subset of perl for security -- just enough to
4133             perform basic arithmetic comparisons. The following input is accepted:
4134              
4135             =over 4
4136              
4137             =item numbers, whitespace, arithmetic operations and grouping
4138              
4139             Namely these characters and ranges:
4140              
4141             ( ) - + * / _ . , < = > ! ~ 0-9 whitespace
4142              
4143             =item version
4144              
4145             This will be replaced with the version number of the currently-running
4146             SpamAssassin engine. Note: The version used is in the internal SpamAssassin
4147             version format which is C<x.yyyzzz>, where x is major version, y is minor
4148             version, and z is maintenance version. So 3.0.0 is C<3.000000>, and 3.4.80
4149             is C<3.004080>.
4150              
4151             =item perl_version
4152              
4153             (Introduced in 3.4.1) This will be replaced with the version number of the
4154             currently-running perl engine. Note: The version used is in the $] version
4155             format which is C<x.yyyzzz>, where x is major version, y is minor version,
4156             and z is maintenance version. So 5.8.8 is C<5.008008>, and 5.10.0 is
4157             C<5.010000>. Use to protect rules that incorporate RE syntax elements
4158             introduced in later versions of perl, such as the C<++> non-backtracking
4159             match introduced in perl 5.10. For example:
4160              
4161             # Avoid lint error on older perl installs
4162             # Check SA version first to avoid warnings on checking perl_version on older SA
4163             if version > 3.004001 && perl_version >= 5.018000
4164             body INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18 /(?[ \p{Thai} & \p{Digit} ])/
4165             endif
4166              
4167             Note that the above will still generate a warning on perl older than 5.10.0;
4168             to avoid that warning do this instead:
4169              
4170             # Avoid lint error on older perl installs
4171             if can(Mail::SpamAssassin::Conf::perl_min_version_5010000)
4172             body INVALID_RE_SYNTAX_IN_PERL_5_8 /\w++/
4173             endif
4174              
4175             Warning: a can() test is only defined for perl 5.10.0!
4176              
4177              
4178             =item plugin(Name::Of::Plugin)
4179              
4180             This is a function call that returns C<1> if the plugin named
4181             C<Name::Of::Plugin> is loaded, or C<undef> otherwise.
4182              
4183             =item has(Name::Of::Package::function_name)
4184              
4185             This is a function call that returns C<1> if the perl package named
4186             C<Name::Of::Package> includes a function called C<function_name>, or C<undef>
4187             otherwise. Note that packages can be SpamAssassin plugins or built-in classes,
4188             there's no difference in this respect. Internally this invokes UNIVERSAL::can.
4189              
4190             =item can(Name::Of::Package::function_name)
4191              
4192             This is a function call that returns C<1> if the perl package named
4193             C<Name::Of::Package> includes a function called C<function_name>
4194             B<and> that function returns a true value when called with no arguments,
4195             otherwise C<undef> is returned.
4196              
4197             Is similar to C<has>, except that it also calls the named function,
4198             testing its return value (unlike the perl function UNIVERSAL::can).
4199             This makes it possible for a 'feature' function to determine its result
4200             value at run time.
4201              
4202             =back
4203              
4204             If the end of a configuration file is reached while still inside a
4205             C<if> scope, a warning will be issued, but parsing will restart on
4206             the next file.
4207              
4208             For example:
4209              
4210             if (version > 3.000000)
4211             header MY_FOO ...
4212             endif
4213              
4214             loadplugin MyPlugin plugintest.pm
4215              
4216             if plugin (MyPlugin)
4217             header MY_PLUGIN_FOO eval:check_for_foo()
4218             score MY_PLUGIN_FOO 0.1
4219             endif
4220              
4221             =item ifplugin PluginModuleName
4222              
4223             An alias for C<if plugin(PluginModuleName)>.
4224              
4225             =item else
4226              
4227             Used to support conditional interpretation of the configuration
4228             file. Lines between this and a corresponding C<endif> line,
4229             will be ignored unless the conditional expression evaluates as false
4230             (in the perl sense; that is, not defined and not 0 and non-empty string).
4231              
4232             =item require_version n.nnnnnn
4233              
4234             Indicates that the entire file, from this line on, requires a certain
4235             version of SpamAssassin to run. If a different (older or newer) version
4236             of SpamAssassin tries to read the configuration from this file, it will
4237             output a warning instead, and ignore it.
4238              
4239             Note: The version used is in the internal SpamAssassin version format which is
4240             C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance
4241             version. So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>.
4242              
4243             =cut
4244              
4245             push (@cmds, {
4246             setting => 'require_version',
4247             type => $CONF_TYPE_STRING,
4248       0     code => sub {
4249             }
4250 91         1002 });
4251              
4252             =back
4253              
4254             =head1 TEMPLATE TAGS
4255              
4256             The following C<tags> can be used as placeholders in certain options.
4257             They will be replaced by the corresponding value when they are used.
4258              
4259             Some tags can take an argument (in parentheses). The argument is
4260             optional, and the default is shown below.
4261              
4262             _YESNO_ "Yes" for spam, "No" for nonspam (=ham)
4263             _YESNO(spam_str,ham_str)_ returns the first argument ("Yes" if missing)
4264             for spam, and the second argument ("No" if missing) for ham
4265             _YESNOCAPS_ "YES" for spam, "NO" for nonspam (=ham)
4266             _YESNOCAPS(spam_str,ham_str)_ same as _YESNO(...)_, but uppercased
4267             _SCORE(PAD)_ message score, if PAD is included and is either spaces or
4268             zeroes, then pad scores with that many spaces or zeroes
4269             (default, none) ie: _SCORE(0)_ makes 2.4 become 02.4,
4270             _SCORE(00)_ is 002.4. 12.3 would be 12.3 and 012.3
4271             respectively.
4272             _REQD_ message threshold
4273             _VERSION_ version (eg. 3.0.0 or 3.1.0-r26142-foo1)
4274             _SUBVERSION_ sub-version/code revision date (eg. 2004-01-10)
4275             _RULESVERSION_ comma-separated list of rules versions, retrieved from
4276             an '# UPDATE version' comment in rules files; if there is
4277             more than one set of rules (update channels) the order
4278             is unspecified (currently sorted by names of files);
4279             _HOSTNAME_ hostname of the machine the mail was processed on
4280             _REMOTEHOSTNAME_ hostname of the machine the mail was sent from, only
4281             available with spamd
4282             _REMOTEHOSTADDR_ ip address of the machine the mail was sent from, only
4283             available with spamd
4284             _BAYES_ bayes score
4285             _TOKENSUMMARY_ number of new, neutral, spammy, and hammy tokens found
4286             _BAYESTC_ number of new tokens found
4287             _BAYESTCLEARNED_ number of seen tokens found
4288             _BAYESTCSPAMMY_ number of spammy tokens found
4289             _BAYESTCHAMMY_ number of hammy tokens found
4290             _HAMMYTOKENS(N)_ the N most significant hammy tokens (default, 5)
4291             _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5)
4292             _DATE_ rfc-2822 date of scan
4293             _STARS(*)_ one "*" (use any character) for each full score point
4294             (note: limited to 50 'stars')
4295             _SENDERDOMAIN_ a domain name of the envelope sender address, lowercased
4296             _AUTHORDOMAIN_ a domain name of the author address (the From header
4297             field), lowercased; note that RFC 5322 allows a mail
4298             message to have multiple authors - currently only the
4299             domain name of the first email address is returned
4300             _RELAYSTRUSTED_ relays used and deemed to be trusted (see the
4301             'X-Spam-Relays-Trusted' pseudo-header)
4302             _RELAYSUNTRUSTED_ relays used that can not be trusted (see the
4303             'X-Spam-Relays-Untrusted' pseudo-header)
4304             _RELAYSINTERNAL_ relays used and deemed to be internal (see the
4305             'X-Spam-Relays-Internal' pseudo-header)
4306             _RELAYSEXTERNAL_ relays used and deemed to be external (see the
4307             'X-Spam-Relays-External' pseudo-header)
4308             _LASTEXTERNALIP_ IP address of client in the external-to-internal
4309             SMTP handover
4310             _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal
4311             SMTP handover
4312             _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal
4313             SMTP handover
4314             _AUTOLEARN_ autolearn status ("ham", "no", "spam", "disabled",
4315             "failed", "unavailable")
4316             _AUTOLEARNSCORE_ portion of message score used by autolearn
4317             _TESTS(,)_ tests hit separated by "," (or other separator)
4318             _TESTSSCORES(,)_ as above, except with scores appended (eg. AWL=-3.0,...)
4319             _SUBTESTS(,)_ subtests (start with "__") hit separated by ","
4320             (or other separator)
4321             _SUBTESTSCOLLAPSED(,)_ subtests (start with "__") hit separated by ","
4322             (or other separator) with duplicated rules collapsed
4323             _DCCB_ DCC's "Brand"
4324             _DCCR_ DCC's results
4325             _PYZOR_ Pyzor results
4326             _RBL_ full results for positive RBL queries in DNS URI format
4327             _LANGUAGES_ possible languages of mail
4328             _PREVIEW_ content preview
4329             _REPORT_ terse report of tests hit (for header reports)
4330             _SUBJPREFIX_ subject prefix based on rules, to be prepended to Subject
4331             header by SpamAssassin caller
4332             _SUMMARY_ summary of tests hit for standard report (for body reports)
4333             _CONTACTADDRESS_ contents of the 'report_contact' setting
4334             _HEADER(NAME)_ includes the value of a message header. value is the same
4335             as is found for header rules (see elsewhere in this doc)
4336             _TIMING_ timing breakdown report
4337             _ADDEDHEADERHAM_ resulting header fields as requested by add_header for spam
4338             _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham
4339             _ADDEDHEADER_ same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam
4340              
4341             If a tag reference uses the name of a tag which is not in this list or defined
4342             by a loaded plugin, the reference will be left intact and not replaced by any
4343             value.
4344              
4345             Additional, plugin specific, template tags can be found in the documentation for
4346             the following plugins:
4347              
4348             L<Mail::SpamAssassin::Plugin::ASN>
4349             L<Mail::SpamAssassin::Plugin::AWL>
4350             L<Mail::SpamAssassin::Plugin::TxRep>
4351              
4352             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4353             which specifies a format. See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT>
4354             section, below, for details.
4355              
4356             =head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT
4357              
4358             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4359             which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_>
4360             The following formats are available:
4361              
4362             =over 4
4363              
4364             =item short
4365              
4366             Only the tokens themselves are listed.
4367             I<For example, preference file entry:>
4368              
4369             C<add_header all Spammy _SPAMMYTOKENS(2,short)_>
4370              
4371             I<Results in message header:>
4372              
4373             C<X-Spam-Spammy: remove.php, UD:jpg>
4374              
4375             Indicating that the top two spammy tokens found are C<remove.php>
4376             and C<UD:jpg>. (The token itself follows the last colon, the
4377             text before the colon indicates something about the token.
4378             C<UD> means the token looks like it might be part of a domain name.)
4379              
4380             =item compact
4381              
4382             The token probability, an abbreviated declassification distance (see
4383             example), and the token are listed.
4384             I<For example, preference file entry:>
4385              
4386             C<add_header all Spammy _SPAMMYTOKENS(2,compact)_>
4387              
4388             I<Results in message header:>
4389              
4390             C<0.989-6--remove.php, 0.988-+--UD:jpg>
4391              
4392             Indicating that the probabilities of the top two tokens are 0.989 and
4393             0.988, respectively. The first token has a declassification distance
4394             of 6, meaning that if the token had appeared in at least 6 more ham
4395             messages it would not be considered spammy. The C<+> for the second
4396             token indicates a declassification distance greater than 9.
4397              
4398             =item long
4399              
4400             Probability, declassification distance, number of times seen in a ham
4401             message, number of times seen in a spam message, age and the token are
4402             listed.
4403              
4404             I<For example, preference file entry:>
4405              
4406             C<add_header all Spammy _SPAMMYTOKENS(2,long)_>
4407              
4408             I<Results in message header:>
4409              
4410             C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg>
4411              
4412             In addition to the information provided by the compact option,
4413             the long option shows that the first token appeared in zero
4414             ham messages and four spam messages, and that it was last
4415             seen four days ago. The second token appeared in two ham messages,
4416             25 spam messages and was last seen one day ago.
4417             (Unlike the C<compact> option, the long option shows declassification
4418             distances that are greater than 9.)
4419              
4420             =back
4421              
4422             =cut
4423              
4424 91         887 return \@cmds;
4425             }
4426              
4427             ###########################################################################
4428              
4429             # settings that were once part of core, but are now in (possibly-optional)
4430             # bundled plugins. These will be warned about, but do not generate a fatal
4431             # error when "spamassassin --lint" is run like a normal syntax error would.
4432              
4433             our @MIGRATED_SETTINGS = qw{
4434             ok_languages
4435             };
4436              
4437             ###########################################################################
4438              
4439             sub new {
4440 91     91 0 312 my $class = shift;
4441 91   33     570 $class = ref($class) || $class;
4442 91         734 my $self = {
4443             main => shift,
4444             registered_commands => [],
4445 91         353 }; bless ($self, $class);
4446              
4447 91         1714 $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self);
4448 91         3030 $self->{parser}->register_commands($self->set_default_commands());
4449              
4450 91         737 $self->{errors} = 0;
4451 91         394 $self->{plugins_loaded} = { };
4452              
4453 91         787 $self->{tests} = { };
4454 91         295 $self->{test_types} = { };
4455 91         460 $self->{scoreset} = [ {}, {}, {}, {} ];
4456 91         253 $self->{scoreset_current} = 0;
4457 91         587 $self->set_score_set (0);
4458 91         292 $self->{tflags} = { };
4459 91         315 $self->{source_file} = { };
4460              
4461             # keep descriptions in a slow but space-efficient single-string
4462             # data structure
4463             # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an
4464             # absolute pig, doubling config parse time, while benchmarks indicate
4465             # no difference in resident memory size!
4466 91         310 $self->{descriptions} = { };
4467             #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
4468             # or warn "tie failed";
4469 91         248 $self->{subjprefix} = { };
4470              
4471             # after parsing, tests are refiled into these hashes for each test type.
4472             # this allows e.g. a full-text test to be rewritten as a body test in
4473             # the user's user_prefs file.
4474 91         285 $self->{body_tests} = { };
4475 91         539 $self->{uri_tests} = { };
4476 91         313 $self->{uri_evals} = { }; # not used/implemented yet
4477 91         249 $self->{head_tests} = { };
4478 91         264 $self->{head_evals} = { };
4479 91         276 $self->{body_evals} = { };
4480 91         223 $self->{full_tests} = { };
4481 91         240 $self->{full_evals} = { };
4482 91         260 $self->{rawbody_tests} = { };
4483 91         588 $self->{rawbody_evals} = { };
4484 91         245 $self->{meta_tests} = { };
4485 91         296 $self->{eval_plugins} = { };
4486 91         433 $self->{duplicate_rules} = { };
4487              
4488             # testing stuff
4489 91         406 $self->{regression_tests} = { };
4490              
4491 91         271 $self->{rewrite_header} = { };
4492 91         244 $self->{want_rebuild_for_type} = { };
4493 91         225 $self->{user_defined_rules} = { };
4494 91         267 $self->{headers_spam} = [ ];
4495 91         330 $self->{headers_ham} = [ ];
4496              
4497 91         324 $self->{bayes_ignore_headers} = [ ];
4498 91         223 $self->{bayes_ignore_from} = { };
4499 91         233 $self->{bayes_ignore_to} = { };
4500              
4501 91         231 $self->{whitelist_auth} = { };
4502 91         210 $self->{def_whitelist_auth} = { };
4503 91         211 $self->{whitelist_from} = { };
4504 91         217 $self->{whitelist_allows_relays} = { };
4505 91         199 $self->{blacklist_from} = { };
4506 91         215 $self->{whitelist_from_rcvd} = { };
4507 91         196 $self->{def_whitelist_from_rcvd} = { };
4508              
4509 91         183 $self->{blacklist_to} = { };
4510 91         219 $self->{whitelist_to} = { };
4511 91         232 $self->{more_spam_to} = { };
4512 91         276 $self->{all_spam_to} = { };
4513              
4514 91         878 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
4515 91         371 $self->{internal_networks} = $self->new_netset('internal_networks',1);
4516 91         378 $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP
4517 91         482 $self->{trusted_networks_configured} = 0;
4518 91         407 $self->{internal_networks_configured} = 0;
4519              
4520             # Make sure we add in X-Spam-Checker-Version
4521 91         185 { my $r = [ "Checker-Version",
  91         312  
4522             "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ];
4523 91         187 push(@{$self->{headers_spam}}, $r);
  91         299  
4524 91         175 push(@{$self->{headers_ham}}, $r);
  91         278  
4525             }
4526              
4527             # RFC 6891: A good compromise may be the use of an EDNS maximum payload size
4528             # of 4096 octets as a starting point.
4529 91         324 $self->{dns_options}->{edns} = 4096;
4530              
4531             # these should potentially be settable by end-users
4532             # perhaps via plugin?
4533 91         348 $self->{num_check_received} = 9;
4534 91         253 $self->{bayes_expiry_pct} = 0.75;
4535 91         243 $self->{bayes_expiry_period} = 43200;
4536 91         198 $self->{bayes_expiry_max_exponent} = 9;
4537              
4538 91         233 $self->{encapsulated_content_description} = 'original message before SpamAssassin';
4539              
4540 91         520 $self;
4541             }
4542              
4543             sub mtime {
4544 0     0 0 0 my $self = shift;
4545 0 0       0 if (@_) {
4546 0         0 $self->{mtime} = shift;
4547             }
4548 0         0 return $self->{mtime};
4549             }
4550              
4551             ###########################################################################
4552              
4553             sub parse_scores_only {
4554 0     0 0 0 my ($self) = @_;
4555 0         0 $self->{parser}->parse ($_[1], 1);
4556             }
4557              
4558             sub parse_rules {
4559 90     90 0 284 my ($self) = @_;
4560 90         884 $self->{parser}->parse ($_[1], 0);
4561             }
4562              
4563             ###########################################################################
4564              
4565             sub set_score_set {
4566 183     183 0 789 my ($self, $set) = @_;
4567 183         736 $self->{scores} = $self->{scoreset}->[$set];
4568 183         588 $self->{scoreset_current} = $set;
4569 183         1094 dbg("config: score set $set chosen.");
4570             }
4571              
4572             sub get_score_set {
4573 433     433 0 975 my($self) = @_;
4574 433         1154 return $self->{scoreset_current};
4575             }
4576              
4577             sub get_rule_types {
4578 0     0 0 0 my ($self) = @_;
4579 0         0 return @rule_types;
4580             }
4581              
4582             sub get_rule_keys {
4583 0     0 0 0 my ($self, $test_type, $priority) = @_;
4584              
4585             # special case rbl_evals since they do not have a priority
4586 0 0       0 if ($test_type eq 'rbl_evals') {
4587 0         0 return keys(%{$self->{$test_type}});
  0         0  
4588             }
4589              
4590 0 0       0 if (defined($priority)) {
4591 0         0 return keys(%{$self->{$test_type}->{$priority}});
  0         0  
4592             }
4593             else {
4594 0         0 my @rules;
4595 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4596 0         0 push(@rules, keys(%{$self->{$test_type}->{$pri}}));
  0         0  
4597             }
4598 0         0 return @rules;
4599             }
4600             }
4601              
4602             sub get_rule_value {
4603 0     0 0 0 my ($self, $test_type, $rulename, $priority) = @_;
4604              
4605             # special case rbl_evals since they do not have a priority
4606 0 0       0 if ($test_type eq 'rbl_evals') {
4607 0         0 return @{$self->{$test_type}->{$rulename}};
  0         0  
4608             }
4609              
4610 0 0       0 if (defined($priority)) {
4611 0         0 return $self->{$test_type}->{$priority}->{$rulename};
4612             }
4613             else {
4614 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4615 0 0       0 if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4616 0         0 return $self->{$test_type}->{$pri}->{$rulename};
4617             }
4618             }
4619 0         0 return; # if we get here we didn't find the rule
4620             }
4621             }
4622              
4623             sub delete_rule {
4624 0     0 0 0 my ($self, $test_type, $rulename, $priority) = @_;
4625              
4626             # special case rbl_evals since they do not have a priority
4627 0 0       0 if ($test_type eq 'rbl_evals') {
4628 0         0 return delete($self->{$test_type}->{$rulename});
4629             }
4630              
4631 0 0       0 if (defined($priority)) {
4632 0         0 return delete($self->{$test_type}->{$priority}->{$rulename});
4633             }
4634             else {
4635 0         0 foreach my $pri (keys(%{$self->{priorities}})) {
  0         0  
4636 0 0       0 if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4637 0         0 return delete($self->{$test_type}->{$pri}->{$rulename});
4638             }
4639             }
4640 0         0 return; # if we get here we didn't find the rule
4641             }
4642             }
4643              
4644             # trim_rules ($regexp)
4645             #
4646             # Remove all rules that don't match the given regexp (or are sub-rules of
4647             # meta-tests that match the regexp).
4648              
4649             sub trim_rules {
4650 0     0 0 0 my ($self, $regexp) = @_;
4651              
4652 0         0 my ($rec, $err) = compile_regexp($regexp, 0);
4653 0 0       0 if (!$rec) {
4654 0         0 die "config: trim_rules: invalid regexp '$regexp': $err";
4655             }
4656              
4657 0         0 my @all_rules;
4658              
4659 0         0 foreach my $rule_type ($self->get_rule_types()) {
4660 0         0 push(@all_rules, $self->get_rule_keys($rule_type));
4661             }
4662              
4663 0         0 my @rules_to_keep = grep(/$rec/, @all_rules);
4664              
4665 0 0       0 if (@rules_to_keep == 0) {
4666 0         0 die "config: trim_rules: all rules excluded, nothing to test\n";
4667             }
4668              
4669 0         0 my @meta_tests = grep(/$rec/, $self->get_rule_keys('meta_tests'));
4670 0         0 foreach my $meta (@meta_tests) {
4671 0         0 push(@rules_to_keep, $self->add_meta_depends($meta))
4672             }
4673              
4674 0         0 my %rules_to_keep_hash;
4675              
4676 0         0 foreach my $rule (@rules_to_keep) {
4677 0         0 $rules_to_keep_hash{$rule} = 1;
4678             }
4679              
4680 0         0 foreach my $rule_type ($self->get_rule_types()) {
4681 0         0 foreach my $rulekey ($self->get_rule_keys($rule_type)) {
4682             $self->delete_rule($rule_type, $rulekey)
4683 0 0       0 if (!$rules_to_keep_hash{$rulekey});
4684             }
4685             }
4686             } # trim_rules()
4687              
4688             sub add_meta_depends {
4689 0     0 0 0 my ($self, $meta) = @_;
4690              
4691 0         0 my @rules;
4692 0         0 my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g;
4693              
4694 0         0 @tokens = grep(!/^\d+$/, @tokens);
4695             # @tokens now only consists of sub-rules
4696              
4697 0         0 foreach my $token (@tokens) {
4698 0 0       0 die "config: meta test $meta depends on itself\n" if $token eq $meta;
4699 0         0 push(@rules, $token);
4700              
4701             # If the sub-rule is a meta-test, recurse
4702 0 0       0 if ($self->get_rule_value('meta_tests', $token)) {
4703 0         0 push(@rules, $self->add_meta_depends($token));
4704             }
4705             } # foreach my $token (@tokens)
4706              
4707 0         0 return @rules;
4708             } # add_meta_depends()
4709              
4710             sub is_rule_active {
4711 88     88 0 153 my ($self, $test_type, $rulename, $priority) = @_;
4712              
4713             # special case rbl_evals since they do not have a priority
4714 88 50       158 if ($test_type eq 'rbl_evals') {
4715 0 0       0 return 0 unless ($self->{$test_type}->{$rulename});
4716 0         0 return ($self->{scores}->{$rulename});
4717             }
4718              
4719             # first determine if the rule is defined
4720 88 50       142 if (defined($priority)) {
4721             # we have a specific priority
4722 0 0       0 return 0 unless ($self->{$test_type}->{$priority}->{$rulename});
4723             }
4724             else {
4725             # no specific priority so we must loop over all currently defined
4726             # priorities to see if the rule is defined
4727 88         102 my $found_p = 0;
4728 88         98 foreach my $pri (keys %{$self->{priorities}}) {
  88         245  
4729 88 50       218 if ($self->{$test_type}->{$pri}->{$rulename}) {
4730 88         104 $found_p = 1;
4731 88         120 last;
4732             }
4733             }
4734 88 50       182 return 0 unless ($found_p);
4735             }
4736              
4737 88         263 return ($self->{scores}->{$rulename});
4738             }
4739              
4740             ###########################################################################
4741              
4742             # treats a bitset argument as a bit vector of all possible port numbers (8 kB)
4743             # and sets bit values to $value (0 or 1) in the specified range of port numbers
4744             #
4745             sub set_ports_range {
4746 1     1 0 5 my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_;
4747 1 50       3 $port_range_lo = 0 if $port_range_lo < 0;
4748 1 50       4 $port_range_hi = 65535 if $port_range_hi > 65535;
4749 1 50       4 if (!defined $$bitset_ref) { # provide a sensible default
    0          
4750 1         5 wipe_ports_range($bitset_ref, 1); # turn on all bits 0..65535
4751 1         624 vec($$bitset_ref,$_,1) = 0 for 0..1023; # avoid 0 and privileged ports
4752             } elsif ($$bitset_ref eq '') { # repopulate the bitset (late configuration)
4753 0         0 wipe_ports_range($bitset_ref, 0); # turn off all bits 0..65535
4754             }
4755 1 50       7 $value = !$value ? 0 : 1;
4756 1         5 for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) {
4757 1         7 vec($$bitset_ref,$j,1) = $value;
4758             }
4759             }
4760              
4761             sub wipe_ports_range {
4762 1     1 0 3 my($bitset_ref, $value) = @_;
4763 1 50       11 $value = !$value ? "\000" : "\377";
4764 1         18 $$bitset_ref = $value x 8192; # quickly turn all bits 0..65535 on or off
4765             }
4766              
4767             ###########################################################################
4768              
4769             sub add_to_addrlist {
4770 33     33 0 2647 my $self = shift; $self->{parser}->add_to_addrlist(@_);
  33         86  
4771             }
4772             sub add_to_addrlist_rcvd {
4773 0     0 0 0 my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_);
  0         0  
4774             }
4775             sub remove_from_addrlist {
4776 0     0 0 0 my $self = shift; $self->{parser}->remove_from_addrlist(@_);
  0         0  
4777             }
4778             sub remove_from_addrlist_rcvd {
4779 0     0 0 0 my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_);
  0         0  
4780             }
4781              
4782             ###########################################################################
4783              
4784             sub regression_tests {
4785 0     0 0 0 my $self = shift;
4786 0 0       0 if (@_ == 1) {
4787             # we specified a symbolic name, return the strings
4788 0         0 my $name = shift;
4789 0         0 my $tests = $self->{regression_tests}->{$name};
4790 0         0 return @$tests;
4791             }
4792             else {
4793             # no name asked for, just return the symbolic names we have tests for
4794 0         0 return keys %{$self->{regression_tests}};
  0         0  
4795             }
4796             }
4797              
4798             ###########################################################################
4799              
4800             sub finish_parsing {
4801 90     90 0 399 my ($self, $user) = @_;
4802 90         696 $self->{parser}->finish_parsing($user);
4803             }
4804              
4805             ###########################################################################
4806              
4807             sub found_any_rules {
4808 93     93 0 276 my ($self) = @_;
4809 93 100       409 if (!defined $self->{found_any_rules}) {
4810 90         191 $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0);
  90         1050  
4811             }
4812 93         301 return $self->{found_any_rules};
4813             }
4814              
4815             ###########################################################################
4816              
4817             sub get_description_for_rule {
4818 178     178 0 452 my ($self, $rule) = @_;
4819             # as silly as it looks, localized $1 here prevents an outer $1 from getting
4820             # tainted by the expression or assignment in the next line, bug 6148
4821 178         469 local($1);
4822 178         572 my $rule_descr = $self->{descriptions}->{$rule};
4823 178         664 return $rule_descr;
4824             }
4825              
4826             ###########################################################################
4827              
4828             sub maybe_header_only {
4829 13     13 0 34 my($self,$rulename) = @_;
4830 13         37 my $type = $self->{test_types}->{$rulename};
4831              
4832 13 50       65 if ($rulename =~ /AUTOLEARNTEST/i) {
4833 0         0 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4834             }
4835            
4836 13 50       48 return 0 if (!defined ($type));
4837              
4838 13 100 100     114 if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
    50          
4839 9         39 return 1;
4840              
4841             } elsif ($type == $TYPE_META_TESTS) {
4842 0         0 my $tflags = $self->{tflags}->{$rulename};
4843 0   0     0 $tflags ||= '';
4844 0 0       0 if ($tflags =~ m/\bnet\b/i) {
4845 0         0 return 0;
4846             } else {
4847 0         0 return 1;
4848             }
4849             }
4850              
4851 4         38 return 0;
4852             }
4853              
4854             sub maybe_body_only {
4855 4     4 0 14 my($self,$rulename) = @_;
4856 4         11 my $type = $self->{test_types}->{$rulename};
4857              
4858 4 50       16 if ($rulename =~ /AUTOLEARNTEST/i) {
4859 0         0 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4860             }
4861              
4862 4 50       12 return 0 if (!defined ($type));
4863              
4864 4 50 33     79 if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
    0 33        
      33        
4865             || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
4866             {
4867             # some rawbody go off of headers...
4868 4         27 return 1;
4869              
4870             } elsif ($type == $TYPE_META_TESTS) {
4871 0   0     0 my $tflags = $self->{tflags}->{$rulename}; $tflags ||= '';
  0         0  
4872 0 0       0 if ($tflags =~ m/\bnet\b/i) {
4873 0         0 return 0;
4874             } else {
4875 0         0 return 1;
4876             }
4877             }
4878              
4879 0         0 return 0;
4880             }
4881              
4882             ###########################################################################
4883              
4884             sub load_plugin {
4885 5148     5148 0 13437 my ($self, $package, $path, $silent) = @_;
4886 5148         19031 $self->{main}->{plugins}->load_plugin($package, $path, $silent);
4887             }
4888              
4889             sub load_plugin_succeeded {
4890 2362     2362 0 7083 my ($self, $plugin, $package, $path) = @_;
4891 2362         23219 $self->{plugins_loaded}->{$package} = 1;
4892             }
4893              
4894             sub register_eval_rule {
4895 11280     11280 0 17638 my ($self, $pluginobj, $nameofsub) = @_;
4896 11280         39248 $self->{eval_plugins}->{$nameofsub} = $pluginobj;
4897             }
4898              
4899             ###########################################################################
4900              
4901             sub clone {
4902 3     3 0 8 my ($self, $source, $dest) = @_;
4903              
4904 3 100       9 unless (defined $source) {
4905 1         2 $source = $self;
4906             }
4907 3 100       9 unless (defined $dest) {
4908 2         4 $dest = $self;
4909             }
4910              
4911 3         4 my %done;
4912              
4913             # keys that should not be copied in ->clone().
4914             # bug 4179: include want_rebuild_for_type, so that if a user rule
4915             # is defined, its method will be recompiled for future scans in
4916             # order to *remove* the generated method calls
4917 3         22 my @NON_COPIED_KEYS = qw(
4918             main eval_plugins plugins_loaded registered_commands sed_path_cache parser
4919             scoreset scores want_rebuild_for_type
4920             );
4921              
4922             # special cases. first, skip anything that cannot be changed
4923             # by users, and the stuff we take care of here
4924 3         11 foreach my $var (@NON_COPIED_KEYS) {
4925 27         50 $done{$var} = undef;
4926             }
4927              
4928             # keys that should can be copied using a ->clone() method, in ->clone()
4929 3         10 my @CLONABLE_KEYS = qw(
4930             internal_networks trusted_networks msa_networks
4931             );
4932              
4933 3         7 foreach my $key (@CLONABLE_KEYS) {
4934 9         35 $dest->{$key} = $source->{$key}->clone();
4935 9         20 $done{$key} = undef;
4936             }
4937              
4938             # two-level hashes
4939 3         9 foreach my $key (qw(uri_host_lists askdns)) {
4940 6         12 my $v = $source->{$key};
4941 6         14 my $dest_key_ref = $dest->{$key} = {}; # must start from scratch!
4942 6         11 while(my($k2,$v2) = each %{$v}) {
  6         32  
4943 0         0 %{$dest_key_ref->{$k2}} = %{$v2};
  0         0  
  0         0  
4944             }
4945 6         18 $done{$key} = undef;
4946             }
4947              
4948             # bug 4179: be smarter about cloning the rule-type structures;
4949             # some are like this: $self->{type}->{priority}->{name} = 'value';
4950             # which is an extra level that the below code won't deal with
4951 3         10 foreach my $t (@rule_types) {
4952 36         40 foreach my $k (keys %{$source->{$t}}) {
  36         92  
4953 12         20 my $v = $source->{$t}->{$k};
4954 12         18 my $i = ref $v;
4955 12 50       25 if ($i eq 'HASH') {
    0          
4956 12         17 %{$dest->{$t}->{$k}} = %{$v};
  12         98  
  12         56  
4957             }
4958             elsif ($i eq 'ARRAY') {
4959 0         0 @{$dest->{$t}->{$k}} = @{$v};
  0         0  
  0         0  
4960             }
4961             else {
4962 0         0 $dest->{$t}->{$k} = $v;
4963             }
4964             }
4965 36         78 $done{$t} = undef;
4966             }
4967              
4968             # and now, copy over all the rest -- the less complex cases.
4969 3         6 while(my($k,$v) = each %{$source}) {
  756         1828  
4970 753 100       1223 next if exists $done{$k}; # we handled it above
4971 692         925 $done{$k} = undef;
4972 692         844 my $i = ref($v);
4973              
4974             # Not a reference, or a scalar? Just copy the value over.
4975 692 100       1280 if ($i eq '') {
    50          
    100          
    50          
    0          
4976 536         1025 $dest->{$k} = $v;
4977             }
4978             elsif ($i eq 'SCALAR') {
4979 0         0 $dest->{$k} = $$v;
4980             }
4981             elsif ($i eq 'ARRAY') {
4982 27         35 @{$dest->{$k}} = @{$v};
  27         93  
  27         41  
4983             }
4984             elsif ($i eq 'HASH') {
4985 129         133 %{$dest->{$k}} = %{$v};
  129         3751  
  129         2071  
4986             }
4987             elsif ($i eq 'Regexp') {
4988 0         0 $dest->{$k} = $v;
4989             }
4990             else {
4991             # throw a warning for debugging -- should never happen in normal usage
4992 0         0 warn "config: dup unknown type $k, $i\n";
4993             }
4994             }
4995              
4996 3         5 foreach my $cmd (@{$self->{registered_commands}}) {
  3         10  
4997 720         1047 my $k = $cmd->{setting};
4998 720 100       1353 next if exists $done{$k}; # we handled it above
4999 76         137 $done{$k} = undef;
5000 76         176 $dest->{$k} = $source->{$k};
5001             }
5002              
5003             # scoresets
5004 3         59 delete $dest->{scoreset};
5005 3         12 for my $i (0 .. 3) {
5006 12         17 %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]};
  12         334  
  12         95  
5007             }
5008              
5009             # deal with $conf->{scores}, it needs to be a reference into the scoreset
5010             # hash array dealy. Do it at the end since scoreset_current isn't set
5011             # otherwise.
5012 3         21 $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}];
5013              
5014             # ensure we don't copy the path cache from the master
5015 3         6 delete $dest->{sed_path_cache};
5016              
5017 3         80 return 1;
5018             }
5019              
5020             ###########################################################################
5021              
5022             sub free_uncompiled_rule_source {
5023 2     2 0 8 my ($self) = @_;
5024              
5025 2 50 33     38 if (!$self->{main}->{keep_config_parsing_metadata} &&
5026             !$self->{allow_user_rules})
5027             {
5028 2         10 delete $self->{if_stack};
5029             #delete $self->{source_file};
5030             #delete $self->{meta_dependencies};
5031             }
5032             }
5033              
5034             sub new_netset {
5035 369     369 0 1052 my ($self, $netset_name, $add_loopback) = @_;
5036 369         2566 my $set = Mail::SpamAssassin::NetSet->new($netset_name);
5037 369 100       928 if ($add_loopback) {
5038 246         1213 $set->add_cidr('127.0.0.0/8');
5039 246         709 $set->add_cidr('::1');
5040             }
5041 369         1413 return $set;
5042             }
5043              
5044             ###########################################################################
5045              
5046             sub finish {
5047 52     52 0 161 my ($self) = @_;
5048             #untie %{$self->{descriptions}};
5049 52         98 %{$self} = ();
  52         33985  
5050             }
5051              
5052             ###########################################################################
5053              
5054 0     0 0 0 sub sa_die { Mail::SpamAssassin::sa_die(@_); }
5055              
5056             ###########################################################################
5057              
5058             # subroutines available to conditionalize rules, for example:
5059             # if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers))
5060              
5061 0     0 0 0 sub feature_originating_ip_headers { 1 }
5062 0     0 0 0 sub feature_dns_local_ports_permit_avoid { 1 }
5063 0     0 0 0 sub feature_bayes_auto_learn_on_error { 1 }
5064 0     0 0 0 sub feature_uri_host_listed { 1 }
5065 0     0 0 0 sub feature_yesno_takes_args { 1 }
5066 0     0 0 0 sub feature_bug6558_free { 1 }
5067 0     0 0 0 sub feature_edns { 1 } # supports 'dns_options edns' config option
5068 0     0 0 0 sub feature_dns_query_restriction { 1 } # supported config option
5069 156     156 0 2910 sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries
5070 0     0 0   sub feature_compile_regexp { 1 } # Util::compile_regexp
5071 0     0 0   sub feature_meta_rules_matching { 1 } # meta rules_matching() expression
5072 0     0 0   sub feature_subjprefix { 1 } # add subject prefixes rule option
5073 0     0 0   sub has_tflags_nosubject { 1 } # tflags nosubject
5074 0     0 0   sub perl_min_version_5010000 { return $] >= 5.010000 } # perl version check ("perl_version" not neatly backwards-compatible)
5075              
5076             ###########################################################################
5077              
5078             1;
5079             __END__
5080              
5081             =head1 LOCALI[SZ]ATION
5082              
5083             A line starting with the text C<lang xx> will only be interpreted
5084             if the user is in that locale, allowing test descriptions and
5085             templates to be set for that language.
5086              
5087             The locales string should specify either both the language and country, e.g.
5088             C<lang pt_BR>, or just the language, e.g. C<lang de>.
5089              
5090             =head1 SEE ALSO
5091              
5092             Mail::SpamAssassin(3)
5093             spamassassin(1)
5094             spamd(1)
5095              
5096             =cut