File Coverage

blib/lib/Mail/SpamAssassin/Conf.pm
Criterion Covered Total %
statement 554 986 56.1
branch 121 398 30.4
condition 37 149 24.8
subroutine 60 114 52.6
pod 0 48 0.0
total 772 1695 45.5


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Conf - SpamAssassin configuration file
21              
22             =head1 SYNOPSIS
23              
24             # a comment
25              
26             rewrite_header Subject *****SPAM*****
27              
28             full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i
29             describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618
30              
31             header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i
32             describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters
33              
34             score A_HREF_TO_REMOVE 2.0
35              
36             lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com
37              
38             lang pt_BR report O programa detetor de Spam ZOE [...]
39              
40             =head1 DESCRIPTION
41              
42             SpamAssassin is configured using traditional UNIX-style configuration files,
43             loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin>
44             directories.
45              
46             The following web page lists the most important configuration settings
47             used to configure SpamAssassin; novices are encouraged to read it first:
48              
49             http://wiki.apache.org/spamassassin/ImportantInitialConfigItems
50              
51             =head1 FILE FORMAT
52              
53             The C<#> character starts a comment, which continues until end of line.
54             B<NOTE:> if the C<#> character is to be used as part of a rule or
55             configuration option, it must be escaped with a backslash. i.e.: C<\#>
56              
57             Whitespace in the files is not significant, but please note that starting a
58             line with whitespace is deprecated, as we reserve its use for multi-line rule
59             definitions, at some point in the future.
60              
61             Currently, each rule or configuration setting must fit on one-line; multi-line
62             settings are not supported yet.
63              
64             File and directory paths can use C<~> to refer to the user's home
65             directory, but no other shell-style path extensions such as globing or
66             C<~user/> are supported.
67              
68             Where appropriate below, default values are listed in parentheses.
69              
70             Test names ("SYMBOLIC_TEST_NAME") can only contain alphanumerics/underscores,
71             can not start with digit, and must be less than 128 characters.
72              
73             =head1 USER PREFERENCES
74              
75             The following options can be used in both site-wide (C<local.cf>) and
76             user-specific (C<user_prefs>) configuration files to customize how
77             SpamAssassin handles incoming email messages.
78              
79             =cut
80              
81              
82             use strict;
83 41     41   238 use warnings;
  41         62  
  41         1030  
84 41     41   172 # use bytes;
  41         60  
  41         1103  
85             use re 'taint';
86 41     41   185  
  41         67  
  41         1206  
87             use Mail::SpamAssassin::NetSet;
88 41     41   10761 use Mail::SpamAssassin::Constants qw(:sa :ip);
  41         121  
  41         1559  
89 41     41   238 use Mail::SpamAssassin::Conf::Parser;
  41         65  
  41         7293  
90 41     41   13786 use Mail::SpamAssassin::Logger;
  41         114  
  41         5138  
91 41     41   315 use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
  41         71  
  41         2438  
92 41     41   234 use File::Spec;
  41         72  
  41         1583  
93 41     41   201  
  41         75  
  41         564233  
94             our @ISA = qw();
95              
96             our $COLLECT_REGRESSION_TESTS; # Used only for unit tests.
97              
98             # odd => eval test. Not constants so they can be shared with Parser
99             # TODO: move to Constants.pm?
100             our $TYPE_HEAD_TESTS = 0x0008;
101             our $TYPE_HEAD_EVALS = 0x0009;
102             our $TYPE_BODY_TESTS = 0x000a;
103             our $TYPE_BODY_EVALS = 0x000b;
104             our $TYPE_FULL_TESTS = 0x000c;
105             our $TYPE_FULL_EVALS = 0x000d;
106             our $TYPE_RAWBODY_TESTS = 0x000e;
107             our $TYPE_RAWBODY_EVALS = 0x000f;
108             our $TYPE_URI_TESTS = 0x0010;
109             our $TYPE_URI_EVALS = 0x0011;
110             our $TYPE_META_TESTS = 0x0012;
111             our $TYPE_RBL_EVALS = 0x0013;
112             our $TYPE_EMPTY_TESTS = 0x0014;
113              
114             my @rule_types = ("body_tests", "uri_tests", "uri_evals",
115             "head_tests", "head_evals", "body_evals", "full_tests",
116             "full_evals", "rawbody_tests", "rawbody_evals",
117             "rbl_evals", "meta_tests");
118              
119             #Removed $VERSION per BUG 6422
120             #$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later
121              
122             # these are variables instead of constants so that other classes can
123             # access them; if they're constants, they'd have to go in Constants.pm
124             # TODO: move to Constants.pm?
125             our $CONF_TYPE_STRING = 1;
126             our $CONF_TYPE_BOOL = 2;
127             our $CONF_TYPE_NUMERIC = 3;
128             our $CONF_TYPE_HASH_KEY_VALUE = 4;
129             our $CONF_TYPE_ADDRLIST = 5;
130             our $CONF_TYPE_TEMPLATE = 6;
131             our $CONF_TYPE_NOARGS = 7;
132             our $CONF_TYPE_STRINGLIST = 8;
133             our $CONF_TYPE_IPADDRLIST = 9;
134             our $CONF_TYPE_DURATION = 10;
135             our $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser
136             our $INVALID_VALUE = '-99999999999998';
137             our $INVALID_HEADER_FIELD_NAME = '-99999999999997';
138              
139             # set to "1" by the test suite code, to record regression tests
140             # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1;
141              
142             # search for "sub new {" to find the start of the code
143             ###########################################################################
144              
145             my($self) = @_;
146              
147 92     92 0 323 # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt.
148             # push each config item like this, to avoid a POD bug; it can't just accept
149             # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies.
150             my @cmds;
151              
152 92         257 =head2 SCORING OPTIONS
153              
154             =over 4
155              
156             =item required_score n.nn (default: 5)
157              
158             Set the score required before a mail is considered spam. C<n.nn> can
159             be an integer or a real number. 5.0 is the default setting, and is
160             quite aggressive; it would be suitable for a single-user setup, but if
161             you're an ISP installing SpamAssassin, you should probably set the
162             default to be more conservative, like 8.0 or 10.0. It is not
163             recommended to automatically delete or discard messages marked as
164             spam, as your users B<will> complain, but if you choose to do so, only
165             delete messages with an exceptionally high score such as 15.0 or
166             higher. This option was previously known as C<required_hits> and that
167             name is still accepted, but is deprecated.
168              
169             =cut
170              
171             push (@cmds, {
172             setting => 'required_score',
173 92         1617 aliases => ['required_hits'], # backward compatible
174             default => 5,
175             type => $CONF_TYPE_NUMERIC,
176             });
177              
178             =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ]
179              
180             Assign scores (the number of points for a hit) to a given test.
181             Scores can be positive or negative real numbers or integers.
182             C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for
183             that test; for example, 'FROM_ENDS_IN_NUMS'.
184              
185             If only one valid score is listed, then that score is always used
186             for a test.
187              
188             If four valid scores are listed, then the score that is used depends
189             on how SpamAssassin is being used. The first score is used when
190             both Bayes and network tests are disabled (score set 0). The second
191             score is used when Bayes is disabled, but network tests are enabled
192             (score set 1). The third score is used when Bayes is enabled and
193             network tests are disabled (score set 2). The fourth score is used
194             when Bayes is enabled and network tests are enabled (score set 3).
195              
196             Setting a rule's score to 0 will disable that rule from running.
197              
198             If any of the score values are surrounded by parenthesis '()', then
199             all of the scores in the line are considered to be relative to the
200             already set score. ie: '(3)' means increase the score for this
201             rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase
202             the score for this rule by 3 in score sets 0 and 2 only.
203              
204             If no score is given for a test by the end of the configuration,
205             a default score is assigned: a score of 1.0 is used for all tests,
206             except those whose names begin with 'T_' (this is used to indicate a
207             rule in testing) which receive 0.01.
208              
209             Note that test names which begin with '__' are indirect rules used
210             to compose meta-match rules and can also act as prerequisites to
211             other rules. They are not scored or listed in the 'tests hit'
212             reports, but assigning a score of 0 to an indirect rule will disable
213             it from running.
214              
215             =cut
216              
217             push (@cmds, {
218             setting => 'score',
219             is_frequent => 1,
220             code => sub {
221             my ($self, $key, $value, $line) = @_;
222             my($rule, @scores) = split(/\s+/, $value);
223 1245     1245   3561 unless (defined $value && $value !~ /^$/ &&
224 1245         7046 (scalar @scores == 1 || scalar @scores == 4)) {
225 1245 50 33     10199 info("config: score: requires a symbolic rule name and 1 or 4 scores");
      66        
      66        
226             return $MISSING_REQUIRED_VALUE;
227 0         0 }
228 0         0  
229             # Figure out if we're doing relative scores, remove the parens if we are
230             my $relative = 0;
231             foreach (@scores) {
232 1245         2143 local ($1);
233 1245         2265 if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) {
234 1812         3034 $relative = 1;
235 1812 100       3344 }
236 9         13 unless (/^-?\d+(?:\.\d+)?$/) {
237             info("config: score: the non-numeric score ($_) is not valid, " .
238 1812 50       7628 "a numeric score is required");
239 0         0 return $INVALID_VALUE;
240             }
241 0         0 }
242              
243             if ($relative && !exists $self->{scoreset}->[0]->{$rule}) {
244             info("config: score: relative score without previous setting in " .
245 1245 50 66     2752 "configuration");
246 0         0 return $INVALID_VALUE;
247             }
248 0         0  
249             # If we're only passed 1 score, copy it to the other scoresets
250             if (@scores) {
251             if (@scores != 4) {
252 1245 50       2441 @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] );
253 1245 100       2032 }
254 1056         3452  
255             # Set the actual scoreset values appropriately
256             for my $index (0..3) {
257             my $score = $relative ?
258 1245         2355 $self->{scoreset}->[$index]->{$rule} + $scores[$index] :
259             $scores[$index];
260 4980 100       9490  
261             $self->{scoreset}->[$index]->{$rule} = $score + 0.0;
262             }
263 4980         20356 }
264             }
265             });
266              
267 92         2038 =back
268              
269             =head2 WHITELIST AND BLACKLIST OPTIONS
270              
271             =over 4
272              
273             =item whitelist_from user@example.com
274              
275             Used to whitelist sender addresses which send mail that is often tagged
276             (incorrectly) as spam.
277              
278             Use of this setting is not recommended, since it blindly trusts the message,
279             which is routinely and easily forged by spammers and phish senders. The
280             recommended solution is to instead use C<whitelist_auth> or other authenticated
281             whitelisting methods, or C<whitelist_from_rcvd>.
282              
283             Whitelist and blacklist addresses are now file-glob-style patterns, so
284             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
285             Specifically, C<*> and C<?> are allowed, but all other metacharacters
286             are not. Regular expressions are not used for security reasons.
287             Matching is case-insensitive.
288              
289             Multiple addresses per line, separated by spaces, is OK. Multiple
290             C<whitelist_from> lines are also OK.
291              
292             The headers checked for whitelist addresses are as follows: if C<Resent-From>
293             is set, use that; otherwise check all addresses taken from the following
294             set of headers:
295              
296             Envelope-Sender
297             Resent-Sender
298             X-Envelope-From
299             From
300              
301             In addition, the "envelope sender" data, taken from the SMTP envelope data
302             where this is available, is looked up. See C<envelope_sender_header>.
303              
304             e.g.
305              
306             whitelist_from joe@example.com fred@example.com
307             whitelist_from *@example.com
308              
309             =cut
310              
311             push (@cmds, {
312             setting => 'whitelist_from',
313 92         632 type => $CONF_TYPE_ADDRLIST,
314             });
315              
316             =item unwhitelist_from user@example.com
317              
318             Used to remove a default whitelist_from entry, so for example a distribution
319             whitelist_from can be overridden in a local.cf file, or an individual user can
320             override a whitelist_from entry in their own C<user_prefs> file.
321             The specified email address has to match exactly (although case-insensitively)
322             the address previously used in a whitelist_from line, which implies that a
323             wildcard only matches literally the same wildcard (not 'any' address).
324              
325             e.g.
326              
327             unwhitelist_from joe@example.com fred@example.com
328             unwhitelist_from *@example.com
329              
330             =cut
331              
332             push (@cmds, {
333             command => 'unwhitelist_from',
334 92         1004 setting => 'whitelist_from',
335             type => $CONF_TYPE_ADDRLIST,
336             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
337             });
338              
339             =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
340              
341             Works similarly to whitelist_from, except that in addition to matching
342             a sender address, a relay's rDNS name or its IP address must match too
343             for the whitelisting rule to fire. The first parameter is a sender's e-mail
344             address to whitelist, and the second is a string to match the relay's rDNS,
345             or its IP address. Matching is case-insensitive.
346              
347             This second parameter is matched against a TCP-info information field as
348             provided in a FROM clause of a trace information (i.e. in a Received header
349             field, see RFC 5321). Only the Received header fields inserted by trusted
350             hosts are considered. This parameter can either be a full hostname, or a
351             domain component of that hostname, or an IP address (optionally followed
352             by a slash and a prefix length) in square brackets. The address prefix
353             (mask) length with a slash may stand within brackets along with an address,
354             or may follow the bracketed address. Reverse DNS lookup is done by an MTA,
355             not by SpamAssassin.
356              
357             For backward compatibility as an alternative to a CIDR notation, an IPv4
358             address in brackets may be truncated on classful boundaries to cover whole
359             subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>.
360              
361             In other words, if the host that connected to your MX had an IP address
362             192.0.2.123 that mapped to 'sendinghost.example.org', you should specify
363             C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or
364             C<[192.0.2.0/24]>, or C<[192.0.2]> here.
365              
366             Note that this requires that C<internal_networks> be correct. For simple
367             cases, it will be, but for a complex network you may get better results
368             by setting that parameter.
369              
370             It also requires that your mail exchangers be configured to perform DNS
371             reverse lookups on the connecting host's IP address, and to record the
372             result in the generated Received header field according to RFC 5321.
373              
374             e.g.
375              
376             whitelist_from_rcvd joe@example.com example.com
377             whitelist_from_rcvd *@* mail.example.org
378             whitelist_from_rcvd *@axkit.org [192.0.2.123]
379             whitelist_from_rcvd *@axkit.org [192.0.2.0/24]
380             whitelist_from_rcvd *@axkit.org [192.0.2.0]/24
381             whitelist_from_rcvd *@axkit.org [2001:db8:1234::/48]
382             whitelist_from_rcvd *@axkit.org [2001:db8:1234::]/48
383              
384             =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
385              
386             Same as C<whitelist_from_rcvd>, but used for the default whitelist entries
387             in the SpamAssassin distribution. The whitelist score is lower, because
388             these are often targets for spammer spoofing.
389              
390             =cut
391              
392             push (@cmds, {
393             setting => 'whitelist_from_rcvd',
394             type => $CONF_TYPE_ADDRLIST,
395             code => sub {
396             my ($self, $key, $value, $line) = @_;
397             unless (defined $value && $value !~ /^$/) {
398 0     0   0 return $MISSING_REQUIRED_VALUE;
399 0 0 0     0 }
400 0         0 unless ($value =~ /^\S+\s+\S+$/) {
401             return $INVALID_VALUE;
402 0 0       0 }
403 0         0 $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd',
404             split(/\s+/, $value));
405 0         0 }
406             });
407              
408 92         1079 push (@cmds, {
409             setting => 'def_whitelist_from_rcvd',
410             type => $CONF_TYPE_ADDRLIST,
411             code => sub {
412             my ($self, $key, $value, $line) = @_;
413             unless (defined $value && $value !~ /^$/) {
414 0     0   0 return $MISSING_REQUIRED_VALUE;
415 0 0 0     0 }
416 0         0 unless ($value =~ /^\S+\s+\S+$/) {
417             return $INVALID_VALUE;
418 0 0       0 }
419 0         0 $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd',
420             split(/\s+/, $value));
421 0         0 }
422             });
423              
424 92         1166 =item whitelist_allows_relays user@example.com
425              
426             Specify addresses which are in C<whitelist_from_rcvd> that sometimes
427             send through a mail relay other than the listed ones. By default mail
428             with a From address that is in C<whitelist_from_rcvd> that does not match
429             the relay will trigger a forgery rule. Including the address in
430             C<whitelist_allows_relay> prevents that.
431              
432             Whitelist and blacklist addresses are now file-glob-style patterns, so
433             C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
434             Specifically, C<*> and C<?> are allowed, but all other metacharacters
435             are not. Regular expressions are not used for security reasons.
436             Matching is case-insensitive.
437              
438             Multiple addresses per line, separated by spaces, is OK. Multiple
439             C<whitelist_allows_relays> lines are also OK.
440              
441             The specified email address does not have to match exactly the address
442             previously used in a whitelist_from_rcvd line as it is compared to the
443             address in the header.
444              
445             e.g.
446              
447             whitelist_allows_relays joe@example.com fred@example.com
448             whitelist_allows_relays *@example.com
449              
450             =cut
451              
452             push (@cmds, {
453             setting => 'whitelist_allows_relays',
454 92         645 type => $CONF_TYPE_ADDRLIST,
455             });
456              
457             =item unwhitelist_from_rcvd user@example.com
458              
459             Used to remove a default whitelist_from_rcvd or def_whitelist_from_rcvd
460             entry, so for example a distribution whitelist_from_rcvd can be overridden
461             in a local.cf file, or an individual user can override a whitelist_from_rcvd
462             entry in their own C<user_prefs> file.
463              
464             The specified email address has to match exactly the address previously
465             used in a whitelist_from_rcvd line.
466              
467             e.g.
468              
469             unwhitelist_from_rcvd joe@example.com fred@example.com
470             unwhitelist_from_rcvd *@axkit.org
471              
472             =cut
473              
474             push (@cmds, {
475             setting => 'unwhitelist_from_rcvd',
476             type => $CONF_TYPE_ADDRLIST,
477             code => sub {
478             my ($self, $key, $value, $line) = @_;
479             unless (defined $value && $value !~ /^$/) {
480 0     0   0 return $MISSING_REQUIRED_VALUE;
481 0 0 0     0 }
482 0         0 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
483             return $INVALID_VALUE;
484 0 0       0 }
485 0         0 $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd',
486             split (/\s+/, $value));
487 0         0 $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd',
488             split (/\s+/, $value));
489 0         0 }
490             });
491              
492 92         1205 =item blacklist_from user@example.com
493              
494             Used to specify addresses which send mail that is often tagged (incorrectly) as
495             non-spam, but which the user doesn't want. Same format as C<whitelist_from>.
496              
497             =cut
498              
499             push (@cmds, {
500             setting => 'blacklist_from',
501 92         598 type => $CONF_TYPE_ADDRLIST,
502             });
503              
504             =item unblacklist_from user@example.com
505              
506             Used to remove a default blacklist_from entry, so for example a
507             distribution blacklist_from can be overridden in a local.cf file, or
508             an individual user can override a blacklist_from entry in their own
509             C<user_prefs> file. The specified email address has to match exactly
510             the address previously used in a blacklist_from line.
511              
512              
513             e.g.
514              
515             unblacklist_from joe@example.com fred@example.com
516             unblacklist_from *@spammer.com
517              
518             =cut
519              
520              
521             push (@cmds, {
522             command => 'unblacklist_from',
523 92         605 setting => 'blacklist_from',
524             type => $CONF_TYPE_ADDRLIST,
525             code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
526             });
527              
528              
529             =item whitelist_to user@example.com
530              
531             If the given address appears as a recipient in the message headers
532             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
533             be whitelisted. Useful if you're deploying SpamAssassin system-wide,
534             and don't want some users to have their mail filtered. Same format
535             as C<whitelist_from>.
536              
537             There are three levels of To-whitelisting, C<whitelist_to>, C<more_spam_to>
538             and C<all_spam_to>. Users in the first level may still get some spammish
539             mails blocked, but users in C<all_spam_to> should never get mail blocked.
540              
541             The headers checked for whitelist addresses are as follows: if C<Resent-To> or
542             C<Resent-Cc> are set, use those; otherwise check all addresses taken from the
543             following set of headers:
544              
545             To
546             Cc
547             Apparently-To
548             Delivered-To
549             Envelope-Recipients
550             Apparently-Resent-To
551             X-Envelope-To
552             Envelope-To
553             X-Delivered-To
554             X-Original-To
555             X-Rcpt-To
556             X-Real-To
557              
558             =item more_spam_to user@example.com
559              
560             See above.
561              
562             =item all_spam_to user@example.com
563              
564             See above.
565              
566             =cut
567              
568             push (@cmds, {
569             setting => 'whitelist_to',
570 92         4566 type => $CONF_TYPE_ADDRLIST,
571             });
572             push (@cmds, {
573             setting => 'more_spam_to',
574 92         511 type => $CONF_TYPE_ADDRLIST,
575             });
576             push (@cmds, {
577             setting => 'all_spam_to',
578 92         587 type => $CONF_TYPE_ADDRLIST,
579             });
580              
581             =item blacklist_to user@example.com
582              
583             If the given address appears as a recipient in the message headers
584             (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
585             be blacklisted. Same format as C<blacklist_from>.
586              
587             =cut
588              
589             push (@cmds, {
590             setting => 'blacklist_to',
591 92         2251 type => $CONF_TYPE_ADDRLIST,
592             });
593              
594             =item whitelist_auth user@example.com
595              
596             Used to specify addresses which send mail that is often tagged (incorrectly) as
597             spam. This is different from C<whitelist_from> and C<whitelist_from_rcvd> in
598             that it first verifies that the message was sent by an authorized sender for
599             the address, before whitelisting.
600              
601             Authorization is performed using one of the installed sender-authorization
602             schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using
603             C<Mail::SpamAssassin::Plugin::DKIM>). Note that those plugins must be active,
604             and working, for this to operate.
605              
606             Using C<whitelist_auth> is roughly equivalent to specifying duplicate
607             C<whitelist_from_spf>, C<whitelist_from_dk>, and C<whitelist_from_dkim> lines
608             for each of the addresses specified.
609              
610             e.g.
611              
612             whitelist_auth joe@example.com fred@example.com
613             whitelist_auth *@example.com
614              
615             =item def_whitelist_auth user@example.com
616              
617             Same as C<whitelist_auth>, but used for the default whitelist entries
618             in the SpamAssassin distribution. The whitelist score is lower, because
619             these are often targets for spammer spoofing.
620              
621             =cut
622              
623             push (@cmds, {
624             setting => 'whitelist_auth',
625 92         540 type => $CONF_TYPE_ADDRLIST,
626             });
627              
628             push (@cmds, {
629             setting => 'def_whitelist_auth',
630 92         567 type => $CONF_TYPE_ADDRLIST,
631             });
632              
633             =item unwhitelist_auth user@example.com
634              
635             Used to remove a C<whitelist_auth> or C<def_whitelist_auth> entry. The
636             specified email address has to match exactly the address previously used.
637              
638             e.g.
639              
640             unwhitelist_auth joe@example.com fred@example.com
641             unwhitelist_auth *@example.com
642              
643             =cut
644              
645             push (@cmds, {
646             setting => 'unwhitelist_auth',
647             type => $CONF_TYPE_ADDRLIST,
648             code => sub {
649             my ($self, $key, $value, $line) = @_;
650             unless (defined $value && $value !~ /^$/) {
651 0     0   0 return $MISSING_REQUIRED_VALUE;
652 0 0 0     0 }
653 0         0 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
654             return $INVALID_VALUE;
655 0 0       0 }
656 0         0 $self->{parser}->remove_from_addrlist('whitelist_auth',
657             split (/\s+/, $value));
658 0         0 $self->{parser}->remove_from_addrlist('def_whitelist_auth',
659             split (/\s+/, $value));
660 0         0 }
661             });
662              
663 92         1240  
664             =item enlist_uri_host (listname) host ...
665              
666             Adds one or more host names or domain names to a named list of URI domains.
667             The named list can then be consulted through a check_uri_host_listed()
668             eval rule implemented by the WLBLEval plugin, which takes the list name as
669             an argument. Parenthesis around a list name are literal - a required syntax.
670              
671             Host names may optionally be prefixed by an exclamation mark '!', which
672             produces false as a result if this entry matches. This makes it easier
673             to exclude some subdomains when their superdomain is listed, for example:
674              
675             enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com
676              
677             No wildcards are supported, but subdomains do match implicitly. Lists
678             are independent. Search for each named list starts by looking up the
679             full hostname first, then leading fields are progressively stripped off
680             (e.g.: sub.example.com, example.com, com) until a match is found or we run
681             out of fields. The first matching entry (the most specific) determines if a
682             lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result.
683              
684             If an URL found in a message contains an IP address in place of a host name,
685             the given list must specify the exact same IP address (instead of a host name)
686             in order to match.
687              
688             Use the delist_uri_host directive to neutralize previous enlist_uri_host
689             settings.
690              
691             Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives
692             blacklist_uri_host and whitelist_uri_host and corresponding default rules,
693             but the names 'BLACK' and 'WHITE' are otherwise not special or reserved.
694              
695             =cut
696              
697             push (@cmds, {
698             command => 'enlist_uri_host',
699             setting => 'uri_host_lists',
700             type => $CONF_TYPE_ADDRLIST,
701             code => sub {
702             my($conf, $key, $value, $line) = @_;
703             local($1,$2);
704 0     0   0 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
705 0         0 return $MISSING_REQUIRED_VALUE;
706 0 0       0 }
707 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
708             # note: must not factor out dereferencing, as otherwise
709 0         0 # subhashes would spring up in a copy and be lost
710             foreach my $host ( split(/\s+/, lc $2) ) {
711             my $v = $host =~ s/^!// ? 0 : 1;
712 0         0 $conf->{uri_host_lists}{$listname}{$host} = $v;
713 0 0       0 }
714 0         0 }
715             });
716              
717 92         1163 =item delist_uri_host [ (listname) ] host ...
718              
719             Removes one or more specified host names from a named list of URI domains.
720             Removing an unlisted name is ignored (is not an error). Listname is optional,
721             if specified then just the named list is affected, otherwise hosts are
722             removed from all URI host lists created so far. Parenthesis around a list
723             name are a required syntax.
724              
725             Note that directives in configuration files are processed in sequence,
726             the delist_uri_host only applies to previously listed entries and has
727             no effect on enlisted entries in yet-to-be-processed directives.
728              
729             For convenience (similarity to the enlist_uri_host directive) hostnames
730             may be prefixed by a an exclamation mark, which is stripped off from each
731             name and has no meaning here.
732              
733             =cut
734              
735             push (@cmds, {
736             command => 'delist_uri_host',
737             setting => 'uri_host_lists',
738             type => $CONF_TYPE_ADDRLIST,
739             code => sub {
740             my($conf, $key, $value, $line) = @_;
741             local($1,$2);
742 0     0   0 if ($value !~ /^ (?: \( (.+?) \) \s+ )? (.+) \z/sx) {
743 0         0 return $MISSING_REQUIRED_VALUE;
744 0 0       0 }
745 0         0 my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}};
746             my @args = split(/\s+/, lc $2);
747 0 0       0 foreach my $listname (@listnames) {
  0         0  
748 0         0 foreach my $host (@args) {
749 0         0 my $v = $host =~ s/^!// ? 0 : 1;
750 0         0 delete $conf->{uri_host_lists}{$listname}{$host};
751 0 0       0 }
752 0         0 }
753             }
754             });
755              
756 92         1330 =item enlist_addrlist (listname) user@example.com
757              
758             Adds one or more addresses to a named list of addresses.
759             The named list can then be consulted through a check_from_in_list() or a
760             check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes
761             the list name as an argument. Parenthesis around a list name are literal - a
762             required syntax.
763              
764             Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>,
765             C<*@isp.com>, or C<*.domain.net> will all work.
766             Specifically, C<*> and C<?> are allowed, but all other metacharacters
767             are not. Regular expressions are not used for security reasons.
768             Matching is case-insensitive.
769              
770             Multiple addresses per line, separated by spaces, is OK. Multiple
771             C<enlist_addrlist> lines are also OK.
772              
773             Enlisting an address to the list named blacklist_to is synonymous to using the
774             directive blacklist_to
775              
776             Enlisting an address to the list named blacklist_from is synonymous to using the
777             directive blacklist_from
778              
779             Enlisting an address to the list named whitelist_to is synonymous to using the
780             directive whitelist_to
781              
782             Enlisting an address to the list named whitelist_from is synonymous to using the
783             directive whitelist_from
784              
785             e.g.
786              
787             enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com
788             enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk
789              
790             =cut
791              
792             push (@cmds, {
793             setting => 'enlist_addrlist',
794             type => $CONF_TYPE_ADDRLIST,
795             code => sub {
796             my($conf, $key, $value, $line) = @_;
797             local($1,$2);
798 0     0   0 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
799 0         0 return $MISSING_REQUIRED_VALUE;
800 0 0       0 }
801 0         0 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist()
802             # note: must not factor out dereferencing, as otherwise
803 0         0 # subhashes would spring up in a copy and be lost
804             $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value));
805             }
806 0         0 });
807              
808 92         1115 =item blacklist_uri_host host-or-domain ...
809              
810             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
811              
812             Please see directives enlist_uri_host and delist_uri_host for details.
813              
814             =cut
815              
816             push (@cmds, {
817             command => 'blacklist_uri_host',
818             setting => 'uri_host_lists',
819             type => $CONF_TYPE_ADDRLIST,
820             code => sub {
821             my($conf, $key, $value, $line) = @_;
822             foreach my $host ( split(/\s+/, lc $value) ) {
823 0     0   0 my $v = $host =~ s/^!// ? 0 : 1;
824 0         0 $conf->{uri_host_lists}{'BLACK'}{$host} = $v;
825 0 0       0 }
826 0         0 }
827             });
828              
829 92         1335 =item whitelist_uri_host host-or-domain ...
830              
831             Is a shorthand for a directive: enlist_uri_host (BLACK) host ...
832              
833             Please see directives enlist_uri_host and delist_uri_host for details.
834              
835             =cut
836              
837             push (@cmds, {
838             command => 'whitelist_uri_host',
839             setting => 'uri_host_lists',
840             type => $CONF_TYPE_ADDRLIST,
841             code => sub {
842             my($conf, $key, $value, $line) = @_;
843             foreach my $host ( split(/\s+/, lc $value) ) {
844 0     0   0 my $v = $host =~ s/^!// ? 0 : 1;
845 0         0 $conf->{uri_host_lists}{'WHITE'}{$host} = $v;
846 0 0       0 }
847 0         0 }
848             });
849              
850 92         1123 =back
851              
852             =head2 BASIC MESSAGE TAGGING OPTIONS
853              
854             =over 4
855              
856             =item rewrite_header { subject | from | to } STRING
857              
858             By default, suspected spam messages will not have the C<Subject>,
859             C<From> or C<To> lines tagged to indicate spam. By setting this option,
860             the header will be tagged with C<STRING> to indicate that a message is
861             spam. For the From or To headers, this will take the form of an RFC 2822
862             comment following the address in parentheses. For the Subject header,
863             this will be prepended to the original subject. Note that you should
864             only use the _REQD_ and _SCORE_ tags when rewriting the Subject header
865             if C<report_safe> is 0. Otherwise, you may not be able to remove
866             the SpamAssassin markup via the normal methods. More information
867             about tags is explained below in the B<TEMPLATE TAGS> section.
868              
869             Parentheses are not permitted in STRING if rewriting the From or To headers.
870             (They will be converted to square brackets.)
871              
872             If C<rewrite_header subject> is used, but the message being rewritten
873             does not already contain a C<Subject> header, one will be created.
874              
875             A null value for C<STRING> will remove any existing rewrite for the specified
876             header.
877              
878             =cut
879              
880             push (@cmds, {
881             setting => 'rewrite_header',
882             type => $CONF_TYPE_HASH_KEY_VALUE,
883             code => sub {
884             my ($self, $key, $value, $line) = @_;
885             my($hdr, $string) = split(/\s+/, $value, 2);
886 0     0   0 $hdr = ucfirst(lc($hdr));
887 0         0  
888 0         0 if ($hdr =~ /^$/) {
889             return $MISSING_REQUIRED_VALUE;
890 0 0       0 }
    0          
891 0         0 # We only deal with From, Subject, and To ...
892             elsif ($hdr =~ /^(?:From|Subject|To)$/) {
893             unless (defined $string && $string =~ /\S/) {
894             delete $self->{rewrite_header}->{$hdr};
895 0 0 0     0 return;
896 0         0 }
897 0         0  
898             if ($hdr ne 'Subject') {
899             $string =~ tr/()/[]/;
900 0 0       0 }
901 0         0 $self->{rewrite_header}->{$hdr} = $string;
902             return;
903 0         0 }
904 0         0 else {
905             # if we get here, note the issue, then we'll fail through for an error.
906             info("config: rewrite_header: ignoring $hdr, not From, Subject, or To");
907             return $INVALID_VALUE;
908 0         0 }
909 0         0 }
910             });
911              
912 92         1131 =item subjprefix
913              
914             Add a prefix in emails Subject if a rule is matched.
915             To enable this option "rewrite_header Subject" config
916             option must be enabled as well.
917              
918             The check C<if can(Mail::SpamAssassin::Conf::feature_subjprefix)>
919             should be used to silence warnings in previous
920             SpamAssassin versions.
921              
922             To be able to use this feature a C<add_header all Subjprefix _SUBJPREFIX_>
923             configuration line could be needed when the glue between the MTA and SpamAssassin
924             rewrites the email content.
925              
926             Here is an example on how to use this feature:
927              
928             rewrite_header Subject *****SPAM*****
929             add_header all Subjprefix _SUBJPREFIX_
930             body OLEMACRO_MALICE eval:check_olemacro_malice()
931             describe OLEMACRO_MALICE Dangerous Office Macro
932             score OLEMACRO_MALICE 5.0
933             if can(Mail::SpamAssassin::Conf::feature_subjprefix)
934             subjprefix OLEMACRO_MALICE [VIRUS]
935             endif
936              
937             =cut
938              
939             push (@cmds, {
940             command => 'subjprefix',
941 92         805 setting => 'subjprefix',
942             is_frequent => 1,
943             type => $CONF_TYPE_HASH_KEY_VALUE,
944             });
945              
946             =item add_header { spam | ham | all } header_name string
947              
948             Customized headers can be added to the specified type of messages (spam,
949             ham, or "all" to add to either). All headers begin with C<X-Spam->
950             (so a C<header_name> Foo will generate a header called X-Spam-Foo).
951             header_name is restricted to the character set [A-Za-z0-9_-].
952              
953             The order of C<add_header> configuration options is preserved, inserted
954             headers will follow this order of declarations. When combining C<add_header>
955             with C<clear_headers> and C<remove_header>, keep in mind that C<add_header>
956             appends a new header to the current list, after first removing any existing
957             header fields of the same name. Note also that C<add_header>, C<clear_headers>
958             and C<remove_header> may appear in multiple .cf files, which are interpreted
959             in alphabetic order.
960              
961             C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section.
962             You can also use C<\n> and C<\t> in the header to add newlines and tabulators
963             as desired. A backslash has to be written as \\, any other escaped chars will
964             be silently removed.
965              
966             All headers will be folded if fold_headers is set to C<1>. Note: Manually
967             adding newlines via C<\n> disables any further automatic wrapping (ie:
968             long header lines are possible). The lines will still be properly folded
969             (marked as continuing) though.
970              
971             You can customize existing headers with B<add_header> (only the specified
972             subset of messages will be changed).
973              
974             See also C<clear_headers> and C<remove_header> for removing headers.
975              
976             Here are some examples (these are the defaults, note that Checker-Version can
977             not be changed or removed):
978              
979             add_header spam Flag _YESNOCAPS_
980             add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
981             add_header all Level _STARS(*)_
982             add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_
983              
984             =cut
985              
986             push (@cmds, {
987             setting => 'add_header',
988             code => sub {
989             my ($self, $key, $value, $line) = @_;
990             local ($1,$2,$3);
991 312     312   937 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) {
992 312         945 return $INVALID_VALUE;
993 312 50       2605 }
994 0         0  
995             my ($type, $name, $hline) = ($1, $2, $3);
996             if ($hline =~ /^"(.*)"$/) {
997 312         1203 $hline = $1;
998 312 100       1136 }
999 62         226 my @line = split(
1000             /\\\\/, # split at double backslashes,
1001 312         1299 $hline."\n" # newline needed to make trailing backslashes work
1002             );
1003             foreach (@line) {
1004             s/\\t/\t/g; # expand tabs
1005 312         712 s/\\n/\n/g; # expand newlines
1006 312         533 s/\\.//g; # purge all other escapes
1007 312         456 };
1008 312         601 $hline = join("\\", @line);
1009             chop($hline); # remove dummy newline again
1010 312         863 if (($type eq "ham") || ($type eq "all")) {
1011 312         688 $self->{headers_ham} =
1012 312 100 66     1674 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ];
1013             push(@{$self->{headers_ham}}, [$name, $hline]);
1014 250         419 }
  472         1729  
  250         578  
1015 250         417 if (($type eq "spam") || ($type eq "all")) {
  250         791  
1016             $self->{headers_spam} =
1017 312 50 66     1687 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ];
1018             push(@{$self->{headers_spam}}, [$name, $hline]);
1019 312         501 }
  722         1912  
  312         539  
1020 312         530 }
  312         2627  
1021             });
1022              
1023 92         1311 =item remove_header { spam | ham | all } header_name
1024              
1025             Headers can be removed from the specified type of messages (spam, ham,
1026             or "all" to remove from either). All headers begin with C<X-Spam->
1027             (so C<header_name> will be appended to C<X-Spam->).
1028              
1029             See also C<clear_headers> for removing all the headers at once.
1030              
1031             Note that B<X-Spam-Checker-Version> is not removable because the version
1032             information is needed by mail administrators and developers to debug
1033             problems. Without at least one header, it might not even be possible to
1034             determine that SpamAssassin is running.
1035              
1036             =cut
1037              
1038             push (@cmds, {
1039             setting => 'remove_header',
1040             code => sub {
1041             my ($self, $key, $value, $line) = @_;
1042             local ($1,$2);
1043 0     0   0 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) {
1044 0         0 return $INVALID_VALUE;
1045 0 0       0 }
1046 0         0  
1047             my ($type, $name) = ($1, $2);
1048             return if ( $name eq "Checker-Version" );
1049 0         0  
1050 0 0       0 $name = lc($name);
1051             if (($type eq "ham") || ($type eq "all")) {
1052 0         0 $self->{headers_ham} =
1053 0 0 0     0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ];
1054             }
1055 0         0 if (($type eq "spam") || ($type eq "all")) {
  0         0  
  0         0  
1056             $self->{headers_spam} =
1057 0 0 0     0 [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ];
1058             }
1059 0         0 }
  0         0  
  0         0  
1060             });
1061              
1062 92         1202 =item clear_headers
1063              
1064             Clear the list of headers to be added to messages. You may use this
1065             before any B<add_header> options to prevent the default headers from being
1066             added to the message.
1067              
1068             C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple
1069             .cf files, which are interpreted in alphabetic order, so C<clear_headers>
1070             in a later file will remove all added headers from previously interpreted
1071             configuration files, which may or may not be desired.
1072              
1073             Note that B<X-Spam-Checker-Version> is not removable because the version
1074             information is needed by mail administrators and developers to debug
1075             problems. Without at least one header, it might not even be possible to
1076             determine that SpamAssassin is running.
1077              
1078             =cut
1079              
1080             push (@cmds, {
1081             setting => 'clear_headers',
1082             type => $CONF_TYPE_NOARGS,
1083             code => sub {
1084             my ($self, $key, $value, $line) = @_;
1085             unless (!defined $value || $value eq '') {
1086 62     62   294 return $INVALID_VALUE;
1087 62 50 33     472 }
1088 0         0 my @h = grep { lc($_->[0]) eq "checker-version" }
1089             @{$self->{headers_ham}};
1090 62         411 $self->{headers_ham} = !@h ? [] : [ $h[0] ];
1091 62         150 $self->{headers_spam} = !@h ? [] : [ $h[0] ];
  62         269  
1092 62 50       434 }
1093 62 50       446 });
1094              
1095 92         1223 =item report_safe ( 0 | 1 | 2 ) (default: 1)
1096              
1097             if this option is set to 1, if an incoming message is tagged as spam,
1098             instead of modifying the original message, SpamAssassin will create a
1099             new report message and attach the original message as a message/rfc822
1100             MIME part (ensuring the original message is completely preserved, not
1101             easily opened, and easier to recover).
1102              
1103             If this option is set to 2, then original messages will be attached with
1104             a content type of text/plain instead of message/rfc822. This setting
1105             may be required for safety reasons on certain broken mail clients that
1106             automatically load attachments without any action by the user. This
1107             setting may also make it somewhat more difficult to extract or view the
1108             original message.
1109              
1110             If this option is set to 0, incoming spam is only modified by adding
1111             some C<X-Spam-> headers and no changes will be made to the body. In
1112             addition, a header named B<X-Spam-Report> will be added to spam. You
1113             can use the B<remove_header> option to remove that header after setting
1114             B<report_safe> to 0.
1115              
1116             See B<report_safe_copy_headers> if you want to copy headers from
1117             the original mail into tagged messages.
1118              
1119             =cut
1120              
1121             push (@cmds, {
1122             setting => 'report_safe',
1123             default => 1,
1124             type => $CONF_TYPE_NUMERIC,
1125             code => sub {
1126             my ($self, $key, $value, $line) = @_;
1127             if ($value eq '') {
1128 62     62   303 return $MISSING_REQUIRED_VALUE;
1129 62 50       478 }
    50          
1130 0         0 elsif ($value !~ /^[012]$/) {
1131             return $INVALID_VALUE;
1132             }
1133 0         0  
1134             $self->{report_safe} = $value+0;
1135             if (! $self->{report_safe} &&
1136 62         266 ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) {
1137 62 50 33     525 push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]);
1138             }
1139 0         0 }
  0         0  
1140             });
1141              
1142 92         1229 =item report_wrap_width (default: 70)
1143              
1144             This option sets the wrap width for description lines in the X-Spam-Report
1145             header, not accounting for tab width.
1146              
1147             =cut
1148              
1149             push (@cmds, {
1150             setting => 'report_wrap_width',
1151 92         589 default => '70',
1152             type => $CONF_TYPE_NUMERIC,
1153             });
1154              
1155             =back
1156              
1157             =head2 LANGUAGE OPTIONS
1158              
1159             =over 4
1160              
1161             =item ok_locales xx [ yy zz ... ] (default: all)
1162              
1163             This option is used to specify which locales are considered OK for
1164             incoming mail. Mail using the B<character sets> that are allowed by
1165             this option will not be marked as possibly being spam in a foreign
1166             language.
1167              
1168             If you receive lots of spam in foreign languages, and never get any non-spam in
1169             these languages, this may help. Note that all ISO-8859-* character sets, and
1170             Windows code page character sets, are always permitted by default.
1171              
1172             Set this to C<all> to allow all character sets. This is the default.
1173              
1174             The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and
1175             C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set.
1176              
1177             Examples:
1178              
1179             ok_locales all (allow all locales)
1180             ok_locales en (only allow English)
1181             ok_locales en ja zh (allow English, Japanese, and Chinese)
1182              
1183             Note: if there are multiple ok_locales lines, only the last one is used.
1184              
1185             Select the locales to allow from the list below:
1186              
1187             =over 4
1188              
1189             =item en - Western character sets in general
1190              
1191             =item ja - Japanese character sets
1192              
1193             =item ko - Korean character sets
1194              
1195             =item ru - Cyrillic character sets
1196              
1197             =item th - Thai character sets
1198              
1199             =item zh - Chinese (both simplified and traditional) character sets
1200              
1201             =back
1202              
1203             =cut
1204              
1205             push (@cmds, {
1206             setting => 'ok_locales',
1207 92         624 default => 'all',
1208             type => $CONF_TYPE_STRING,
1209             });
1210              
1211             =item normalize_charset ( 0 | 1) (default: 0)
1212              
1213             Whether to decode non- UTF-8 and non-ASCII textual parts and recode them
1214             to UTF-8 before the text is given over to rules processing. The character
1215             set used for attempted decoding is primarily based on a declared character
1216             set in a Content-Type header, but if the decoding attempt fails a module
1217             Encode::Detect::Detector is consulted (if available) to provide a guess
1218             based on the actual text, and decoding is re-attempted. Even if the option
1219             is enabled no unnecessary decoding and re-encoding work is done when
1220             possible (like with an all-ASCII text with a US-ASCII or extended ASCII
1221             character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn).
1222              
1223             Unicode support in old versions of perl or in a core module Encode is likely
1224             to be buggy in places, so if the normalize_charset function is enabled
1225             it is advised to stick to more recent versions of perl (preferably 5.12
1226             or later). The module Encode::Detect::Detector is optional, when necessary
1227             it will be used if it is available.
1228              
1229             =cut
1230              
1231             push (@cmds, {
1232             setting => 'normalize_charset',
1233             default => 0,
1234             type => $CONF_TYPE_BOOL,
1235             code => sub {
1236             my ($self, $key, $value, $line) = @_;
1237             unless (defined $value && $value !~ /^$/) {
1238 0     0   0 return $MISSING_REQUIRED_VALUE;
1239 0 0 0     0 }
1240 0         0 if (lc $value eq 'yes' || $value eq '1') { $value = 1 }
1241             elsif (lc $value eq 'no' || $value eq '0') { $value = 0 }
1242 0 0 0     0 else { return $INVALID_VALUE }
  0 0 0     0  
1243 0         0  
1244 0         0 $self->{normalize_charset} = $value;
1245              
1246 0         0 unless ($] > 5.008004) {
1247             $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later");
1248 0 0       0 $self->{normalize_charset} = 0;
1249 0         0 return $INVALID_VALUE;
1250 0         0 }
1251 0         0 require HTML::Parser;
1252             #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN
1253 0         0 unless ( eval { HTML::Parser->VERSION(3.46) } ) {
1254             $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later");
1255 0 0       0 $self->{normalize_charset} = 0;
  0         0  
1256 0         0 return $INVALID_VALUE;
1257 0         0 }
1258 0         0 unless (eval 'require Encode') {
1259             $self->{parser}->lint_warn("config: normalize_charset requires Encode");
1260 0 0       0 $self->{normalize_charset} = 0;
1261 0         0 return $INVALID_VALUE;
1262 0         0 }
1263 0         0 }
1264             });
1265              
1266 92         1043 =back
1267              
1268             =head2 NETWORK TEST OPTIONS
1269              
1270             =over 4
1271              
1272             =item trusted_networks IPaddress[/masklen] ... (default: none)
1273              
1274             What networks or hosts are 'trusted' in your setup. B<Trusted> in this case
1275             means that relay hosts on these networks are considered to not be potentially
1276             operated by spammers, open relays, or open proxies. A trusted host could
1277             conceivably relay spam, but will not originate it, and will not forge header
1278             data. DNS blacklist checks will never query for hosts on these networks.
1279              
1280             See C<http://wiki.apache.org/spamassassin/TrustPath> for more information.
1281              
1282             MXes for your domain(s) and internal relays should B<also> be specified using
1283             the C<internal_networks> setting. When there are 'trusted' hosts that
1284             are not MXes or internal relays for your domain(s) they should B<only> be
1285             specified in C<trusted_networks>.
1286              
1287             The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6
1288             address optionally enclosed in square brackets. Scoped link-local IPv6
1289             addresses are syntactically recognized but the interface scope is currently
1290             ignored (e.g. [fe80::1234%eth0] ) and should be avoided.
1291              
1292             If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length,
1293             specified in bits. If it is not specified, but less than 4 octets of an IPv4
1294             address are specified with a trailing dot, an implied netmask length covers
1295             all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24).
1296             If masklen is not specified, and there is not trailing dot, then just a single
1297             IP address specified is used, as if the masklen were C</32> with an IPv4
1298             address, or C</128> in case of an IPv6 address.
1299              
1300             If a network or host address is prefaced by a C<!> the matching network or
1301             host will be excluded from the list even if a less specific (shorter netmask
1302             length) subnet is later specified in the list. This allows a subset of
1303             a wider network to be exempt. In case of specifying overlapping subnets,
1304             specify more specific subnets first (tighter matching, i.e. with a longer
1305             netmask length), followed by less specific (shorter netmask length) subnets
1306             to get predictable results regardless of the search algorithm used - when
1307             Net::Patricia module is installed the search finds the tightest matching
1308             entry in the list, while a sequential search as used in absence of the
1309             module Net::Patricia will find the first matching entry in the list.
1310              
1311             Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless
1312             of your config.
1313              
1314             Examples:
1315              
1316             trusted_networks 192.168.0.0/16 # all in 192.168.*.*
1317             trusted_networks 192.168. # all in 192.168.*.*
1318             trusted_networks 212.17.35.15 # just that host
1319             trusted_networks !10.0.1.5 10.0.1/24 # all in 10.0.1.* but not 10.0.1.5
1320             trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32
1321             # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64
1322              
1323             This operates additively, so a C<trusted_networks> line after another one
1324             will append new entries to the list of trusted networks. To clear out the
1325             existing entries, use C<clear_trusted_networks>.
1326              
1327             If C<trusted_networks> is not set and C<internal_networks> is, the value
1328             of C<internal_networks> will be used for this parameter.
1329              
1330             If neither C<trusted_networks> or C<internal_networks> is set, a basic
1331             inference algorithm is applied. This works as follows:
1332              
1333             =over 4
1334              
1335             =item *
1336              
1337             If the 'from' host has an IP address in a private (RFC 1918) network range,
1338             then it's trusted
1339              
1340             =item *
1341              
1342             If there are authentication tokens in the received header, and
1343             the previous host was trusted, then this host is also trusted
1344              
1345             =item *
1346              
1347             Otherwise this host, and all further hosts, are consider untrusted.
1348              
1349             =back
1350              
1351             =cut
1352              
1353             push (@cmds, {
1354             setting => 'trusted_networks',
1355 92         690 type => $CONF_TYPE_IPADDRLIST,
1356             });
1357              
1358             =item clear_trusted_networks
1359              
1360             Empty the list of trusted networks.
1361              
1362             =cut
1363              
1364             push (@cmds, {
1365             setting => 'clear_trusted_networks',
1366             type => $CONF_TYPE_NOARGS,
1367             code => sub {
1368             my ($self, $key, $value, $line) = @_;
1369             unless (!defined $value || $value eq '') {
1370 32     32   144 return $INVALID_VALUE;
1371 32 50 33     188 }
1372 0         0 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
1373             $self->{trusted_networks_configured} = 0;
1374 32         212 }
1375 32         143 });
1376              
1377 92         1071 =item internal_networks IPaddress[/masklen] ... (default: none)
1378              
1379             What networks or hosts are 'internal' in your setup. B<Internal> means
1380             that relay hosts on these networks are considered to be MXes for your
1381             domain(s), or internal relays. This uses the same syntax as
1382             C<trusted_networks>, above - see there for details.
1383              
1384             This value is used when checking 'dial-up' or dynamic IP address
1385             blocklists, in order to detect direct-to-MX spamming.
1386              
1387             Trusted relays that accept mail directly from dial-up connections
1388             (i.e. are also performing a role of mail submission agents - MSA)
1389             should not be listed in C<internal_networks>. List them only in
1390             C<trusted_networks>.
1391              
1392             If C<trusted_networks> is set and C<internal_networks> is not, the value
1393             of C<trusted_networks> will be used for this parameter.
1394              
1395             If neither C<trusted_networks> nor C<internal_networks> is set, no addresses
1396             will be considered local; in other words, any relays past the machine where
1397             SpamAssassin is running will be considered external.
1398              
1399             Every entry in C<internal_networks> must appear in C<trusted_networks>; in
1400             other words, C<internal_networks> is always a subset of the trusted set.
1401              
1402             Note: 127/8 and ::1 are always included in internal_networks, regardless of
1403             your config.
1404              
1405             =cut
1406              
1407             push (@cmds, {
1408             setting => 'internal_networks',
1409 92         500 type => $CONF_TYPE_IPADDRLIST,
1410             });
1411              
1412             =item clear_internal_networks
1413              
1414             Empty the list of internal networks.
1415              
1416             =cut
1417              
1418             push (@cmds, {
1419             setting => 'clear_internal_networks',
1420             type => $CONF_TYPE_NOARGS,
1421             code => sub {
1422             my ($self, $key, $value, $line) = @_;
1423             unless (!defined $value || $value eq '') {
1424 32     32   155 return $INVALID_VALUE;
1425 32 50 33     187 }
1426 0         0 $self->{internal_networks} = $self->new_netset('internal_networks',1);
1427             $self->{internal_networks_configured} = 0;
1428 32         95 }
1429 32         141 });
1430              
1431 92         1092 =item msa_networks IPaddress[/masklen] ... (default: none)
1432              
1433             The networks or hosts which are acting as MSAs in your setup (but not also
1434             as MX relays). This uses the same syntax as C<trusted_networks>, above - see
1435             there for details.
1436              
1437             B<MSA> means that the relay hosts on these networks accept mail from your
1438             own users and authenticates them appropriately. These relays will never
1439             accept mail from hosts that aren't authenticated in some way. Examples of
1440             authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc.
1441              
1442             All relays found in the message headers after the MSA relay will take
1443             on the same trusted and internal classifications as the MSA relay itself,
1444             as defined by your I<trusted_networks> and I<internal_networks> configuration.
1445              
1446             For example, if the MSA relay is trusted and internal so will all of the
1447             relays that precede it.
1448              
1449             When using msa_networks to identify an MSA it is recommended that you treat
1450             that MSA as both trusted and internal. When an MSA is not included in
1451             msa_networks you should treat the MSA as trusted but not internal, however
1452             if the MSA is also acting as an MX or intermediate relay you must always
1453             treat it as both trusted and internal and ensure that the MSA includes
1454             visible auth tokens in its Received header to identify submission clients.
1455              
1456             B<Warning:> Never include an MSA that also acts as an MX (or is also an
1457             intermediate relay for an MX) or otherwise accepts mail from
1458             non-authenticated users in msa_networks. Doing so will result in unknown
1459             external relays being trusted.
1460              
1461             =cut
1462              
1463             push (@cmds, {
1464             setting => 'msa_networks',
1465 92         422 type => $CONF_TYPE_IPADDRLIST,
1466             });
1467              
1468             =item clear_msa_networks
1469              
1470             Empty the list of msa networks.
1471              
1472             =cut
1473              
1474             push (@cmds, {
1475             setting => 'clear_msa_networks',
1476             type => $CONF_TYPE_NOARGS,
1477             code => sub {
1478             my ($self, $key, $value, $line) = @_;
1479             unless (!defined $value || $value eq '') {
1480 32     32   138 return $INVALID_VALUE;
1481 32 50 33     195 }
1482 0         0 $self->{msa_networks} =
1483             $self->new_netset('msa_networks',0); # no loopback IP
1484             $self->{msa_networks_configured} = 0;
1485 32         96 }
1486 32         150 });
1487              
1488 92         910 =item originating_ip_headers header ... (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP)
1489              
1490             A list of header field names from which an originating IP address can
1491             be obtained. For example, webmail servers may record a client IP address
1492             in X-Originating-IP.
1493              
1494             These IP addresses are virtually appended into the Received: chain, so they
1495             are used in RBL checks where appropriate.
1496              
1497             Currently the IP addresses are not added into X-Spam-Relays-* header fields,
1498             but they may be in the future.
1499              
1500             =cut
1501              
1502             push (@cmds, {
1503             setting => 'originating_ip_headers',
1504             default => [],
1505             type => $CONF_TYPE_STRINGLIST,
1506             code => sub {
1507             my ($self, $key, $value, $line) = @_;
1508             unless (defined $value && $value !~ /^$/) {
1509 124     124   403 return $MISSING_REQUIRED_VALUE;
1510 124 50 33     833 }
1511 0         0 foreach my $hfname (split(/\s+/, $value)) {
1512             # avoid duplicates, consider header field names case-insensitive
1513 124         533 push(@{$self->{originating_ip_headers}}, $hfname)
1514             if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}});
1515 248         861 }
1516 248 50       332 }
  248         1043  
1517             });
1518              
1519 92         1024 =item clear_originating_ip_headers
1520              
1521             Empty the list of 'originating IP address' header field names.
1522              
1523             =cut
1524              
1525             push (@cmds, {
1526             setting => 'clear_originating_ip_headers',
1527             type => $CONF_TYPE_NOARGS,
1528             code => sub {
1529             my ($self, $key, $value, $line) = @_;
1530             unless (!defined $value || $value eq '') {
1531 62     62   288 return $INVALID_VALUE;
1532 62 50 33     494 }
1533 0         0 $self->{originating_ip_headers} = [];
1534             }
1535 62         267 });
1536              
1537 92         1022 =item always_trust_envelope_sender ( 0 | 1 ) (default: 0)
1538              
1539             Trust the envelope sender even if the message has been passed through one or
1540             more trusted relays. See also C<envelope_sender_header>.
1541              
1542             =cut
1543              
1544             push (@cmds, {
1545             setting => 'always_trust_envelope_sender',
1546 92         482 default => 0,
1547             type => $CONF_TYPE_BOOL,
1548             });
1549              
1550             =item skip_rbl_checks ( 0 | 1 ) (default: 0)
1551              
1552             Turning on the skip_rbl_checks setting will disable the DNSEval plugin,
1553             which implements Real-time Block List (or: Blackhole List) (RBL) lookups.
1554              
1555             By default, SpamAssassin will run RBL checks. Individual blocklists may
1556             be disabled selectively by setting a score of a corresponding rule to 0.
1557              
1558             See also a related configuration parameter skip_uribl_checks,
1559             which controls the URIDNSBL plugin (documented in the URIDNSBL man page).
1560              
1561             =cut
1562              
1563             push (@cmds, {
1564             setting => 'skip_rbl_checks',
1565 92         517 default => 0,
1566             type => $CONF_TYPE_BOOL,
1567             });
1568              
1569             =item dns_available { yes | no | test[: domain1 domain2...] } (default: yes)
1570              
1571             Tells SpamAssassin whether DNS resolving is available or not. A value I<yes>
1572             indicates DNS resolving is available, a value I<no> indicates DNS resolving
1573             is not available - both of these values apply unconditionally and skip initial
1574             DNS tests, which can be slow or unreliable.
1575              
1576             When the option value is a I<test> (with or without arguments), SpamAssassin
1577             will query some domain names on the internet during initialization, attempting
1578             to determine if DNS resolving is working or not. A space-separated list
1579             of domain names may be specified explicitly, or left to a built-in default
1580             of a dozen or so domain names. From an explicit or a default list a subset
1581             of three domain names is picked randomly for checking. The test queries for
1582             NS records of these domain: if at least one query returns a success then
1583             SpamAssassin considers DNS resolving as available, otherwise not.
1584              
1585             The problem is that the test can introduce some startup delay if a network
1586             connection is down, and in some cases it can wrongly guess that DNS is
1587             unavailable because a test connection failed, what causes disabling several
1588             DNS-dependent tests.
1589              
1590             Please note, the DNS test queries for NS records, so specify domain names,
1591             not host names.
1592              
1593             Since version 3.4.0 of SpamAssassin a default setting for option
1594             I<dns_available> is I<yes>. A default in older versions was I<test>.
1595              
1596             =cut
1597              
1598             push (@cmds, {
1599             setting => 'dns_available',
1600             default => 'yes',
1601             type => $CONF_TYPE_STRING,
1602             code => sub {
1603             my ($self, $key, $value, $line) = @_;
1604             if ($value =~ /^test(?::\s*\S.*)?$/) {
1605 13     13   31 $self->{dns_available} = $value;
1606 13 50       70 }
    100          
    50          
1607 0         0 elsif ($value =~ /^(?:yes|1)$/) {
1608             $self->{dns_available} = 'yes';
1609             }
1610 1         15 elsif ($value =~ /^(?:no|0)$/) {
1611             $self->{dns_available} = 'no';
1612             }
1613 12         39 else {
1614             return $INVALID_VALUE;
1615             }
1616 0         0 }
1617             });
1618              
1619 92         961 =item dns_server ip-addr-port (default: entries provided by Net::DNS)
1620              
1621             Specifies an IP address of a DNS server, and optionally its port number.
1622             The I<dns_server> directive may be specified multiple times, each entry
1623             adding to a list of available resolving name servers. The I<ip-addr-port>
1624             argument can either be an IPv4 or IPv6 address, optionally enclosed in
1625             brackets, and optionally followed by a colon and a port number. In absence
1626             of a port number a standard port number 53 is assumed. When an IPv6 address
1627             is specified along with a port number, the address B<must> be enclosed in
1628             brackets to avoid parsing ambiguity regarding a colon separator. A scoped
1629             link-local IP address is allowed (assuming underlying modules allow it).
1630              
1631             Examples :
1632             dns_server 127.0.0.1
1633             dns_server 127.0.0.1:53
1634             dns_server [127.0.0.1]:53
1635             dns_server [::1]:53
1636             dns_server fe80::1%lo0
1637             dns_server [fe80::1%lo0]:53
1638              
1639             In absence of I<dns_server> directives, the list of name servers is provided
1640             by Net::DNS module, which typically obtains the list from /etc/resolv.conf,
1641             but this may be platform dependent. Please consult the Net::DNS::Resolver
1642             documentation for details.
1643              
1644             =cut
1645              
1646             push (@cmds, {
1647             setting => 'dns_server',
1648             type => $CONF_TYPE_STRING,
1649             code => sub {
1650             my ($self, $key, $value, $line) = @_;
1651             my($address,$port); local($1,$2,$3);
1652 1     1   4 if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) {
1653 1         2 $address = defined $1 ? $1 : $2; $port = $3;
  1         3  
1654 1 50       7 } elsif ($value =~ /^(?: \[ ([^\]]*) \] |
    0          
1655 1 50       5 ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) {
  1         3  
1656             $address = defined $1 ? $1 : $2; $port = '53';
1657             } else {
1658 0 0       0 return $INVALID_VALUE;
  0         0  
1659             }
1660 0         0 my $scope = ''; # scoped IP address?
1661             $scope = $1 if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi;
1662 1         3 my $IP_ADDRESS = IP_ADDRESS; # IP_ADDRESS regexp does not handle scope
1663 1 50       4 if ($address =~ /$IP_ADDRESS/ && $port >= 1 && $port <= 65535) {
1664 1         2 $self->{dns_servers} = [] if !$self->{dns_servers};
1665 1 50 33     31 # checked, untainted, stored in a normalized form
      33        
1666 1 50       5 push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port"));
1667             } else {
1668 1         2 return $INVALID_VALUE;
  1         8  
1669             }
1670 0         0 }
1671             });
1672              
1673 92         1058 =item clear_dns_servers
1674              
1675             Empty the list of explicitly configured DNS servers through a I<dns_server>
1676             directive, falling back to Net::DNS -supplied defaults.
1677              
1678             =cut
1679              
1680             push (@cmds, {
1681             setting => 'clear_dns_servers',
1682             type => $CONF_TYPE_NOARGS,
1683             code => sub {
1684             my ($self, $key, $value, $line) = @_;
1685             unless (!defined $value || $value eq '') {
1686 1     1   4 return $INVALID_VALUE;
1687 1 50 33     7 }
1688 0         0 undef $self->{dns_servers};
1689             }
1690 1         4 });
1691              
1692 92         1026 =item dns_local_ports_permit ranges...
1693              
1694             Add the specified ports or ports ranges to the set of allowed port numbers
1695             that can be used as local port numbers when sending DNS queries to a resolver.
1696              
1697             The argument is a whitespace-separated or a comma-separated list of
1698             single port numbers n, or port number pairs (i.e. m-n) delimited by a '-',
1699             representing a range. Allowed port numbers are between 1 and 65535.
1700              
1701             Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed
1702             in order in which they appear in configuration files. Each directive adds
1703             (or subtracts) its subsets of ports to a current set of available ports.
1704             Whatever is left in the set by the end of configuration processing
1705             is made available to a DNS resolving client code.
1706              
1707             If the resulting set of port numbers is empty (see also the directive
1708             I<dns_local_ports_none>), then SpamAssassin does not apply its ports
1709             randomization logic, but instead leaves the operating system to choose
1710             a suitable free local port number.
1711              
1712             The initial set consists of all port numbers in the range 1024-65535.
1713             Note that system config files already modify the set and remove all the
1714             IANA registered port numbers and some other ranges, so there is rarely
1715             a need to adjust the ranges by site-specific directives.
1716              
1717             See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>.
1718              
1719             =cut
1720              
1721             push (@cmds, {
1722             setting => 'dns_local_ports_permit',
1723             type => $CONF_TYPE_STRING,
1724             is_admin => 1,
1725             code => sub {
1726             my($self, $key, $value, $line) = @_;
1727             my(@port_ranges); local($1,$2);
1728 0     0   0 foreach my $range (split(/[ \t,]+/, $value)) {
1729 0         0 if ($range =~ /^(\d{1,5})\z/) {
  0         0  
1730 0         0 # don't allow adding a port number 0
1731 0 0       0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
    0          
1732             push(@port_ranges, [$1,$1]);
1733 0 0 0     0 } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
  0         0  
1734 0         0 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
1735             if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE }
1736 0 0 0     0 push(@port_ranges, [$1,$2]);
  0         0  
1737 0 0 0     0 } else {
  0         0  
1738 0         0 return $INVALID_VALUE;
1739             }
1740 0         0 }
1741             foreach my $p (@port_ranges) {
1742             undef $self->{dns_available_portscount}; # invalidate derived data
1743 0         0 set_ports_range(\$self->{dns_available_ports_bitset},
1744 0         0 $p->[0], $p->[1], 1);
1745             }
1746 0         0 }
1747             });
1748              
1749 92         1230 =item dns_local_ports_avoid ranges...
1750              
1751             Remove specified ports or ports ranges from the set of allowed port numbers
1752             that can be used as local port numbers when sending DNS queries to a resolver.
1753              
1754             Please see directive I<dns_local_ports_permit> for details.
1755              
1756             =cut
1757              
1758             push (@cmds, {
1759             setting => 'dns_local_ports_avoid',
1760             type => $CONF_TYPE_STRING,
1761             is_admin => 1,
1762             code => sub {
1763             my($self, $key, $value, $line) = @_;
1764             my(@port_ranges); local($1,$2);
1765 0     0   0 foreach my $range (split(/[ \t,]+/, $value)) {
1766 0         0 if ($range =~ /^(\d{1,5})\z/) {
  0         0  
1767 0         0 if ($1 > 65535) { return $INVALID_VALUE }
1768 0 0       0 # don't mind clearing also the port number 0
    0          
1769 0 0       0 push(@port_ranges, [$1,$1]);
  0         0  
1770             } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1771 0         0 if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE }
1772             push(@port_ranges, [$1,$2]);
1773 0 0 0     0 } else {
  0         0  
1774 0         0 return $INVALID_VALUE;
1775             }
1776 0         0 }
1777             foreach my $p (@port_ranges) {
1778             undef $self->{dns_available_portscount}; # invalidate derived data
1779 0         0 set_ports_range(\$self->{dns_available_ports_bitset},
1780 0         0 $p->[0], $p->[1], 0);
1781             }
1782 0         0 }
1783             });
1784              
1785 92         1168 =item dns_local_ports_none
1786              
1787             Is a fast shorthand for:
1788              
1789             dns_local_ports_avoid 1-65535
1790              
1791             leaving the set of available DNS query local port numbers empty. In all
1792             respects (apart from speed) it is equivalent to the shown directive, and can
1793             be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1794              
1795             If the resulting set of port numbers is empty, then SpamAssassin does not
1796             apply its ports randomization logic, but instead leaves the operating system
1797             to choose a suitable free local port number.
1798              
1799             See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1800              
1801             =cut
1802              
1803             push (@cmds, {
1804             setting => 'dns_local_ports_none',
1805             type => $CONF_TYPE_NOARGS,
1806             is_admin => 1,
1807             code => sub {
1808             my ($self, $key, $value, $line) = @_;
1809             unless (!defined $value || $value eq '') {
1810 0     0   0 return $INVALID_VALUE;
1811 0 0 0     0 }
1812 0         0 undef $self->{dns_available_portscount}; # invalidate derived data
1813             wipe_ports_range(\$self->{dns_available_ports_bitset}, 0);
1814 0         0 }
1815 0         0 });
1816              
1817 92         1187 =item dns_test_interval n (default: 600 seconds)
1818              
1819             If dns_available is set to I<test>, the dns_test_interval time in number
1820             of seconds will tell SpamAssassin how often to retest for working DNS.
1821             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
1822             indicating seconds (default), minutes, hours, days, weeks).
1823              
1824             =cut
1825              
1826             push (@cmds, {
1827             setting => 'dns_test_interval',
1828 92         547 default => 600,
1829             type => $CONF_TYPE_DURATION,
1830             });
1831              
1832             =item dns_options opts (default: norotate, nodns0x20, edns=4096)
1833              
1834             Provides a (whitespace or comma -separated) list of options applying
1835             to DNS resolving. Available options are: I<rotate>, I<dns0x20> and
1836             I<edns> (or I<edns0>). Option name may be negated by prepending a I<no>
1837             (e.g. I<norotate>, I<NoEDNS>) to counteract a previously enabled option.
1838             Option names are not case-sensitive. The I<dns_options> directive may
1839             appear in configuration files multiple times, the last setting prevails.
1840              
1841             Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's
1842             acceptable UDP payload size according to EDNS0 specifications (RFC 6891,
1843             ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>)
1844             a traditional implied UDP payload size is 512 bytes, which is also a minimum
1845             allowed value for this option. When the option is specified but a value
1846             is not provided, a conservative default of 1220 bytes is implied. It is
1847             recommended to keep I<edns> enabled when using a local recursive DNS server
1848             which supports EDNS0 (like most modern DNS servers do), a suitable setting
1849             in this case is I<edns=4096>, which is also a default. Allowing UDP payload
1850             size larger than 512 bytes can avoid truncation of resource records in large
1851             DNS responses (like in TXT records of some SPF and DKIM responses, or when
1852             an unreasonable number of A records is published by some domain). The option
1853             should be disabled when a recursive DNS server is only reachable through
1854             non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall)
1855             which bans DNS UDP payload sizes larger than 512 bytes. A suitable value
1856             when a non-local recursive DNS server is used and a middlebox B<does> allow
1857             EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a
1858             DNS UDP packet to fit within a single IP packet in most cases (a slightly
1859             less conservative range would be 1280-1410 bytes).
1860              
1861             Option I<rotate> causes SpamAssassin to choose a DNS server at random
1862             from all servers listed in C</etc/resolv.conf> every I<dns_test_interval>
1863             seconds, effectively spreading the load over all currently available DNS
1864             servers when there are many spamd workers.
1865              
1866             Option I<dns0x20> enables randomization of letters in a DNS query label
1867             according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions
1868             of responses (by chance or by a malicious intent) by increasing spread
1869             as provided by a 16-bit query ID and up to 16 bits of a port number,
1870             with additional bits as encoded by flipping case (upper/lower) of letters
1871             in a query. The number of additional random bits corresponds to the number
1872             of letters in a query label. Should work reliably with all mainstream
1873             DNS servers - do not turn on if you see frequent info messages
1874             "dns: no callback for id:" in the log, or if RBL or URIDNS lookups
1875             do not work for no apparent reason.
1876              
1877             =cut
1878              
1879             push (@cmds, {
1880             setting => 'dns_options',
1881             type => $CONF_TYPE_HASH_KEY_VALUE,
1882             code => sub {
1883             my ($self, $key, $value, $line) = @_;
1884             foreach my $option (split (/[\s,]+/, lc $value)) {
1885 0     0   0 local($1,$2);
1886 0         0 if ($option =~ /^no(rotate|dns0x20)\z/) {
1887 0         0 $self->{dns_options}->{$1} = 0;
1888 0 0       0 } elsif ($option =~ /^no(edns)0?\z/) {
    0          
    0          
    0          
1889 0         0 $self->{dns_options}->{$1} = 0;
1890             } elsif ($option =~ /^(rotate|dns0x20)\z/) {
1891 0         0 $self->{dns_options}->{$1} = 1;
1892             } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) {
1893 0         0 # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload
1894             # size, defaults to some UDP packet size likely to fit into a single
1895             # IP packet which is more likely to pass firewalls which choke on IP
1896             # fragments. RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for
1897             # basic header, yielding 1240. RFC 3226 prescribes a min of 1220 for
1898             # RFC 2535 compliant servers. RFC 6891: choosing between 1280 and
1899             # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable.
1900             #
1901             $self->{dns_options}->{$1} = $2 || 1220;
1902             return $INVALID_VALUE if $self->{dns_options}->{$1} < 512;
1903 0   0     0 } else {
1904 0 0       0 return $INVALID_VALUE;
1905             }
1906 0         0 }
1907             }
1908             });
1909              
1910 92         988 =item dns_query_restriction (allow|deny) domain1 domain2 ...
1911              
1912             Option allows disabling of rules which would result in a DNS query to one of
1913             the listed domains. The first argument must be a literal C<allow> or C<deny>,
1914             remaining arguments are domains names.
1915              
1916             Most DNS queries (with some exceptions) are subject to dns_query_restriction.
1917             A domain to be queried is successively stripped-off of its leading labels
1918             (thus yielding a series of its parent domains), and on each iteration a
1919             check is made against an associative array generated by dns_query_restriction
1920             options. Search stops at the first match (i.e. the tightest match), and the
1921             matching entry with its C<allow> or C<deny> value then controls whether a
1922             DNS query is allowed to be launched.
1923              
1924             If no match is found an implicit default is to allow a query. The purpose of
1925             an explicit C<allow> entry is to be able to override a previously configured
1926             C<deny> on the same domain or to override an entry (possibly yet to be
1927             configured in subsequent config directives) on one of its parent domains.
1928             Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit
1929             DNS queries on a specific DNS BL zone but deny queries to other zones under
1930             the same parent domain.
1931              
1932             Domains are matched case-insensitively, no wildcards are recognized,
1933             there should be no leading or trailing dot.
1934              
1935             Specifying a block on querying a domain name has a similar effect as setting
1936             a score of corresponding DNSBL and URIBL rules to zero, and can be a handy
1937             alternative to hunting for such rules when a site policy does not allow
1938             certain DNS block lists to be queried.
1939              
1940             Example:
1941             dns_query_restriction deny dnswl.org surbl.org
1942             dns_query_restriction allow zen.spamhaus.org
1943             dns_query_restriction deny spamhaus.org mailspike.net spamcop.net
1944              
1945             =cut
1946              
1947             push (@cmds, {
1948             setting => 'dns_query_restriction',
1949             type => $CONF_TYPE_STRING,
1950             code => sub {
1951             my ($self, $key, $value, $line) = @_;
1952             defined $value && $value =~ s/^(allow|deny)\s+//i
1953 0     0   0 or return $INVALID_VALUE;
1954 0 0 0     0 my $blocked = lc($1) eq 'deny' ? 1 : 0;
1955             foreach my $domain (split(/\s+/, $value)) {
1956 0 0       0 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots
1957 0         0 $self->{dns_query_blocked}{lc $domain} = $blocked;
1958 0         0 }
  0         0  
1959 0         0 }
1960             });
1961              
1962 92         1081 =item clear_dns_query_restriction
1963              
1964             The option removes any entries entered by previous 'dns_query_restriction'
1965             options, leaving the list empty, i.e. allowing DNS queries for any domain
1966             (including any DNS BL zone).
1967              
1968             =cut
1969              
1970             push (@cmds, {
1971             setting => 'clear_dns_query_restriction',
1972             aliases => ['clear_dns_query_restrictions'],
1973             type => $CONF_TYPE_NOARGS,
1974             code => sub {
1975             my ($self, $key, $value, $line) = @_;
1976             return $INVALID_VALUE if defined $value && $value ne '';
1977 0     0   0 delete $self->{dns_query_blocked};
1978 0 0 0     0 }
1979 0         0 });
1980              
1981 92         1143 =back
1982              
1983             =head2 LEARNING OPTIONS
1984              
1985             =over 4
1986              
1987             =item use_learner ( 0 | 1 ) (default: 1)
1988              
1989             Whether to use any machine-learning classifiers with SpamAssassin, such as the
1990             default 'BAYES_*' rules. Setting this to 0 will disable use of any and all
1991             human-trained classifiers.
1992              
1993             =cut
1994              
1995             push (@cmds, {
1996             setting => 'use_learner',
1997 92         433 default => 1,
1998             type => $CONF_TYPE_BOOL,
1999             });
2000              
2001             =item use_bayes ( 0 | 1 ) (default: 1)
2002              
2003             Whether to use the naive-Bayesian-style classifier built into
2004             SpamAssassin. This is a master on/off switch for all Bayes-related
2005             operations.
2006              
2007             =cut
2008              
2009             push (@cmds, {
2010             setting => 'use_bayes',
2011 92         445 default => 1,
2012             type => $CONF_TYPE_BOOL,
2013             });
2014              
2015             =item use_bayes_rules ( 0 | 1 ) (default: 1)
2016              
2017             Whether to use rules using the naive-Bayesian-style classifier built
2018             into SpamAssassin. This allows you to disable the rules while leaving
2019             auto and manual learning enabled.
2020              
2021             =cut
2022              
2023             push (@cmds, {
2024             setting => 'use_bayes_rules',
2025 92         423 default => 1,
2026             type => $CONF_TYPE_BOOL,
2027             });
2028              
2029             =item bayes_auto_learn ( 0 | 1 ) (default: 1)
2030              
2031             Whether SpamAssassin should automatically feed high-scoring mails (or
2032             low-scoring mails, for non-spam) into its learning systems. The only
2033             learning system supported currently is a naive-Bayesian-style classifier.
2034              
2035             See the documentation for the
2036             C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module
2037             for details on how Bayes auto-learning is implemented by default.
2038              
2039             =cut
2040              
2041             push (@cmds, {
2042             setting => 'bayes_auto_learn',
2043 92         450 default => 1,
2044             type => $CONF_TYPE_BOOL,
2045             });
2046              
2047             =item bayes_token_sources (default: header visible invisible uri)
2048              
2049             Controls which sources in a mail message can contribute tokens (e.g. words,
2050             phrases, etc.) to a Bayes classifier. The argument is a space-separated list
2051             of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each
2052             of which may be prefixed by a I<no> to indicate its exclusion. Additionally
2053             two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list
2054             of keywords is processed sequentially: a keyword I<all> adds all available
2055             keywords to a set being built, a I<none> or I<noall> clears the set, other
2056             non-negated keywords are added to the set, and negated keywords are removed
2057             from the set. Keywords are case-insensitive.
2058              
2059             The default set is: I<header> I<visible> I<invisible> I<uri>, which is
2060             equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart>
2061             is not currently in a default set is that it is a newer source (introduced
2062             with SpamAssassin version 3.4.1) and not much experience has yet been gathered
2063             regarding its usefulness.
2064              
2065             See also option C<bayes_ignore_header> for a fine-grained control on individual
2066             header fields under the umbrella of a more general keyword I<header> here.
2067              
2068             Keywords imply the following data sources:
2069              
2070             =over 4
2071              
2072             =item I<header> - tokens collected from a message header section
2073              
2074             =item I<visible> - words from visible text (plain or HTML) in a message body
2075              
2076             =item I<invisible> - hidden/invisible text in HTML parts of a message body
2077              
2078             =item I<uri> - URIs collected from a message body
2079              
2080             =item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type
2081              
2082             =item I<all> - adds all the above keywords to the set being assembled
2083              
2084             =item I<none> or I<noall> - removes all keywords from the set
2085              
2086             =back
2087              
2088             The C<bayes_token_sources> directive may appear multiple times, its keywords
2089             are interpreted sequentially, adding or removing items from the final set
2090             as they appear in their order in C<bayes_token_sources> directive(s).
2091              
2092             =cut
2093              
2094             push (@cmds, {
2095             setting => 'bayes_token_sources',
2096             default => { map(($_,1), qw(header visible invisible uri)) }, # mimepart
2097             type => $CONF_TYPE_HASH_KEY_VALUE,
2098             code => sub {
2099             my ($self, $key, $value, $line) = @_;
2100             return $MISSING_REQUIRED_VALUE if $value eq '';
2101 0     0   0 my $h = ($self->{bayes_token_sources} ||= {});
2102 0 0       0 my %all_kw = map(($_,1), qw(header visible invisible uri mimepart));
2103 0   0     0 foreach (split(/\s+/, lc $value)) {
2104 0         0 if (/^(none|noall)\z/) {
2105 0         0 %$h = ();
2106 0 0 0     0 } elsif ($_ eq 'all') {
    0          
    0          
2107 0         0 %$h = %all_kw;
2108             } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) {
2109 0         0 $h->{$2} = defined $1 ? 0 : 1;
2110             } else {
2111 0 0       0 return $INVALID_VALUE;
2112             }
2113 0         0 }
2114             }
2115             });
2116              
2117 92         2155 =item bayes_ignore_header header_name
2118              
2119             If you receive mail filtered by upstream mail systems, like
2120             a spam-filtering ISP or mailing list, and that service adds
2121             new headers (as most of them do), these headers may provide
2122             inappropriate cues to the Bayesian classifier, allowing it
2123             to take a "short cut". To avoid this, list the headers using this
2124             setting. Example:
2125              
2126             bayes_ignore_header X-Upstream-Spamfilter
2127             bayes_ignore_header X-Upstream-SomethingElse
2128              
2129             =cut
2130              
2131             push (@cmds, {
2132             setting => 'bayes_ignore_header',
2133             default => [],
2134             type => $CONF_TYPE_STRINGLIST,
2135             code => sub {
2136             my ($self, $key, $value, $line) = @_;
2137             if ($value eq '') {
2138 0     0   0 return $MISSING_REQUIRED_VALUE;
2139 0 0       0 }
2140 0         0 push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value));
2141             }
2142 0         0 });
  0         0  
2143              
2144 92         1293 =item bayes_ignore_from user@example.com
2145              
2146             Bayesian classification and autolearning will not be performed on mail
2147             from the listed addresses. Program C<sa-learn> will also ignore the
2148             listed addresses if it is invoked using the C<--use-ignores> option.
2149             One or more addresses can be listed, see C<whitelist_from>.
2150              
2151             Spam messages from certain senders may contain many words that
2152             frequently occur in ham. For example, one might read messages from a
2153             preferred bookstore but also get unwanted spam messages from other
2154             bookstores. If the unwanted messages are learned as spam then any
2155             messages discussing books, including the preferred bookstore and
2156             antiquarian messages would be in danger of being marked as spam. The
2157             addresses of the annoying bookstores would be listed. (Assuming they
2158             were halfway legitimate and didn't send you mail through myriad
2159             affiliates.)
2160              
2161             Those who have pieces of spam in legitimate messages or otherwise
2162             receive ham messages containing potentially spammy words might fear
2163             that some spam messages might be in danger of being marked as ham.
2164             The addresses of the spam mailing lists, correspondents, etc. would
2165             be listed.
2166              
2167             =cut
2168              
2169             push (@cmds, {
2170             setting => 'bayes_ignore_from',
2171 92         484 type => $CONF_TYPE_ADDRLIST,
2172             });
2173              
2174             =item bayes_ignore_to user@example.com
2175              
2176             Bayesian classification and autolearning will not be performed on mail
2177             to the listed addresses. See C<bayes_ignore_from> for details.
2178              
2179             =cut
2180              
2181             push (@cmds, {
2182             setting => 'bayes_ignore_to',
2183 92         485 type => $CONF_TYPE_ADDRLIST,
2184             });
2185              
2186             =item bayes_min_ham_num (Default: 200)
2187              
2188             =item bayes_min_spam_num (Default: 200)
2189              
2190             To be accurate, the Bayes system does not activate until a certain number of
2191             ham (non-spam) and spam have been learned. The default is 200 of each ham and
2192             spam, but you can tune these up or down with these two settings.
2193              
2194             =cut
2195              
2196             push (@cmds, {
2197             setting => 'bayes_min_ham_num',
2198 92         456 default => 200,
2199             type => $CONF_TYPE_NUMERIC,
2200             });
2201             push (@cmds, {
2202             setting => 'bayes_min_spam_num',
2203 92         370 default => 200,
2204             type => $CONF_TYPE_NUMERIC,
2205             });
2206              
2207             =item bayes_learn_during_report (Default: 1)
2208              
2209             The Bayes system will, by default, learn any reported messages
2210             (C<spamassassin -r>) as spam. If you do not want this to happen, set
2211             this option to 0.
2212              
2213             =cut
2214              
2215             push (@cmds, {
2216             setting => 'bayes_learn_during_report',
2217 92         322 default => 1,
2218             type => $CONF_TYPE_BOOL,
2219             });
2220              
2221             =item bayes_sql_override_username
2222              
2223             Used by BayesStore::SQL storage implementation.
2224              
2225             If this options is set the BayesStore::SQL module will override the set
2226             username with the value given. This could be useful for implementing global or
2227             group bayes databases.
2228              
2229             =cut
2230              
2231             push (@cmds, {
2232             setting => 'bayes_sql_override_username',
2233 92         338 default => '',
2234             type => $CONF_TYPE_STRING,
2235             });
2236              
2237             =item bayes_use_hapaxes (default: 1)
2238              
2239             Should the Bayesian classifier use hapaxes (words/tokens that occur only
2240             once) when classifying? This produces significantly better hit-rates.
2241              
2242             =cut
2243              
2244             push (@cmds, {
2245             setting => 'bayes_use_hapaxes',
2246 92         301 default => 1,
2247             type => $CONF_TYPE_BOOL,
2248             });
2249              
2250             =item bayes_journal_max_size (default: 102400)
2251              
2252             SpamAssassin will opportunistically sync the journal and the database.
2253             It will do so once a day, but will sync more often if the journal file
2254             size goes above this setting, in bytes. If set to 0, opportunistic
2255             syncing will not occur.
2256              
2257             =cut
2258              
2259             push (@cmds, {
2260             setting => 'bayes_journal_max_size',
2261 92         343 default => 102400,
2262             type => $CONF_TYPE_NUMERIC,
2263             });
2264              
2265             =item bayes_expiry_max_db_size (default: 150000)
2266              
2267             What should be the maximum size of the Bayes tokens database? When expiry
2268             occurs, the Bayes system will keep either 75% of the maximum value, or
2269             100,000 tokens, whichever has a larger value. 150,000 tokens is roughly
2270             equivalent to a 8Mb database file.
2271              
2272             =cut
2273              
2274             push (@cmds, {
2275             setting => 'bayes_expiry_max_db_size',
2276 92         404 default => 150000,
2277             type => $CONF_TYPE_NUMERIC,
2278             });
2279              
2280             =item bayes_auto_expire (default: 1)
2281              
2282             If enabled, the Bayes system will try to automatically expire old tokens
2283             from the database. Auto-expiry occurs when the number of tokens in the
2284             database surpasses the bayes_expiry_max_db_size value. If a bayes datastore
2285             backend does not implement individual key/value expirations, the setting
2286             is silently ignored.
2287              
2288             =cut
2289              
2290             push (@cmds, {
2291             setting => 'bayes_auto_expire',
2292 92         325 default => 1,
2293             type => $CONF_TYPE_BOOL,
2294             });
2295              
2296             =item bayes_token_ttl (default: 3w, i.e. 3 weeks)
2297              
2298             Time-to-live / expiration time in seconds for tokens kept in a Bayes database.
2299             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2300             indicating seconds (default), minutes, hours, days, weeks).
2301              
2302             If bayes_auto_expire is true and a Bayes datastore backend supports it
2303             (currently only Redis), this setting controls deletion of expired tokens
2304             from a bayes database. The value is observed on a best-effort basis, exact
2305             timing promises are not necessarily kept. If a bayes datastore backend
2306             does not implement individual key/value expirations, the setting is silently
2307             ignored.
2308              
2309             =cut
2310              
2311             push (@cmds, {
2312             setting => 'bayes_token_ttl',
2313 92         308 default => 3*7*24*60*60, # seconds (3 weeks)
2314             type => $CONF_TYPE_DURATION,
2315             });
2316              
2317             =item bayes_seen_ttl (default: 8d, i.e. 8 days)
2318              
2319             Time-to-live / expiration time in seconds for 'seen' entries
2320             (i.e. mail message digests with their status) kept in a Bayes database.
2321             A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2322             indicating seconds (default), minutes, hours, days, weeks).
2323              
2324             If bayes_auto_expire is true and a Bayes datastore backend supports it
2325             (currently only Redis), this setting controls deletion of expired 'seen'
2326             entries from a bayes database. The value is observed on a best-effort basis,
2327             exact timing promises are not necessarily kept. If a bayes datastore backend
2328             does not implement individual key/value expirations, the setting is silently
2329             ignored.
2330              
2331             =cut
2332              
2333             push (@cmds, {
2334             setting => 'bayes_seen_ttl',
2335 92         373 default => 8*24*60*60, # seconds (8 days)
2336             type => $CONF_TYPE_DURATION,
2337             });
2338              
2339             =item bayes_learn_to_journal (default: 0)
2340              
2341             If this option is set, whenever SpamAssassin does Bayes learning, it
2342             will put the information into the journal instead of directly into the
2343             database. This lowers contention for locking the database to execute
2344             an update, but will also cause more access to the journal and cause a
2345             delay before the updates are actually committed to the Bayes database.
2346              
2347             =cut
2348              
2349             push (@cmds, {
2350             setting => 'bayes_learn_to_journal',
2351 92         338 default => 0,
2352             type => $CONF_TYPE_BOOL,
2353             });
2354              
2355             =back
2356              
2357             =head2 MISCELLANEOUS OPTIONS
2358              
2359             =over 4
2360              
2361             =item time_limit n (default: 300)
2362              
2363             Specifies a limit on elapsed time in seconds that SpamAssassin is allowed
2364             to spend before providing a result. The value may be fractional and must
2365             not be negative, zero is interpreted as unlimited. The default is 300
2366             seconds for consistency with the spamd default setting of --timeout-child .
2367              
2368             This is a best-effort advisory setting, processing will not be abruptly
2369             aborted at an arbitrary point in processing when the time limit is exceeded,
2370             but only on reaching one of locations in the program flow equipped with a
2371             time test. Currently equipped with the test are the main checking loop,
2372             asynchronous DNS lookups, plugins which are calling external programs.
2373             Rule evaluation is guarded by starting a timer (alarm) on each set of
2374             compiled rules.
2375              
2376             When a message is passed to Mail::SpamAssassin::parse, a deadline time
2377             is established as a sum of current time and the C<time_limit> setting.
2378              
2379             This deadline may also be specified by a caller through an option
2380             'master_deadline' in $suppl_attrib on a call to parse(), possibly providing
2381             a more accurate deadline taking into account past and expected future
2382             processing of a message in a mail filtering setup. If both the config
2383             option as well as a 'master_deadline' option in a call are provided,
2384             the shorter time limit of the two is used (since version 3.3.2).
2385             Note that spamd (and possibly third-party callers of SpamAssassin) will
2386             supply the 'master_deadline' option in a call based on its --timeout-child
2387             option (or equivalent), unlike the command line C<spamassassin>, which has
2388             no such command line option.
2389              
2390             When a time limit is exceeded, most of the remaining tests will be skipped,
2391             as well as auto-learning. Whatever tests fired so far will determine the
2392             final score. The behaviour is similar to short-circuiting with attribute 'on',
2393             as implemented by a Shortcircuit plugin. A synthetic hit on a rule named
2394             TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that
2395             the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may
2396             be provided explicitly in a configuration file, for example to achieve
2397             whitelisting or blacklisting effect for messages with long processing times.
2398              
2399             The C<time_limit> option is a useful protection against excessive processing
2400             time on certain degenerate or unusually long or complex mail messages, as well
2401             as against some DoS attacks. It is also needed in time-critical pre-queue
2402             filtering setups (e.g. milter, proxy, integration with MTA), where message
2403             processing must finish before a SMTP client times out. RFC 5321 prescribes
2404             in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes,
2405             although it is not unusual to see some SMTP clients abort sooner on waiting
2406             for a response. A sensible C<time_limit> for a pre-queue filtering setup is
2407             maybe 50 seconds, assuming that clients are willing to wait at least a minute.
2408              
2409             =cut
2410              
2411             push (@cmds, {
2412             setting => 'time_limit',
2413 92         527 default => 300,
2414             type => $CONF_TYPE_DURATION,
2415             });
2416              
2417             =item lock_method type
2418              
2419             Select the file-locking method used to protect database files on-disk. By
2420             default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
2421             are sure that the database files you'll be using for Bayes and AWL storage will
2422             never be accessed over NFS, a non-NFS-safe locking system can be selected.
2423              
2424             This will be quite a bit faster, but may risk file corruption if the files are
2425             ever accessed by multiple clients at once, and one or more of them is accessing
2426             them through an NFS filesystem.
2427              
2428             Note that different platforms require different locking systems.
2429              
2430             The supported locking systems for C<type> are as follows:
2431              
2432             =over 4
2433              
2434             =item I<nfssafe> - an NFS-safe locking system
2435              
2436             =item I<flock> - simple UNIX C<flock()> locking
2437              
2438             =item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>.
2439              
2440             =back
2441              
2442             nfssafe and flock are only available on UNIX, and win32 is only available
2443             on Windows. By default, SpamAssassin will choose either nfssafe or
2444             win32 depending on the platform in use.
2445              
2446             =cut
2447              
2448             push (@cmds, {
2449             setting => 'lock_method',
2450             default => '',
2451             type => $CONF_TYPE_STRING,
2452             code => sub {
2453             my ($self, $key, $value, $line) = @_;
2454             if ($value !~ /^(nfssafe|flock|win32)$/) {
2455 0     0   0 return $INVALID_VALUE;
2456 0 0       0 }
2457 0         0
2458             $self->{lock_method} = $value;
2459             # recreate the locker
2460 0         0 $self->{main}->create_locker();
2461             }
2462 0         0 });
2463              
2464 92         896 =item fold_headers ( 0 | 1 ) (default: 1)
2465              
2466             By default, headers added by SpamAssassin will be whitespace folded.
2467             In other words, they will be broken up into multiple lines instead of
2468             one very long one and each continuation line will have a tabulator
2469             prepended to mark it as a continuation of the preceding one.
2470              
2471             The automatic wrapping can be disabled here. Note that this can generate very
2472             long lines. RFC 2822 required that header lines do not exceed 998 characters
2473             (not counting the final CRLF).
2474              
2475             =cut
2476              
2477             push (@cmds, {
2478             setting => 'fold_headers',
2479 92         524 default => 1,
2480             type => $CONF_TYPE_BOOL,
2481             });
2482              
2483             =item report_safe_copy_headers header_name ...
2484              
2485             If using C<report_safe>, a few of the headers from the original message
2486             are copied into the wrapper header (From, To, Cc, Subject, Date, etc.)
2487             If you want to have other headers copied as well, you can add them
2488             using this option. You can specify multiple headers on the same line,
2489             separated by spaces, or you can just use multiple lines.
2490              
2491             =cut
2492              
2493             push (@cmds, {
2494             setting => 'report_safe_copy_headers',
2495             default => [],
2496             type => $CONF_TYPE_STRINGLIST,
2497             code => sub {
2498             my ($self, $key, $value, $line) = @_;
2499             if ($value eq '') {
2500 0     0   0 return $MISSING_REQUIRED_VALUE;
2501 0 0       0 }
2502 0         0 push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value));
2503             }
2504 0         0 });
  0         0  
2505              
2506 92         1030 =item envelope_sender_header Name-Of-Header
2507              
2508             SpamAssassin will attempt to discover the address used in the 'MAIL FROM:'
2509             phase of the SMTP transaction that delivered this message, if this data has
2510             been made available by the SMTP server. This is used in the C<EnvelopeFrom>
2511             pseudo-header, and for various rules such as SPF checking.
2512              
2513             By default, various MTAs will use different headers, such as the following:
2514              
2515             X-Envelope-From
2516             Envelope-Sender
2517             X-Sender
2518             Return-Path
2519              
2520             SpamAssassin will attempt to use these, if some heuristics (such as the header
2521             placement in the message, or the absence of fetchmail signatures) appear to
2522             indicate that they are safe to use. However, it may choose the wrong headers
2523             in some mailserver configurations. (More discussion of this can be found
2524             in bug 2142 and bug 4747 in the SpamAssassin BugZilla.)
2525              
2526             To avoid this heuristic failure, the C<envelope_sender_header> setting may be
2527             helpful. Name the header that your MTA or MDA adds to messages containing the
2528             address used at the MAIL FROM step of the SMTP transaction.
2529              
2530             If the header in question contains C<E<lt>> or C<E<gt>> characters at the start
2531             and end of the email address in the right-hand side, as in the SMTP
2532             transaction, these will be stripped.
2533              
2534             If the header is not found in a message, or if it's value does not contain an
2535             C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its
2536             default heuristics.
2537              
2538             (Note for MTA developers: we would prefer if the use of a single header be
2539             avoided in future, since that precludes 'downstream' spam scanning.
2540             C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a
2541             better proposal, storing the envelope sender at each hop in the C<Received>
2542             header.)
2543              
2544             example:
2545              
2546             envelope_sender_header X-SA-Exim-Mail-From
2547              
2548             =cut
2549              
2550             push (@cmds, {
2551             setting => 'envelope_sender_header',
2552 92         439 default => undef,
2553             type => $CONF_TYPE_STRING,
2554             });
2555              
2556             =item describe SYMBOLIC_TEST_NAME description ...
2557              
2558             Used to describe a test. This text is shown to users in the detailed report.
2559              
2560             Note that test names which begin with '__' are reserved for meta-match
2561             sub-rules, and are not scored or listed in the 'tests hit' reports.
2562              
2563             Also note that by convention, rule descriptions should be limited in
2564             length to no more than 50 characters.
2565              
2566             =cut
2567              
2568             push (@cmds, {
2569             command => 'describe',
2570 92         503 setting => 'descriptions',
2571             is_frequent => 1,
2572             type => $CONF_TYPE_HASH_KEY_VALUE,
2573             });
2574              
2575             =item report_charset CHARSET (default: unset)
2576              
2577             Set the MIME Content-Type charset used for the text/plain report which
2578             is attached to spam mail messages.
2579              
2580             =cut
2581              
2582             push (@cmds, {
2583             setting => 'report_charset',
2584 92         405 default => '',
2585             type => $CONF_TYPE_STRING,
2586             });
2587              
2588             =item report ...some text for a report...
2589              
2590             Set the report template which is attached to spam mail messages. See the
2591             C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an
2592             example.
2593              
2594             If you change this, try to keep it under 78 columns. Each C<report>
2595             line appends to the existing template, so use C<clear_report_template>
2596             to restart.
2597              
2598             Tags can be included as explained above.
2599              
2600             =cut
2601              
2602             push (@cmds, {
2603             command => 'report',
2604 92         520 setting => 'report_template',
2605             default => '',
2606             type => $CONF_TYPE_TEMPLATE,
2607             });
2608              
2609             =item clear_report_template
2610              
2611             Clear the report template.
2612              
2613             =cut
2614              
2615             push (@cmds, {
2616             command => 'clear_report_template',
2617 92         482 setting => 'report_template',
2618             type => $CONF_TYPE_NOARGS,
2619             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2620             });
2621              
2622             =item report_contact ...text of contact address...
2623              
2624             Set what _CONTACTADDRESS_ is replaced with in the above report text.
2625             By default, this is 'the administrator of that system', since the hostname
2626             of the system the scanner is running on is also included.
2627              
2628             =cut
2629              
2630             push (@cmds, {
2631             setting => 'report_contact',
2632 92         351 default => 'the administrator of that system',
2633             type => $CONF_TYPE_STRING,
2634             });
2635              
2636             =item report_hostname ...hostname to use...
2637              
2638             Set what _HOSTNAME_ is replaced with in the above report text.
2639             By default, this is determined dynamically as whatever the host running
2640             SpamAssassin calls itself.
2641              
2642             =cut
2643              
2644             push (@cmds, {
2645             setting => 'report_hostname',
2646 92         358 default => '',
2647             type => $CONF_TYPE_STRING,
2648             });
2649              
2650             =item unsafe_report ...some text for a report...
2651              
2652             Set the report template which is attached to spam mail messages which contain a
2653             non-text/plain part. See the C<10_default_prefs.cf> configuration file in
2654             C</usr/share/spamassassin> for an example.
2655              
2656             Each C<unsafe-report> line appends to the existing template, so use
2657             C<clear_unsafe_report_template> to restart.
2658              
2659             Tags can be used in this template (see above for details).
2660              
2661             =cut
2662              
2663             push (@cmds, {
2664             command => 'unsafe_report',
2665 92         503 setting => 'unsafe_report_template',
2666             default => '',
2667             type => $CONF_TYPE_TEMPLATE,
2668             });
2669              
2670             =item clear_unsafe_report_template
2671              
2672             Clear the unsafe_report template.
2673              
2674             =cut
2675              
2676             push (@cmds, {
2677             command => 'clear_unsafe_report_template',
2678 92         513 setting => 'unsafe_report_template',
2679             type => $CONF_TYPE_NOARGS,
2680             code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2681             });
2682              
2683             =item mbox_format_from_regex
2684              
2685             Set a specific regular expression to be used for mbox file From separators.
2686              
2687             For example, this setting will allow sa-learn to process emails stored in
2688             a kmail 2 mbox:
2689              
2690             mbox_format_from_regex /^From \S+ ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/
2691              
2692              
2693             =cut
2694              
2695             push (@cmds, {
2696             setting => 'mbox_format_from_regex',
2697 92         367 type => $CONF_TYPE_STRING
2698             });
2699              
2700              
2701             =item parse_dkim_uris ( 0 | 1 ) (default: 1)
2702              
2703             If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL)
2704              
2705             =cut
2706              
2707             push (@cmds, {
2708             setting => 'parse_dkim_uris',
2709 92         482 default => 1,
2710             type => $CONF_TYPE_BOOL,
2711             });
2712              
2713             =back
2714              
2715             =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
2716              
2717             These settings differ from the ones above, in that they are considered
2718             'privileged'. Only users running C<spamassassin> from their procmailrc's or
2719             forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can
2720             use them. C<spamd> users cannot use them in their C<user_prefs> files, for
2721             security and efficiency reasons, unless C<allow_user_rules> is enabled (and
2722             then, they may only add rules from below).
2723              
2724             =over 4
2725              
2726             =item allow_user_rules ( 0 | 1 ) (default: 0)
2727              
2728             This setting allows users to create rules (and only rules) in their
2729             C<user_prefs> files for use with C<spamd>. It defaults to off, because
2730             this could be a severe security hole. It may be possible for users to
2731             gain root level access if C<spamd> is run as root. It is NOT a good
2732             idea, unless you have some other way of ensuring that users' tests are
2733             safe. Don't use this unless you are certain you know what you are
2734             doing. Furthermore, this option causes spamassassin to recompile all
2735             the tests each time it processes a message for a user with a rule in
2736             his/her C<user_prefs> file, which could have a significant effect on
2737             server load. It is not recommended.
2738              
2739             Note that it is not currently possible to use C<allow_user_rules> to modify an
2740             existing system rule from a C<user_prefs> file with C<spamd>.
2741              
2742             =cut
2743              
2744             push (@cmds, {
2745             setting => 'allow_user_rules',
2746             is_priv => 1,
2747             default => 0,
2748             type => $CONF_TYPE_BOOL,
2749             code => sub {
2750             my ($self, $key, $value, $line) = @_;
2751             if ($value eq '') {
2752 0     0   0 return $MISSING_REQUIRED_VALUE;
2753 0 0       0 }
    0          
2754 0         0 elsif ($value !~ /^[01]$/) {
2755             return $INVALID_VALUE;
2756             }
2757 0         0  
2758             $self->{allow_user_rules} = $value+0;
2759             dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!");
2760 0         0 }
2761 0 0       0 });
2762              
2763 92         1061 =item redirector_pattern /pattern/modifiers
2764              
2765             A regex pattern that matches both the redirector site portion, and
2766             the target site portion of a URI.
2767              
2768             Note: The target URI portion must be surrounded in parentheses and
2769             no other part of the pattern may create a backreference.
2770              
2771             Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude
2772              
2773             redirector_pattern /^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i
2774              
2775             =cut
2776              
2777             push (@cmds, {
2778             setting => 'redirector_pattern',
2779             is_priv => 1,
2780             default => [],
2781             type => $CONF_TYPE_STRINGLIST,
2782             code => sub {
2783             my ($self, $key, $value, $line) = @_;
2784              
2785 434     434   1254 $value =~ s/^\s+//;
2786             if ($value eq '') {
2787 434         1135 return $MISSING_REQUIRED_VALUE;
2788 434 50       985 }
2789 0         0  
2790             my ($rec, $err) = compile_regexp($value, 1);
2791             if (!$rec) {
2792 434         1114 dbg("config: invalid redirector_pattern '$value': $err");
2793 434 50       957 return $INVALID_VALUE;
2794 0         0 }
2795 0         0  
2796             push @{$self->{main}->{conf}->{redirector_patterns}}, $rec;
2797             }
2798 434         486 });
  434         1894  
2799              
2800 92         1101 =item header SYMBOLIC_TEST_NAME header op /pattern/modifiers [if-unset: STRING]
2801              
2802             Define a test. C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as
2803             'FROM_ENDS_IN_NUMS'. C<header> is the name of a mail header field,
2804             such as 'Subject', 'To', 'From', etc. Header field names are matched
2805             case-insensitively (conforming to RFC 5322 section 1.2.2), except for
2806             all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED.
2807              
2808             Appending a modifier C<:raw> to a header field name will inhibit decoding of
2809             quoted-printable or base-64 encoded strings, and will preserve all whitespace
2810             inside the header string. The C<:raw> may also be applied to pseudo-headers
2811             e.g. C<ALL:raw> will return a pristine (unmodified) header section.
2812              
2813             Appending a modifier C<:addr> to a header field name will cause everything
2814             except the first email address to be removed from the header field. It is
2815             mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
2816             their 'Resent-*' counterparts, and the 'Return-Path'.
2817              
2818             Appending a modifier C<:name> to a header field name will cause everything
2819             except the first display name to be removed from the header field. It is
2820             mainly applicable to header fields containing a single mail address: 'From',
2821             'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
2822              
2823             It is syntactically permitted to append more than one modifier to a header
2824             field name, although currently most combinations achieve no additional effect,
2825             for example C<From:addr:raw> or C<From:raw:addr> is currently the same as
2826             C<From:addr> .
2827              
2828             For example, appending C<:addr> to a header name will result in example@foo
2829             in all of the following cases:
2830              
2831             =over 4
2832              
2833             =item example@foo
2834              
2835             =item example@foo (Foo Blah)
2836              
2837             =item example@foo, example@bar
2838              
2839             =item display: example@foo (Foo Blah), example@bar ;
2840              
2841             =item Foo Blah E<lt>example@fooE<gt>
2842              
2843             =item "Foo Blah" E<lt>example@fooE<gt>
2844              
2845             =item "'Foo Blah'" E<lt>example@fooE<gt>
2846              
2847             =back
2848              
2849             For example, appending C<:name> to a header name will result in "Foo Blah"
2850             (without quotes) in all of the following cases:
2851              
2852             =over 4
2853              
2854             =item example@foo (Foo Blah)
2855              
2856             =item example@foo (Foo Blah), example@bar
2857              
2858             =item display: example@foo (Foo Blah), example@bar ;
2859              
2860             =item Foo Blah E<lt>example@fooE<gt>
2861              
2862             =item "Foo Blah" E<lt>example@fooE<gt>
2863              
2864             =item "'Foo Blah'" E<lt>example@fooE<gt>
2865              
2866             =back
2867              
2868             There are several special pseudo-headers that can be specified:
2869              
2870             =over 4
2871              
2872             =item C<ALL> can be used to mean the text of all the message's headers.
2873             Note that all whitespace inside the headers, at line folds, is currently
2874             compressed into a single space (' ') character. To obtain a pristine
2875             (unmodified) header section, use C<ALL:raw> - the :raw modifier is documented
2876             above. Also similar that return headers added by specific relays: ALL-TRUSTED,
2877             ALL-INTERNAL, ALL-UNTRUSTED, ALL-EXTERNAL.
2878              
2879             =item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'
2880             headers.
2881              
2882             =item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP
2883             transaction that delivered this message, if this data has been made available
2884             by the SMTP server. See C<envelope_sender_header> for more information
2885             on how to set this.
2886              
2887             =item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;
2888             some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
2889             or to 'X-Message-Id', then uses its own one in the 'Message-Id' header.
2890             The value returned for this symbol is the text from all 3 headers, separated
2891             by newlines.
2892              
2893             =item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>,
2894             C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable,
2895             pre-parsed representation of the message's network path, as recorded in the
2896             Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs
2897             'external' sets. See C<http://wiki.apache.org/spamassassin/TrustedRelays> for
2898             more details.
2899              
2900             =back
2901              
2902             C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain
2903             regular expression), and C<pattern> is a valid Perl regular expression, with
2904             C<modifiers> as regexp modifiers in the usual style. Note that multi-line
2905             rules are not supported, even if you use C<x> as a modifier. Also note that
2906             the C<#> character must be escaped (C<\#>) or else it will be considered to be
2907             the start of a comment and not part of the regexp.
2908              
2909             If the header specified matches multiple headers, their text will be
2910             concatenated with embedded \n's. Therefore you may wish to use C</m> if you
2911             use C<^> or C<$> in your regular expression.
2912              
2913             If the C<[if-unset: STRING]> tag is present, then C<STRING> will
2914             be used if the header is not found in the mail message.
2915              
2916             Test names must not start with a number, and must contain only
2917             alphanumerics and underscores. It is suggested that lower-case characters
2918             not be used, and names have a length of no more than 22 characters,
2919             as an informal convention. Dashes are not allowed.
2920              
2921             Note that test names which begin with '__' are reserved for meta-match
2922             sub-rules, and are not scored or listed in the 'tests hit' reports.
2923             Test names which begin with 'T_' are reserved for tests which are
2924             undergoing QA, and these are given a very low score.
2925              
2926             If you add or modify a test, please be sure to run a sanity check afterwards
2927             by running C<spamassassin --lint>. This will avoid confusing error
2928             messages, or other tests being skipped as a side-effect.
2929              
2930             =item header SYMBOLIC_TEST_NAME exists:header_field_name
2931              
2932             Define a header field existence test. C<header_field_name> is the name
2933             of a header field to test for existence. Not to be confused with a
2934             test for a nonempty header field body, which can be implemented by a
2935             C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above.
2936              
2937             =item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments])
2938              
2939             Define a header eval test. C<name_of_eval_method> is the name of
2940             a method registered by a C<Mail::SpamAssassin::Plugin> object.
2941             C<arguments> are optional arguments to the function call.
2942              
2943             =item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test'])
2944              
2945             Check a DNSBL (a DNS blacklist or whitelist). This will retrieve Received:
2946             headers from the message, extract the IP addresses, select which ones are
2947             'untrusted' based on the C<trusted_networks> logic, and query that DNSBL
2948             zone. There's a few things to note:
2949              
2950             =over 4
2951              
2952             =item duplicated or private IPs
2953              
2954             Duplicated IPs are only queried once and reserved IPs are not queried.
2955             Private IPs are those listed in
2956             C<https://www.iana.org/assignments/ipv4-address-space>,
2957             C<http://duxcw.com/faq/network/privip.htm>,
2958             C<http://duxcw.com/faq/network/autoip.htm>, or
2959             C<https://tools.ietf.org/html/rfc5735> as private.
2960              
2961             =item the 'set' argument
2962              
2963             This is used as a 'zone ID'. If you want to look up a multiple-meaning zone
2964             like SORBS, you can then query the results from that zone using it;
2965             but all check_rbl_sub() calls must use that zone ID.
2966              
2967             Also, if more than one IP address gets a DNSBL hit for a particular rule, it
2968             does not affect the score because rules only trigger once per message.
2969              
2970             =item the 'zone' argument
2971              
2972             This is the root zone of the DNSBL.
2973              
2974             The domain name is considered to be a fully qualified domain name
2975             (i.e. not subject to DNS resolver's search or default domain options).
2976             No trailing period is needed, and will be removed if specified.
2977              
2978             =item the 'sub-test' argument
2979              
2980             This optional argument behaves the same as the sub-test argument in
2981             C<check_rbl_sub()> below.
2982              
2983             =item selecting all IPs except for the originating one
2984              
2985             This is accomplished by placing '-notfirsthop' at the end of the set name.
2986             This is useful for querying against DNS lists which list dialup IP
2987             addresses; the first hop may be a dialup, but as long as there is at least
2988             one more hop, via their outgoing SMTP server, that's legitimate, and so
2989             should not gain points. If there is only one hop, that will be queried
2990             anyway, as it should be relaying via its outgoing SMTP server instead of
2991             sending directly to your MX (mail exchange).
2992              
2993             =item selecting IPs by whether they are trusted
2994              
2995             When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP
2996             addresses in Received headers that were not added by trusted relays. To
2997             test the first IP address that can be trusted, place '-firsttrusted' at the
2998             end of the set name. That should test the IP address of the relay that
2999             connected to the most remote trusted relay.
3000              
3001             Note that this requires that SpamAssassin know which relays are trusted. For
3002             simple cases, SpamAssassin can make a good estimate. For complex cases, you
3003             may get better results by setting C<trusted_networks> manually.
3004              
3005             In addition, you can test all untrusted IP addresses by placing '-untrusted'
3006             at the end of the set name. Important note -- this does NOT include the
3007             IP address from the most recent 'untrusted line', as used in '-firsttrusted'
3008             above. That's because we're talking about the trustworthiness of the
3009             IP address data, not the source header line, here; and in the case of
3010             the most recent header (the 'firsttrusted'), that data can be trusted.
3011             See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays>
3012             for more information on this.
3013              
3014             =item Selecting just the last external IP
3015              
3016             By using '-lastexternal' at the end of the set name, you can select only
3017             the external host that connected to your internal network, or at least
3018             the last external host with a public IP.
3019              
3020             =back
3021              
3022             =item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone')
3023              
3024             Same as check_rbl(), except querying using IN TXT instead of IN A records.
3025             If the zone supports it, it will result in a line of text describing
3026             why the IP is listed, typically a hyperlink to a database entry.
3027              
3028             =item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test')
3029              
3030             Create a sub-test for 'set'. If you want to look up a multi-meaning zone
3031             like relays.osirusoft.com, you can then query the results from that zone
3032             using the zone ID from the original query. The sub-test may either be an
3033             IPv4 dotted address for RBLs that return multiple A records, or a
3034             non-negative decimal number to specify a bitmask for RBLs that return a
3035             single A record containing a bitmask of results, or a regular expression.
3036              
3037             Note: the set name must be exactly the same for as the main query rule,
3038             including selections like '-notfirsthop' appearing at the end of the set
3039             name.
3040              
3041             =cut
3042              
3043             push (@cmds, {
3044             setting => 'header',
3045             is_frequent => 1,
3046             is_priv => 1,
3047             code => sub {
3048             my ($self, $key, $value, $line) = @_;
3049             local($1);
3050 2882     2882   7378 if ($value !~ s/^(\S+)\s+//) {
3051 2882         5898 return $INVALID_VALUE;
3052 2882 50       11546 }
3053 0         0 my $rulename = $1;
3054             if ($value eq '') {
3055 2882         5982 return $MISSING_REQUIRED_VALUE;
3056 2882 50       6032 }
3057 0         0 if ($value =~ /^(?:rbl)?eval:(.*)$/) {
3058             my $fn = $1;
3059 2882 100       10632 if ($fn !~ /^\w+\(.*\)$/) {
3060 2324         5008 return $INVALID_VALUE;
3061 2324 50       6920 }
3062 0         0 if ($fn =~ /^check_(?:rbl|dns)/) {
3063             $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS);
3064 2324 100       5284 }
3065 1         8 else {
3066             $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS);
3067             }
3068 2323         6120 }
3069             else {
3070             # Detailed parsing in add_test
3071             $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS);
3072             }
3073 558         2067 }
3074             });
3075              
3076 92         1141 =item body SYMBOLIC_TEST_NAME /pattern/modifiers
3077              
3078             Define a body pattern test. C<pattern> is a Perl regular expression. Note:
3079             as per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3080             the beginning of a comment.
3081              
3082             The 'body' in this case is the textual parts of the message body; any
3083             non-text MIME parts are stripped, and the message decoded from
3084             Quoted-Printable or Base-64-encoded format if necessary. Parts declared as
3085             text/html will be rendered from HTML to text.
3086              
3087             All body paragraphs (double-newline-separated blocks text) are turned into a
3088             line breaks removed, whitespace normalized single line. Any lines longer
3089             than 2kB are split into shorter separate lines (from a boundary when
3090             possible), this may unexpectedly prevent pattern from matching. Patterns
3091             are matched independently against each of these lines.
3092              
3093             Note that by default the message Subject header is considered part of the
3094             body and becomes the first line when running the rules. If you don't want
3095             to match Subject along with body text, use "tflags RULENAME nosubject".
3096              
3097             =item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3098              
3099             Define a body eval test. See above.
3100              
3101             =cut
3102              
3103             push (@cmds, {
3104             setting => 'body',
3105             is_frequent => 1,
3106             is_priv => 1,
3107             code => sub {
3108             my ($self, $key, $value, $line) = @_;
3109             local($1);
3110 316     316   1156 if ($value !~ s/^(\S+)\s+//) {
3111 316         735 return $INVALID_VALUE;
3112 316 50       1690 }
3113 0         0 my $rulename = $1;
3114             if ($value eq '') {
3115 316         836 return $MISSING_REQUIRED_VALUE;
3116 316 50       1005 }
3117 0         0 if ($value =~ /^eval:(.*)$/) {
3118             my $fn = $1;
3119 316 100       1329 if ($fn !~ /^\w+\(.*\)$/) {
3120 84         277 return $INVALID_VALUE;
3121 84 50       529 }
3122 0         0 $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS);
3123             } else {
3124 84         425 $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS);
3125             }
3126 232         1058 }
3127             });
3128              
3129 92         1096 =item uri SYMBOLIC_TEST_NAME /pattern/modifiers
3130              
3131             Define a uri pattern test. C<pattern> is a Perl regular expression. Note: as
3132             per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3133             the beginning of a comment.
3134              
3135             The 'uri' in this case is a list of all the URIs in the body of the email,
3136             and the test will be run on each and every one of those URIs, adjusting the
3137             score if a match is found. Use this test instead of one of the body tests
3138             when you need to match a URI, as it is more accurately bound to the start/end
3139             points of the URI, and will also be faster.
3140              
3141             =cut
3142              
3143             # we don't do URI evals yet - maybe later
3144             # if (/^uri\s+(\S+)\s+eval:(.*)$/) {
3145             # $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS);
3146             # next;
3147             # }
3148             push (@cmds, {
3149             setting => 'uri',
3150             is_priv => 1,
3151             code => sub {
3152             my ($self, $key, $value, $line) = @_;
3153             local($1);
3154 62     62   375 if ($value !~ s/^(\S+)\s+//) {
3155 62         218 return $INVALID_VALUE;
3156 62 50       548 }
3157 0         0 my $rulename = $1;
3158             if ($value eq '') {
3159 62         237 return $MISSING_REQUIRED_VALUE;
3160 62 50       317 }
3161 0         0 $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS);
3162             }
3163 62         312 });
3164              
3165 92         1106 =item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers
3166              
3167             Define a raw-body pattern test. C<pattern> is a Perl regular expression.
3168             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3169             considered the beginning of a comment.
3170              
3171             The 'raw body' of a message is the raw data inside all textual parts. The
3172             text will be decoded from base64 or quoted-printable encoding, but HTML
3173             tags and line breaks will still be present. Multiline expressions will
3174             need to be used to match strings that are broken by line breaks.
3175              
3176             Note that the text is split into 2-4kB chunks (from a word boundary when
3177             possible), this may unexpectedly prevent pattern from matching. Patterns
3178             are matched independently against each of these chunks.
3179              
3180             =item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3181              
3182             Define a raw-body eval test. See above.
3183              
3184             =cut
3185              
3186             push (@cmds, {
3187             setting => 'rawbody',
3188             is_frequent => 1,
3189             is_priv => 1,
3190             code => sub {
3191             my ($self, $key, $value, $line) = @_;
3192             local($1);
3193 0     0   0 if ($value !~ s/^(\S+)\s+//) {
3194 0         0 return $INVALID_VALUE;
3195 0 0       0 }
3196 0         0 my $rulename = $1;
3197             if ($value eq '') {
3198 0         0 return $MISSING_REQUIRED_VALUE;
3199 0 0       0 }
3200 0         0 if ($value =~ /^eval:(.*)$/) {
3201             my $fn = $1;
3202 0 0       0 if ($fn !~ /^\w+\(.*\)$/) {
3203 0         0 return $INVALID_VALUE;
3204 0 0       0 }
3205 0         0 $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS);
3206             } else {
3207 0         0 $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS);
3208             }
3209 0         0 }
3210             });
3211              
3212 92         1068 =item full SYMBOLIC_TEST_NAME /pattern/modifiers
3213              
3214             Define a full message pattern test. C<pattern> is a Perl regular expression.
3215             Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3216             considered the beginning of a comment.
3217              
3218             The full message is the pristine message headers plus the pristine message
3219             body, including all MIME data such as images, other attachments, MIME
3220             boundaries, etc.
3221              
3222             =item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3223              
3224             Define a full message eval test. See above.
3225              
3226             =cut
3227              
3228             push (@cmds, {
3229             setting => 'full',
3230             is_priv => 1,
3231             code => sub {
3232             my ($self, $key, $value, $line) = @_;
3233             local($1);
3234 305     305   944 if ($value !~ s/^(\S+)\s+//) {
3235 305         664 return $INVALID_VALUE;
3236 305 50       1478 }
3237 0         0 my $rulename = $1;
3238             if ($value eq '') {
3239 305         741 return $MISSING_REQUIRED_VALUE;
3240 305 50       836 }
3241 0         0 if ($value =~ /^eval:(.*)$/) {
3242             my $fn = $1;
3243 305 50       1215 if ($fn !~ /^\w+\(.*\)$/) {
3244 305         691 return $INVALID_VALUE;
3245 305 50       1231 }
3246 0         0 $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS);
3247             } else {
3248 305         1029 $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS);
3249             }
3250 0         0 }
3251             });
3252              
3253 92         998 =item meta SYMBOLIC_TEST_NAME boolean expression
3254              
3255             Define a boolean expression test in terms of other tests that have
3256             been hit or not hit. For example:
3257              
3258             meta META1 TEST1 && !(TEST2 || TEST3)
3259              
3260             Note that English language operators ("and", "or") will be treated as
3261             rule names, and that there is no C<XOR> operator.
3262              
3263             =item meta SYMBOLIC_TEST_NAME boolean arithmetic expression
3264              
3265             Can also define an arithmetic expression in terms of other tests,
3266             with an unhit test having the value "0" and a hit test having a
3267             nonzero value. The value of a hit meta test is that of its arithmetic
3268             expression. The value of a hit eval test is that returned by its
3269             method. The value of a hit header, body, rawbody, uri, or full test
3270             which has the "multiple" tflag is the number of times the test hit.
3271             The value of any other type of hit test is "1".
3272              
3273             For example:
3274              
3275             meta META2 (3 * TEST1 - 2 * TEST2) E<gt> 0
3276              
3277             Note that Perl builtins and functions, like C<abs()>, B<can't> be
3278             used, and will be treated as rule names.
3279              
3280             If you want to define a meta-rule, but do not want its individual sub-rules to
3281             count towards the final score unless the entire meta-rule matches, give the
3282             sub-rules names that start with '__' (two underscores). SpamAssassin will
3283             ignore these for scoring.
3284              
3285             =item meta SYMBOLIC_TEST_NAME ... rules_matching(RULEGLOB) ...
3286              
3287             Special function that will expand to list of matching rulenames. Can be
3288             used anywhere in expressions. Argument supports glob style rulename
3289             matching (* = anything, ? = one character). Matching is case-sensitive.
3290              
3291             For example, this will hit if at least two __FOO_* rule hits:
3292              
3293             body __FOO_1 /xxx/
3294             body __FOO_2 /yyy/
3295             body __FOO_3 /zzz/
3296             meta FOO_META rules_matching(__FOO_*) >= 2
3297              
3298             Which would be the same as:
3299              
3300             meta FOO_META (__FOO_1 + __FOO_2 + __FOO_3) >= 2
3301              
3302              
3303             =cut
3304              
3305             push (@cmds, {
3306             setting => 'meta',
3307             is_frequent => 1,
3308             is_priv => 1,
3309             code => sub {
3310             my ($self, $key, $value, $line) = @_;
3311             local($1);
3312 132     132   508 if ($value !~ s/^(\S+)\s+//) {
3313 132         331 return $INVALID_VALUE;
3314 132 50       737 }
3315 0         0 my $rulename = $1;
3316             if ($value eq '') {
3317 132         398 return $MISSING_REQUIRED_VALUE;
3318 132 50       574 }
3319 0         0 if ($value =~ /\*\s*\*/) {
3320             info("config: found invalid '**' or '* *' operator in meta command");
3321 132 50       479 return $INVALID_VALUE;
3322 0         0 }
3323 0         0 $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS);
3324             }
3325 132         525 });
3326              
3327 92         1130 =item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ]
3328              
3329             Defines the name of a test that should be "reused" during the scoring
3330             process. If a message has an X-Spam-Status header that shows a hit for
3331             this rule or any of the old rule names given, a hit will be added for
3332             this rule when B<mass-check --reuse> is used. Examples:
3333              
3334             C<reuse SPF_PASS>
3335              
3336             C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1>
3337              
3338             The actual logic for reuse tests is done by
3339             B<Mail::SpamAssassin::Plugin::Reuse>.
3340              
3341             =cut
3342              
3343             push (@cmds, {
3344             setting => 'reuse',
3345             is_priv => 1,
3346             code => sub {
3347             my ($self, $key, $value, $line) = @_;
3348             if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) {
3349 61     61   372 return $INVALID_VALUE;
3350 61 50       569 }
3351 0         0 my $rule_name = $1;
3352             # don't overwrite tests, just define them so scores, priorities work
3353 61         248 if (!exists $self->{tests}->{$rule_name}) {
3354             $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS);
3355 61 50       460 }
3356 0         0 }
3357             });
3358              
3359 92         1000 =item tflags SYMBOLIC_TEST_NAME flags
3360              
3361             Used to set flags on a test. Parameter is a space-separated list of flag
3362             names or flag name = value pairs.
3363             These flags are used in the score-determination back end system for details
3364             of the test's behaviour. Please see C<bayes_auto_learn> for more information
3365             about tflag interaction with those systems. The following flags can be set:
3366              
3367             =over 4
3368              
3369             =item net
3370              
3371             The test is a network test, and will not be run in the mass checking system
3372             or if B<-L> is used, therefore its score should not be modified.
3373              
3374             =item nice
3375              
3376             The test is intended to compensate for common false positives, and should be
3377             assigned a negative score.
3378              
3379             =item userconf
3380              
3381             The test requires user configuration before it can be used (like
3382             language-specific tests).
3383              
3384             =item learn
3385              
3386             The test requires training before it can be used.
3387              
3388             =item noautolearn
3389              
3390             The test will explicitly be ignored when calculating the score for
3391             learning systems.
3392              
3393             =item autolearn_force
3394              
3395             The test will be subject to less stringent autolearn thresholds.
3396              
3397             Normally, SpamAssassin will require 3 points from the header and 3
3398             points from the body to be auto-learned as spam. This option keeps
3399             the threshold at 6 points total but changes it to have no regard to the
3400             source of the points.
3401              
3402             =item noawl
3403              
3404             This flag is specific when using AWL plugin.
3405              
3406             Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios
3407             it works against the system administrator when trying to add some rules to
3408             correct miss-classified email. When AWL plugin searches the email and finds
3409             the noawl flag it will exit without normalizing the score nor storing the
3410             value in db.
3411              
3412             =item multiple
3413              
3414             The test will be evaluated multiple times, for use with meta rules.
3415             Only affects header, body, rawbody, uri, and full tests.
3416              
3417             =item maxhits=N
3418              
3419             If B<multiple> is specified, limit the number of hits found to N.
3420             If the rule is used in a meta that counts the hits (e.g. __RULENAME E<gt> 5),
3421             this is a way to avoid wasted extra work (use "tflags multiple maxhits=6").
3422              
3423             For example:
3424              
3425             uri __KAM_COUNT_URIS /^./
3426             tflags __KAM_COUNT_URIS multiple maxhits=16
3427             describe __KAM_COUNT_URIS A multiple match used to count URIs in a message
3428              
3429             meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0)
3430             meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1)
3431             meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2)
3432             meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3)
3433             meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4)
3434             meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5)
3435             meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10)
3436             meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15)
3437              
3438             =item nosubject
3439              
3440             Used only for B<body> rules. If specified, Subject header will not be a
3441             part of the matched body text. See I<body> for more info.
3442              
3443             =item ips_only
3444              
3445             This flag is specific to rules invoking an URIDNSBL plugin,
3446             it is documented there.
3447              
3448             =item domains_only
3449              
3450             This flag is specific to rules invoking an URIDNSBL plugin,
3451             it is documented there.
3452              
3453             =item ns
3454              
3455             This flag is specific to rules invoking an URIDNSBL plugin,
3456             it is documented there.
3457              
3458             =item a
3459              
3460             This flag is specific to rules invoking an URIDNSBL plugin,
3461             it is documented there.
3462              
3463             =back
3464              
3465             =cut
3466              
3467             push (@cmds, {
3468             setting => 'tflags',
3469 92         581 is_frequent => 1,
3470             is_priv => 1,
3471             type => $CONF_TYPE_HASH_KEY_VALUE,
3472             });
3473              
3474             =item priority SYMBOLIC_TEST_NAME n
3475              
3476             Assign a specific priority to a test. All tests, except for DNS and Meta
3477             tests, are run in increasing priority value order (negative priority values
3478             are run before positive priority values). The default test priority is 0
3479             (zero).
3480              
3481             The values C<-99999999999999> and C<-99999999999998> have a special meaning
3482             internally, and should not be used.
3483              
3484             =cut
3485              
3486             push (@cmds, {
3487             setting => 'priority',
3488             is_priv => 1,
3489             type => $CONF_TYPE_HASH_KEY_VALUE,
3490             code => sub {
3491             my ($self, $key, $value, $line) = @_;
3492             my ($rulename, $priority) = split(/\s+/, $value, 2);
3493 77     77   390 unless (defined $priority) {
3494 77         830 return $MISSING_REQUIRED_VALUE;
3495 77 50       367 }
3496 0         0 unless ($rulename =~ IS_RULENAME) {
3497             return $INVALID_VALUE;
3498 77 50       327 }
3499 0         0 unless ($priority =~ /^-?\d+$/) {
3500             return $INVALID_VALUE;
3501 77 50       391 }
3502 0         0 $self->{priority}->{$rulename} = $priority;
3503             }
3504 77         455 });
3505              
3506 92         1013 =back
3507              
3508             =head1 ADMINISTRATOR SETTINGS
3509              
3510             These settings differ from the ones above, in that they are considered 'more
3511             privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section.
3512             No matter what C<allow_user_rules> is set to, these can never be set from a
3513             user's C<user_prefs> file when spamc/spamd is being used. However, all
3514             settings can be used by local programs run directly by the user.
3515              
3516             =over 4
3517              
3518             =item version_tag string
3519              
3520             This tag is appended to the SA version in the X-Spam-Status header. You should
3521             include it when you modify your ruleset, especially if you plan to distribute it.
3522             A good choice for I<string> is your last name or your initials followed by a
3523             number which you increase with each change.
3524              
3525             The version_tag will be lowercased, and any non-alphanumeric or period
3526             character will be replaced by an underscore.
3527              
3528             e.g.
3529              
3530             version_tag myrules1 # version=2.41-myrules1
3531              
3532             =cut
3533              
3534             push (@cmds, {
3535             setting => 'version_tag',
3536             is_admin => 1,
3537             code => sub {
3538             my ($self, $key, $value, $line) = @_;
3539             if ($value eq '') {
3540 0     0   0 return $MISSING_REQUIRED_VALUE;
3541 0 0       0 }
3542 0         0 my $tag = lc($value);
3543             $tag =~ tr/a-z0-9./_/c;
3544 0         0 foreach (@Mail::SpamAssassin::EXTRA_VERSION) {
3545 0         0 if($_ eq $tag) { $tag = undef; last; }
3546 0         0 }
3547 0 0       0 push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag);
  0         0  
  0         0  
3548             }
3549 0 0       0 });
3550              
3551 92         929 =item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against
3552              
3553             Define a regression testing string. You can have more than one regression test
3554             string per symbolic test name. Simply specify a string that you wish the test
3555             to match.
3556              
3557             These tests are only run as part of the test suite - they should not affect the
3558             general running of SpamAssassin.
3559              
3560             =cut
3561              
3562             push (@cmds, {
3563             setting => 'test',
3564             is_admin => 1,
3565             code => sub {
3566             return unless defined $COLLECT_REGRESSION_TESTS;
3567             my ($self, $key, $value, $line) = @_;
3568 1856 50   1856   3553 local ($1,$2,$3);
3569 0         0 if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; }
3570 0         0 $self->{parser}->add_regression_test($1, $2, $3);
3571 0 0       0 }
  0         0  
3572 0         0 });
3573              
3574 92         1006 =item body_part_scan_size (default: 50000)
3575              
3576             Per mime-part scan size limit in bytes for "body" type rules.
3577             The decoded/stripped mime-part is truncated approx to this size.
3578             Helps scanning large messages safely, so it's not necessary to
3579             skip them completely. Disabled with 0.
3580              
3581             =cut
3582              
3583             push (@cmds, {
3584             setting => 'body_part_scan_size',
3585 92         444 is_admin => 1,
3586             default => 50000,
3587             type => $CONF_TYPE_NUMERIC,
3588             });
3589              
3590              
3591             =item rawbody_part_scan_size (default: 500000)
3592              
3593             Like body_part_scan_size, for "rawbody" type rules.
3594              
3595             =cut
3596              
3597             push (@cmds, {
3598             setting => 'rawbody_part_scan_size',
3599 92         555 is_admin => 1,
3600             default => 500000,
3601             type => $CONF_TYPE_NUMERIC,
3602             });
3603            
3604             =item rbl_timeout t [t_min] [zone] (default: 15 3)
3605              
3606             All DNS queries are made at the beginning of a check and we try to read
3607             the results at the end. This value specifies the maximum period of time
3608             (in seconds) to wait for a DNS query. If most of the DNS queries have
3609             succeeded for a particular message, then SpamAssassin will not wait for
3610             the full period to avoid wasting time on unresponsive server(s), but will
3611             shrink the timeout according to a percentage of queries already completed.
3612             As the number of queries remaining approaches 0, the timeout value will
3613             gradually approach a t_min value, which is an optional second parameter
3614             and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout
3615             is set to t_min. Here is a chart of queries remaining versus the timeout
3616             in seconds, for the default 15 second / 3 second timeout setting:
3617              
3618             queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0%
3619             timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3
3620              
3621             For example, if 20 queries are made at the beginning of a message check
3622             and 16 queries have returned (leaving 20%), the remaining 4 queries should
3623             finish within 7.3 seconds since their query started or they will be timed out.
3624             Note that timed out queries are only aborted when there is nothing else left
3625             for SpamAssassin to do - long evaluation of other rules may grant queries
3626             additional time.
3627              
3628             If a parameter 'zone' is specified (it must end with a letter, which
3629             distinguishes it from other numeric parametrs), then the setting only
3630             applies to DNS queries against the specified DNS domain (host, domain or
3631             RBL (sub)zone). Matching is case-insensitive, the actual domain may be a
3632             subdomain of the specified zone.
3633              
3634             =cut
3635              
3636             push (@cmds, {
3637             setting => 'rbl_timeout',
3638             is_admin => 1,
3639             default => 15,
3640             code => sub {
3641             my ($self, $key, $value, $line) = @_;
3642             unless (defined $value && $value !~ /^$/) {
3643 1     1   4 return $MISSING_REQUIRED_VALUE;
3644 1 50 33     8 }
3645 0         0 local ($1,$2,$3);
3646             unless ($value =~ /^ ( \+? \d+ (?: \. \d*)? [smhdw]? )
3647 1         4 (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )?
3648 1 50       12 (?: \s+ (\S* [a-zA-Z]) )? $/xsi) {
3649             return $INVALID_VALUE;
3650             }
3651 0         0 my($timeout, $timeout_min, $zone) = ($1, $2, $3);
3652             foreach ($timeout, $timeout_min) {
3653 1         4 if (defined $_ && s/\s*([smhdw])\z//i) {
3654 1         3 $_ *= { s => 1, m => 60, h => 3600,
3655 2 50 66     27 d => 24*3600, w => 7*24*3600 }->{lc $1};
3656             }
3657 0         0 }
3658             if (!defined $zone) { # a global setting
3659             $self->{rbl_timeout} = 0 + $timeout;
3660 1 50       5 $self->{rbl_timeout_min} = 0 + $timeout_min if defined $timeout_min;
3661 1         4 }
3662 1 50       9 else { # per-zone settings
3663             $zone =~ s/^\.//; $zone =~ s/\.\z//; # strip leading and trailing dot
3664             $zone = lc $zone;
3665 0         0 $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout;
  0         0  
3666 0         0 $self->{by_zone}{$zone}{rbl_timeout_min} =
3667 0         0 0 + $timeout_min if defined $timeout_min;
3668             }
3669 0 0       0 },
3670             type => $CONF_TYPE_DURATION,
3671             });
3672 92         1131  
3673             =item util_rb_tld tld1 tld2 ...
3674              
3675             This option maintains list of valid TLDs in the RegistryBoundaries code.
3676             TLDs include things like com, net, org, etc.
3677              
3678             =cut
3679              
3680             push (@cmds, {
3681             setting => 'util_rb_tld',
3682             is_admin => 1,
3683             code => sub {
3684             my ($self, $key, $value, $line) = @_;
3685             unless (defined $value && $value !~ /^$/) {
3686 10519     10519   24937 return $MISSING_REQUIRED_VALUE;
3687 10519 50 33     45325 }
3688 0         0 unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) {
3689             return $INVALID_VALUE;
3690 10519 50       76798 }
3691 0         0 foreach (split(/\s+/, $value)) {
3692             $self->{valid_tlds}{lc $_} = 1;
3693 10519         78699 }
3694 119223         289332 }
3695             });
3696              
3697 92         1233 =item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3698              
3699             This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3700             code. 2TLDs include things like co.uk, fed.us, etc.
3701              
3702             =cut
3703              
3704             push (@cmds, {
3705             setting => 'util_rb_2tld',
3706             is_admin => 1,
3707             code => sub {
3708             my ($self, $key, $value, $line) = @_;
3709             unless (defined $value && $value !~ /^$/) {
3710 36499     36499   83143 return $MISSING_REQUIRED_VALUE;
3711 36499 50 33     149712 }
3712 0         0 unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
3713             return $INVALID_VALUE;
3714 36499 50       170033 }
3715 0         0 foreach (split(/\s+/, $value)) {
3716             $self->{two_level_domains}{lc $_} = 1;
3717 36499         146612 }
3718 148205         398925 }
3719             });
3720              
3721 92         1040 =item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3722              
3723             This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3724             code. 3TLDs include things like demon.co.uk, plc.co.im, etc.
3725              
3726             =cut
3727              
3728             push (@cmds, {
3729             setting => 'util_rb_3tld',
3730             is_admin => 1,
3731             code => sub {
3732             my ($self, $key, $value, $line) = @_;
3733             unless (defined $value && $value !~ /^$/) {
3734 3793     3793   8776 return $MISSING_REQUIRED_VALUE;
3735 3793 50 33     16108 }
3736 0         0 unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) {
3737             return $INVALID_VALUE;
3738 3793 50       13544 }
3739 0         0 foreach (split(/\s+/, $value)) {
3740             $self->{three_level_domains}{lc $_} = 1;
3741 3793         9708 }
3742 3793         18299 }
3743             });
3744              
3745 92         995 =item clear_util_rb
3746              
3747             Empty internal list of valid TLDs (including 2nd and 3rd level) which
3748             RegistryBoundaries code uses. Only useful if you want to override the
3749             standard lists supplied by sa-update.
3750              
3751             =cut
3752              
3753             push (@cmds, {
3754             setting => 'clear_util_rb',
3755             type => $CONF_TYPE_NOARGS,
3756             code => sub {
3757             my ($self, $key, $value, $line) = @_;
3758             unless (!defined $value || $value eq '') {
3759 79     79   410 return $INVALID_VALUE;
3760 79 50 33     697 }
3761 0         0 $self->{valid_tlds} = ();
3762             $self->{two_level_domains} = ();
3763 79         393 $self->{three_level_domains} = ();
3764 79         321 dbg("config: cleared tld lists");
3765 79         277 }
3766 79         515 });
3767              
3768 92         1041 =item bayes_path /path/filename (default: ~/.spamassassin/bayes)
3769              
3770             This is the directory and filename for Bayes databases. Several databases
3771             will be created, with this as the base directory and filename, with C<_toks>,
3772             C<_seen>, etc. appended to the base. The default setting results in files
3773             called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc.
3774              
3775             By default, each user has their own in their C<~/.spamassassin> directory with
3776             mode 0700/0600. For system-wide SpamAssassin use, you may want to reduce disk
3777             space usage by sharing this across all users. However, Bayes appears to be
3778             more effective with individual user databases.
3779              
3780             =cut
3781              
3782             push (@cmds, {
3783             setting => 'bayes_path',
3784             is_admin => 1,
3785             default => '__userstate__/bayes',
3786             type => $CONF_TYPE_STRING,
3787             code => sub {
3788             my ($self, $key, $value, $line) = @_;
3789             unless (defined $value && $value !~ /^$/) {
3790 62     62   346 return $MISSING_REQUIRED_VALUE;
3791 62 50 33     723 }
3792 0         0 if (-d $value) {
3793             return $INVALID_VALUE;
3794 62 50       7906 }
3795 0         0 $self->{bayes_path} = $value;
3796             }
3797 62         580 });
3798              
3799 92         1011 =item bayes_file_mode (default: 0700)
3800              
3801             The file mode bits used for the Bayesian filtering database files.
3802              
3803             Make sure you specify this using the 'x' mode bits set, as it may also be used
3804             to create directories. However, if a file is created, the resulting file will
3805             not have any execute bits set (the umask is set to 111). The argument is a
3806             string of octal digits, it is converted to a numeric value internally.
3807              
3808             =cut
3809              
3810             push (@cmds, {
3811             setting => 'bayes_file_mode',
3812             is_admin => 1,
3813             default => '0700',
3814             type => $CONF_TYPE_NUMERIC,
3815             code => sub {
3816             my ($self, $key, $value, $line) = @_;
3817             if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE }
3818 0     0   0 $self->{bayes_file_mode} = untaint_var($value);
3819 0 0       0 }
  0         0  
3820 0         0 });
3821              
3822 92         996 =item bayes_store_module Name::Of::BayesStore::Module
3823              
3824             If this option is set, the module given will be used as an alternate
3825             to the default bayes storage mechanism. It must conform to the
3826             published storage specification (see
3827             Mail::SpamAssassin::BayesStore). For example, set this to
3828             Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage
3829             module.
3830              
3831             =cut
3832              
3833             push (@cmds, {
3834             setting => 'bayes_store_module',
3835             is_admin => 1,
3836             default => '',
3837             type => $CONF_TYPE_STRING,
3838             code => sub {
3839             my ($self, $key, $value, $line) = @_;
3840             local ($1);
3841 6     6   40 if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; }
3842 6         45 $self->{bayes_store_module} = $1;
3843 6 50       62 }
  0         0  
3844 6         61 });
3845              
3846 92         960 =item bayes_sql_dsn DBI::databasetype:databasename:hostname:port
3847              
3848             Used for BayesStore::SQL storage implementation.
3849              
3850             This option give the connect string used to connect to the SQL based Bayes storage.
3851              
3852             =cut
3853              
3854             push (@cmds, {
3855             setting => 'bayes_sql_dsn',
3856 92         567 is_admin => 1,
3857             default => '',
3858             type => $CONF_TYPE_STRING,
3859             });
3860              
3861             =item bayes_sql_username
3862              
3863             Used by BayesStore::SQL storage implementation.
3864              
3865             This option gives the username used by the above DSN.
3866              
3867             =cut
3868              
3869             push (@cmds, {
3870             setting => 'bayes_sql_username',
3871 92         487 is_admin => 1,
3872             default => '',
3873             type => $CONF_TYPE_STRING,
3874             });
3875              
3876             =item bayes_sql_password
3877              
3878             Used by BayesStore::SQL storage implementation.
3879              
3880             This option gives the password used by the above DSN.
3881              
3882             =cut
3883              
3884             push (@cmds, {
3885             setting => 'bayes_sql_password',
3886 92         388 is_admin => 1,
3887             default => '',
3888             type => $CONF_TYPE_STRING,
3889             });
3890              
3891             =item bayes_sql_username_authorized ( 0 | 1 ) (default: 0)
3892              
3893             Whether to call the services_authorized_for_username plugin hook in BayesSQL.
3894             If the hook does not determine that the user is allowed to use bayes or is
3895             invalid then then database will not be initialized.
3896              
3897             NOTE: By default the user is considered invalid until a plugin returns
3898             a true value. If you enable this, but do not have a proper plugin
3899             loaded, all users will turn up as invalid.
3900              
3901             The username passed into the plugin can be affected by the
3902             bayes_sql_override_username config option.
3903              
3904             =cut
3905              
3906             push (@cmds, {
3907             setting => 'bayes_sql_username_authorized',
3908 92         383 is_admin => 1,
3909             default => 0,
3910             type => $CONF_TYPE_BOOL,
3911             });
3912              
3913             =item user_scores_dsn DBI:databasetype:databasename:hostname:port
3914              
3915             If you load user scores from an SQL database, this will set the DSN
3916             used to connect. Example: C<DBI:mysql:spamassassin:localhost>
3917              
3918             If you load user scores from an LDAP directory, this will set the DSN used to
3919             connect. You have to write the DSN as an LDAP URL, the components being the
3920             host and port to connect to, the base DN for the search, the scope of the
3921             search (base, one or sub), the single attribute being the multivalued attribute
3922             used to hold the configuration data (space separated pairs of key and value,
3923             just as in a file) and finally the filter being the expression used to filter
3924             out the wanted username. Note that the filter expression is being used in a
3925             sprintf statement with the username as the only parameter, thus is can hold a
3926             single __USERNAME__ expression. This will be replaced with the username.
3927              
3928             Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__>
3929              
3930             =cut
3931              
3932             push (@cmds, {
3933             setting => 'user_scores_dsn',
3934 92         328 is_admin => 1,
3935             default => '',
3936             type => $CONF_TYPE_STRING,
3937             });
3938              
3939             =item user_scores_sql_username username
3940              
3941             The authorized username to connect to the above DSN.
3942              
3943             =cut
3944              
3945             push (@cmds, {
3946             setting => 'user_scores_sql_username',
3947 92         365 is_admin => 1,
3948             default => '',
3949             type => $CONF_TYPE_STRING,
3950             });
3951              
3952             =item user_scores_sql_password password
3953              
3954             The password for the database username, for the above DSN.
3955              
3956             =cut
3957              
3958             push (@cmds, {
3959             setting => 'user_scores_sql_password',
3960 92         596 is_admin => 1,
3961             default => '',
3962             type => $CONF_TYPE_STRING,
3963             });
3964              
3965             =item user_scores_sql_custom_query query
3966              
3967             This option gives you the ability to create a custom SQL query to
3968             retrieve user scores and preferences. In order to work correctly your
3969             query should return two values, the preference name and value, in that
3970             order. In addition, there are several "variables" that you can use
3971             as part of your query, these variables will be substituted for the
3972             current values right before the query is run. The current allowed
3973             variables are:
3974              
3975             =over 4
3976              
3977             =item _TABLE_
3978              
3979             The name of the table where user scores and preferences are stored. Currently
3980             hardcoded to userpref, to change this value you need to create a new custom
3981             query with the new table name.
3982              
3983             =item _USERNAME_
3984              
3985             The current user's username.
3986              
3987             =item _MAILBOX_
3988              
3989             The portion before the @ as derived from the current user's username.
3990              
3991             =item _DOMAIN_
3992              
3993             The portion after the @ as derived from the current user's username, this
3994             value may be null.
3995              
3996             =back
3997              
3998             The query must be one continuous line in order to parse correctly.
3999              
4000             Here are several example queries, please note that these are broken up
4001             for easy reading, in your config it should be one continuous line.
4002              
4003             =over 4
4004              
4005             =item Current default query:
4006              
4007             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC>
4008              
4009             =item Use global and then domain level defaults:
4010              
4011             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC>
4012              
4013             =item Maybe global prefs should override user prefs:
4014              
4015             C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC>
4016              
4017             =back
4018              
4019             =cut
4020              
4021             push (@cmds, {
4022             setting => 'user_scores_sql_custom_query',
4023 92         365 is_admin => 1,
4024             default => undef,
4025             type => $CONF_TYPE_STRING,
4026             });
4027              
4028             =item user_scores_ldap_username
4029              
4030             This is the Bind DN used to connect to the LDAP server. It defaults
4031             to the empty string (""), allowing anonymous binding to work.
4032              
4033             Example: C<cn=master,dc=koehntopp,dc=de>
4034              
4035             =cut
4036              
4037             push (@cmds, {
4038             setting => 'user_scores_ldap_username',
4039 92         347 is_admin => 1,
4040             default => '',
4041             type => $CONF_TYPE_STRING,
4042             });
4043              
4044             =item user_scores_ldap_password
4045              
4046             This is the password used to connect to the LDAP server. It defaults
4047             to the empty string ("").
4048              
4049             =cut
4050              
4051             push (@cmds, {
4052             setting => 'user_scores_ldap_password',
4053 92         410 is_admin => 1,
4054             default => '',
4055             type => $CONF_TYPE_STRING,
4056             });
4057              
4058             =item user_scores_fallback_to_global (default: 1)
4059              
4060             Fall back to global scores and settings if userprefs can't be loaded
4061             from SQL or LDAP, instead of passing the message through unprocessed.
4062              
4063             =cut
4064              
4065             push (@cmds, {
4066             setting => 'user_scores_fallback_to_global',
4067 92         349 is_admin => 1,
4068             default => 1,
4069             type => $CONF_TYPE_BOOL,
4070             });
4071              
4072             =item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm]
4073              
4074             Load a SpamAssassin plugin module. The C<ModuleName> is the perl module
4075             name, used to create the plugin object itself.
4076              
4077             Module naming is strict, name must only contain alphanumeric characters or
4078             underscores. File must have .pm extension.
4079              
4080             C</path/module.pm> is the file to load, containing the module's perl code;
4081             if it's specified as a relative path, it's considered to be relative to the
4082             current configuration file. If it is omitted, the module will be loaded
4083             using perl's search path (the C<@INC> array).
4084              
4085             See C<Mail::SpamAssassin::Plugin> for more details on writing plugins.
4086              
4087             =cut
4088              
4089             push (@cmds, {
4090             setting => 'loadplugin',
4091             is_admin => 1,
4092             code => sub {
4093             my ($self, $key, $value, $line) = @_;
4094             if ($value eq '') {
4095 5232     5232   13698 return $MISSING_REQUIRED_VALUE;
4096 5232 50       12432 }
4097 0         0 my ($package, $path);
4098             local ($1,$2);
4099 5232         7019 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4100 5232         11019 ($package, $path) = ($1, $2);
4101 5232 50       24448 } else {
4102 5232         14803 return $INVALID_VALUE;
4103             }
4104 0         0 $self->load_plugin ($package, $path);
4105             }
4106 5232         11589 });
4107              
4108 92         805 =item tryplugin ModuleName [/path/module.pm]
4109              
4110             Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in
4111             the filesystem.
4112              
4113             =cut
4114              
4115             push (@cmds, {
4116             setting => 'tryplugin',
4117             is_admin => 1,
4118             code => sub {
4119             my ($self, $key, $value, $line) = @_;
4120             if ($value eq '') {
4121 0     0   0 return $MISSING_REQUIRED_VALUE;
4122 0 0       0 }
4123 0         0 my ($package, $path);
4124             local ($1,$2);
4125 0         0 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4126 0         0 ($package, $path) = ($1, $2);
4127 0 0       0 } else {
4128 0         0 return $INVALID_VALUE;
4129             }
4130 0         0 $self->load_plugin ($package, $path, 1);
4131             }
4132 0         0 });
4133              
4134 92         910 =item ignore_always_matching_regexps (Default: 0)
4135              
4136             Ignore any rule which contains a regexp which always matches.
4137             Currently only catches regexps which contain '||', or which begin or
4138             end with a '|'. Also ignore rules with C<some> combinatorial explosions.
4139              
4140             =cut
4141              
4142             push (@cmds, {
4143             setting => 'ignore_always_matching_regexps',
4144 92         557 is_admin => 1,
4145             default => 0,
4146             type => $CONF_TYPE_BOOL,
4147             });
4148              
4149             =back
4150              
4151             =head1 PREPROCESSING OPTIONS
4152              
4153             =over 4
4154              
4155             =item include filename
4156              
4157             Include configuration lines from C<filename>. Relative paths are considered
4158             relative to the current configuration file or user preferences file.
4159              
4160             =item if (boolean perl expression)
4161              
4162             Used to support conditional interpretation of the configuration
4163             file. Lines between this and a corresponding C<else> or C<endif> line
4164             will be ignored unless the expression evaluates as true
4165             (in the perl sense; that is, defined and non-0 and non-empty string).
4166              
4167             The conditional accepts a limited subset of perl for security -- just enough to
4168             perform basic arithmetic comparisons. The following input is accepted:
4169              
4170             =over 4
4171              
4172             =item numbers, whitespace, arithmetic operations and grouping
4173              
4174             Namely these characters and ranges:
4175              
4176             ( ) - + * / _ . , < = > ! ~ 0-9 whitespace
4177              
4178             =item version
4179              
4180             This will be replaced with the version number of the currently-running
4181             SpamAssassin engine. Note: The version used is in the internal SpamAssassin
4182             version format which is C<x.yyyzzz>, where x is major version, y is minor
4183             version, and z is maintenance version. So 3.0.0 is C<3.000000>, and 3.4.80
4184             is C<3.004080>.
4185              
4186             =item perl_version
4187              
4188             (Introduced in 3.4.1) This will be replaced with the version number of the
4189             currently-running perl engine. Note: The version used is in the $] version
4190             format which is C<x.yyyzzz>, where x is major version, y is minor version,
4191             and z is maintenance version. So 5.8.8 is C<5.008008>, and 5.10.0 is
4192             C<5.010000>. Use to protect rules that incorporate RE syntax elements
4193             introduced in later versions of perl, such as the C<++> non-backtracking
4194             match introduced in perl 5.10. For example:
4195              
4196             # Avoid lint error on older perl installs
4197             # Check SA version first to avoid warnings on checking perl_version on older SA
4198             if version > 3.004001 && perl_version >= 5.018000
4199             body INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18 /(?[ \p{Thai} & \p{Digit} ])/
4200             endif
4201              
4202             Note that the above will still generate a warning on perl older than 5.10.0;
4203             to avoid that warning do this instead:
4204              
4205             # Avoid lint error on older perl installs
4206             if can(Mail::SpamAssassin::Conf::perl_min_version_5010000)
4207             body INVALID_RE_SYNTAX_IN_PERL_5_8 /\w++/
4208             endif
4209              
4210             Warning: a can() test is only defined for perl 5.10.0!
4211              
4212              
4213             =item plugin(Name::Of::Plugin)
4214              
4215             This is a function call that returns C<1> if the plugin named
4216             C<Name::Of::Plugin> is loaded, or C<undef> otherwise.
4217              
4218             =item has(Name::Of::Package::function_name)
4219              
4220             This is a function call that returns C<1> if the perl package named
4221             C<Name::Of::Package> includes a function called C<function_name>, or C<undef>
4222             otherwise. Note that packages can be SpamAssassin plugins or built-in classes,
4223             there's no difference in this respect. Internally this invokes UNIVERSAL::can.
4224              
4225             =item can(Name::Of::Package::function_name)
4226              
4227             This is a function call that returns C<1> if the perl package named
4228             C<Name::Of::Package> includes a function called C<function_name>
4229             B<and> that function returns a true value when called with no arguments,
4230             otherwise C<undef> is returned.
4231              
4232             Is similar to C<has>, except that it also calls the named function,
4233             testing its return value (unlike the perl function UNIVERSAL::can).
4234             This makes it possible for a 'feature' function to determine its result
4235             value at run time.
4236              
4237             =back
4238              
4239             If the end of a configuration file is reached while still inside a
4240             C<if> scope, a warning will be issued, but parsing will restart on
4241             the next file.
4242              
4243             For example:
4244              
4245             if (version > 3.000000)
4246             header MY_FOO ...
4247             endif
4248              
4249             loadplugin MyPlugin plugintest.pm
4250              
4251             if plugin (MyPlugin)
4252             header MY_PLUGIN_FOO eval:check_for_foo()
4253             score MY_PLUGIN_FOO 0.1
4254             endif
4255              
4256             =item ifplugin PluginModuleName
4257              
4258             An alias for C<if plugin(PluginModuleName)>.
4259              
4260             =item else
4261              
4262             Used to support conditional interpretation of the configuration
4263             file. Lines between this and a corresponding C<endif> line,
4264             will be ignored unless the conditional expression evaluates as false
4265             (in the perl sense; that is, not defined and not 0 and non-empty string).
4266              
4267             =item require_version n.nnnnnn
4268              
4269             Indicates that the entire file, from this line on, requires a certain
4270             version of SpamAssassin to run. If a different (older or newer) version
4271             of SpamAssassin tries to read the configuration from this file, it will
4272             output a warning instead, and ignore it.
4273              
4274             Note: The version used is in the internal SpamAssassin version format which is
4275             C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance
4276             version. So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>.
4277              
4278             =cut
4279              
4280             push (@cmds, {
4281             setting => 'require_version',
4282             type => $CONF_TYPE_STRING,
4283             code => sub {
4284             }
4285       0     });
4286              
4287 92         875 =back
4288              
4289             =head1 TEMPLATE TAGS
4290              
4291             The following C<tags> can be used as placeholders in certain options.
4292             They will be replaced by the corresponding value when they are used.
4293              
4294             Some tags can take an argument (in parentheses). The argument is
4295             optional, and the default is shown below.
4296              
4297             _YESNO_ "Yes" for spam, "No" for nonspam (=ham)
4298             _YESNO(spam_str,ham_str)_ returns the first argument ("Yes" if missing)
4299             for spam, and the second argument ("No" if missing) for ham
4300             _YESNOCAPS_ "YES" for spam, "NO" for nonspam (=ham)
4301             _YESNOCAPS(spam_str,ham_str)_ same as _YESNO(...)_, but uppercased
4302             _SCORE(PAD)_ message score, if PAD is included and is either spaces or
4303             zeroes, then pad scores with that many spaces or zeroes
4304             (default, none) ie: _SCORE(0)_ makes 2.4 become 02.4,
4305             _SCORE(00)_ is 002.4. 12.3 would be 12.3 and 012.3
4306             respectively.
4307             _REQD_ message threshold
4308             _VERSION_ version (eg. 3.0.0 or 3.1.0-r26142-foo1)
4309             _SUBVERSION_ sub-version/code revision date (eg. 2004-01-10)
4310             _RULESVERSION_ comma-separated list of rules versions, retrieved from
4311             an '# UPDATE version' comment in rules files; if there is
4312             more than one set of rules (update channels) the order
4313             is unspecified (currently sorted by names of files);
4314             _HOSTNAME_ hostname of the machine the mail was processed on
4315             _REMOTEHOSTNAME_ hostname of the machine the mail was sent from, only
4316             available with spamd
4317             _REMOTEHOSTADDR_ ip address of the machine the mail was sent from, only
4318             available with spamd
4319             _BAYES_ bayes score
4320             _TOKENSUMMARY_ number of new, neutral, spammy, and hammy tokens found
4321             _BAYESTC_ number of new tokens found
4322             _BAYESTCLEARNED_ number of seen tokens found
4323             _BAYESTCSPAMMY_ number of spammy tokens found
4324             _BAYESTCHAMMY_ number of hammy tokens found
4325             _HAMMYTOKENS(N)_ the N most significant hammy tokens (default, 5)
4326             _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5)
4327             _DATE_ rfc-2822 date of scan
4328             _STARS(*)_ one "*" (use any character) for each full score point
4329             (note: limited to 50 'stars')
4330             _SENDERDOMAIN_ a domain name of the envelope sender address, lowercased
4331             _AUTHORDOMAIN_ a domain name of the author address (the From header
4332             field), lowercased; note that RFC 5322 allows a mail
4333             message to have multiple authors - currently only the
4334             domain name of the first email address is returned
4335             _RELAYSTRUSTED_ relays used and deemed to be trusted (see the
4336             'X-Spam-Relays-Trusted' pseudo-header)
4337             _RELAYSUNTRUSTED_ relays used that can not be trusted (see the
4338             'X-Spam-Relays-Untrusted' pseudo-header)
4339             _RELAYSINTERNAL_ relays used and deemed to be internal (see the
4340             'X-Spam-Relays-Internal' pseudo-header)
4341             _RELAYSEXTERNAL_ relays used and deemed to be external (see the
4342             'X-Spam-Relays-External' pseudo-header)
4343             _LASTEXTERNALIP_ IP address of client in the external-to-internal
4344             SMTP handover
4345             _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal
4346             SMTP handover
4347             _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal
4348             SMTP handover
4349             _AUTOLEARN_ autolearn status ("ham", "no", "spam", "disabled",
4350             "failed", "unavailable")
4351             _AUTOLEARNSCORE_ portion of message score used by autolearn
4352             _TESTS(,)_ tests hit separated by "," (or other separator)
4353             _TESTSSCORES(,)_ as above, except with scores appended (eg. AWL=-3.0,...)
4354             _SUBTESTS(,)_ subtests (start with "__") hit separated by ","
4355             (or other separator)
4356             _SUBTESTSCOLLAPSED(,)_ subtests (start with "__") hit separated by ","
4357             (or other separator) with duplicated rules collapsed
4358             _DCCB_ DCC's "Brand"
4359             _DCCR_ DCC's results
4360             _PYZOR_ Pyzor results
4361             _RBL_ full results for positive RBL queries in DNS URI format
4362             _LANGUAGES_ possible languages of mail
4363             _PREVIEW_ content preview
4364             _REPORT_ terse report of tests hit (for header reports)
4365             _SUBJPREFIX_ subject prefix based on rules, to be prepended to Subject
4366             header by SpamAssassin caller
4367             _SUMMARY_ summary of tests hit for standard report (for body reports)
4368             _CONTACTADDRESS_ contents of the 'report_contact' setting
4369             _HEADER(NAME)_ includes the value of a message header. value is the same
4370             as is found for header rules (see elsewhere in this doc)
4371             _TIMING_ timing breakdown report
4372             _ADDEDHEADERHAM_ resulting header fields as requested by add_header for spam
4373             _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham
4374             _ADDEDHEADER_ same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam
4375              
4376             If a tag reference uses the name of a tag which is not in this list or defined
4377             by a loaded plugin, the reference will be left intact and not replaced by any
4378             value.
4379             All template tag names should be restricted to the character set [A-Za-z0-9(,)].
4380              
4381             Additional, plugin specific, template tags can be found in the documentation for
4382             the following plugins:
4383              
4384             L<Mail::SpamAssassin::Plugin::ASN>
4385             L<Mail::SpamAssassin::Plugin::AWL>
4386             L<Mail::SpamAssassin::Plugin::TxRep>
4387              
4388             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4389             which specifies a format. See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT>
4390             section, below, for details.
4391              
4392             =head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT
4393              
4394             The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4395             which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_>
4396             The following formats are available:
4397              
4398             =over 4
4399              
4400             =item short
4401              
4402             Only the tokens themselves are listed.
4403             I<For example, preference file entry:>
4404              
4405             C<add_header all Spammy _SPAMMYTOKENS(2,short)_>
4406              
4407             I<Results in message header:>
4408              
4409             C<X-Spam-Spammy: remove.php, UD:jpg>
4410              
4411             Indicating that the top two spammy tokens found are C<remove.php>
4412             and C<UD:jpg>. (The token itself follows the last colon, the
4413             text before the colon indicates something about the token.
4414             C<UD> means the token looks like it might be part of a domain name.)
4415              
4416             =item compact
4417              
4418             The token probability, an abbreviated declassification distance (see
4419             example), and the token are listed.
4420             I<For example, preference file entry:>
4421              
4422             C<add_header all Spammy _SPAMMYTOKENS(2,compact)_>
4423              
4424             I<Results in message header:>
4425              
4426             C<0.989-6--remove.php, 0.988-+--UD:jpg>
4427              
4428             Indicating that the probabilities of the top two tokens are 0.989 and
4429             0.988, respectively. The first token has a declassification distance
4430             of 6, meaning that if the token had appeared in at least 6 more ham
4431             messages it would not be considered spammy. The C<+> for the second
4432             token indicates a declassification distance greater than 9.
4433              
4434             =item long
4435              
4436             Probability, declassification distance, number of times seen in a ham
4437             message, number of times seen in a spam message, age and the token are
4438             listed.
4439              
4440             I<For example, preference file entry:>
4441              
4442             C<add_header all Spammy _SPAMMYTOKENS(2,long)_>
4443              
4444             I<Results in message header:>
4445              
4446             C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg>
4447              
4448             In addition to the information provided by the compact option,
4449             the long option shows that the first token appeared in zero
4450             ham messages and four spam messages, and that it was last
4451             seen four days ago. The second token appeared in two ham messages,
4452             25 spam messages and was last seen one day ago.
4453             (Unlike the C<compact> option, the long option shows declassification
4454             distances that are greater than 9.)
4455              
4456             =back
4457              
4458             =cut
4459              
4460             return \@cmds;
4461             }
4462 92         901  
4463             ###########################################################################
4464              
4465             # settings that were once part of core, but are now in (possibly-optional)
4466             # bundled plugins. These will be warned about, but do not generate a fatal
4467             # error when "spamassassin --lint" is run like a normal syntax error would.
4468              
4469             our @MIGRATED_SETTINGS = qw{
4470             ok_languages
4471             };
4472              
4473             ###########################################################################
4474              
4475             my $class = shift;
4476             $class = ref($class) || $class;
4477             my $self = {
4478 92     92 0 348 main => shift,
4479 92   33     668 registered_commands => [],
4480 92         703 }; bless ($self, $class);
4481              
4482             $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self);
4483 92         388 $self->{parser}->register_commands($self->set_default_commands());
4484              
4485 92         1633 $self->{errors} = 0;
4486 92         2588 $self->{plugins_loaded} = { };
4487              
4488 92         956 $self->{tests} = { };
4489 92         352 $self->{test_types} = { };
4490             $self->{scoreset} = [ {}, {}, {}, {} ];
4491 92         761 $self->{scoreset_current} = 0;
4492 92         286 $self->set_score_set (0);
4493 92         396 $self->{tflags} = { };
4494 92         305 $self->{source_file} = { };
4495 92         511  
4496 92         253 # keep descriptions in a slow but space-efficient single-string
4497 92         436 # data structure
4498             # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an
4499             # absolute pig, doubling config parse time, while benchmarks indicate
4500             # no difference in resident memory size!
4501             $self->{descriptions} = { };
4502             #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
4503             # or warn "tie failed";
4504 92         341 $self->{subjprefix} = { };
4505              
4506             # after parsing, tests are refiled into these hashes for each test type.
4507 92         242 # this allows e.g. a full-text test to be rewritten as a body test in
4508             # the user's user_prefs file.
4509             $self->{body_tests} = { };
4510             $self->{uri_tests} = { };
4511             $self->{uri_evals} = { }; # not used/implemented yet
4512 92         315 $self->{head_tests} = { };
4513 92         360 $self->{head_evals} = { };
4514 92         279 $self->{body_evals} = { };
4515 92         224 $self->{full_tests} = { };
4516 92         239 $self->{full_evals} = { };
4517 92         199 $self->{rawbody_tests} = { };
4518 92         261 $self->{rawbody_evals} = { };
4519 92         260 $self->{meta_tests} = { };
4520 92         214 $self->{eval_plugins} = { };
4521 92         229 $self->{duplicate_rules} = { };
4522 92         219  
4523 92         244 # testing stuff
4524 92         390 $self->{regression_tests} = { };
4525              
4526             $self->{rewrite_header} = { };
4527 92         362 $self->{want_rebuild_for_type} = { };
4528             $self->{user_defined_rules} = { };
4529 92         259 $self->{headers_spam} = [ ];
4530 92         324 $self->{headers_ham} = [ ];
4531 92         263  
4532 92         262 $self->{bayes_ignore_headers} = [ ];
4533 92         235 $self->{bayes_ignore_from} = { };
4534             $self->{bayes_ignore_to} = { };
4535 92         254  
4536 92         173 $self->{whitelist_auth} = { };
4537 92         189 $self->{def_whitelist_auth} = { };
4538             $self->{whitelist_from} = { };
4539 92         269 $self->{whitelist_allows_relays} = { };
4540 92         193 $self->{blacklist_from} = { };
4541 92         162 $self->{whitelist_from_rcvd} = { };
4542 92         155 $self->{def_whitelist_from_rcvd} = { };
4543 92         195  
4544 92         188 $self->{blacklist_to} = { };
4545 92         184 $self->{whitelist_to} = { };
4546             $self->{more_spam_to} = { };
4547 92         219 $self->{all_spam_to} = { };
4548 92         191  
4549 92         209 $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
4550 92         195 $self->{internal_networks} = $self->new_netset('internal_networks',1);
4551             $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP
4552 92         660 $self->{trusted_networks_configured} = 0;
4553 92         293 $self->{internal_networks_configured} = 0;
4554 92         318  
4555 92         360 # Make sure we add in X-Spam-Checker-Version
4556 92         374 { my $r = [ "Checker-Version",
4557             "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ];
4558             push(@{$self->{headers_spam}}, $r);
4559 92         195 push(@{$self->{headers_ham}}, $r);
  92         259  
4560             }
4561 92         536  
  92         285  
4562 92         215 # RFC 6891: A good compromise may be the use of an EDNS maximum payload size
  92         253  
4563             # of 4096 octets as a starting point.
4564             $self->{dns_options}->{edns} = 4096;
4565              
4566             # these should potentially be settable by end-users
4567 92         389 # perhaps via plugin?
4568             $self->{num_check_received} = 9;
4569             $self->{bayes_expiry_pct} = 0.75;
4570             $self->{bayes_expiry_period} = 43200;
4571 92         267 $self->{bayes_expiry_max_exponent} = 9;
4572 92         287  
4573 92         281 $self->{encapsulated_content_description} = 'original message before SpamAssassin';
4574 92         229  
4575             $self;
4576 92         339 }
4577              
4578 92         549 my $self = shift;
4579             if (@_) {
4580             $self->{mtime} = shift;
4581             }
4582 0     0 0 0 return $self->{mtime};
4583 0 0       0 }
4584 0         0  
4585             ###########################################################################
4586 0         0  
4587             my ($self) = @_;
4588             $self->{parser}->parse ($_[1], 1);
4589             }
4590              
4591             my ($self) = @_;
4592 0     0 0 0 $self->{parser}->parse ($_[1], 0);
4593 0         0 }
4594              
4595             ###########################################################################
4596              
4597 91     91 0 261 my ($self, $set) = @_;
4598 91         925 $self->{scores} = $self->{scoreset}->[$set];
4599             $self->{scoreset_current} = $set;
4600             dbg("config: score set $set chosen.");
4601             }
4602              
4603             my($self) = @_;
4604 185     185 0 659 return $self->{scoreset_current};
4605 185         744 }
4606 185         446  
4607 185         1128 my ($self) = @_;
4608             return @rule_types;
4609             }
4610              
4611 433     433 0 1085 my ($self, $test_type, $priority) = @_;
4612 433         1156  
4613             # special case rbl_evals since they do not have a priority
4614             if ($test_type eq 'rbl_evals') {
4615             return keys(%{$self->{$test_type}});
4616 0     0 0 0 }
4617 0         0  
4618             if (defined($priority)) {
4619             return keys(%{$self->{$test_type}->{$priority}});
4620             }
4621 0     0 0 0 else {
4622             my @rules;
4623             foreach my $pri (keys(%{$self->{priorities}})) {
4624 0 0       0 push(@rules, keys(%{$self->{$test_type}->{$pri}}));
4625 0         0 }
  0         0  
4626             return @rules;
4627             }
4628 0 0       0 }
4629 0         0  
  0         0  
4630             my ($self, $test_type, $rulename, $priority) = @_;
4631              
4632 0         0 # special case rbl_evals since they do not have a priority
4633 0         0 if ($test_type eq 'rbl_evals') {
  0         0  
4634 0         0 return @{$self->{$test_type}->{$rulename}};
  0         0  
4635             }
4636 0         0  
4637             if (defined($priority)) {
4638             return $self->{$test_type}->{$priority}->{$rulename};
4639             }
4640             else {
4641 0     0 0 0 foreach my $pri (keys(%{$self->{priorities}})) {
4642             if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4643             return $self->{$test_type}->{$pri}->{$rulename};
4644 0 0       0 }
4645 0         0 }
  0         0  
4646             return; # if we get here we didn't find the rule
4647             }
4648 0 0       0 }
4649 0         0  
4650             my ($self, $test_type, $rulename, $priority) = @_;
4651              
4652 0         0 # special case rbl_evals since they do not have a priority
  0         0  
4653 0 0       0 if ($test_type eq 'rbl_evals') {
4654 0         0 return delete($self->{$test_type}->{$rulename});
4655             }
4656              
4657 0         0 if (defined($priority)) {
4658             return delete($self->{$test_type}->{$priority}->{$rulename});
4659             }
4660             else {
4661             foreach my $pri (keys(%{$self->{priorities}})) {
4662 0     0 0 0 if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4663             return delete($self->{$test_type}->{$pri}->{$rulename});
4664             }
4665 0 0       0 }
4666 0         0 return; # if we get here we didn't find the rule
4667             }
4668             }
4669 0 0       0  
4670 0         0 # trim_rules ($regexp)
4671             #
4672             # Remove all rules that don't match the given regexp (or are sub-rules of
4673 0         0 # meta-tests that match the regexp).
  0         0  
4674 0 0       0  
4675 0         0 my ($self, $regexp) = @_;
4676              
4677             my ($rec, $err) = compile_regexp($regexp, 0);
4678 0         0 if (!$rec) {
4679             die "config: trim_rules: invalid regexp '$regexp': $err";
4680             }
4681              
4682             my @all_rules;
4683              
4684             foreach my $rule_type ($self->get_rule_types()) {
4685             push(@all_rules, $self->get_rule_keys($rule_type));
4686             }
4687              
4688 0     0 0 0 my @rules_to_keep = grep(/$rec/, @all_rules);
4689              
4690 0         0 if (@rules_to_keep == 0) {
4691 0 0       0 die "config: trim_rules: all rules excluded, nothing to test\n";
4692 0         0 }
4693              
4694             my @meta_tests = grep(/$rec/, $self->get_rule_keys('meta_tests'));
4695 0         0 foreach my $meta (@meta_tests) {
4696             push(@rules_to_keep, $self->add_meta_depends($meta))
4697 0         0 }
4698 0         0  
4699             my %rules_to_keep_hash;
4700              
4701 0         0 foreach my $rule (@rules_to_keep) {
4702             $rules_to_keep_hash{$rule} = 1;
4703 0 0       0 }
4704 0         0  
4705             foreach my $rule_type ($self->get_rule_types()) {
4706             foreach my $rulekey ($self->get_rule_keys($rule_type)) {
4707 0         0 $self->delete_rule($rule_type, $rulekey)
4708 0         0 if (!$rules_to_keep_hash{$rulekey});
4709 0         0 }
4710             }
4711             } # trim_rules()
4712 0         0  
4713             my ($self, $meta) = @_;
4714 0         0  
4715 0         0 my @rules;
4716             my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g;
4717              
4718 0         0 @tokens = grep(!/^\d+$/, @tokens);
4719 0         0 # @tokens now only consists of sub-rules
4720              
4721 0 0       0 foreach my $token (@tokens) {
4722             die "config: meta test $meta depends on itself\n" if $token eq $meta;
4723             push(@rules, $token);
4724              
4725             # If the sub-rule is a meta-test, recurse
4726             if ($self->get_rule_value('meta_tests', $token)) {
4727 0     0 0 0 push(@rules, $self->add_meta_depends($token));
4728             }
4729 0         0 } # foreach my $token (@tokens)
4730 0         0  
4731             return @rules;
4732 0         0 } # add_meta_depends()
4733              
4734             my ($self, $test_type, $rulename, $priority) = @_;
4735 0         0  
4736 0 0       0 # special case rbl_evals since they do not have a priority
4737 0         0 if ($test_type eq 'rbl_evals') {
4738             return 0 unless ($self->{$test_type}->{$rulename});
4739             return ($self->{scores}->{$rulename});
4740 0 0       0 }
4741 0         0  
4742             # first determine if the rule is defined
4743             if (defined($priority)) {
4744             # we have a specific priority
4745 0         0 return 0 unless ($self->{$test_type}->{$priority}->{$rulename});
4746             }
4747             else {
4748             # no specific priority so we must loop over all currently defined
4749 88     88 0 127 # priorities to see if the rule is defined
4750             my $found_p = 0;
4751             foreach my $pri (keys %{$self->{priorities}}) {
4752 88 50       130 if ($self->{$test_type}->{$pri}->{$rulename}) {
4753 0 0       0 $found_p = 1;
4754 0         0 last;
4755             }
4756             }
4757             return 0 unless ($found_p);
4758 88 50       111 }
4759              
4760 0 0       0 return ($self->{scores}->{$rulename});
4761             }
4762              
4763             ###########################################################################
4764              
4765 88         86 # treats a bitset argument as a bit vector of all possible port numbers (8 kB)
4766 88         84 # and sets bit values to $value (0 or 1) in the specified range of port numbers
  88         195  
4767 88 50       156 #
4768 88         89 my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_;
4769 88         92 $port_range_lo = 0 if $port_range_lo < 0;
4770             $port_range_hi = 65535 if $port_range_hi > 65535;
4771             if (!defined $$bitset_ref) { # provide a sensible default
4772 88 50       150 wipe_ports_range($bitset_ref, 1); # turn on all bits 0..65535
4773             vec($$bitset_ref,$_,1) = 0 for 0..1023; # avoid 0 and privileged ports
4774             } elsif ($$bitset_ref eq '') { # repopulate the bitset (late configuration)
4775 88         213 wipe_ports_range($bitset_ref, 0); # turn off all bits 0..65535
4776             }
4777             $value = !$value ? 0 : 1;
4778             for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) {
4779             vec($$bitset_ref,$j,1) = $value;
4780             }
4781             }
4782              
4783             my($bitset_ref, $value) = @_;
4784 1     1 0 3 $value = !$value ? "\000" : "\377";
4785 1 50       3 $$bitset_ref = $value x 8192; # quickly turn all bits 0..65535 on or off
4786 1 50       4 }
4787 1 50       3  
    0          
4788 1         4 ###########################################################################
4789 1         494  
4790             my $self = shift; $self->{parser}->add_to_addrlist(@_);
4791 0         0 }
4792             my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_);
4793 1 50       3 }
4794 1         3 my $self = shift; $self->{parser}->remove_from_addrlist(@_);
4795 1         5 }
4796             my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_);
4797             }
4798              
4799             ###########################################################################
4800 1     1 0 3  
4801 1 50       9 my $self = shift;
4802 1         11 if (@_ == 1) {
4803             # we specified a symbolic name, return the strings
4804             my $name = shift;
4805             my $tests = $self->{regression_tests}->{$name};
4806             return @$tests;
4807             }
4808 34     34 0 2109 else {
  34         105  
4809             # no name asked for, just return the symbolic names we have tests for
4810             return keys %{$self->{regression_tests}};
4811 0     0 0 0 }
  0         0  
4812             }
4813              
4814 0     0 0 0 ###########################################################################
  0         0  
4815              
4816             my ($self, $user) = @_;
4817 0     0 0 0 $self->{parser}->finish_parsing($user);
  0         0  
4818             }
4819              
4820             ###########################################################################
4821              
4822             my ($self) = @_;
4823 0     0 0 0 if (!defined $self->{found_any_rules}) {
4824 0 0       0 $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0);
4825             }
4826 0         0 return $self->{found_any_rules};
4827 0         0 }
4828 0         0  
4829             ###########################################################################
4830              
4831             my ($self, $rule) = @_;
4832 0         0 # as silly as it looks, localized $1 here prevents an outer $1 from getting
  0         0  
4833             # tainted by the expression or assignment in the next line, bug 6148
4834             local($1);
4835             my $rule_descr = $self->{descriptions}->{$rule};
4836             return $rule_descr;
4837             }
4838              
4839 91     91 0 287 ###########################################################################
4840 91         483  
4841             my($self,$rulename) = @_;
4842             my $type = $self->{test_types}->{$rulename};
4843              
4844             if ($rulename =~ /AUTOLEARNTEST/i) {
4845             dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4846 94     94 0 361 }
4847 94 100       335
4848 91         153 return 0 if (!defined ($type));
  91         1191  
4849              
4850 94         304 if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
4851             return 1;
4852              
4853             } elsif ($type == $TYPE_META_TESTS) {
4854             my $tflags = $self->{tflags}->{$rulename};
4855             $tflags ||= '';
4856 178     178 0 344 if ($tflags =~ m/\bnet\b/i) {
4857             return 0;
4858             } else {
4859 178         427 return 1;
4860 178         437 }
4861 178         574 }
4862              
4863             return 0;
4864             }
4865              
4866             my($self,$rulename) = @_;
4867 13     13 0 42 my $type = $self->{test_types}->{$rulename};
4868 13         38  
4869             if ($rulename =~ /AUTOLEARNTEST/i) {
4870 13 50       54 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
4871 0         0 }
4872              
4873             return 0 if (!defined ($type));
4874 13 50       44  
4875             if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
4876 13 100 100     89 || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
    50          
4877 9         43 {
4878             # some rawbody go off of headers...
4879             return 1;
4880 0         0  
4881 0   0     0 } elsif ($type == $TYPE_META_TESTS) {
4882 0 0       0 my $tflags = $self->{tflags}->{$rulename}; $tflags ||= '';
4883 0         0 if ($tflags =~ m/\bnet\b/i) {
4884             return 0;
4885 0         0 } else {
4886             return 1;
4887             }
4888             }
4889 4         35  
4890             return 0;
4891             }
4892              
4893 4     4 0 12 ###########################################################################
4894 4         10  
4895             my ($self, $package, $path, $silent) = @_;
4896 4 50       28 $self->{main}->{plugins}->load_plugin($package, $path, $silent);
4897 0         0 }
4898              
4899             my ($self, $plugin, $package, $path) = @_;
4900 4 50       14 $self->{plugins_loaded}->{$package} = 1;
4901             }
4902 4 50 33     50  
    0 33        
      33        
4903             my ($self, $pluginobj, $nameofsub) = @_;
4904             $self->{eval_plugins}->{$nameofsub} = $pluginobj;
4905             }
4906 4         15  
4907             ###########################################################################
4908              
4909 0   0     0 my ($self, $source, $dest) = @_;
  0         0  
4910 0 0       0  
4911 0         0 unless (defined $source) {
4912             $source = $self;
4913 0         0 }
4914             unless (defined $dest) {
4915             $dest = $self;
4916             }
4917 0         0  
4918             my %done;
4919              
4920             # keys that should not be copied in ->clone().
4921             # bug 4179: include want_rebuild_for_type, so that if a user rule
4922             # is defined, its method will be recompiled for future scans in
4923 5232     5232 0 11288 # order to *remove* the generated method calls
4924 5232         16063 my @NON_COPIED_KEYS = qw(
4925             main eval_plugins plugins_loaded registered_commands sed_path_cache parser
4926             scoreset scores want_rebuild_for_type
4927             );
4928 2400     2400 0 6164  
4929 2400         21448 # special cases. first, skip anything that cannot be changed
4930             # by users, and the stuff we take care of here
4931             foreach my $var (@NON_COPIED_KEYS) {
4932             $done{$var} = undef;
4933 11652     11652 0 15115 }
4934 11652         35632  
4935             # keys that should can be copied using a ->clone() method, in ->clone()
4936             my @CLONABLE_KEYS = qw(
4937             internal_networks trusted_networks msa_networks
4938             );
4939              
4940 3     3 0 5 foreach my $key (@CLONABLE_KEYS) {
4941             $dest->{$key} = $source->{$key}->clone();
4942 3 100       6 $done{$key} = undef;
4943 1         3 }
4944              
4945 3 100       6 # two-level hashes
4946 2         3 foreach my $key (qw(uri_host_lists askdns)) {
4947             my $v = $source->{$key};
4948             my $dest_key_ref = $dest->{$key} = {}; # must start from scratch!
4949 3         4 while(my($k2,$v2) = each %{$v}) {
4950             %{$dest_key_ref->{$k2}} = %{$v2};
4951             }
4952             $done{$key} = undef;
4953             }
4954              
4955 3         16 # bug 4179: be smarter about cloning the rule-type structures;
4956             # some are like this: $self->{type}->{priority}->{name} = 'value';
4957             # which is an extra level that the below code won't deal with
4958             foreach my $t (@rule_types) {
4959             foreach my $k (keys %{$source->{$t}}) {
4960             my $v = $source->{$t}->{$k};
4961             my $i = ref $v;
4962 3         7 if ($i eq 'HASH') {
4963 27         42 %{$dest->{$t}->{$k}} = %{$v};
4964             }
4965             elsif ($i eq 'ARRAY') {
4966             @{$dest->{$t}->{$k}} = @{$v};
4967 3         9 }
4968             else {
4969             $dest->{$t}->{$k} = $v;
4970             }
4971 3         8 }
4972 9         24 $done{$t} = undef;
4973 9         18 }
4974              
4975             # and now, copy over all the rest -- the less complex cases.
4976             while(my($k,$v) = each %{$source}) {
4977 3         6 next if exists $done{$k}; # we handled it above
4978 6         9 $done{$k} = undef;
4979 6         11 my $i = ref($v);
4980 6         7  
  6         16  
4981 0         0 # Not a reference, or a scalar? Just copy the value over.
  0         0  
  0         0  
4982             if ($i eq '') {
4983 6         12 $dest->{$k} = $v;
4984             }
4985             elsif ($i eq 'SCALAR') {
4986             $dest->{$k} = $$v;
4987             }
4988             elsif ($i eq 'ARRAY') {
4989 3         10 @{$dest->{$k}} = @{$v};
4990 36         31 }
  36         81  
4991 12         17 elsif ($i eq 'HASH') {
4992 12         14 %{$dest->{$k}} = %{$v};
4993 12 50       28 }
    0          
4994 12         11 elsif ($i eq 'Regexp') {
  12         78  
  12         38  
4995             $dest->{$k} = $v;
4996             }
4997 0         0 else {
  0         0  
  0         0  
4998             # throw a warning for debugging -- should never happen in normal usage
4999             warn "config: dup unknown type $k, $i\n";
5000 0         0 }
5001             }
5002              
5003 36         59 foreach my $cmd (@{$self->{registered_commands}}) {
5004             my $k = $cmd->{setting};
5005             next if exists $done{$k}; # we handled it above
5006             $done{$k} = undef;
5007 3         5 $dest->{$k} = $source->{$k};
  760         1472  
5008 757 100       966 }
5009 696         759  
5010 696         696 # scoresets
5011             delete $dest->{scoreset};
5012             for my $i (0 .. 3) {
5013 696 100       980 %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]};
    50          
    100          
    50          
    0          
5014 540         819 }
5015              
5016             # deal with $conf->{scores}, it needs to be a reference into the scoreset
5017 0         0 # hash array dealy. Do it at the end since scoreset_current isn't set
5018             # otherwise.
5019             $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}];
5020 27         28  
  27         64  
  27         35  
5021             # ensure we don't copy the path cache from the master
5022             delete $dest->{sed_path_cache};
5023 129         117  
  129         2887  
  129         1485  
5024             return 1;
5025             }
5026 0         0  
5027             ###########################################################################
5028              
5029             my ($self) = @_;
5030 0         0  
5031             if (!$self->{main}->{keep_config_parsing_metadata} &&
5032             !$self->{allow_user_rules})
5033             {
5034 3         6 delete $self->{if_stack};
  3         6  
5035 723         865 #delete $self->{source_file};
5036 723 100       1111 #delete $self->{meta_dependencies};
5037 78         128 }
5038 78         129 }
5039              
5040             my ($self, $netset_name, $add_loopback) = @_;
5041             my $set = Mail::SpamAssassin::NetSet->new($netset_name);
5042 3         41 if ($add_loopback) {
5043 3         7 $set->add_cidr('127.0.0.0/8');
5044 12         14 $set->add_cidr('::1');
  12         265  
  12         78  
5045             }
5046             return $set;
5047             }
5048              
5049             ###########################################################################
5050 3         19  
5051             my ($self) = @_;
5052             #untie %{$self->{descriptions}};
5053 3         5 %{$self} = ();
5054             }
5055 3         66  
5056             ###########################################################################
5057              
5058              
5059             ###########################################################################
5060              
5061 2     2 0 6 # subroutines available to conditionalize rules, for example:
5062             # if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers))
5063 2 50 33     27  
5064              
5065             ###########################################################################
5066 2         7  
5067             1;
5068              
5069             =head1 LOCALI[SZ]ATION
5070              
5071             A line starting with the text C<lang xx> will only be interpreted
5072             if the user is in that locale, allowing test descriptions and
5073 372     372 0 1063 templates to be set for that language.
5074 372         2358  
5075 372 100       839 The locales string should specify either both the language and country, e.g.
5076 248         1106 C<lang pt_BR>, or just the language, e.g. C<lang de>.
5077 248         638  
5078             =head1 SEE ALSO
5079 372         1281  
5080             Mail::SpamAssassin(3)
5081             spamassassin(1)
5082             spamd(1)
5083              
5084             =cut