File Coverage

blib/lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 385 757 50.8
branch 109 340 32.0
condition 44 176 25.0
subroutine 56 83 67.4
pod 31 56 55.3
total 625 1412 44.2


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             package Mail::SpamAssassin;
62 31     31   2926268 use strict;
  31         394  
  31         1192  
63 31     31   216 use warnings;
  31         73  
  31         1260  
64             # use bytes;
65 31     31   237 use re 'taint';
  31         77  
  31         1938  
66              
67             require 5.006_001;
68              
69 30     30   8936 use Mail::SpamAssassin::Logger;
  30         68  
  30         1962  
70 30     30   9313 use Mail::SpamAssassin::Constants;
  30         67  
  30         1517  
71 30     30   8941 use Mail::SpamAssassin::Conf;
  30         113  
  30         1212  
72 30     30   10963 use Mail::SpamAssassin::Conf::SQL;
  30         80  
  30         987  
73 30     30   9148 use Mail::SpamAssassin::Conf::LDAP;
  30         87  
  30         1007  
74 30     30   9233 use Mail::SpamAssassin::PerMsgStatus;
  30         132  
  30         1324  
75 29     29   10485 use Mail::SpamAssassin::Message;
  29         573  
  29         3271  
76 29     29   13753 use Mail::SpamAssassin::PluginHandler;
  29         102  
  29         994  
77 29     29   9419 use Mail::SpamAssassin::DnsResolver;
  29         101  
  29         1103  
78 29     29   9723 use Mail::SpamAssassin::RegistryBoundaries;
  29         84  
  29         1064  
79 29     29   220 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  29         64  
  29         1766  
80 29     29   10137 use Mail::SpamAssassin::Util::ScopedTimer;
  29         91  
  29         963  
81              
82 29     29   228 use Errno qw(ENOENT EACCES);
  29         87  
  29         1898  
83 29     29   212 use File::Basename;
  29         96  
  29         3528  
84 29     29   214 use File::Path;
  29         67  
  29         2666  
85 29     29   201 use File::Spec 0.8;
  29         1001  
  29         804  
86 29     29   163 use Time::HiRes qw(time);
  29         70  
  29         212  
87 29     29   2962 use Cwd;
  29         105  
  29         2739  
88 29     29   218 use Config;
  29         69  
  29         287683  
89              
90             our $VERSION = "3.004003"; # update after release (same format as perl $])
91             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
92             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
93              
94              
95             # Used during the prerelease/release-candidate part of the official release
96             # process. If you hacked up your SA, you should add a version_tag to your .cf
97             # files; this variable should not be modified.
98             our @EXTRA_VERSION = qw();
99              
100             our @ISA = qw();
101              
102             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
103             our $SUB_VERSION = 'svnunknown';
104             if ('$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $' =~ ':') {
105             # Subversion keyword "$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $" has been successfully expanded.
106             # Doesn't happen with automated launchpad builds:
107             # https://bugs.launchpad.net/launchpad/+bug/780916
108             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $ updated by SVN'))[1];
109             }
110              
111              
112             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
113             if ('$LastChangedRevision: 1870940 $' =~ ':') {
114             # Subversion keyword "$LastChangedRevision: 1870940 $" has been successfully expanded.
115             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1870940 $ updated by SVN}[1]));
116             } else {
117             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
118             }
119             }
120              
121             sub Version {
122 36     36 0 487 $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
123 36         1218 return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
124             }
125              
126             our $HOME_URL = "https://spamassassin.apache.org/";
127              
128             # note that the CWD takes priority. This is required in case a user
129             # is testing a new version of SpamAssassin on a machine with an older
130             # version installed. Unless you can come up with a fix for this that
131             # allows "make test" to work, don't change this.
132              
133             our @default_rules_path = (
134             '__local_state_dir__/__version__',
135             '__def_rules_dir__',
136             '__prefix__/share/spamassassin',
137             '/usr/local/share/spamassassin',
138             '/usr/share/spamassassin',
139             );
140              
141             # first 3 are BSDish, latter 2 Linuxish
142             our @site_rules_path = (
143             '__local_rules_dir__',
144             '__prefix__/etc/mail/spamassassin',
145             '__prefix__/etc/spamassassin',
146             '/usr/local/etc/spamassassin',
147             '/usr/pkg/etc/spamassassin',
148             '/usr/etc/spamassassin',
149             '/etc/mail/spamassassin',
150             '/etc/spamassassin',
151             );
152              
153             our @default_prefs_path = (
154             '__local_rules_dir__/user_prefs.template',
155             '__prefix__/etc/mail/spamassassin/user_prefs.template',
156             '__prefix__/share/spamassassin/user_prefs.template',
157             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
158             '__def_rules_dir__/user_prefs.template',
159             '/etc/spamassassin/user_prefs.template',
160             '/etc/mail/spamassassin/user_prefs.template',
161             '/usr/local/share/spamassassin/user_prefs.template',
162             '/usr/share/spamassassin/user_prefs.template',
163             );
164              
165             our @default_userprefs_path = (
166             '~/.spamassassin/user_prefs',
167             );
168              
169             our @default_userstate_dir = (
170             '~/.spamassassin',
171             );
172              
173             ###########################################################################
174              
175             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
176              
177             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
178             reference to the constructor which may contain the following attribute-
179             value pairs.
180              
181             =over 4
182              
183             =item debug
184              
185             This is the debug options used to determine logging level. It exists to
186             allow sections of debug messages (called "facilities") to be enabled or
187             disabled. If this is a string, it is treated as a comma-delimited list
188             of the debug facilities. If it's a hash reference, then the keys are
189             treated as the list of debug facilities and if it's a array reference,
190             then the elements are treated as the list of debug facilities.
191              
192             There are also two special cases: (1) if the special case of "info" is
193             passed as a debug facility, then all informational messages are enabled;
194             (2) if the special case of "all" is passed as a debug facility, then all
195             debugging facilities are enabled.
196              
197             =item rules_filename
198              
199             The filename/directory to load spam-identifying rules from. (optional)
200              
201             =item site_rules_filename
202              
203             The filename/directory to load site-specific spam-identifying rules from.
204             (optional)
205              
206             =item userprefs_filename
207              
208             The filename to load preferences from. (optional)
209              
210             =item userstate_dir
211              
212             The directory user state is stored in. (optional)
213              
214             =item config_tree_recurse
215              
216             Set to C<1> to recurse through directories when reading configuration
217             files, instead of just reading a single level. (optional, default 0)
218              
219             =item config_text
220              
221             The text of all rules and preferences. If you prefer not to load the rules
222             from files, read them in yourself and set this instead. As a result, this will
223             override the settings for C<rules_filename>, C<site_rules_filename>,
224             and C<userprefs_filename>.
225              
226             =item pre_config_text
227              
228             Similar to C<config_text>, this text is placed before config_text to allow an
229             override of config files.
230              
231             =item post_config_text
232              
233             Similar to C<config_text>, this text is placed after config_text to allow an
234             override of config files.
235              
236             =item force_ipv4
237              
238             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
239             to use IPv6. Use if the existing tests for IPv6 availability produce
240             incorrect results or crashes.
241              
242             =item force_ipv6
243              
244             For symmetry with force_ipv4: if set to 1, DNS or other network tests
245             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
246             this setting and use whatever protocol family they are comfortable with.
247              
248             =item require_rules
249              
250             If set to 1, init() will die if no valid rules could be loaded. This is the
251             default behaviour when called by C<spamassassin> or C<spamd>.
252              
253             =item languages_filename
254              
255             If you want to be able to use the language-guessing rule
256             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
257             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
258             need to set this. It should be the path to the B<languages> file normally
259             found in the SpamAssassin B<rules> directory.
260              
261             =item local_tests_only
262              
263             If set to 1, no tests that require internet access will be performed. (default:
264             0)
265              
266             =item need_tags
267              
268             The option provides a way to avoid more expensive processing when it is known
269             in advance that some information will not be needed by a caller.
270              
271             A value of the option can either be a string (a comma-delimited list of tag
272             names), or a reference to a list of individual tag names. A caller may provide
273             the list in advance, specifying his intention to later collect the information
274             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
275             insensitive), it shows that a caller will not be interested in such tag,
276             although there is no guarantee it would save any resources, nor that a tag
277             value will be empty. Currently no built-in tags start with 'NO'. A later
278             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
279             to TIMING,NOASN.
280              
281             For backward compatibility, all tags available as of version 3.2.4 will
282             be available by default (unless disabled by NOtag), even if not requested
283             through need_tags option. Future versions may provide new tags conditionally
284             available.
285              
286             Currently the only tag that needs to be explicitly requested is 'TIMING'.
287             Not requesting it can save a millisecond or two - it mostly serves to
288             illustrate the usage of need_tags.
289              
290             Example:
291             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
292             or:
293             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
294              
295             =item ignore_site_cf_files
296              
297             If set to 1, any rule files found in the C<site_rules_filename> directory will
298             be ignored. *.pre files (used for loading plugins) found in the
299             C<site_rules_filename> directory will still be used. (default: 0)
300              
301             =item dont_copy_prefs
302              
303             If set to 1, the user preferences file will not be created if it doesn't
304             already exist. (default: 0)
305              
306             =item save_pattern_hits
307              
308             If set to 1, the patterns hit can be retrieved from the
309             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
310              
311             =item home_dir_for_helpers
312              
313             If set, the B<HOME> environment variable will be set to this value
314             when using test applications that require their configuration data,
315             such as Razor, Pyzor and DCC.
316              
317             =item username
318              
319             If set, the C<username> attribute will use this as the current user's name.
320             Otherwise, the default is taken from the runtime environment (ie. this process'
321             effective UID under UNIX).
322              
323             =item skip_prng_reseeding
324              
325             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
326             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
327             Perl function should be called during initialization of each child process,
328             soon after forking.
329              
330             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
331             library.
332              
333             This setting requires the caller to decide when to call srand().
334             This choice may be desired to preserve the entropy of a PRNG. The default
335             value of skip_prng_reseeding is false to maintain backward compatibility.
336              
337             This option should only be set by a caller if it calls srand() upon spawning
338             child processes. Unless you are certain you need it, leave this setting as
339             false.
340              
341             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
342             which allows spamd to call srand() right after forking a child process.
343              
344             =back
345              
346             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
347             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
348             configuration files in the usual installed locations using the below variable
349             definitions which can be passed in.
350              
351             =over 4
352              
353             =item PREFIX
354              
355             Used as the root for certain directory paths such as:
356              
357             '__prefix__/etc/mail/spamassassin'
358             '__prefix__/etc/spamassassin'
359              
360             Defaults to "/usr/local".
361              
362             =item DEF_RULES_DIR
363              
364             Location where the default rules are installed. Defaults to
365             "/usr/local/share/spamassassin".
366              
367             =item LOCAL_RULES_DIR
368              
369             Location where the local site rules are installed. Defaults to
370             "/etc/mail/spamassassin".
371              
372             =item LOCAL_STATE_DIR
373              
374             Location of the local state directory, mainly used for installing updates via
375             C<sa-update> and compiling rulesets to native code. Defaults to
376             "/var/lib/spamassassin".
377              
378             =back
379              
380              
381             =cut
382              
383             # undocumented ctor settings:
384             #
385             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
386              
387             sub new {
388 36     36 1 13170555 my $class = shift;
389 36   33     659 $class = ref($class) || $class;
390              
391 36         165 my $self = shift;
392 36 50       286 if (!defined $self) { $self = { }; }
  0         0  
393 36         182 bless ($self, $class);
394              
395             # basic backward compatibility; debug used to be a boolean.
396             # translate that into 'all', which is what it meant before 3.1.0.
397 36 50 33     453 if ($self->{debug} && $self->{debug} eq '1') {
398 0         0 $self->{debug} = 'all';
399             }
400              
401             # enable or disable debugging
402 36         724 Mail::SpamAssassin::Logger::add_facilities($self->{debug});
403              
404             # first debugging information possibly printed should be the version
405 36         402 dbg("generic: SpamAssassin version " . Version());
406              
407             # if the libs are installed in an alternate location, and the caller
408             # didn't set PREFIX, we should have an estimated guess ready, values
409             # substituted at 'make' time
410 36   50     617 $self->{PREFIX} ||= '/usr/local';
411 36   50     505 $self->{DEF_RULES_DIR} ||= '/usr/local/share/spamassassin';
412 36   50     396 $self->{LOCAL_RULES_DIR} ||= '/etc/mail/spamassassin';
413 36   50     405 $self->{LOCAL_STATE_DIR} ||= '/var/lib/spamassassin';
414 36         163 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
  144         750  
415             qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
416              
417 36         154 $self->{needed_tags} = {};
418 36         106 { my $ntags = $self->{need_tags};
  36         112  
419 36 50       385 if (defined $ntags) {
420 0 0       0 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
421 0 0       0 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
422             }
423             }
424             }
425 36 50 33     366 if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
426 0         0 $self->timer_enable();
427             }
428              
429 36   33     1130 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
430 36         764 $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
431              
432 36   50     309 $self->{save_pattern_hits} ||= 0;
433              
434             # Make sure that we clean $PATH if we're tainted
435 36         516 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
436              
437 36 50       176 if (!defined $self->{username}) {
438 36         398 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
439             }
440              
441 36         303 $self->create_locker();
442              
443 36         147 $self;
444             }
445              
446             sub create_locker {
447 36     36 0 106 my ($self) = @_;
448              
449 36         68 my $class;
450 36         96 my $m = $self->{conf}->{lock_method};
451              
452             # let people choose what they want -- even if they may not work on their
453             # OS. (they could be using cygwin!)
454 36 50       231 if ($m eq 'win32') { $class = 'Win32'; }
  0 50       0  
    50          
455 0         0 elsif ($m eq 'flock') { $class = 'Flock'; }
456 0         0 elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
457             else {
458             # OS-specific defaults
459 36 50       201 if (am_running_on_windows()) {
460 0         0 $class = 'Win32';
461             } else {
462 36         191 $class = 'UnixNFSSafe';
463             }
464             }
465              
466             # this could probably be made a little faster; for now I'm going
467             # for slow but safe, by keeping in quotes
468             eval '
469             use Mail::SpamAssassin::Locker::'.$class.';
470             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
471             1;
472 36 50   15   4131 ' or do {
  15     5   8634  
  15     4   48  
  15         636  
  5         101  
  5         40  
  5         370  
  4         122  
  4         43  
  4         371  
473 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
474 0         0 die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
475             };
476              
477 36 50       318 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
478             }
479              
480             ###########################################################################
481              
482             =item parse($message, $parse_now [, $suppl_attrib])
483              
484             Parse will return a Mail::SpamAssassin::Message object with just the
485             headers parsed. When calling this function, there are two optional
486             parameters that can be passed in: $message is either undef (which
487             will use STDIN), a scalar - a string containing an entire message,
488             a reference to such string, an array reference of the message with
489             one line per array element, or either a file glob or an IO::File object
490             which holds the entire contents of the message; and $parse_now, which
491             specifies whether or not to create a MIME tree at parse time or later
492             as necessary.
493              
494             The I<$parse_now> option, by default, is set to false (0). This
495             allows SpamAssassin to not have to generate the tree of internal
496             data nodes if the information is not going to be used. This is
497             handy, for instance, when running C<spamassassin -d>, which only
498             needs the pristine header and body which is always parsed and stored
499             by this function.
500              
501             The optional last argument I<$suppl_attrib> provides a way for a caller
502             to pass additional information about a message to SpamAssassin. It is
503             either undef, or a ref to a hash where each key/value pair provides some
504             supplementary attribute of the message, typically information that cannot
505             be deduced from the message itself, or is hard to do so reliably, or would
506             represent unnecessary work for SpamAssassin to obtain it. The argument will
507             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
508             made available to the rest of the code as well as to plugins. The exact list
509             of attributes will evolve through time, any unknown attribute should be
510             ignored. Possible examples are: SMTP envelope information, a flag indicating
511             that a message as supplied by a caller was truncated due to size limit, an
512             already verified list of DKIM signature objects, or perhaps a list of rule
513             hits predetermined by a caller, which makes another possible way for a
514             caller to provide meta information (instead of having to insert made-up
515             header fields in order to pass information), or maybe just plain rule hits.
516              
517             For more information, please see the C<Mail::SpamAssassin::Message>
518             and C<Mail::SpamAssassin::Message::Node> POD.
519              
520             =cut
521              
522             sub parse {
523 43     43 1 9769 my($self, $message, $parsenow, $suppl_attrib) = @_;
524              
525 43         206 my $start_time = time;
526 43         191 $self->init(1);
527 43         139 my $timer = $self->time_method("parse");
528              
529 43         84 my $master_deadline;
530             # passed in at a function call
531 43 50 66     158 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
532 1         3 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
533             }
534             # found in a config file - overrides passed-in number if lower
535 43 50       190 if ($self->{conf}->{time_limit}) { # defined and nonzero
536 43         205 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
537 43 50 33     193 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
538 43         80 $master_deadline = $time_limit_deadline;
539             }
540             }
541 43 50       139 if (defined $master_deadline) {
542 43         176 dbg("config: time limit %.1f s", $master_deadline - $start_time);
543             }
544              
545             my $msg = Mail::SpamAssassin::Message->new({
546             message=>$message, parsenow=>$parsenow,
547             normalize=>$self->{conf}->{normalize_charset},
548             body_part_scan_size=>$self->{conf}->{body_part_scan_size},
549             rawbody_part_scan_size=>$self->{conf}->{rawbody_part_scan_size},
550 43         1103 master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
551              
552             # bug 5069: The goal here is to get rendering plugins to do things
553             # like OCR, convert doc and pdf to text, etc, though it could be anything
554             # that wants to process the message after it's been parsed.
555 43         293 $self->call_plugins("post_message_parse", { message => $msg });
556              
557 43         221 return $msg;
558             }
559              
560              
561             ###########################################################################
562              
563             =item $status = $f->check ($mail)
564              
565             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
566             to determine if it is spam or not.
567              
568             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
569             used to test or manipulate the mail message.
570              
571             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
572             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
573             object is a "factory". However, if you do this, be sure to call the
574             C<finish()> method on the status objects when you're done with them.
575              
576             =cut
577              
578             sub check {
579 12     12 1 686 my ($self, $mail_obj) = @_;
580              
581 12         38 $self->init(1);
582 12         99 my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
583 12         58 $pms->check();
584 12 50       44 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
585 12         84 $pms;
586             }
587              
588             =item $status = $f->check_message_text ($mailtext)
589              
590             Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
591             is spam or not.
592              
593             Otherwise identical to C<check()> above.
594              
595             =cut
596              
597             sub check_message_text {
598 0     0 1 0 my ($self, $mailtext) = @_;
599 0         0 my $msg = $self->parse($mailtext, 1);
600 0         0 my $result = $self->check($msg);
601              
602             # Kill off the metadata ...
603             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
604             # the message. So killing it here will cause things like
605             # rewrite_message() to fail. <grrr>
606             #
607 0         0 $msg->finish_metadata();
608              
609 0         0 return $result;
610             }
611              
612             ###########################################################################
613              
614             =item $status = $f->learn ($mail, $id, $isspam, $forget)
615              
616             Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
617              
618             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
619             be learnt as non-spam.
620              
621             If C<$forget> is set, the attributes of the mail will be removed from
622             both the non-spam and spam learning databases.
623              
624             C<$id> is an optional message-identification string, used internally
625             to tag the message. If it is C<undef>, the Message-Id of the message
626             will be used. It should be unique to that message.
627              
628             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
629             manipulate the learning process for each mail.
630              
631             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
632             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
633             object is a "factory". However, if you do this, be sure to call the
634             C<finish()> method on the learner objects when you're done with them.
635              
636             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
637             must be called before using this method.
638              
639             =cut
640              
641             sub learn {
642 2     2 1 7 my ($self, $mail_obj, $id, $isspam, $forget) = @_;
643 2         4 local ($_);
644              
645 2         592 require Mail::SpamAssassin::PerMsgLearner;
646 2         10 $self->init(1);
647 2         15 my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
648              
649 2 50       15 if ($forget) {
    50          
650 0         0 dbg("learn: forgetting message");
651 0         0 $msg->forget($id);
652             } elsif ($isspam) {
653 0         0 dbg("learn: learning spam");
654 0         0 $msg->learn_spam($id);
655             } else {
656 2         7 dbg("learn: learning ham");
657 2         6 $msg->learn_ham($id);
658             }
659              
660 2         8 $msg;
661             }
662              
663             ###########################################################################
664              
665             =item $f->init_learner ( [ { opt => val, ... } ] )
666              
667             Initialise learning. You may pass the following attribute-value pairs to this
668             method.
669              
670             =over 4
671              
672             =item caller_will_untie
673              
674             Whether or not the code calling this method will take care of untie'ing
675             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
676              
677             =item force_expire
678              
679             Should an expiration run be forced to occur immediately? (optional, default 0).
680              
681             =item learn_to_journal
682              
683             Should learning data be written to the journal, instead of directly to the
684             databases? (optional, default 0).
685              
686             =item wait_for_lock
687              
688             Whether or not to wait a long time for locks to complete (optional, default 0).
689              
690             =item opportunistic_expire_check_only
691              
692             During the opportunistic journal sync and expire check, don't actually do the
693             expire but report back whether or not it should occur (optional, default 0).
694              
695             =item no_relearn
696              
697             If doing a learn operation, and the message has already been learned as
698             the opposite type, don't re-learn the message.
699              
700             =back
701              
702             =cut
703              
704             sub init_learner {
705 4     4 1 7 my $self = shift;
706 4         7 my $opts = shift;
707 4         13 dbg("learn: initializing learner");
708              
709             # Make sure we're already initialized ...
710 4         14 $self->init(1);
711              
712 4         38 my %kv = (
713             'force_expire' => 'learn_force_expire',
714             'learn_to_journal' => 'learn_to_journal',
715             'caller_will_untie' => 'learn_caller_will_untie',
716             'wait_for_lock' => 'learn_wait_for_lock',
717             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
718             'no_relearn' => 'learn_no_relearn',
719             );
720              
721 4         8 my %ret;
722              
723             # Set any other options that need setting ...
724 4         19 while( my($k,$v) = each %kv ) {
725 24         43 $ret{$k} = $self->{$v};
726 24 100       54 if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
  14         41  
727             }
728              
729 4         15 return \%ret;
730             }
731              
732             ###########################################################################
733              
734             =item $f->rebuild_learner_caches ({ opt => val })
735              
736             Rebuild any cache databases; should be called after the learning process.
737             Options include: C<verbose>, which will output diagnostics to C<stdout>
738             if set to 1.
739              
740             =cut
741              
742             sub rebuild_learner_caches {
743 0     0 1 0 my $self = shift;
744 0         0 my $opts = shift;
745 0 0       0 $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
746 0         0 1;
747             }
748              
749             =item $f->finish_learner ()
750              
751             Finish learning.
752              
753             =cut
754              
755             sub finish_learner {
756 3     3 1 7 my $self = shift;
757 3 100       20 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
758 3         9 1;
759             }
760              
761             =item $f->dump_bayes_db()
762              
763             Dump the contents of the Bayes DB
764              
765             =cut
766              
767             sub dump_bayes_db {
768 0     0 1 0 my($self,@opts) = @_;
769 0 0       0 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
770             }
771              
772             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
773              
774             Signals that the current user has changed (possibly using C<setuid>), meaning
775             that SpamAssassin should close any per-user databases it has open, and re-open
776             using ones appropriate for the new user.
777              
778             Note that this should be called I<after> reading any per-user configuration, as
779             that data may override some paths opened in this method. You may pass the
780             following attribute-value pairs:
781              
782             =over 4
783              
784             =item username
785              
786             The username of the user. This will be used for the C<username> attribute.
787              
788             =item user_dir
789              
790             A directory to use as a 'home directory' for the current user's data,
791             overriding the system default. This directory must be readable and writable by
792             the process. Note that the resulting C<userstate_dir> will be the
793             C<.spamassassin> subdirectory of this dir.
794              
795             =item userstate_dir
796              
797             A directory to use as a directory for the current user's data, overriding the
798             system default. This directory must be readable and writable by the process.
799             The default is C<user_dir/.spamassassin>.
800              
801             =back
802              
803             =cut
804              
805             sub signal_user_changed {
806 0     0 1 0 my $self = shift;
807 0         0 my $opts = shift;
808 0         0 my $set = 0;
809              
810 0         0 my $timer = $self->time_method("signal_user_changed");
811 0         0 dbg("info: user has changed");
812              
813 0 0 0     0 if (defined $opts && $opts->{username}) {
814 0         0 $self->{username} = $opts->{username};
815             } else {
816 0         0 undef $self->{username};
817             }
818 0 0 0     0 if (defined $opts && $opts->{user_dir}) {
819 0         0 $self->{user_dir} = $opts->{user_dir};
820             } else {
821 0         0 undef $self->{user_dir};
822             }
823 0 0 0     0 if (defined $opts && $opts->{userstate_dir}) {
824 0         0 $self->{userstate_dir} = $opts->{userstate_dir};
825             } else {
826 0         0 undef $self->{userstate_dir};
827             }
828              
829             # reopen bayes dbs for this user
830 0 0       0 $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
831 0 0       0 if ($self->{conf}->{use_bayes}) {
832 0         0 require Mail::SpamAssassin::Bayes;
833 0         0 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
834             } else {
835 0 0       0 delete $self->{bayes_scanner} if $self->{bayes_scanner};
836             }
837              
838             # this user may have a different learn_to_journal setting, so reset appropriately
839 0         0 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
840              
841 0 0       0 $set |= 1 unless $self->{local_tests_only};
842 0 0 0     0 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      0        
843              
844 0         0 $self->{conf}->set_score_set ($set);
845              
846             $self->call_plugins("signal_user_changed", {
847             username => $self->{username},
848             userstate_dir => $self->{userstate_dir},
849             user_dir => $self->{user_dir},
850 0         0 });
851              
852 0         0 1;
853             }
854              
855             ###########################################################################
856              
857             =item $f->report_as_spam ($mail, $options)
858              
859             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
860             human-verified spam. This will submit the mail message to live,
861             collaborative, spam-blocker databases, allowing other users to block this
862             message.
863              
864             It will also submit the mail to SpamAssassin's Bayesian learner.
865              
866             Options is an optional reference to a hash of options. Currently these
867             can be:
868              
869             =over 4
870              
871             =item dont_report_to_dcc
872              
873             Inhibits reporting of the spam to DCC.
874              
875             =item dont_report_to_pyzor
876              
877             Inhibits reporting of the spam to Pyzor.
878              
879             =item dont_report_to_razor
880              
881             Inhibits reporting of the spam to Razor.
882              
883             =item dont_report_to_spamcop
884              
885             Inhibits reporting of the spam to SpamCop.
886              
887             =back
888              
889             =cut
890              
891             sub report_as_spam {
892 0     0 1 0 my ($self, $mail, $options) = @_;
893 0         0 local ($_);
894              
895 0         0 $self->init(1);
896 0         0 my $timer = $self->time_method("report_as_spam");
897              
898             # learn as spam if enabled
899 0 0       0 if ( $self->{conf}->{bayes_learn_during_report} ) {
900 0         0 $self->learn ($mail, undef, 1, 0);
901             }
902              
903 0         0 require Mail::SpamAssassin::Reporter;
904 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
905 0         0 $mail->report();
906             }
907              
908             ###########################################################################
909              
910             =item $f->revoke_as_spam ($mail, $options)
911              
912             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
913             human-verified ham (non-spam). This will revoke the mail message from live,
914             collaborative, spam-blocker databases, allowing other users to block this
915             message.
916              
917             It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
918              
919             Options is an optional reference to a hash of options. Currently these
920             can be:
921              
922             =over 4
923              
924             =item dont_report_to_razor
925              
926             Inhibits revoking of the spam to Razor.
927              
928              
929             =back
930              
931             =cut
932              
933             sub revoke_as_spam {
934 0     0 1 0 my ($self, $mail, $options) = @_;
935 0         0 local ($_);
936              
937 0         0 $self->init(1);
938 0         0 my $timer = $self->time_method("revoke_as_spam");
939              
940             # learn as nonspam
941 0         0 $self->learn ($mail, undef, 0, 0);
942              
943 0         0 require Mail::SpamAssassin::Reporter;
944 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
945 0         0 $mail->revoke ();
946             }
947              
948             ###########################################################################
949              
950             =item $f->add_address_to_whitelist ($addr, $cli_p)
951              
952             Given a string containing an email address, add it to the automatic
953             whitelist database.
954              
955             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
956              
957             =cut
958              
959             sub add_address_to_whitelist {
960 0     0 1 0 my ($self, $addr, $cli_p) = @_;
961              
962 0         0 $self->call_plugins("whitelist_address", { address => $addr,
963             cli_p => $cli_p });
964             }
965              
966             ###########################################################################
967              
968             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
969              
970             Given a mail message, find as many addresses in the usual headers (To, Cc, From
971             etc.), and the message body, and add them to the automatic whitelist database.
972              
973             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
974              
975             =cut
976              
977             sub add_all_addresses_to_whitelist {
978 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
979              
980 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
981 0         0 $self->call_plugins("whitelist_address", { address => $addr,
982             cli_p => $cli_p });
983             }
984             }
985              
986             ###########################################################################
987              
988             =item $f->remove_address_from_whitelist ($addr, $cli_p)
989              
990             Given a string containing an email address, remove it from the automatic
991             whitelist database.
992              
993             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
994              
995             =cut
996              
997             sub remove_address_from_whitelist {
998 0     0 1 0 my ($self, $addr, $cli_p) = @_;
999              
1000 0         0 $self->call_plugins("remove_address", { address => $addr,
1001             cli_p => $cli_p });
1002             }
1003              
1004             ###########################################################################
1005              
1006             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
1007              
1008             Given a mail message, find as many addresses in the usual headers (To, Cc, From
1009             etc.), and the message body, and remove them from the automatic whitelist
1010             database.
1011              
1012             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1013              
1014             =cut
1015              
1016             sub remove_all_addresses_from_whitelist {
1017 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1018              
1019 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1020 0         0 $self->call_plugins("remove_address", { address => $addr,
1021             cli_p => $cli_p });
1022             }
1023             }
1024              
1025             ###########################################################################
1026              
1027             =item $f->add_address_to_blacklist ($addr, $cli_p)
1028              
1029             Given a string containing an email address, add it to the automatic
1030             whitelist database with a high score, effectively blacklisting them.
1031              
1032             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1033              
1034             =cut
1035              
1036             sub add_address_to_blacklist {
1037 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1038 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1039             cli_p => $cli_p });
1040             }
1041              
1042             ###########################################################################
1043              
1044             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1045              
1046             Given a mail message, find addresses in the From headers and add them to the
1047             automatic whitelist database with a high score, effectively blacklisting them.
1048              
1049             Note that To and Cc addresses are not used.
1050              
1051             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1052              
1053             =cut
1054              
1055             sub add_all_addresses_to_blacklist {
1056 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1057              
1058 0         0 $self->init(1);
1059              
1060 0         0 my @addrlist;
1061 0         0 my @hdrs = $mail_obj->get_header('From');
1062 0 0       0 if ($#hdrs >= 0) {
1063 0         0 push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1064             }
1065              
1066 0         0 foreach my $addr (@addrlist) {
1067 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1068             cli_p => $cli_p });
1069             }
1070              
1071             }
1072              
1073             ###########################################################################
1074              
1075             =item $text = $f->remove_spamassassin_markup ($mail)
1076              
1077             Returns the text of the message, with any SpamAssassin-added text (such
1078             as the report, or X-Spam-Status headers) stripped.
1079              
1080             Note that the B<$mail> object is not modified.
1081              
1082             Warning: if the input message in B<$mail> contains a mixture of CR-LF
1083             (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1084             to use one or the other consistently throughout.
1085              
1086             =cut
1087              
1088             sub remove_spamassassin_markup {
1089 0     0 1 0 my ($self, $mail_obj) = @_;
1090 0         0 local ($_);
1091              
1092 0         0 my $timer = $self->time_method("remove_spamassassin_markup");
1093 0   0     0 my $mbox = $mail_obj->get_mbox_separator() || '';
1094              
1095 0         0 dbg("markup: removing markup");
1096              
1097             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1098             # if we have definitive proof it's not an encapsulated message.
1099 0   0     0 my $ct = $mail_obj->get_header("Content-Type") || '';
1100 0 0       0 if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1101              
1102             # Ok, this is a possible encapsulated message, search for the
1103             # appropriate mime part and deal with it if necessary.
1104 0         0 my $boundary = "\Q$1\E";
1105 0         0 my @msg = split(/^/,$mail_obj->get_pristine_body());
1106              
1107 0         0 my $flag = 0;
1108 0         0 $ct = '';
1109 0         0 my $cd = '';
1110 0         0 for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1111             # only look at mime part headers
1112 0 0 0     0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1113              
1114 0 0       0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1115              
1116             # Ok, we found the encapsulated piece ...
1117 0 0 0     0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
      0        
1118             ($ct eq "message/rfc822" &&
1119             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1120             {
1121 0         0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1122              
1123             # find the end and chop it off
1124 0         0 for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1125 0 0       0 if ( $msg[$i] =~ /^--$boundary/ ) {
1126 0 0       0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1127             # will remove the blank line (not sure it'll always be
1128             # there) and everything below. don't worry, the splice
1129             # guarantees the for will stop ...
1130             }
1131             }
1132              
1133             # Ok, we're done. Return the rewritten message.
1134 0         0 return join('', $mbox, @msg);
1135             }
1136              
1137 0         0 $flag = 0;
1138 0         0 $ct = '';
1139 0         0 $cd = '';
1140 0         0 next;
1141             }
1142              
1143             # Ok, we're in the mime header ... Capture the appropriate headers...
1144 0         0 $flag = 1;
1145 0 0       0 if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
    0          
1146 0         0 $ct = $1;
1147             }
1148             elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1149 0         0 $cd = $1;
1150             }
1151             }
1152             }
1153              
1154             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1155             # So treat it like a "report_safe 0" message.
1156 0         0 my $hdrs = $mail_obj->get_pristine_header();
1157 0         0 my $body = $mail_obj->get_pristine_body();
1158              
1159             # remove DOS line endings
1160 0         0 $hdrs =~ s/\r//gs;
1161              
1162             # unfold SA added headers, but not X-Spam-Prev headers ...
1163 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1164 0         0 1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1165 0         0 $hdrs =~ s/^\n//;
1166              
1167             ###########################################################################
1168             # Backward Compatibility, pre 3.0.x.
1169              
1170             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1171 0         0 $self->init(1);
1172 0         0 foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
  0         0  
1173             # let the 3.0 decoding do it...
1174 0 0       0 next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1175              
1176 0         0 dbg("markup: removing markup in $header");
1177 0 0       0 if ($header eq 'Subject') {
1178 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1179 0         0 $tag = quotemeta($tag);
1180 0         0 $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1181 0         0 $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1182 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1183 0         0 1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1184             } else {
1185 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1186             }
1187             }
1188              
1189             # Now deal with report cleansing from 2.4x and previous.
1190             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1191             # followed by another "SPAM: ----.+" line, followed by a blank line.
1192 0         0 1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1193             ###########################################################################
1194              
1195             # 3.0 version -- support for previously-nonexistent Subject hdr.
1196             # ensure the Subject line didn't *really* contain "(nonexistent)" in
1197             # the original message!
1198 0 0 0     0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1199             && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1200             {
1201 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1202 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1203             }
1204              
1205             # 3.0 version -- revert from X-Spam-Prev to original ...
1206 0         0 while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1207 0         0 my($hdr, $name) = ($1,$2);
1208              
1209             # If the rewritten version doesn't exist, we should deal with it anyway...
1210 0 0       0 unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1211 0         0 $hdrs =~ s/\n\n/\n$hdr\n/;
1212             }
1213             }
1214              
1215             # remove any other X-Spam headers we added, will be unfolded
1216 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1217 0         0 1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1218 0         0 $hdrs =~ s/^\n//;
1219              
1220             # re-add DOS line endings
1221 0 0       0 if ($mail_obj->{line_ending} ne "\n") {
1222 0         0 $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1223             }
1224              
1225             # Put the whole thing back together ...
1226 0         0 return join ('', $mbox, $hdrs, $body);
1227             }
1228              
1229             ###########################################################################
1230              
1231             =item $f->read_scoreonly_config ($filename)
1232              
1233             Read a configuration file and parse user preferences from it.
1234              
1235             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1236             In other words, they include scoring options, scores, whitelists and
1237             blacklists, and so on, but do not include rule definitions, privileged
1238             settings, etc. unless C<allow_user_rules> is enabled; and they never include
1239             the administrator settings.
1240              
1241             =cut
1242              
1243             sub read_scoreonly_config {
1244 0     0 1 0 my ($self, $filename) = @_;
1245              
1246 0         0 my $timer = $self->time_method("read_scoreonly_config");
1247 0         0 local *IN;
1248 0 0       0 if (!open(IN,"<$filename")) {
1249             # the file may not exist; this should not be verbose
1250 0         0 dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1251 0         0 return;
1252             }
1253              
1254 0         0 my($inbuf,$nread,$text); $text = '';
  0         0  
1255 0         0 while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
  0         0  
1256 0 0       0 defined $nread or die "error reading $filename: $!";
1257 0 0       0 close IN or die "error closing $filename: $!";
1258 0         0 undef $inbuf;
1259              
1260 0         0 $text = "file start $filename\n" . $text;
1261             # add an extra \n in case file did not end in one.
1262 0         0 $text .= "\nfile end $filename\n";
1263              
1264 0         0 $self->{conf}->{main} = $self;
1265 0         0 $self->{conf}->parse_scores_only ($text);
1266 0         0 $self->{conf}->finish_parsing(1);
1267              
1268 0         0 delete $self->{conf}->{main}; # to allow future GC'ing
1269             }
1270              
1271             ###########################################################################
1272              
1273             =item $f->load_scoreonly_sql ($username)
1274              
1275             Read configuration parameters from SQL database and parse scores from it. This
1276             will only take effect if the perl C<DBI> module is installed, and the
1277             configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
1278             C<user_scores_sql_password> are set correctly.
1279              
1280             The username in C<$username> will also be used for the C<username> attribute of
1281             the Mail::SpamAssassin object.
1282              
1283             =cut
1284              
1285             sub load_scoreonly_sql {
1286 0     0 1 0 my ($self, $username) = @_;
1287              
1288 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1289 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1290 0         0 $self->{username} = $username;
1291 0 0       0 unless ($src->load($username)) {
1292 0         0 return 0;
1293             }
1294 0         0 return 1;
1295             }
1296              
1297             ###########################################################################
1298              
1299             =item $f->load_scoreonly_ldap ($username)
1300              
1301             Read configuration parameters from an LDAP server and parse scores from it.
1302             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1303             installed, and the configuration parameters C<user_scores_dsn>,
1304             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1305             correctly.
1306              
1307             The username in C<$username> will also be used for the C<username> attribute of
1308             the Mail::SpamAssassin object.
1309              
1310             =cut
1311              
1312             sub load_scoreonly_ldap {
1313 0     0 1 0 my ($self, $username) = @_;
1314              
1315 0         0 dbg("config: load_scoreonly_ldap($username)");
1316 0         0 my $timer = $self->time_method("load_scoreonly_ldap");
1317 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1318 0         0 $self->{username} = $username;
1319 0         0 $src->load($username);
1320             }
1321              
1322             ###########################################################################
1323              
1324             =item $f->set_persistent_address_list_factory ($factoryobj)
1325              
1326             Set the persistent address list factory, used to create objects for the
1327             automatic whitelist algorithm's persistent-storage back-end. See
1328             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1329             must implement, and the API the objects they produce must implement.
1330              
1331             =cut
1332              
1333             sub set_persistent_address_list_factory {
1334 4     4 1 16 my ($self, $fac) = @_;
1335 4         14 $self->{pers_addr_list_factory} = $fac;
1336             }
1337              
1338             ###########################################################################
1339              
1340             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1341              
1342             Compile all patterns, load all configuration files, and load all
1343             possibly-required Perl modules.
1344              
1345             Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1346             plan to fork() or start a new perl interpreter thread to process a message,
1347             this is suboptimal, as each process/thread will have to perform these actions.
1348              
1349             Call this function in the master thread or process to perform the actions
1350             straight away, so that the sub-processes will not have to.
1351              
1352             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1353             configuration without reading the per-user configuration file and it will
1354             assume that you will call C<read_scoreonly_config> at a later point.
1355              
1356             If C<$keep_userstate> is true, compile_now() will revert any configuration
1357             options which have a default with I<__userstate__> in it post-init(),
1358             and then re-change the option before returning. This lets you change
1359             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1360             files there as necessary without disturbing the actual files as changed
1361             by a configuration option. By default, this is disabled.
1362              
1363             =cut
1364              
1365             sub compile_now {
1366 1     1 1 2081 my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1367              
1368 1         53 my $timer = $self->time_method("compile_now");
1369              
1370             # Backup default values which deal with userstate.
1371             # This is done so we can create any new files in, presumably, a temp dir.
1372             # see bug 2762 for more details.
1373 1         2 my %backup;
1374 1 50 33     5 if (defined $deal_with_userstate && $deal_with_userstate) {
1375 0         0 while(my($k,$v) = each %{$self->{conf}}) {
  0         0  
1376 0 0 0     0 $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
      0        
1377             }
1378             }
1379              
1380 1         20 $self->init($use_user_prefs);
1381              
1382             # if init() didn't change the value from default, forget about it.
1383             # if the value is different, remember the new version, and reset the default.
1384 1         9 while(my($k,$v) = each %backup) {
1385 0 0       0 if ($self->{conf}->{$k} eq $v) {
1386 0         0 delete $backup{$k};
1387             }
1388             else {
1389 0         0 my $backup = $backup{$k};
1390 0         0 $backup{$k} = $self->{conf}->{$k};
1391 0         0 $self->{conf}->{$k} = $backup;
1392             }
1393             }
1394              
1395 1         3 dbg("ignore: test message to precompile patterns and load modules");
1396              
1397             # tell plugins we are about to send a message for compiling purposes
1398 1         12 $self->call_plugins("compile_now_start",
1399             { use_user_prefs => $use_user_prefs,
1400             keep_userstate => $deal_with_userstate});
1401              
1402             # note: this may incur network access. Good. We want to make sure
1403             # as much as possible is preloaded!
1404 1         29 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1405             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1406             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1407              
1408 1         10 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1409 1         25 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1410             { disable_auto_learning => 1 } );
1411              
1412             # We want to turn off the bayes rules for this test msg
1413 1         4 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1414 1         3 $self->{conf}->{use_bayes_rules} = 0;
1415 1         6 $status->check();
1416 1         5 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1417 1         7 $status->finish();
1418 1         11 $mail->finish();
1419 1         7 $self->finish_learner();
1420              
1421 1         7 $self->{conf}->free_uncompiled_rule_source();
1422              
1423             # load SQL modules now as well
1424 1         5 my $dsn = $self->{conf}->{user_scores_dsn};
1425 1 50       5 if ($dsn ne '') {
1426 0 0       0 if ($dsn =~ /^ldap:/i) {
1427 0         0 Mail::SpamAssassin::Conf::LDAP::load_modules();
1428             } else {
1429 0         0 Mail::SpamAssassin::Conf::SQL::load_modules();
1430             }
1431             }
1432              
1433             # make sure things are ready for scanning
1434 1 50       3 $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1435 1         7 $self->call_plugins("compile_now_finish",
1436             { use_user_prefs => $use_user_prefs,
1437             keep_userstate => $deal_with_userstate});
1438              
1439             # Reset any non-default values to the post-init() version.
1440 1         9 while(my($k,$v) = each %backup) {
1441 0         0 $self->{conf}->{$k} = $v;
1442             }
1443              
1444             # clear sed_path_cache
1445 1         10 delete $self->{conf}->{sed_path_cache};
1446              
1447 1         8 1;
1448             }
1449              
1450             ###########################################################################
1451              
1452             =item $f->debug_diagnostics ()
1453              
1454             Output some diagnostic information, useful for debugging SpamAssassin
1455             problems.
1456              
1457             =cut
1458              
1459             sub debug_diagnostics {
1460 0     0 1 0 my ($self) = @_;
1461              
1462             # load this class lazily, to avoid overhead when this method isn't
1463             # called.
1464 0         0 eval {
1465 0         0 require Mail::SpamAssassin::Util::DependencyInfo;
1466 0         0 dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1467             };
1468             }
1469              
1470             ###########################################################################
1471              
1472             =item $failed = $f->lint_rules ()
1473              
1474             Syntax-check the current set of rules. Returns the number of
1475             syntax errors discovered, or 0 if the configuration is valid.
1476              
1477             =cut
1478              
1479             sub lint_rules {
1480 0     0 1 0 my ($self) = @_;
1481              
1482 0         0 dbg("ignore: using a test message to lint rules");
1483 0         0 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1484             "Subject: \n",
1485             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1486             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1487              
1488 0         0 $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1489 0         0 $self->{syntax_errors} = 0;
1490              
1491 0         0 my $olddcp = $self->{dont_copy_prefs};
1492 0         0 $self->{dont_copy_prefs} = 1;
1493              
1494 0         0 $self->init(1);
1495 0         0 $self->{syntax_errors} += $self->{conf}->{errors};
1496              
1497 0         0 $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1498              
1499             # bug 5048: override settings to ensure a faster lint
1500 0         0 $self->{'conf'}->{'use_auto_whitelist'} = 0;
1501 0         0 $self->{'conf'}->{'bayes_auto_learn'} = 0;
1502              
1503 0         0 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1504 0         0 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1505             { disable_auto_learning => 1 } );
1506 0         0 $status->check();
1507              
1508 0         0 $self->{syntax_errors} += $status->{rule_errors};
1509 0         0 $status->finish();
1510 0         0 $mail->finish();
1511 0 0       0 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1512 0         0 return ($self->{syntax_errors});
1513             }
1514              
1515             ###########################################################################
1516              
1517             =item $f->finish()
1518              
1519             Destroy this object, so that it will be garbage-collected once it
1520             goes out of scope. The object will no longer be usable after this
1521             method is called.
1522              
1523             =cut
1524              
1525             sub finish {
1526 20     20 1 18442 my ($self) = @_;
1527              
1528 20         92 $self->timer_start("finish");
1529             $self->call_plugins("finish_tests", { conf => $self->{conf},
1530 20         114 main => $self });
1531              
1532 20         112 $self->{conf}->finish(); delete $self->{conf};
  20         62  
1533 20         113 $self->{plugins}->finish(); delete $self->{plugins};
  20         65  
1534              
1535 20 100       63 if ($self->{bayes_scanner}) {
1536 7         44 $self->{bayes_scanner}->finish();
1537 7         23 delete $self->{bayes_scanner};
1538             }
1539              
1540 20 50       191 $self->{resolver}->finish() if $self->{resolver};
1541              
1542 20         101 $self->timer_end("finish");
1543 20         36 %{$self} = ();
  20         501  
1544             }
1545              
1546             ###########################################################################
1547             # timers: bug 5356
1548              
1549             sub timer_enable {
1550 0     0 0 0 my ($self) = @_;
1551 0 0       0 dbg("config: timing enabled") if !$self->{timer_enabled};
1552 0         0 $self->{timer_enabled} = 1;
1553             }
1554              
1555             sub timer_disable {
1556 0     0 0 0 my ($self) = @_;
1557 0 0       0 dbg("config: timing disabled") if $self->{timer_enabled};
1558 0         0 $self->{timer_enabled} = 0;
1559             }
1560              
1561             # discard all timers, start afresh
1562             sub timer_reset {
1563 3     3 0 18716 my ($self) = @_;
1564 3         11 delete $self->{timers};
1565 3         9 delete $self->{timers_order};
1566             }
1567              
1568             sub timer_start {
1569 20     20 0 81 my ($self, $name) = @_;
1570              
1571 20 50       76 return unless $self->{timer_enabled};
1572             # dbg("timing: '$name' starting");
1573              
1574 0 0       0 if (!exists $self->{timers}->{$name}) {
1575 0         0 push @{$self->{timers_order}}, $name;
  0         0  
1576             }
1577            
1578 0         0 $self->{timers}->{$name}->{start} = Time::HiRes::time();
1579             # note that this will reset any existing, unstopped timer of that name;
1580             # that's ok
1581             }
1582              
1583             sub timer_end {
1584 20     20 0 78 my ($self, $name) = @_;
1585 20 50       70 return unless $self->{timer_enabled};
1586              
1587 0         0 my $t = $self->{timers}->{$name};
1588 0         0 $t->{end} = time;
1589              
1590 0 0       0 if (!$t->{start}) {
1591 0         0 warn "timer_end('$name') with no timer_start";
1592 0         0 return;
1593             }
1594              
1595             # add to any existing elapsed time for this event, since
1596             # we may call the same timer name multiple times -- this is ok,
1597             # as long as they are not nested
1598 0         0 my $dt = $t->{end} - $t->{start};
1599 0 0       0 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1600 0 0       0 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
  0         0  
1601 0         0 else { $t->{elapsed} = $dt }
1602             }
1603              
1604             sub time_method {
1605 447     447 0 1174 my ($self, $name) = @_;
1606 447 50       1541 return unless $self->{timer_enabled};
1607 0         0 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1608             }
1609              
1610             sub timer_report {
1611 0     0 0 0 my ($self) = @_;
1612              
1613 0         0 my $earliest;
1614             my $latest;
1615              
1616 0         0 while (my($name,$h) = each(%{$self->{timers}})) {
  0         0  
1617             # dbg("timing: %s - %s", $name, join(", ",
1618             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1619 0         0 my $start = $h->{start};
1620 0 0 0     0 if (defined $start && (!defined $earliest || $earliest > $start)) {
      0        
1621 0         0 $earliest = $start;
1622             }
1623 0         0 my $end = $h->{end};
1624 0 0 0     0 if (defined $end && (!defined $latest || $latest < $end)) {
      0        
1625 0         0 $latest = $end;
1626             }
1627 0 0 0     0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1628             }
1629 0 0 0     0 my $total =
1630             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1631 0         0 my @str;
1632 0         0 foreach my $name (@{$self->{timers_order}}) {
  0         0  
1633 0   0     0 my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1634 0 0 0     0 my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1635 0 0       0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
    0          
1636 0         0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1637             }
1638              
1639 0         0 return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1640             }
1641              
1642             ###########################################################################
1643             # non-public methods.
1644              
1645             sub init {
1646 96     96 0 905 my ($self, $use_user_pref) = @_;
1647              
1648             # Allow init() to be called multiple times, but only run once.
1649 96 100       368 if (defined $self->{_initted}) {
1650             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1651 61 50       256 if ($self->{_initted} != $$) {
1652 0         0 $self->{_initted} = $$;
1653 0 0       0 srand if !$self->{skip_prng_reseeding};
1654 0         0 $self->{resolver}->reinit_post_fork();
1655             }
1656 61         134 return;
1657             }
1658              
1659 35         263 my $timer = $self->time_method("init");
1660             # Note that this PID has run init()
1661 35         252 $self->{_initted} = $$;
1662              
1663             #fix spamd reading root prefs file
1664 35 100       123 if (!defined $use_user_pref) {
1665 7         18 $use_user_pref = 1;
1666             }
1667              
1668 35 100       134 if (!defined $self->{config_text}) {
1669 23         195 $self->{config_text} = '';
1670              
1671             # read a file called "init.pre" in site rules dir *before* all others;
1672             # even the system config.
1673 23         81 my $siterules = $self->{site_rules_filename};
1674 23   33     79 $siterules ||= $self->first_existing_path (@site_rules_path);
1675              
1676 23         66 my $sysrules = $self->{rules_filename};
1677 23   33     70 $sysrules ||= $self->first_existing_path (@default_rules_path);
1678              
1679 23 50       71 if ($siterules) {
1680 23         170 $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1681             }
1682             else {
1683 0         0 warn "config: could not find site rules directory\n";
1684             }
1685              
1686 23 50       136 if ($sysrules) {
1687 23         115 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1688             }
1689             else {
1690 0         0 warn "config: could not find sys rules directory\n";
1691             }
1692              
1693 23 50       161 if ($sysrules) {
1694 23         127 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1695 23 50 66     204 if ($self->{require_rules} && $cftext !~ /\S/) {
1696 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1697             }
1698 23         2480 $self->{config_text} .= $cftext;
1699             }
1700              
1701 23 50       217 if (!$self->{languages_filename}) {
1702 23         188 $self->{languages_filename} = $self->find_rule_support_file("languages");
1703             }
1704              
1705 23 50 33     310 if ($siterules && !$self->{ignore_site_cf_files}) {
1706 23         109 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1707             }
1708              
1709 23 100       146 if ( $use_user_pref != 0 ) {
1710 7         32 $self->get_and_create_userstate_dir();
1711              
1712             # user prefs file
1713 7         21 my $fname = $self->{userprefs_filename};
1714 7   33     27 $fname ||= $self->first_existing_path (@default_userprefs_path);
1715              
1716 7 100       39 if (!$self->{dont_copy_prefs}) {
1717             # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1718             # just use the last entry in the array as the default path.
1719 6   33     19 $fname ||= $self->sed_path($default_userprefs_path[-1]);
1720              
1721 6 50       102 my $stat_errn = stat($fname) ? 0 : 0+$!;
1722 6 50 33     85 if ($stat_errn == 0 && -f _) {
    0          
    0          
    0          
1723             # exists and is a regular file, nothing to do
1724             } elsif ($stat_errn == 0) {
1725 0         0 warn "config: default user preference file $fname is not a regular file\n";
1726             } elsif ($stat_errn != ENOENT) {
1727 0         0 warn "config: default user preference file $fname not accessible: $!\n";
1728             } elsif (!$self->create_default_prefs($fname)) {
1729 0         0 warn "config: failed to create default user preference file $fname\n";
1730             }
1731             }
1732              
1733 7         33 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1734             }
1735             }
1736              
1737 35 50       132 if ($self->{pre_config_text}) {
1738 0         0 $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1739             }
1740 35 100       140 if ($self->{post_config_text}) {
1741 2         30 $self->{config_text} .= $self->{post_config_text};
1742             }
1743              
1744 35 50       390 if ($self->{config_text} !~ /\S/) {
1745 0         0 my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1746 0 0       0 if ($self->{require_rules}) {
1747 0         0 die $m;
1748             } else {
1749 0         0 warn $m;
1750             }
1751             }
1752              
1753             # Go and parse the config!
1754 35         128 $self->{conf}->{main} = $self;
1755 35 50       153 if (would_log('dbg', 'config_text') > 1) {
1756 0         0 dbg('config_text: '.$self->{config_text});
1757             }
1758 35         292 $self->{conf}->parse_rules ($self->{config_text});
1759 35         419 $self->{conf}->finish_parsing(0);
1760 35         87 delete $self->{conf}->{main}; # to allow future GC'ing
1761              
1762 35         124 undef $self->{config_text}; # ensure it's actually freed
1763 35         85 delete $self->{config_text};
1764              
1765 35 50 66     178 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1766 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1767             }
1768              
1769             # Initialize the Bayes subsystem
1770 35 100       177 if ($self->{conf}->{use_bayes}) {
1771 22         6090 require Mail::SpamAssassin::Bayes;
1772 22         338 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1773             }
1774 35         132 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1775              
1776             # Figure out/set our initial scoreset
1777 35         79 my $set = 0;
1778 35 100       121 $set |= 1 unless $self->{local_tests_only};
1779 35 50 100     240 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      66        
1780 35         226 $self->{conf}->set_score_set ($set);
1781              
1782 35 50       132 if ($self->{only_these_rules}) {
1783 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1784             }
1785              
1786 35 50       115 if (!$self->{timer_enabled}) {
1787             # enable timing implicitly if _TIMING_ is used in add_header templates
1788 35         64 foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
  35         105  
1789 35         153 @{$self->{conf}->{'headers_spam'}}) {
1790 180 50       673 if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
  0         0  
  0         0  
1791             }
1792             }
1793              
1794             # should be called only after configuration has been parsed
1795 35         590 $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1796              
1797             # TODO -- open DNS cache etc. if necessary
1798             }
1799              
1800             sub read_cf {
1801 53     53 0 177 my ($self, $allpaths, $desc) = @_;
1802 53         219 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1803             }
1804              
1805             sub read_pre {
1806 46     46 0 229 my ($self, $allpaths, $desc) = @_;
1807 46         335 return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1808             }
1809              
1810             sub _read_cf_pre {
1811 99     99   287 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1812              
1813 99 50       233 return '' unless defined ($allpaths);
1814              
1815 99         228 my $txt = '';
1816 99         474 foreach my $path (split("\000", $allpaths))
1817             {
1818 99         509 dbg("config: using \"$path\" for $desc");
1819              
1820 99 100       1711 my $stat_errn = stat($path) ? 0 : 0+$!;
1821 99 100 33     536 if ($stat_errn == ENOENT) {
    50 33        
    100          
    50          
1822             # no file or directory
1823             } elsif ($stat_errn != 0) {
1824 0         0 dbg("config: file or directory $path not accessible: $!");
1825             } elsif (-d _) {
1826 92         323 foreach my $file ($self->$filelistmethod($path)) {
1827 580         1711 $txt .= read_cf_file($file);
1828             }
1829             } elsif (-f _ && -s _ && -r _) {
1830 0         0 $txt .= read_cf_file($path);
1831             }
1832             }
1833              
1834 99         3257 return $txt;
1835             }
1836              
1837              
1838             sub read_cf_file {
1839 580     580 0 1250 my($path) = @_;
1840 580         875 my $txt = '';
1841              
1842 580         1231 local *IN;
1843 580 50       18268 if (open (IN, "<".$path)) {
1844              
1845 580         1505 my($inbuf,$nread); $txt = '';
  580         987  
1846 580         10287 while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
  603         4947  
1847 580 50       1557 defined $nread or die "error reading $path: $!";
1848 580 50       4740 close IN or die "error closing $path: $!";
1849 580         1342 undef $inbuf;
1850              
1851 580         4105 $txt = "file start $path\n" . $txt;
1852             # add an extra \n in case file did not end in one.
1853 580         1856 $txt .= "\nfile end $path\n";
1854              
1855 580         2441 dbg("config: read file $path");
1856             }
1857             else {
1858 0         0 warn "config: cannot open \"$path\": $!\n";
1859             }
1860              
1861 580         6175 return $txt;
1862             }
1863              
1864             sub get_and_create_userstate_dir {
1865 7     7 0 25 my ($self, $dir) = @_;
1866              
1867 7         14 my $fname;
1868              
1869             # If vpopmail is enabled then set fname to virtual homedir
1870             # precedence: dir, userstate_dir, derive from user_dir, system default
1871 7 50       36 if (defined $dir) {
    100          
    50          
1872 0         0 $fname = File::Spec->catdir ($dir, ".spamassassin");
1873             }
1874             elsif (defined $self->{userstate_dir}) {
1875 6         17 $fname = $self->{userstate_dir};
1876             }
1877             elsif (defined $self->{user_dir}) {
1878 0         0 $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1879             }
1880              
1881 7   66     43 $fname ||= $self->first_existing_path (@default_userstate_dir);
1882              
1883             # bug 4932: use the last default_userstate_dir entry if none of the others
1884             # already exist
1885 7   33     22 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1886              
1887 7 100       25 if (!$self->{dont_copy_prefs}) {
1888 6         43 dbg("config: using \"$fname\" for user state dir");
1889             }
1890              
1891             # if this is not a dir, not readable, or we are unable to create the dir,
1892             # this is not (yet) a serious error; in fact, it's not even worth
1893             # a warning at all times, so use dbg(). see bug 6268
1894 7 50       114 my $stat_errn = stat($fname) ? 0 : 0+$!;
1895 7 50 33     125 if ($stat_errn == 0 && !-d _) {
    50 33        
1896 0         0 dbg("config: $fname exists but is not a directory");
1897             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1898 0         0 dbg("config: error accessing $fname: $!");
1899             } else { # does not exist, create it
1900             eval {
1901 7         517 mkpath($fname, 0, 0700); 1;
  7         38  
1902 7 50       22 } or do {
1903 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
1904 0         0 dbg("config: mkdir $fname failed: $eval_stat");
1905             };
1906             }
1907              
1908 7         22 $fname;
1909             }
1910              
1911             =item $fullpath = $f->find_rule_support_file ($filename)
1912              
1913             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1914             in the system-wide rules directory, and return its full path if
1915             it exists, or undef if it doesn't exist.
1916              
1917             (This API was added in SpamAssassin 3.1.1.)
1918              
1919             =cut
1920              
1921             sub find_rule_support_file {
1922 23     23 1 106 my ($self, $filename) = @_;
1923              
1924             return $self->first_existing_path(
1925 23         117 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
  115         282  
  115         706  
  115         514  
1926             }
1927              
1928             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1929              
1930             Copy default preferences file into home directory for later use and
1931             modification, if it does not already exist and C<dont_copy_prefs> is
1932             not set.
1933              
1934             =cut
1935              
1936             sub create_default_prefs {
1937             # $userdir will only exist if vpopmail config is enabled thru spamd
1938             # Its value will be the virtual user's maildir
1939             #
1940 0     0 1 0 my ($self, $fname, $user, $userdir) = @_;
1941              
1942 0 0       0 if ($self->{dont_copy_prefs}) {
1943 0         0 return(0);
1944             }
1945              
1946             # if ($userdir && $userdir ne $self->{user_dir}) {
1947             # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1948             # }
1949              
1950 0 0       0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1951 0 0       0 if ($stat_errn == 0) {
    0          
1952             # fine, it already exists
1953             } elsif ($stat_errn != ENOENT) {
1954 0         0 dbg("config: cannot access user preferences file $fname: $!");
1955             } else {
1956             # Pass on the value of $userdir for virtual users in vpopmail
1957             # otherwise it is empty and the user's normal homedir is used
1958 0         0 $self->get_and_create_userstate_dir($userdir);
1959              
1960             # copy in the default one for later editing
1961 0         0 my $defprefs =
1962             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1963              
1964 0         0 local(*IN,*OUT);
1965 0         0 $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1966 0 0       0 if (!defined $defprefs) {
    0          
    0          
1967 0         0 warn "config: can not determine default prefs path\n";
1968             } elsif (!open(IN, "<$defprefs")) {
1969 0         0 warn "config: cannot open $defprefs: $!\n";
1970             } elsif (!open(OUT, ">$fname")) {
1971 0         0 warn "config: cannot create user preferences file $fname: $!\n";
1972             } else {
1973             # former code skipped lines beginning with '#* ', the following copy
1974             # procedure no longer does so, as it avoids reading line-by-line
1975 0         0 my($inbuf,$nread);
1976 0         0 while ( $nread=read(IN,$inbuf,16384) ) {
1977 0 0       0 print OUT $inbuf or die "cannot write to $fname: $!";
1978             }
1979 0 0       0 defined $nread or die "error reading $defprefs: $!";
1980 0         0 undef $inbuf;
1981 0 0       0 close OUT or die "error closing $fname: $!";
1982 0 0       0 close IN or die "error closing $defprefs: $!";
1983              
1984 0 0 0     0 if (($< == 0) && ($> == 0) && defined($user)) { # chown it
      0        
1985 0         0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1986 0 0       0 unless (chown($uid, $gid, $fname)) {
1987 0         0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1988             }
1989             }
1990 0         0 warn "config: created user preferences file: $fname\n";
1991 0         0 return(1);
1992             }
1993             }
1994              
1995 0         0 return(0);
1996             }
1997              
1998             ###########################################################################
1999              
2000             sub expand_name {
2001 1     1 0 5 my ($self, $name) = @_;
2002 1   50     22 my $home = $self->{user_dir} || $ENV{HOME} || '';
2003              
2004 1 50       6 if (am_running_on_windows()) {
2005 0   0     0 my $userprofile = $ENV{USERPROFILE} || '';
2006              
2007 0 0 0     0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
2008 0 0       0 return $userprofile if ($userprofile =~ m/^\\\\/o);
2009              
2010 0 0 0     0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
2011 0 0       0 return $home if ($home =~ m/^\\\\/o);
2012              
2013 0         0 return '';
2014             } else {
2015 1 50 33     28 return $home if ($home && $home =~ /\//o);
2016 0 0       0 return (getpwnam($name))[7] if ($name ne '');
2017 0         0 return (getpwuid($>))[7];
2018             }
2019             }
2020              
2021             sub sed_path {
2022 117     117 0 404 my ($self, $path) = @_;
2023 117 50       423 return if !defined $path;
2024              
2025 117 100       656 if (exists($self->{conf}->{sed_path_cache}->{$path})) {
2026 50         482 return $self->{conf}->{sed_path_cache}->{$path};
2027             }
2028              
2029 67         148 my $orig_path = $path;
2030              
2031 67 0       260 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
  0         0  
2032 67 50       265 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
  24         184  
2033 67 50       274 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
  24         154  
2034 67 0 33     170 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
  1         7  
2035 67 0       132 $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
  0         0  
2036 67         135 $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
  0         0  
2037 67         227 $path =~ s/__version__/${VERSION}/gs;
2038 67         132 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
  1         4  
2039              
2040 67         304 $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
2041 67         303 $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
2042 67         322 return $path;
2043             }
2044              
2045             sub get_perl_major_version {
2046 0     0 0 0 my $self = shift;
2047 0 0       0 $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2048 0         0 return $1;
2049             }
2050              
2051             sub first_existing_path {
2052 25     25 0 81 my $self = shift;
2053 25         48 my $path;
2054 25         167 foreach my $p (@_) {
2055 52         280 $path = $self->sed_path ($p);
2056 52 50       151 if (defined $path) {
2057 52 100       1360 my($errn) = stat($path) ? 0 : 0+$!;
2058 52 100       317 if ($errn == ENOENT) { } # does not exist
    50          
2059 0         0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2060 24         167 else { return $path }
2061             }
2062             }
2063 1         4 return;
2064             }
2065              
2066             ###########################################################################
2067              
2068             sub get_cf_files_in_dir {
2069 46     46 0 176 my ($self, $dir) = @_;
2070 46         134 return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2071             }
2072              
2073             sub get_pre_files_in_dir {
2074 46     46 0 127 my ($self, $dir) = @_;
2075 46         226 return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2076             }
2077              
2078             sub _get_cf_pre_files_in_dir {
2079 92     92   386 my ($self, $dir, $type) = @_;
2080              
2081 92 50       293 if ($self->{config_tree_recurse}) {
2082 0         0 my @cfs;
2083              
2084             # use "eval" to avoid loading File::Find unless this is specified
2085             eval ' use File::Find qw();
2086             File::Find::find(
2087             { untaint => 1,
2088             follow => 1,
2089             wanted =>
2090             sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
2091             }, $dir); 1;
2092 0 0       0 ' or do {
2093 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
2094 0         0 die "_get_cf_pre_files_in_dir error: $eval_stat";
2095             };
2096 0         0 @cfs = sort { $a cmp $b } @cfs;
  0         0  
2097 0         0 return @cfs;
2098             }
2099             else {
2100 92 50       2412 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2101 92 100 100     3484 my @cfs = grep { $_ ne '.' && $_ ne '..' &&
  1352   100     21429  
2102             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2103 92         1385 closedir SA_CF_DIR;
2104              
2105 92         722 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
  580         2762  
  1184         2031  
2106             }
2107             }
2108              
2109             ###########################################################################
2110              
2111             sub have_plugin {
2112 176     176 0 346 my ($self, $subname) = @_;
2113              
2114             # We could potentially get called after a finish(), so just return.
2115 176 50       386 return unless $self->{plugins};
2116              
2117 176         434 return $self->{plugins}->have_callback ($subname);
2118             }
2119              
2120             sub call_plugins {
2121 2114     2114 0 23572 my $self = shift;
2122 2114         3598 my $subname = shift;
2123              
2124             # We could potentially get called after a finish(), so just return.
2125 2114 50       5572 return unless $self->{plugins};
2126              
2127             # Use some calls ourself too
2128 2114 100       4136 if ($subname eq 'finish_parsing_end') {
2129             # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
2130             # read. Plugins can also now use {valid_tlds_re} to one time compile
2131             # regexes in finish_parsing_end.
2132 35         633 $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
2133             }
2134              
2135             # safety net in case some plugin changes global settings, Bug 6218
2136 2114         7179 local $/ = $/; # prevent underlying modules from changing the global $/
2137              
2138 2114         6946 return $self->{plugins}->callback($subname, @_);
2139             }
2140              
2141             ###########################################################################
2142              
2143             sub find_all_addrs_in_mail {
2144 0     0 0 0 my ($self, $mail_obj) = @_;
2145              
2146 0         0 $self->init(1);
2147              
2148 0         0 my @addrlist;
2149 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2150             Errors-To Mail-Followup-To))
2151             {
2152 0         0 my @hdrs = $mail_obj->get_header($header);
2153 0 0       0 if ($#hdrs < 0) { next; }
  0         0  
2154 0         0 push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2155             }
2156              
2157             # find addrs in body, too
2158 0         0 foreach my $line (@{$mail_obj->get_body()}) {
  0         0  
2159 0         0 push (@addrlist, $self->find_all_addrs_in_line($line));
2160             }
2161              
2162 0         0 my @ret;
2163             my %done;
2164              
2165 0         0 foreach (@addrlist) {
2166 0         0 s/^mailto://; # from Outlook "forwarded" message
2167 0 0       0 next if defined ($done{$_}); $done{$_} = 1;
  0         0  
2168 0         0 push (@ret, $_);
2169             }
2170              
2171 0         0 @ret;
2172             }
2173              
2174             sub find_all_addrs_in_line {
2175 88     88 0 312 my ($self, $line) = @_;
2176              
2177             # a more permissive pattern based on "dot-atom" as per RFC2822
2178 88         279 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2179 88         151 my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2180              
2181 88         164 my @addrs;
2182             my %seen;
2183 88         1064 while ($line =~ s/(?:mailto:)?\s*
2184             ($ID_PATTERN \@
2185             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2186             {
2187 137         402 my $addr = $1;
2188 137         282 $addr =~ s/^mailto://;
2189 137 100       459 next if (defined ($seen{$addr})); $seen{$addr} = 1;
  127         365  
2190 127         1612 push (@addrs, $addr);
2191             }
2192              
2193 88         537 return @addrs;
2194             }
2195              
2196             ###########################################################################
2197              
2198             # sa_die -- used to die with a useful exit code.
2199              
2200             sub sa_die {
2201 0     0 0 0 my $exitcode = shift;
2202 0         0 warn @_;
2203 0         0 exit $exitcode;
2204             }
2205              
2206             ###########################################################################
2207              
2208             =item $f->copy_config ( [ $source ], [ $dest ] )
2209              
2210             Used for daemons to keep a persistent Mail::SpamAssassin object's
2211             configuration correct if switching between users. Pass an associative
2212             array reference as either $source or $dest, and set the other to 'undef'
2213             so that the object will use its current configuration. i.e.:
2214              
2215             # create object w/ configuration
2216             my $spamtest = Mail::SpamAssassin->new( ... );
2217              
2218             # backup configuration to %conf_backup
2219             my %conf_backup;
2220             $spamtest->copy_config(undef, \%conf_backup) ||
2221             die "config: error returned from copy_config!\n";
2222              
2223             ... do stuff, perhaps modify the config, etc ...
2224              
2225             # reset the configuration back to the original
2226             $spamtest->copy_config(\%conf_backup, undef) ||
2227             die "config: error returned from copy_config!\n";
2228              
2229             Note that the contents of the associative arrays should be considered
2230             opaque by calling code.
2231              
2232             =cut
2233              
2234             sub copy_config {
2235 0     0 1 0 my ($self, $source, $dest) = @_;
2236              
2237             # At least one of either source or dest needs to be a hash reference ...
2238 0 0 0     0 unless ((defined $source && ref($source) eq 'HASH') ||
      0        
      0        
2239             (defined $dest && ref($dest) eq 'HASH'))
2240             {
2241 0         0 return 0;
2242             }
2243              
2244 0         0 my $timer = $self->time_method("copy_config");
2245              
2246             # let the Conf object itself do all the heavy lifting. It's better
2247             # than having this class know all about that class' internals...
2248 0 0       0 if (defined $source) {
2249 0         0 dbg ("config: copying current conf from backup");
2250             }
2251             else {
2252 0         0 dbg ("config: copying current conf to backup");
2253             }
2254 0         0 return $self->{conf}->clone($source, $dest);
2255             }
2256              
2257             ###########################################################################
2258              
2259             =item @plugins = $f->get_loaded_plugins_list ( )
2260              
2261             Return the list of plugins currently loaded by this SpamAssassin object's
2262             configuration; each entry in the list is an object of type
2263             C<Mail::SpamAssassin::Plugin>.
2264              
2265             (This API was added in SpamAssassin 3.2.0.)
2266              
2267             =cut
2268              
2269             sub get_loaded_plugins_list {
2270 0     0 1 0 my ($self) = @_;
2271 0         0 return $self->{plugins}->get_loaded_plugins_list();
2272             }
2273              
2274             1;
2275             __END__
2276              
2277             ###########################################################################
2278              
2279             =back
2280              
2281             =head1 PREREQUISITES
2282              
2283             C<HTML::Parser>
2284             C<Sys::Syslog>
2285              
2286             =head1 MORE DOCUMENTATION
2287              
2288             See also E<lt>https://spamassassin.apache.org/E<gt> and
2289             E<lt>https://wiki.apache.org/spamassassin/E<gt> for more information.
2290              
2291             =head1 SEE ALSO
2292              
2293             Mail::SpamAssassin::Conf(3)
2294             Mail::SpamAssassin::PerMsgStatus(3)
2295             spamassassin(1)
2296             sa-update(1)
2297              
2298             =head1 BUGS
2299              
2300             See E<lt>https://issues.apache.org/SpamAssassin/E<gt>
2301              
2302             =head1 AUTHORS
2303              
2304             The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>
2305              
2306             =head1 COPYRIGHT
2307              
2308             SpamAssassin is distributed under the Apache License, Version 2.0, as
2309             described in the file C<LICENSE> included with the distribution.
2310              
2311             =head1 AVAILABILITY
2312              
2313             The latest version of this library is likely to be available from CPAN
2314             as well as:
2315              
2316             E<lt>https://spamassassin.apache.org/E<gt>
2317              
2318             =cut