File Coverage

blib/lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 384 756 50.7
branch 106 338 31.3
condition 43 176 24.4
subroutine 56 83 67.4
pod 31 56 55.3
total 620 1409 44.0


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             package Mail::SpamAssassin;
62 30     30   5111869 use strict;
  30         208  
  30         1008  
63 30     30   198 use warnings;
  30         63  
  30         1224  
64             # use bytes;
65 30     30   214 use re 'taint';
  30         66  
  30         1869  
66              
67             require 5.006_001;
68              
69 29     29   8610 use Mail::SpamAssassin::Logger;
  29         66  
  29         2134  
70 29     29   9229 use Mail::SpamAssassin::Constants;
  29         68  
  29         1501  
71 29     29   8753 use Mail::SpamAssassin::Conf;
  29         110  
  29         1323  
72 29     29   10904 use Mail::SpamAssassin::Conf::SQL;
  29         80  
  29         889  
73 29     29   9080 use Mail::SpamAssassin::Conf::LDAP;
  29         76  
  29         933  
74 29     29   8810 use Mail::SpamAssassin::PerMsgStatus;
  29         124  
  29         1243  
75 29     29   10401 use Mail::SpamAssassin::Message;
  29         556  
  29         3238  
76 29     29   14917 use Mail::SpamAssassin::PluginHandler;
  29         88  
  29         972  
77 29     29   9307 use Mail::SpamAssassin::DnsResolver;
  29         105  
  29         1063  
78 29     29   10902 use Mail::SpamAssassin::RegistryBoundaries;
  29         85  
  29         1110  
79 29     29   216 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  29         67  
  29         8135  
80 29     29   11380 use Mail::SpamAssassin::Util::ScopedTimer;
  29         83  
  29         1016  
81              
82 29     29   196 use Errno qw(ENOENT EACCES);
  29         65  
  29         1813  
83 29     29   204 use File::Basename;
  29         61  
  29         3495  
84 29     29   208 use File::Path;
  29         54  
  29         2783  
85 29     29   198 use File::Spec 0.8;
  29         954  
  29         844  
86 29     29   183 use Time::HiRes qw(time);
  29         82  
  29         233  
87 29     29   3349 use Cwd;
  29         86  
  29         2641  
88 29     29   199 use Config;
  29         59  
  29         274926  
89              
90             our $VERSION = "3.004002"; # update after release (same format as perl $])
91             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
92             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
93              
94              
95             # Used during the prerelease/release-candidate part of the official release
96             # process. If you hacked up your SA, you should add a version_tag to your .cf
97             # files; this variable should not be modified.
98             our @EXTRA_VERSION = qw();
99              
100             our @ISA = qw();
101              
102             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
103             our $SUB_VERSION = 'svnunknown';
104             if ('$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $' =~ ':') {
105             # Subversion keyword "$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $" has been successfully expanded.
106             # Doesn't happen with automated launchpad builds:
107             # https://bugs.launchpad.net/launchpad/+bug/780916
108             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $ updated by SVN'))[1];
109             }
110              
111              
112             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
113             if ('$LastChangedRevision: 1840870 $' =~ ':') {
114             # Subversion keyword "$LastChangedRevision: 1840870 $" has been successfully expanded.
115             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1840870 $ updated by SVN}[1]));
116             } else {
117             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
118             }
119             }
120              
121             sub Version {
122 25     25 0 446 $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
123 25         913 return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
124             }
125              
126             our $HOME_URL = "http://spamassassin.apache.org/";
127              
128             # note that the CWD takes priority. This is required in case a user
129             # is testing a new version of SpamAssassin on a machine with an older
130             # version installed. Unless you can come up with a fix for this that
131             # allows "make test" to work, don't change this.
132              
133             our @default_rules_path = (
134             '__local_state_dir__/__version__',
135             '__def_rules_dir__',
136             '__prefix__/share/spamassassin',
137             '/usr/local/share/spamassassin',
138             '/usr/share/spamassassin',
139             );
140              
141             # first 3 are BSDish, latter 2 Linuxish
142             our @site_rules_path = (
143             '__local_rules_dir__',
144             '__prefix__/etc/mail/spamassassin',
145             '__prefix__/etc/spamassassin',
146             '/usr/local/etc/spamassassin',
147             '/usr/pkg/etc/spamassassin',
148             '/usr/etc/spamassassin',
149             '/etc/mail/spamassassin',
150             '/etc/spamassassin',
151             );
152              
153             our @default_prefs_path = (
154             '__local_rules_dir__/user_prefs.template',
155             '__prefix__/etc/mail/spamassassin/user_prefs.template',
156             '__prefix__/share/spamassassin/user_prefs.template',
157             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
158             '__def_rules_dir__/user_prefs.template',
159             '/etc/spamassassin/user_prefs.template',
160             '/etc/mail/spamassassin/user_prefs.template',
161             '/usr/local/share/spamassassin/user_prefs.template',
162             '/usr/share/spamassassin/user_prefs.template',
163             );
164              
165             our @default_userprefs_path = (
166             '~/.spamassassin/user_prefs',
167             );
168              
169             our @default_userstate_dir = (
170             '~/.spamassassin',
171             );
172              
173             ###########################################################################
174              
175             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
176              
177             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
178             reference to the constructor which may contain the following attribute-
179             value pairs.
180              
181             =over 4
182              
183             =item debug
184              
185             This is the debug options used to determine logging level. It exists to
186             allow sections of debug messages (called "facilities") to be enabled or
187             disabled. If this is a string, it is treated as a comma-delimited list
188             of the debug facilities. If it's a hash reference, then the keys are
189             treated as the list of debug facilities and if it's a array reference,
190             then the elements are treated as the list of debug facilities.
191              
192             There are also two special cases: (1) if the special case of "info" is
193             passed as a debug facility, then all informational messages are enabled;
194             (2) if the special case of "all" is passed as a debug facility, then all
195             debugging facilities are enabled.
196              
197             =item rules_filename
198              
199             The filename/directory to load spam-identifying rules from. (optional)
200              
201             =item site_rules_filename
202              
203             The filename/directory to load site-specific spam-identifying rules from.
204             (optional)
205              
206             =item userprefs_filename
207              
208             The filename to load preferences from. (optional)
209              
210             =item userstate_dir
211              
212             The directory user state is stored in. (optional)
213              
214             =item config_tree_recurse
215              
216             Set to C<1> to recurse through directories when reading configuration
217             files, instead of just reading a single level. (optional, default 0)
218              
219             =item config_text
220              
221             The text of all rules and preferences. If you prefer not to load the rules
222             from files, read them in yourself and set this instead. As a result, this will
223             override the settings for C<rules_filename>, C<site_rules_filename>,
224             and C<userprefs_filename>.
225              
226             =item pre_config_text
227              
228             Similar to C<config_text>, this text is placed before config_text to allow an
229             override of config files.
230              
231             =item post_config_text
232              
233             Similar to C<config_text>, this text is placed after config_text to allow an
234             override of config files.
235              
236             =item force_ipv4
237              
238             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
239             to use IPv6. Use if the existing tests for IPv6 availability produce
240             incorrect results or crashes.
241              
242             =item force_ipv6
243              
244             For symmetry with force_ipv4: if set to 1, DNS or other network tests
245             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
246             this setting and use whatever protocol family they are comfortable with.
247              
248             =item require_rules
249              
250             If set to 1, init() will die if no valid rules could be loaded. This is the
251             default behaviour when called by C<spamassassin> or C<spamd>.
252              
253             =item languages_filename
254              
255             If you want to be able to use the language-guessing rule
256             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
257             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
258             need to set this. It should be the path to the B<languages> file normally
259             found in the SpamAssassin B<rules> directory.
260              
261             =item local_tests_only
262              
263             If set to 1, no tests that require internet access will be performed. (default:
264             0)
265              
266             =item need_tags
267              
268             The option provides a way to avoid more expensive processing when it is known
269             in advance that some information will not be needed by a caller.
270              
271             A value of the option can either be a string (a comma-delimited list of tag
272             names), or a reference to a list of individual tag names. A caller may provide
273             the list in advance, specifying his intention to later collect the information
274             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
275             insensitive), it shows that a caller will not be interested in such tag,
276             although there is no guarantee it would save any resources, nor that a tag
277             value will be empty. Currently no built-in tags start with 'NO'. A later
278             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
279             to TIMING,NOASN.
280              
281             For backward compatibility, all tags available as of version 3.2.4 will
282             be available by default (unless disabled by NOtag), even if not requested
283             through need_tags option. Future versions may provide new tags conditionally
284             available.
285              
286             Currently the only tag that needs to be explicitly requested is 'TIMING'.
287             Not requesting it can save a millisecond or two - it mostly serves to
288             illustrate the usage of need_tags.
289              
290             Example:
291             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
292             or:
293             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
294              
295             =item ignore_site_cf_files
296              
297             If set to 1, any rule files found in the C<site_rules_filename> directory will
298             be ignored. *.pre files (used for loading plugins) found in the
299             C<site_rules_filename> directory will still be used. (default: 0)
300              
301             =item dont_copy_prefs
302              
303             If set to 1, the user preferences file will not be created if it doesn't
304             already exist. (default: 0)
305              
306             =item save_pattern_hits
307              
308             If set to 1, the patterns hit can be retrieved from the
309             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
310              
311             =item home_dir_for_helpers
312              
313             If set, the B<HOME> environment variable will be set to this value
314             when using test applications that require their configuration data,
315             such as Razor, Pyzor and DCC.
316              
317             =item username
318              
319             If set, the C<username> attribute will use this as the current user's name.
320             Otherwise, the default is taken from the runtime environment (ie. this process'
321             effective UID under UNIX).
322              
323             =item skip_prng_reseeding
324              
325             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
326             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
327             Perl function should be called during initialization of each child process,
328             soon after forking.
329              
330             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
331             library.
332              
333             This setting requires the caller to decide when to call srand().
334             This choice may be desired to preserve the entropy of a PRNG. The default
335             value of skip_prng_reseeding is false to maintain backward compatibility.
336              
337             This option should only be set by a caller if it calls srand() upon spawning
338             child processes. Unless you are certain you need it, leave this setting as
339             false.
340              
341             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
342             which allows spamd to call srand() right after forking a child process.
343              
344             =back
345              
346             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
347             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
348             configuration files in the usual installed locations using the below variable
349             definitions which can be passed in.
350              
351             =over 4
352              
353             =item PREFIX
354              
355             Used as the root for certain directory paths such as:
356              
357             '__prefix__/etc/mail/spamassassin'
358             '__prefix__/etc/spamassassin'
359              
360             Defaults to "/usr/local".
361              
362             =item DEF_RULES_DIR
363              
364             Location where the default rules are installed. Defaults to
365             "/usr/local/share/spamassassin".
366              
367             =item LOCAL_RULES_DIR
368              
369             Location where the local site rules are installed. Defaults to
370             "/etc/mail/spamassassin".
371              
372             =item LOCAL_STATE_DIR
373              
374             Location of the local state directory, mainly used for installing updates via
375             C<sa-update> and compiling rulesets to native code. Defaults to
376             "/var/lib/spamassassin".
377              
378             =back
379              
380              
381             =cut
382              
383             # undocumented ctor settings:
384             #
385             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
386              
387             sub new {
388 25     25 1 12758046 my $class = shift;
389 25   33     684 $class = ref($class) || $class;
390              
391 25         175 my $self = shift;
392 25 50       283 if (!defined $self) { $self = { }; }
  0         0  
393 25         164 bless ($self, $class);
394              
395             # basic backward compatibility; debug used to be a boolean.
396             # translate that into 'all', which is what it meant before 3.1.0.
397 25 50 33     462 if ($self->{debug} && $self->{debug} eq '1') {
398 0         0 $self->{debug} = 'all';
399             }
400              
401             # enable or disable debugging
402 25         616 Mail::SpamAssassin::Logger::add_facilities($self->{debug});
403              
404             # first debugging information possibly printed should be the version
405 25         362 dbg("generic: SpamAssassin version " . Version());
406              
407             # if the libs are installed in an alternate location, and the caller
408             # didn't set PREFIX, we should have an estimated guess ready, values
409             # substituted at 'make' time
410 25   50     551 $self->{PREFIX} ||= '/usr/local';
411 25   50     408 $self->{DEF_RULES_DIR} ||= '/usr/local/share/spamassassin';
412 25   50     352 $self->{LOCAL_RULES_DIR} ||= '/etc/mail/spamassassin';
413 25   50     402 $self->{LOCAL_STATE_DIR} ||= '/var/lib/spamassassin';
414 25         106 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
  100         635  
415             qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
416              
417 25         147 $self->{needed_tags} = {};
418 25         109 { my $ntags = $self->{need_tags};
  25         110  
419 25 50       351 if (defined $ntags) {
420 0 0       0 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
421 0 0       0 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
422             }
423             }
424             }
425 25 50 33     339 if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
426 0         0 $self->timer_enable();
427             }
428              
429 25   33     1120 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
430 25         607 $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
431 25         666 $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
432              
433 25   50     354 $self->{save_pattern_hits} ||= 0;
434              
435             # Make sure that we clean $PATH if we're tainted
436 25         904 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
437              
438 25 50       98 if (!defined $self->{username}) {
439 25         336 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
440             }
441              
442 25         327 $self->create_locker();
443              
444 25         127 $self;
445             }
446              
447             sub create_locker {
448 25     25 0 91 my ($self) = @_;
449              
450 25         47 my $class;
451 25         78 my $m = $self->{conf}->{lock_method};
452              
453             # let people choose what they want -- even if they may not work on their
454             # OS. (they could be using cygwin!)
455 25 50       174 if ($m eq 'win32') { $class = 'Win32'; }
  0 50       0  
    50          
456 0         0 elsif ($m eq 'flock') { $class = 'Flock'; }
457 0         0 elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
458             else {
459             # OS-specific defaults
460 25 50       162 if (am_running_on_windows()) {
461 0         0 $class = 'Win32';
462             } else {
463 25         162 $class = 'UnixNFSSafe';
464             }
465             }
466              
467             # this could probably be made a little faster; for now I'm going
468             # for slow but safe, by keeping in quotes
469             eval '
470             use Mail::SpamAssassin::Locker::'.$class.';
471             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
472             1;
473 25 50   15   2823 ' or do {
  15     4   8969  
  15     3   44  
  15         562  
  4         78  
  4         23  
  4         270  
  3         88  
  3         28  
  3         227  
474 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
475 0         0 die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
476             };
477              
478 25 50       275 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
479             }
480              
481             ###########################################################################
482              
483             =item parse($message, $parse_now [, $suppl_attrib])
484              
485             Parse will return a Mail::SpamAssassin::Message object with just the
486             headers parsed. When calling this function, there are two optional
487             parameters that can be passed in: $message is either undef (which
488             will use STDIN), a scalar - a string containing an entire message,
489             a reference to such string, an array reference of the message with
490             one line per array element, or either a file glob or an IO::File object
491             which holds the entire contents of the message; and $parse_now, which
492             specifies whether or not to create a MIME tree at parse time or later
493             as necessary.
494              
495             The I<$parse_now> option, by default, is set to false (0). This
496             allows SpamAssassin to not have to generate the tree of internal
497             data nodes if the information is not going to be used. This is
498             handy, for instance, when running C<spamassassin -d>, which only
499             needs the pristine header and body which is always parsed and stored
500             by this function.
501              
502             The optional last argument I<$suppl_attrib> provides a way for a caller
503             to pass additional information about a message to SpamAssassin. It is
504             either undef, or a ref to a hash where each key/value pair provides some
505             supplementary attribute of the message, typically information that cannot
506             be deduced from the message itself, or is hard to do so reliably, or would
507             represent unnecessary work for SpamAssassin to obtain it. The argument will
508             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
509             made available to the rest of the code as well as to plugins. The exact list
510             of attributes will evolve through time, any unknown attribute should be
511             ignored. Possible examples are: SMTP envelope information, a flag indicating
512             that a message as supplied by a caller was truncated due to size limit, an
513             already verified list of DKIM signature objects, or perhaps a list of rule
514             hits predetermined by a caller, which makes another possible way for a
515             caller to provide meta information (instead of having to insert made-up
516             header fields in order to pass information), or maybe just plain rule hits.
517              
518             For more information, please see the C<Mail::SpamAssassin::Message>
519             and C<Mail::SpamAssassin::Message::Node> POD.
520              
521             =cut
522              
523             sub parse {
524 31     31 1 16104 my($self, $message, $parsenow, $suppl_attrib) = @_;
525              
526 31         139 my $start_time = time;
527 31         178 $self->init(1);
528 31         125 my $timer = $self->time_method("parse");
529              
530 31         68 my $master_deadline;
531             # passed in at a function call
532 31 50 66     117 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
533 1         3 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
534             }
535             # found in a config file - overrides passed-in number if lower
536 31 50       122 if ($self->{conf}->{time_limit}) { # defined and nonzero
537 31         118 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
538 31 50 33     132 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
539 31         61 $master_deadline = $time_limit_deadline;
540             }
541             }
542 31 50       88 if (defined $master_deadline) {
543 31         144 dbg("config: time limit %.1f s", $master_deadline - $start_time);
544             }
545              
546             my $msg = Mail::SpamAssassin::Message->new({
547             message=>$message, parsenow=>$parsenow,
548             normalize=>$self->{conf}->{normalize_charset},
549 31         864 master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
550              
551             # bug 5069: The goal here is to get rendering plugins to do things
552             # like OCR, convert doc and pdf to text, etc, though it could be anything
553             # that wants to process the message after it's been parsed.
554 31         209 $self->call_plugins("post_message_parse", { message => $msg });
555              
556 31         135 return $msg;
557             }
558              
559              
560             ###########################################################################
561              
562             =item $status = $f->check ($mail)
563              
564             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
565             to determine if it is spam or not.
566              
567             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
568             used to test or manipulate the mail message.
569              
570             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
571             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
572             object is a "factory". However, if you do this, be sure to call the
573             C<finish()> method on the status objects when you're done with them.
574              
575             =cut
576              
577             sub check {
578 12     12 1 796 my ($self, $mail_obj) = @_;
579              
580 12         46 $self->init(1);
581 12         96 my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
582 12         59 $pms->check();
583 12 50       47 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
584 12         87 $pms;
585             }
586              
587             =item $status = $f->check_message_text ($mailtext)
588              
589             Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
590             is spam or not.
591              
592             Otherwise identical to C<check()> above.
593              
594             =cut
595              
596             sub check_message_text {
597 0     0 1 0 my ($self, $mailtext) = @_;
598 0         0 my $msg = $self->parse($mailtext, 1);
599 0         0 my $result = $self->check($msg);
600              
601             # Kill off the metadata ...
602             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
603             # the message. So killing it here will cause things like
604             # rewrite_message() to fail. <grrr>
605             #
606 0         0 $msg->finish_metadata();
607              
608 0         0 return $result;
609             }
610              
611             ###########################################################################
612              
613             =item $status = $f->learn ($mail, $id, $isspam, $forget)
614              
615             Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
616              
617             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
618             be learnt as non-spam.
619              
620             If C<$forget> is set, the attributes of the mail will be removed from
621             both the non-spam and spam learning databases.
622              
623             C<$id> is an optional message-identification string, used internally
624             to tag the message. If it is C<undef>, the Message-Id of the message
625             will be used. It should be unique to that message.
626              
627             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
628             manipulate the learning process for each mail.
629              
630             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
631             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
632             object is a "factory". However, if you do this, be sure to call the
633             C<finish()> method on the learner objects when you're done with them.
634              
635             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
636             must be called before using this method.
637              
638             =cut
639              
640             sub learn {
641 2     2 1 7 my ($self, $mail_obj, $id, $isspam, $forget) = @_;
642 2         3 local ($_);
643              
644 2         595 require Mail::SpamAssassin::PerMsgLearner;
645 2         9 $self->init(1);
646 2         10 my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
647              
648 2 50       7 if ($forget) {
    50          
649 0         0 dbg("learn: forgetting message");
650 0         0 $msg->forget($id);
651             } elsif ($isspam) {
652 0         0 dbg("learn: learning spam");
653 0         0 $msg->learn_spam($id);
654             } else {
655 2         7 dbg("learn: learning ham");
656 2         7 $msg->learn_ham($id);
657             }
658              
659 2         7 $msg;
660             }
661              
662             ###########################################################################
663              
664             =item $f->init_learner ( [ { opt => val, ... } ] )
665              
666             Initialise learning. You may pass the following attribute-value pairs to this
667             method.
668              
669             =over 4
670              
671             =item caller_will_untie
672              
673             Whether or not the code calling this method will take care of untie'ing
674             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
675              
676             =item force_expire
677              
678             Should an expiration run be forced to occur immediately? (optional, default 0).
679              
680             =item learn_to_journal
681              
682             Should learning data be written to the journal, instead of directly to the
683             databases? (optional, default 0).
684              
685             =item wait_for_lock
686              
687             Whether or not to wait a long time for locks to complete (optional, default 0).
688              
689             =item opportunistic_expire_check_only
690              
691             During the opportunistic journal sync and expire check, don't actually do the
692             expire but report back whether or not it should occur (optional, default 0).
693              
694             =item no_relearn
695              
696             If doing a learn operation, and the message has already been learned as
697             the opposite type, don't re-learn the message.
698              
699             =back
700              
701             =cut
702              
703             sub init_learner {
704 4     4 1 7 my $self = shift;
705 4         5 my $opts = shift;
706 4         14 dbg("learn: initializing learner");
707              
708             # Make sure we're already initialized ...
709 4         17 $self->init(1);
710              
711 4         27 my %kv = (
712             'force_expire' => 'learn_force_expire',
713             'learn_to_journal' => 'learn_to_journal',
714             'caller_will_untie' => 'learn_caller_will_untie',
715             'wait_for_lock' => 'learn_wait_for_lock',
716             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
717             'no_relearn' => 'learn_no_relearn',
718             );
719              
720 4         5 my %ret;
721              
722             # Set any other options that need setting ...
723 4         14 while( my($k,$v) = each %kv ) {
724 24         33 $ret{$k} = $self->{$v};
725 24 100       43 if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
  14         33  
726             }
727              
728 4         13 return \%ret;
729             }
730              
731             ###########################################################################
732              
733             =item $f->rebuild_learner_caches ({ opt => val })
734              
735             Rebuild any cache databases; should be called after the learning process.
736             Options include: C<verbose>, which will output diagnostics to C<stdout>
737             if set to 1.
738              
739             =cut
740              
741             sub rebuild_learner_caches {
742 0     0 1 0 my $self = shift;
743 0         0 my $opts = shift;
744 0 0       0 $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
745 0         0 1;
746             }
747              
748             =item $f->finish_learner ()
749              
750             Finish learning.
751              
752             =cut
753              
754             sub finish_learner {
755 3     3 1 6 my $self = shift;
756 3 100       16 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
757 3         8 1;
758             }
759              
760             =item $f->dump_bayes_db()
761              
762             Dump the contents of the Bayes DB
763              
764             =cut
765              
766             sub dump_bayes_db {
767 0     0 1 0 my($self,@opts) = @_;
768 0 0       0 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
769             }
770              
771             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
772              
773             Signals that the current user has changed (possibly using C<setuid>), meaning
774             that SpamAssassin should close any per-user databases it has open, and re-open
775             using ones appropriate for the new user.
776              
777             Note that this should be called I<after> reading any per-user configuration, as
778             that data may override some paths opened in this method. You may pass the
779             following attribute-value pairs:
780              
781             =over 4
782              
783             =item username
784              
785             The username of the user. This will be used for the C<username> attribute.
786              
787             =item user_dir
788              
789             A directory to use as a 'home directory' for the current user's data,
790             overriding the system default. This directory must be readable and writable by
791             the process. Note that the resulting C<userstate_dir> will be the
792             C<.spamassassin> subdirectory of this dir.
793              
794             =item userstate_dir
795              
796             A directory to use as a directory for the current user's data, overriding the
797             system default. This directory must be readable and writable by the process.
798             The default is C<user_dir/.spamassassin>.
799              
800             =back
801              
802             =cut
803              
804             sub signal_user_changed {
805 0     0 1 0 my $self = shift;
806 0         0 my $opts = shift;
807 0         0 my $set = 0;
808              
809 0         0 my $timer = $self->time_method("signal_user_changed");
810 0         0 dbg("info: user has changed");
811              
812 0 0 0     0 if (defined $opts && $opts->{username}) {
813 0         0 $self->{username} = $opts->{username};
814             } else {
815 0         0 undef $self->{username};
816             }
817 0 0 0     0 if (defined $opts && $opts->{user_dir}) {
818 0         0 $self->{user_dir} = $opts->{user_dir};
819             } else {
820 0         0 undef $self->{user_dir};
821             }
822 0 0 0     0 if (defined $opts && $opts->{userstate_dir}) {
823 0         0 $self->{userstate_dir} = $opts->{userstate_dir};
824             } else {
825 0         0 undef $self->{userstate_dir};
826             }
827              
828             # reopen bayes dbs for this user
829 0 0       0 $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
830 0 0       0 if ($self->{conf}->{use_bayes}) {
831 0         0 require Mail::SpamAssassin::Bayes;
832 0         0 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
833             } else {
834 0 0       0 delete $self->{bayes_scanner} if $self->{bayes_scanner};
835             }
836              
837             # this user may have a different learn_to_journal setting, so reset appropriately
838 0         0 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
839              
840 0 0       0 $set |= 1 unless $self->{local_tests_only};
841 0 0 0     0 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      0        
842              
843 0         0 $self->{conf}->set_score_set ($set);
844              
845             $self->call_plugins("signal_user_changed", {
846             username => $self->{username},
847             userstate_dir => $self->{userstate_dir},
848             user_dir => $self->{user_dir},
849 0         0 });
850              
851 0         0 1;
852             }
853              
854             ###########################################################################
855              
856             =item $f->report_as_spam ($mail, $options)
857              
858             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
859             human-verified spam. This will submit the mail message to live,
860             collaborative, spam-blocker databases, allowing other users to block this
861             message.
862              
863             It will also submit the mail to SpamAssassin's Bayesian learner.
864              
865             Options is an optional reference to a hash of options. Currently these
866             can be:
867              
868             =over 4
869              
870             =item dont_report_to_dcc
871              
872             Inhibits reporting of the spam to DCC.
873              
874             =item dont_report_to_pyzor
875              
876             Inhibits reporting of the spam to Pyzor.
877              
878             =item dont_report_to_razor
879              
880             Inhibits reporting of the spam to Razor.
881              
882             =item dont_report_to_spamcop
883              
884             Inhibits reporting of the spam to SpamCop.
885              
886             =back
887              
888             =cut
889              
890             sub report_as_spam {
891 0     0 1 0 my ($self, $mail, $options) = @_;
892 0         0 local ($_);
893              
894 0         0 $self->init(1);
895 0         0 my $timer = $self->time_method("report_as_spam");
896              
897             # learn as spam if enabled
898 0 0       0 if ( $self->{conf}->{bayes_learn_during_report} ) {
899 0         0 $self->learn ($mail, undef, 1, 0);
900             }
901              
902 0         0 require Mail::SpamAssassin::Reporter;
903 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
904 0         0 $mail->report();
905             }
906              
907             ###########################################################################
908              
909             =item $f->revoke_as_spam ($mail, $options)
910              
911             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
912             human-verified ham (non-spam). This will revoke the mail message from live,
913             collaborative, spam-blocker databases, allowing other users to block this
914             message.
915              
916             It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
917              
918             Options is an optional reference to a hash of options. Currently these
919             can be:
920              
921             =over 4
922              
923             =item dont_report_to_razor
924              
925             Inhibits revoking of the spam to Razor.
926              
927              
928             =back
929              
930             =cut
931              
932             sub revoke_as_spam {
933 0     0 1 0 my ($self, $mail, $options) = @_;
934 0         0 local ($_);
935              
936 0         0 $self->init(1);
937 0         0 my $timer = $self->time_method("revoke_as_spam");
938              
939             # learn as nonspam
940 0         0 $self->learn ($mail, undef, 0, 0);
941              
942 0         0 require Mail::SpamAssassin::Reporter;
943 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
944 0         0 $mail->revoke ();
945             }
946              
947             ###########################################################################
948              
949             =item $f->add_address_to_whitelist ($addr, $cli_p)
950              
951             Given a string containing an email address, add it to the automatic
952             whitelist database.
953              
954             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
955              
956             =cut
957              
958             sub add_address_to_whitelist {
959 0     0 1 0 my ($self, $addr, $cli_p) = @_;
960              
961 0         0 $self->call_plugins("whitelist_address", { address => $addr,
962             cli_p => $cli_p });
963             }
964              
965             ###########################################################################
966              
967             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
968              
969             Given a mail message, find as many addresses in the usual headers (To, Cc, From
970             etc.), and the message body, and add them to the automatic whitelist database.
971              
972             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
973              
974             =cut
975              
976             sub add_all_addresses_to_whitelist {
977 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
978              
979 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
980 0         0 $self->call_plugins("whitelist_address", { address => $addr,
981             cli_p => $cli_p });
982             }
983             }
984              
985             ###########################################################################
986              
987             =item $f->remove_address_from_whitelist ($addr, $cli_p)
988              
989             Given a string containing an email address, remove it from the automatic
990             whitelist database.
991              
992             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
993              
994             =cut
995              
996             sub remove_address_from_whitelist {
997 0     0 1 0 my ($self, $addr, $cli_p) = @_;
998              
999 0         0 $self->call_plugins("remove_address", { address => $addr,
1000             cli_p => $cli_p });
1001             }
1002              
1003             ###########################################################################
1004              
1005             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
1006              
1007             Given a mail message, find as many addresses in the usual headers (To, Cc, From
1008             etc.), and the message body, and remove them from the automatic whitelist
1009             database.
1010              
1011             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1012              
1013             =cut
1014              
1015             sub remove_all_addresses_from_whitelist {
1016 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1017              
1018 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1019 0         0 $self->call_plugins("remove_address", { address => $addr,
1020             cli_p => $cli_p });
1021             }
1022             }
1023              
1024             ###########################################################################
1025              
1026             =item $f->add_address_to_blacklist ($addr, $cli_p)
1027              
1028             Given a string containing an email address, add it to the automatic
1029             whitelist database with a high score, effectively blacklisting them.
1030              
1031             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1032              
1033             =cut
1034              
1035             sub add_address_to_blacklist {
1036 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1037 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1038             cli_p => $cli_p });
1039             }
1040              
1041             ###########################################################################
1042              
1043             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1044              
1045             Given a mail message, find addresses in the From headers and add them to the
1046             automatic whitelist database with a high score, effectively blacklisting them.
1047              
1048             Note that To and Cc addresses are not used.
1049              
1050             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1051              
1052             =cut
1053              
1054             sub add_all_addresses_to_blacklist {
1055 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1056              
1057 0         0 $self->init(1);
1058              
1059 0         0 my @addrlist;
1060 0         0 my @hdrs = $mail_obj->get_header('From');
1061 0 0       0 if ($#hdrs >= 0) {
1062 0         0 push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1063             }
1064              
1065 0         0 foreach my $addr (@addrlist) {
1066 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1067             cli_p => $cli_p });
1068             }
1069              
1070             }
1071              
1072             ###########################################################################
1073              
1074             =item $text = $f->remove_spamassassin_markup ($mail)
1075              
1076             Returns the text of the message, with any SpamAssassin-added text (such
1077             as the report, or X-Spam-Status headers) stripped.
1078              
1079             Note that the B<$mail> object is not modified.
1080              
1081             Warning: if the input message in B<$mail> contains a mixture of CR-LF
1082             (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1083             to use one or the other consistently throughout.
1084              
1085             =cut
1086              
1087             sub remove_spamassassin_markup {
1088 0     0 1 0 my ($self, $mail_obj) = @_;
1089 0         0 local ($_);
1090              
1091 0         0 my $timer = $self->time_method("remove_spamassassin_markup");
1092 0   0     0 my $mbox = $mail_obj->get_mbox_separator() || '';
1093              
1094 0         0 dbg("markup: removing markup");
1095              
1096             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1097             # if we have definitive proof it's not an encapsulated message.
1098 0   0     0 my $ct = $mail_obj->get_header("Content-Type") || '';
1099 0 0       0 if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1100              
1101             # Ok, this is a possible encapsulated message, search for the
1102             # appropriate mime part and deal with it if necessary.
1103 0         0 my $boundary = "\Q$1\E";
1104 0         0 my @msg = split(/^/,$mail_obj->get_pristine_body());
1105              
1106 0         0 my $flag = 0;
1107 0         0 $ct = '';
1108 0         0 my $cd = '';
1109 0         0 for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1110             # only look at mime part headers
1111 0 0 0     0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1112              
1113 0 0       0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1114              
1115             # Ok, we found the encapsulated piece ...
1116 0 0 0     0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
      0        
1117             ($ct eq "message/rfc822" &&
1118             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1119             {
1120 0         0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1121              
1122             # find the end and chop it off
1123 0         0 for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1124 0 0       0 if ( $msg[$i] =~ /^--$boundary/ ) {
1125 0 0       0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1126             # will remove the blank line (not sure it'll always be
1127             # there) and everything below. don't worry, the splice
1128             # guarantees the for will stop ...
1129             }
1130             }
1131              
1132             # Ok, we're done. Return the rewritten message.
1133 0         0 return join('', $mbox, @msg);
1134             }
1135              
1136 0         0 $flag = 0;
1137 0         0 $ct = '';
1138 0         0 $cd = '';
1139 0         0 next;
1140             }
1141              
1142             # Ok, we're in the mime header ... Capture the appropriate headers...
1143 0         0 $flag = 1;
1144 0 0       0 if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
    0          
1145 0         0 $ct = $1;
1146             }
1147             elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1148 0         0 $cd = $1;
1149             }
1150             }
1151             }
1152              
1153             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1154             # So treat it like a "report_safe 0" message.
1155 0         0 my $hdrs = $mail_obj->get_pristine_header();
1156 0         0 my $body = $mail_obj->get_pristine_body();
1157              
1158             # remove DOS line endings
1159 0         0 $hdrs =~ s/\r//gs;
1160              
1161             # unfold SA added headers, but not X-Spam-Prev headers ...
1162 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1163 0         0 1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1164 0         0 $hdrs =~ s/^\n//;
1165              
1166             ###########################################################################
1167             # Backward Compatibilty, pre 3.0.x.
1168              
1169             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1170 0         0 $self->init(1);
1171 0         0 foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
  0         0  
1172             # let the 3.0 decoding do it...
1173 0 0       0 next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1174              
1175 0         0 dbg("markup: removing markup in $header");
1176 0 0       0 if ($header eq 'Subject') {
1177 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1178 0         0 $tag = quotemeta($tag);
1179 0         0 $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1180 0         0 $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1181 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1182 0         0 1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1183             } else {
1184 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1185             }
1186             }
1187              
1188             # Now deal with report cleansing from 2.4x and previous.
1189             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1190             # followed by another "SPAM: ----.+" line, followed by a blank line.
1191 0         0 1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1192             ###########################################################################
1193              
1194             # 3.0 version -- support for previously-nonexistent Subject hdr.
1195             # ensure the Subject line didn't *really* contain "(nonexistent)" in
1196             # the original message!
1197 0 0 0     0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1198             && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1199             {
1200 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1201 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1202             }
1203              
1204             # 3.0 version -- revert from X-Spam-Prev to original ...
1205 0         0 while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1206 0         0 my($hdr, $name) = ($1,$2);
1207              
1208             # If the rewritten version doesn't exist, we should deal with it anyway...
1209 0 0       0 unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1210 0         0 $hdrs =~ s/\n\n/\n$hdr\n/;
1211             }
1212             }
1213              
1214             # remove any other X-Spam headers we added, will be unfolded
1215 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1216 0         0 1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1217 0         0 $hdrs =~ s/^\n//;
1218              
1219             # re-add DOS line endings
1220 0 0       0 if ($mail_obj->{line_ending} ne "\n") {
1221 0         0 $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1222             }
1223              
1224             # Put the whole thing back together ...
1225 0         0 return join ('', $mbox, $hdrs, $body);
1226             }
1227              
1228             ###########################################################################
1229              
1230             =item $f->read_scoreonly_config ($filename)
1231              
1232             Read a configuration file and parse user preferences from it.
1233              
1234             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1235             In other words, they include scoring options, scores, whitelists and
1236             blacklists, and so on, but do not include rule definitions, privileged
1237             settings, etc. unless C<allow_user_rules> is enabled; and they never include
1238             the administrator settings.
1239              
1240             =cut
1241              
1242             sub read_scoreonly_config {
1243 0     0 1 0 my ($self, $filename) = @_;
1244              
1245 0         0 my $timer = $self->time_method("read_scoreonly_config");
1246 0         0 local *IN;
1247 0 0       0 if (!open(IN,"<$filename")) {
1248             # the file may not exist; this should not be verbose
1249 0         0 dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1250 0         0 return;
1251             }
1252              
1253 0         0 my($inbuf,$nread,$text); $text = '';
  0         0  
1254 0         0 while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
  0         0  
1255 0 0       0 defined $nread or die "error reading $filename: $!";
1256 0 0       0 close IN or die "error closing $filename: $!";
1257 0         0 undef $inbuf;
1258              
1259 0         0 $text = "file start $filename\n" . $text;
1260             # add an extra \n in case file did not end in one.
1261 0         0 $text .= "\nfile end $filename\n";
1262              
1263 0         0 $self->{conf}->{main} = $self;
1264 0         0 $self->{conf}->parse_scores_only ($text);
1265 0         0 $self->{conf}->finish_parsing(1);
1266              
1267 0         0 delete $self->{conf}->{main}; # to allow future GC'ing
1268             }
1269              
1270             ###########################################################################
1271              
1272             =item $f->load_scoreonly_sql ($username)
1273              
1274             Read configuration paramaters from SQL database and parse scores from it. This
1275             will only take effect if the perl C<DBI> module is installed, and the
1276             configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
1277             C<user_scores_sql_password> are set correctly.
1278              
1279             The username in C<$username> will also be used for the C<username> attribute of
1280             the Mail::SpamAssassin object.
1281              
1282             =cut
1283              
1284             sub load_scoreonly_sql {
1285 0     0 1 0 my ($self, $username) = @_;
1286              
1287 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1288 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1289 0         0 $self->{username} = $username;
1290 0 0       0 unless ($src->load($username)) {
1291 0         0 return 0;
1292             }
1293 0         0 return 1;
1294             }
1295              
1296             ###########################################################################
1297              
1298             =item $f->load_scoreonly_ldap ($username)
1299              
1300             Read configuration paramaters from an LDAP server and parse scores from it.
1301             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1302             installed, and the configuration parameters C<user_scores_dsn>,
1303             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1304             correctly.
1305              
1306             The username in C<$username> will also be used for the C<username> attribute of
1307             the Mail::SpamAssassin object.
1308              
1309             =cut
1310              
1311             sub load_scoreonly_ldap {
1312 0     0 1 0 my ($self, $username) = @_;
1313              
1314 0         0 dbg("config: load_scoreonly_ldap($username)");
1315 0         0 my $timer = $self->time_method("load_scoreonly_ldap");
1316 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1317 0         0 $self->{username} = $username;
1318 0         0 $src->load($username);
1319             }
1320              
1321             ###########################################################################
1322              
1323             =item $f->set_persistent_address_list_factory ($factoryobj)
1324              
1325             Set the persistent address list factory, used to create objects for the
1326             automatic whitelist algorithm's persistent-storage back-end. See
1327             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1328             must implement, and the API the objects they produce must implement.
1329              
1330             =cut
1331              
1332             sub set_persistent_address_list_factory {
1333 3     3 1 11 my ($self, $fac) = @_;
1334 3         12 $self->{pers_addr_list_factory} = $fac;
1335             }
1336              
1337             ###########################################################################
1338              
1339             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1340              
1341             Compile all patterns, load all configuration files, and load all
1342             possibly-required Perl modules.
1343              
1344             Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1345             plan to fork() or start a new perl interpreter thread to process a message,
1346             this is suboptimal, as each process/thread will have to perform these actions.
1347              
1348             Call this function in the master thread or process to perform the actions
1349             straightaway, so that the sub-processes will not have to.
1350              
1351             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1352             configuration without reading the per-user configuration file and it will
1353             assume that you will call C<read_scoreonly_config> at a later point.
1354              
1355             If C<$keep_userstate> is true, compile_now() will revert any configuration
1356             options which have a default with I<__userstate__> in it post-init(),
1357             and then re-change the option before returning. This lets you change
1358             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1359             files there as necessary without disturbing the actual files as changed
1360             by a configuration option. By default, this is disabled.
1361              
1362             =cut
1363              
1364             sub compile_now {
1365 1     1 1 1512 my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1366              
1367 1         13 my $timer = $self->time_method("compile_now");
1368              
1369             # Backup default values which deal with userstate.
1370             # This is done so we can create any new files in, presumably, a temp dir.
1371             # see bug 2762 for more details.
1372 1         3 my %backup;
1373 1 50 33     6 if (defined $deal_with_userstate && $deal_with_userstate) {
1374 0         0 while(my($k,$v) = each %{$self->{conf}}) {
  0         0  
1375 0 0 0     0 $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
      0        
1376             }
1377             }
1378              
1379 1         20 $self->init($use_user_prefs);
1380              
1381             # if init() didn't change the value from default, forget about it.
1382             # if the value is different, remember the new version, and reset the default.
1383 1         8 while(my($k,$v) = each %backup) {
1384 0 0       0 if ($self->{conf}->{$k} eq $v) {
1385 0         0 delete $backup{$k};
1386             }
1387             else {
1388 0         0 my $backup = $backup{$k};
1389 0         0 $backup{$k} = $self->{conf}->{$k};
1390 0         0 $self->{conf}->{$k} = $backup;
1391             }
1392             }
1393              
1394 1         5 dbg("ignore: test message to precompile patterns and load modules");
1395              
1396             # tell plugins we are about to send a message for compiling purposes
1397 1         8 $self->call_plugins("compile_now_start",
1398             { use_user_prefs => $use_user_prefs,
1399             keep_userstate => $deal_with_userstate});
1400              
1401             # note: this may incur network access. Good. We want to make sure
1402             # as much as possible is preloaded!
1403 1         42 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1404             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1405             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1406              
1407 1         10 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1408 1         36 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1409             { disable_auto_learning => 1 } );
1410              
1411             # We want to turn off the bayes rules for this test msg
1412 1         5 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1413 1         2 $self->{conf}->{use_bayes_rules} = 0;
1414 1         11 $status->check();
1415 1         5 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1416 1         8 $status->finish();
1417 1         10 $mail->finish();
1418 1         13 $self->finish_learner();
1419              
1420 1         7 $self->{conf}->free_uncompiled_rule_source();
1421              
1422             # load SQL modules now as well
1423 1         4 my $dsn = $self->{conf}->{user_scores_dsn};
1424 1 50       5 if ($dsn ne '') {
1425 0 0       0 if ($dsn =~ /^ldap:/i) {
1426 0         0 Mail::SpamAssassin::Conf::LDAP::load_modules();
1427             } else {
1428 0         0 Mail::SpamAssassin::Conf::SQL::load_modules();
1429             }
1430             }
1431              
1432             # make sure things are ready for scanning
1433 1 50       4 $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1434 1         8 $self->call_plugins("compile_now_finish",
1435             { use_user_prefs => $use_user_prefs,
1436             keep_userstate => $deal_with_userstate});
1437              
1438             # Reset any non-default values to the post-init() version.
1439 1         8 while(my($k,$v) = each %backup) {
1440 0         0 $self->{conf}->{$k} = $v;
1441             }
1442              
1443             # clear sed_path_cache
1444 1         10 delete $self->{conf}->{sed_path_cache};
1445              
1446 1         8 1;
1447             }
1448              
1449             ###########################################################################
1450              
1451             =item $f->debug_diagnostics ()
1452              
1453             Output some diagnostic information, useful for debugging SpamAssassin
1454             problems.
1455              
1456             =cut
1457              
1458             sub debug_diagnostics {
1459 0     0 1 0 my ($self) = @_;
1460              
1461             # load this class lazily, to avoid overhead when this method isn't
1462             # called.
1463 0         0 eval {
1464 0         0 require Mail::SpamAssassin::Util::DependencyInfo;
1465 0         0 dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1466             };
1467             }
1468              
1469             ###########################################################################
1470              
1471             =item $failed = $f->lint_rules ()
1472              
1473             Syntax-check the current set of rules. Returns the number of
1474             syntax errors discovered, or 0 if the configuration is valid.
1475              
1476             =cut
1477              
1478             sub lint_rules {
1479 0     0 1 0 my ($self) = @_;
1480              
1481 0         0 dbg("ignore: using a test message to lint rules");
1482 0         0 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1483             "Subject: \n",
1484             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1485             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1486              
1487 0         0 $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1488 0         0 $self->{syntax_errors} = 0;
1489              
1490 0         0 my $olddcp = $self->{dont_copy_prefs};
1491 0         0 $self->{dont_copy_prefs} = 1;
1492              
1493 0         0 $self->init(1);
1494 0         0 $self->{syntax_errors} += $self->{conf}->{errors};
1495              
1496 0         0 $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1497              
1498             # bug 5048: override settings to ensure a faster lint
1499 0         0 $self->{'conf'}->{'use_auto_whitelist'} = 0;
1500 0         0 $self->{'conf'}->{'bayes_auto_learn'} = 0;
1501              
1502 0         0 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1503 0         0 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1504             { disable_auto_learning => 1 } );
1505 0         0 $status->check();
1506              
1507 0         0 $self->{syntax_errors} += $status->{rule_errors};
1508 0         0 $status->finish();
1509 0         0 $mail->finish();
1510 0 0       0 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1511 0         0 return ($self->{syntax_errors});
1512             }
1513              
1514             ###########################################################################
1515              
1516             =item $f->finish()
1517              
1518             Destroy this object, so that it will be garbage-collected once it
1519             goes out of scope. The object will no longer be usable after this
1520             method is called.
1521              
1522             =cut
1523              
1524             sub finish {
1525 8     8 1 12958 my ($self) = @_;
1526              
1527 8         39 $self->timer_start("finish");
1528             $self->call_plugins("finish_tests", { conf => $self->{conf},
1529 8         43 main => $self });
1530              
1531 8         57 $self->{conf}->finish(); delete $self->{conf};
  8         29  
1532 8         51 $self->{plugins}->finish(); delete $self->{plugins};
  8         45  
1533              
1534 8 100       33 if ($self->{bayes_scanner}) {
1535 7         34 $self->{bayes_scanner}->finish();
1536 7         23 delete $self->{bayes_scanner};
1537             }
1538              
1539 8 50       62 $self->{resolver}->finish() if $self->{resolver};
1540              
1541 8         40 $self->timer_end("finish");
1542 8         13 %{$self} = ();
  8         173  
1543             }
1544              
1545             ###########################################################################
1546             # timers: bug 5356
1547              
1548             sub timer_enable {
1549 0     0 0 0 my ($self) = @_;
1550 0 0       0 dbg("config: timing enabled") if !$self->{timer_enabled};
1551 0         0 $self->{timer_enabled} = 1;
1552             }
1553              
1554             sub timer_disable {
1555 0     0 0 0 my ($self) = @_;
1556 0 0       0 dbg("config: timing disabled") if $self->{timer_enabled};
1557 0         0 $self->{timer_enabled} = 0;
1558             }
1559              
1560             # discard all timers, start afresh
1561             sub timer_reset {
1562 3     3 0 11187 my ($self) = @_;
1563 3         10 delete $self->{timers};
1564 3         11 delete $self->{timers_order};
1565             }
1566              
1567             sub timer_start {
1568 8     8 0 45 my ($self, $name) = @_;
1569              
1570 8 50       41 return unless $self->{timer_enabled};
1571             # dbg("timing: '$name' starting");
1572              
1573 0 0       0 if (!exists $self->{timers}->{$name}) {
1574 0         0 push @{$self->{timers_order}}, $name;
  0         0  
1575             }
1576            
1577 0         0 $self->{timers}->{$name}->{start} = Time::HiRes::time();
1578             # note that this will reset any existing, unstopped timer of that name;
1579             # that's ok
1580             }
1581              
1582             sub timer_end {
1583 8     8 0 48 my ($self, $name) = @_;
1584 8 50       42 return unless $self->{timer_enabled};
1585              
1586 0         0 my $t = $self->{timers}->{$name};
1587 0         0 $t->{end} = time;
1588              
1589 0 0       0 if (!$t->{start}) {
1590 0         0 warn "timer_end('$name') with no timer_start";
1591 0         0 return;
1592             }
1593              
1594             # add to any existing elapsed time for this event, since
1595             # we may call the same timer name multiple times -- this is ok,
1596             # as long as they are not nested
1597 0         0 my $dt = $t->{end} - $t->{start};
1598 0 0       0 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1599 0 0       0 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
  0         0  
1600 0         0 else { $t->{elapsed} = $dt }
1601             }
1602              
1603             sub time_method {
1604 354     354 0 935 my ($self, $name) = @_;
1605 354 50       1162 return unless $self->{timer_enabled};
1606 0         0 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1607             }
1608              
1609             sub timer_report {
1610 0     0 0 0 my ($self) = @_;
1611              
1612 0         0 my $earliest;
1613             my $latest;
1614              
1615 0         0 while (my($name,$h) = each(%{$self->{timers}})) {
  0         0  
1616             # dbg("timing: %s - %s", $name, join(", ",
1617             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1618 0         0 my $start = $h->{start};
1619 0 0 0     0 if (defined $start && (!defined $earliest || $earliest > $start)) {
      0        
1620 0         0 $earliest = $start;
1621             }
1622 0         0 my $end = $h->{end};
1623 0 0 0     0 if (defined $end && (!defined $latest || $latest < $end)) {
      0        
1624 0         0 $latest = $end;
1625             }
1626 0 0 0     0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1627             }
1628 0 0 0     0 my $total =
1629             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1630 0         0 my @str;
1631 0         0 foreach my $name (@{$self->{timers_order}}) {
  0         0  
1632 0   0     0 my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1633 0 0 0     0 my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1634 0 0       0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
    0          
1635 0         0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1636             }
1637              
1638 0         0 return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1639             }
1640              
1641             ###########################################################################
1642             # non-public methods.
1643              
1644             sub init {
1645 73     73 0 698 my ($self, $use_user_pref) = @_;
1646              
1647             # Allow init() to be called multiple times, but only run once.
1648 73 100       259 if (defined $self->{_initted}) {
1649             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1650 49 50       226 if ($self->{_initted} != $$) {
1651 0         0 $self->{_initted} = $$;
1652 0 0       0 srand if !$self->{skip_prng_reseeding};
1653 0         0 $self->{resolver}->reinit_post_fork();
1654             }
1655 49         108 return;
1656             }
1657              
1658 24         190 my $timer = $self->time_method("init");
1659             # Note that this PID has run init()
1660 24         187 $self->{_initted} = $$;
1661              
1662             #fix spamd reading root prefs file
1663 24 100       102 if (!defined $use_user_pref) {
1664 7         11 $use_user_pref = 1;
1665             }
1666              
1667 24 50       82 if (!defined $self->{config_text}) {
1668 24         233 $self->{config_text} = '';
1669              
1670             # read a file called "init.pre" in site rules dir *before* all others;
1671             # even the system config.
1672 24         82 my $siterules = $self->{site_rules_filename};
1673 24   33     84 $siterules ||= $self->first_existing_path (@site_rules_path);
1674              
1675 24         57 my $sysrules = $self->{rules_filename};
1676 24   33     70 $sysrules ||= $self->first_existing_path (@default_rules_path);
1677              
1678 24 50       78 if ($siterules) {
1679 24         200 $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1680             }
1681             else {
1682 0         0 warn "config: could not find site rules directory\n";
1683             }
1684              
1685 24 50       106 if ($sysrules) {
1686 24         121 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1687             }
1688             else {
1689 0         0 warn "config: could not find sys rules directory\n";
1690             }
1691              
1692 24 50       126 if ($sysrules) {
1693 24         135 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1694 24 50 33     163 if ($self->{require_rules} && $cftext !~ /\S/) {
1695 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1696             }
1697 24         2150 $self->{config_text} .= $cftext;
1698             }
1699              
1700 24 50       133 if (!$self->{languages_filename}) {
1701 24         185 $self->{languages_filename} = $self->find_rule_support_file("languages");
1702             }
1703              
1704 24 50 33     485 if ($siterules && !$self->{ignore_site_cf_files}) {
1705 24         138 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1706             }
1707              
1708 24 100       139 if ( $use_user_pref != 0 ) {
1709 7         35 $self->get_and_create_userstate_dir();
1710              
1711             # user prefs file
1712 7         19 my $fname = $self->{userprefs_filename};
1713 7   33     20 $fname ||= $self->first_existing_path (@default_userprefs_path);
1714              
1715 7 100       22 if (!$self->{dont_copy_prefs}) {
1716             # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1717             # just use the last entry in the array as the default path.
1718 6   33     15 $fname ||= $self->sed_path($default_userprefs_path[-1]);
1719              
1720 6 50       90 my $stat_errn = stat($fname) ? 0 : 0+$!;
1721 6 50 33     84 if ($stat_errn == 0 && -f _) {
    0          
    0          
    0          
1722             # exists and is a regular file, nothing to do
1723             } elsif ($stat_errn == 0) {
1724 0         0 warn "config: default user preference file $fname is not a regular file\n";
1725             } elsif ($stat_errn != ENOENT) {
1726 0         0 warn "config: default user preference file $fname not accessible: $!\n";
1727             } elsif (!$self->create_default_prefs($fname)) {
1728 0         0 warn "config: failed to create default user preference file $fname\n";
1729             }
1730             }
1731              
1732 7         40 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1733             }
1734             }
1735              
1736 24 50       105 if ($self->{pre_config_text}) {
1737 0         0 $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1738             }
1739 24 100       86 if ($self->{post_config_text}) {
1740 2         37 $self->{config_text} .= $self->{post_config_text};
1741             }
1742              
1743 24 50       256 if ($self->{config_text} !~ /\S/) {
1744 0         0 my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1745 0 0       0 if ($self->{require_rules}) {
1746 0         0 die $m;
1747             } else {
1748 0         0 warn $m;
1749             }
1750             }
1751              
1752             # Go and parse the config!
1753 24         115 $self->{conf}->{main} = $self;
1754 24 50       146 if (would_log('dbg', 'config_text') > 1) {
1755 0         0 dbg('config_text: '.$self->{config_text});
1756             }
1757 24         165 $self->{conf}->parse_rules ($self->{config_text});
1758 24         321 $self->{conf}->finish_parsing(0);
1759 24         62 delete $self->{conf}->{main}; # to allow future GC'ing
1760              
1761 24         76 undef $self->{config_text}; # ensure it's actually freed
1762 24         63 delete $self->{config_text};
1763              
1764 24 50 33     120 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1765 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1766             }
1767              
1768             # Initialize the Bayes subsystem
1769 24 100       106 if ($self->{conf}->{use_bayes}) {
1770 23         7273 require Mail::SpamAssassin::Bayes;
1771 23         351 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1772             }
1773 24         122 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1774              
1775             # Figure out/set our initial scoreset
1776 24         56 my $set = 0;
1777 24 100       96 $set |= 1 unless $self->{local_tests_only};
1778 24 50 100     256 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      66        
1779 24         187 $self->{conf}->set_score_set ($set);
1780              
1781 24 50       89 if ($self->{only_these_rules}) {
1782 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1783             }
1784              
1785 24 50       108 if (!$self->{timer_enabled}) {
1786             # enable timing implicitly if _TIMING_ is used in add_header templates
1787 24         63 foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
  24         95  
1788 24         122 @{$self->{conf}->{'headers_spam'}}) {
1789 163 50       474 if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
  0         0  
  0         0  
1790             }
1791             }
1792              
1793             # should be called only after configuration has been parsed
1794 24         533 $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1795              
1796             # TODO -- open DNS cache etc. if necessary
1797             }
1798              
1799             sub read_cf {
1800 55     55 0 213 my ($self, $allpaths, $desc) = @_;
1801 55         181 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1802             }
1803              
1804             sub read_pre {
1805 48     48 0 210 my ($self, $allpaths, $desc) = @_;
1806 48         371 return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1807             }
1808              
1809             sub _read_cf_pre {
1810 103     103   248 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1811              
1812 103 50       216 return '' unless defined ($allpaths);
1813              
1814 103         209 my $txt = '';
1815 103         434 foreach my $path (split("\000", $allpaths))
1816             {
1817 103         521 dbg("config: using \"$path\" for $desc");
1818              
1819 103 100       1547 my $stat_errn = stat($path) ? 0 : 0+$!;
1820 103 100 66     506 if ($stat_errn == ENOENT) {
    50 33        
    100          
    50          
1821             # no file or directory
1822             } elsif ($stat_errn != 0) {
1823 0         0 dbg("config: file or directory $path not accessible: $!");
1824             } elsif (-d _) {
1825 94         365 foreach my $file ($self->$filelistmethod($path)) {
1826 545         1081 $txt .= read_cf_file($file);
1827             }
1828             } elsif (-f _ && -s _ && -r _) {
1829 0         0 $txt .= read_cf_file($path);
1830             }
1831             }
1832              
1833 103         2167 return $txt;
1834             }
1835              
1836              
1837             sub read_cf_file {
1838 545     545 0 869 my($path) = @_;
1839 545         679 my $txt = '';
1840              
1841 545         1119 local *IN;
1842 545 50       14311 if (open (IN, "<".$path)) {
1843              
1844 545         1213 my($inbuf,$nread); $txt = '';
  545         803  
1845 545         8041 while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
  568         3292  
1846 545 50       975 defined $nread or die "error reading $path: $!";
1847 545 50       3654 close IN or die "error closing $path: $!";
1848 545         1037 undef $inbuf;
1849              
1850 545         2390 $txt = "file start $path\n" . $txt;
1851             # add an extra \n in case file did not end in one.
1852 545         1084 $txt .= "\nfile end $path\n";
1853              
1854 545         1686 dbg("config: read file $path");
1855             }
1856             else {
1857 0         0 warn "config: cannot open \"$path\": $!\n";
1858             }
1859              
1860 545         4276 return $txt;
1861             }
1862              
1863             sub get_and_create_userstate_dir {
1864 7     7 0 26 my ($self, $dir) = @_;
1865              
1866 7         15 my $fname;
1867              
1868             # If vpopmail is enabled then set fname to virtual homedir
1869             # precedence: dir, userstate_dir, derive from user_dir, system default
1870 7 50       36 if (defined $dir) {
    100          
    50          
1871 0         0 $fname = File::Spec->catdir ($dir, ".spamassassin");
1872             }
1873             elsif (defined $self->{userstate_dir}) {
1874 6         15 $fname = $self->{userstate_dir};
1875             }
1876             elsif (defined $self->{user_dir}) {
1877 0         0 $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1878             }
1879              
1880 7   66     36 $fname ||= $self->first_existing_path (@default_userstate_dir);
1881              
1882             # bug 4932: use the last default_userstate_dir entry if none of the others
1883             # already exist
1884 7   33     22 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1885              
1886 7 100       26 if (!$self->{dont_copy_prefs}) {
1887 6         45 dbg("config: using \"$fname\" for user state dir");
1888             }
1889              
1890             # if this is not a dir, not readable, or we are unable to create the dir,
1891             # this is not (yet) a serious error; in fact, it's not even worth
1892             # a warning at all times, so use dbg(). see bug 6268
1893 7 50       108 my $stat_errn = stat($fname) ? 0 : 0+$!;
1894 7 50 33     107 if ($stat_errn == 0 && !-d _) {
    50 33        
1895 0         0 dbg("config: $fname exists but is not a directory");
1896             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1897 0         0 dbg("config: error accessing $fname: $!");
1898             } else { # does not exist, create it
1899             eval {
1900 7         388 mkpath($fname, 0, 0700); 1;
  7         56  
1901 7 50       19 } or do {
1902 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
1903 0         0 dbg("config: mkdir $fname failed: $eval_stat");
1904             };
1905             }
1906              
1907 7         18 $fname;
1908             }
1909              
1910             =item $fullpath = $f->find_rule_support_file ($filename)
1911              
1912             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1913             in the system-wide rules directory, and return its full path if
1914             it exists, or undef if it doesn't exist.
1915              
1916             (This API was added in SpamAssassin 3.1.1.)
1917              
1918             =cut
1919              
1920             sub find_rule_support_file {
1921 24     24 1 130 my ($self, $filename) = @_;
1922              
1923             return $self->first_existing_path(
1924 24         116 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
  120         247  
  120         652  
  120         511  
1925             }
1926              
1927             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1928              
1929             Copy default preferences file into home directory for later use and
1930             modification, if it does not already exist and C<dont_copy_prefs> is
1931             not set.
1932              
1933             =cut
1934              
1935             sub create_default_prefs {
1936             # $userdir will only exist if vpopmail config is enabled thru spamd
1937             # Its value will be the virtual user's maildir
1938             #
1939 0     0 1 0 my ($self, $fname, $user, $userdir) = @_;
1940              
1941 0 0       0 if ($self->{dont_copy_prefs}) {
1942 0         0 return(0);
1943             }
1944              
1945             # if ($userdir && $userdir ne $self->{user_dir}) {
1946             # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1947             # }
1948              
1949 0 0       0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1950 0 0       0 if ($stat_errn == 0) {
    0          
1951             # fine, it already exists
1952             } elsif ($stat_errn != ENOENT) {
1953 0         0 dbg("config: cannot access user preferences file $fname: $!");
1954             } else {
1955             # Pass on the value of $userdir for virtual users in vpopmail
1956             # otherwise it is empty and the user's normal homedir is used
1957 0         0 $self->get_and_create_userstate_dir($userdir);
1958              
1959             # copy in the default one for later editing
1960 0         0 my $defprefs =
1961             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1962              
1963 0         0 local(*IN,*OUT);
1964 0         0 $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1965 0 0       0 if (!defined $defprefs) {
    0          
    0          
1966 0         0 warn "config: can not determine default prefs path\n";
1967             } elsif (!open(IN, "<$defprefs")) {
1968 0         0 warn "config: cannot open $defprefs: $!\n";
1969             } elsif (!open(OUT, ">$fname")) {
1970 0         0 warn "config: cannot create user preferences file $fname: $!\n";
1971             } else {
1972             # former code skipped lines beginning with '#* ', the following copy
1973             # procedure no longer does so, as it avoids reading line-by-line
1974 0         0 my($inbuf,$nread);
1975 0         0 while ( $nread=read(IN,$inbuf,16384) ) {
1976 0 0       0 print OUT $inbuf or die "cannot write to $fname: $!";
1977             }
1978 0 0       0 defined $nread or die "error reading $defprefs: $!";
1979 0         0 undef $inbuf;
1980 0 0       0 close OUT or die "error closing $fname: $!";
1981 0 0       0 close IN or die "error closing $defprefs: $!";
1982              
1983 0 0 0     0 if (($< == 0) && ($> == 0) && defined($user)) { # chown it
      0        
1984 0         0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1985 0 0       0 unless (chown($uid, $gid, $fname)) {
1986 0         0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1987             }
1988             }
1989 0         0 warn "config: created user preferences file: $fname\n";
1990 0         0 return(1);
1991             }
1992             }
1993              
1994 0         0 return(0);
1995             }
1996              
1997             ###########################################################################
1998              
1999             sub expand_name {
2000 1     1 0 5 my ($self, $name) = @_;
2001 1   50     17 my $home = $self->{user_dir} || $ENV{HOME} || '';
2002              
2003 1 50       8 if (am_running_on_windows()) {
2004 0   0     0 my $userprofile = $ENV{USERPROFILE} || '';
2005              
2006 0 0 0     0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
2007 0 0       0 return $userprofile if ($userprofile =~ m/^\\\\/o);
2008              
2009 0 0 0     0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
2010 0 0       0 return $home if ($home =~ m/^\\\\/o);
2011              
2012 0         0 return '';
2013             } else {
2014 1 50 33     23 return $home if ($home && $home =~ /\//o);
2015 0 0       0 return (getpwnam($name))[7] if ($name ne '');
2016 0         0 return (getpwuid($>))[7];
2017             }
2018             }
2019              
2020             sub sed_path {
2021 115     115 0 293 my ($self, $path) = @_;
2022 115 50       284 return if !defined $path;
2023              
2024 115 100       443 if (exists($self->{conf}->{sed_path_cache}->{$path})) {
2025 48         421 return $self->{conf}->{sed_path_cache}->{$path};
2026             }
2027              
2028 67         119 my $orig_path = $path;
2029              
2030 67 0       248 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
  0         0  
2031 67 50       229 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
  25         162  
2032 67 50       207 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
  25         151  
2033 67 0 33     143 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
  1         7  
2034 67 0       125 $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
  0         0  
2035 67         137 $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
  0         0  
2036 67         241 $path =~ s/__version__/${VERSION}/gs;
2037 67         134 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
  1         5  
2038              
2039 67         245 $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
2040 67         272 $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
2041 67         205 return $path;
2042             }
2043              
2044             sub get_perl_major_version {
2045 0     0 0 0 my $self = shift;
2046 0 0       0 $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2047 0         0 return $1;
2048             }
2049              
2050             sub first_existing_path {
2051 26     26 0 74 my $self = shift;
2052 26         68 my $path;
2053 26         165 foreach my $p (@_) {
2054 54         251 $path = $self->sed_path ($p);
2055 54 50       158 if (defined $path) {
2056 54 100       1174 my($errn) = stat($path) ? 0 : 0+$!;
2057 54 100       237 if ($errn == ENOENT) { } # does not exist
    50          
2058 0         0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2059 25         140 else { return $path }
2060             }
2061             }
2062 1         5 return;
2063             }
2064              
2065             ###########################################################################
2066              
2067             sub get_cf_files_in_dir {
2068 47     47 0 128 my ($self, $dir) = @_;
2069 47         130 return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2070             }
2071              
2072             sub get_pre_files_in_dir {
2073 47     47 0 130 my ($self, $dir) = @_;
2074 47         157 return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2075             }
2076              
2077             sub _get_cf_pre_files_in_dir {
2078 94     94   298 my ($self, $dir, $type) = @_;
2079              
2080 94 50       232 if ($self->{config_tree_recurse}) {
2081 0         0 my @cfs;
2082              
2083             # use "eval" to avoid loading File::Find unless this is specified
2084             eval ' use File::Find qw();
2085             File::Find::find(
2086             { untaint => 1,
2087             follow => 1,
2088             wanted =>
2089             sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
2090             }, $dir); 1;
2091 0 0       0 ' or do {
2092 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
2093 0         0 die "_get_cf_pre_files_in_dir error: $eval_stat";
2094             };
2095 0         0 @cfs = sort { $a cmp $b } @cfs;
  0         0  
2096 0         0 return @cfs;
2097             }
2098             else {
2099 94 50       2062 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2100 94 100 100     2573 my @cfs = grep { $_ ne '.' && $_ ne '..' &&
  1278   100     13933  
2101             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2102 94         1147 closedir SA_CF_DIR;
2103              
2104 94         667 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
  545         1197  
  1075         1226  
2105             }
2106             }
2107              
2108             ###########################################################################
2109              
2110             sub have_plugin {
2111 133     133 0 192 my ($self, $subname) = @_;
2112              
2113             # We could potentially get called after a finish(), so just return.
2114 133 50       262 return unless $self->{plugins};
2115              
2116 133         299 return $self->{plugins}->have_callback ($subname);
2117             }
2118              
2119             sub call_plugins {
2120 1947     1947 0 19880 my $self = shift;
2121              
2122             # We could potentially get called after a finish(), so just return.
2123 1947 50       3996 return unless $self->{plugins};
2124              
2125             # safety net in case some plugin changes global settings, Bug 6218
2126 1947         5854 local $/ = $/; # prevent underlying modules from changing the global $/
2127              
2128 1947         2992 my $subname = shift;
2129 1947         5365 return $self->{plugins}->callback($subname, @_);
2130             }
2131              
2132             ###########################################################################
2133              
2134             sub find_all_addrs_in_mail {
2135 0     0 0 0 my ($self, $mail_obj) = @_;
2136              
2137 0         0 $self->init(1);
2138              
2139 0         0 my @addrlist;
2140 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2141             Errors-To Mail-Followup-To))
2142             {
2143 0         0 my @hdrs = $mail_obj->get_header($header);
2144 0 0       0 if ($#hdrs < 0) { next; }
  0         0  
2145 0         0 push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2146             }
2147              
2148             # find addrs in body, too
2149 0         0 foreach my $line (@{$mail_obj->get_body()}) {
  0         0  
2150 0         0 push (@addrlist, $self->find_all_addrs_in_line($line));
2151             }
2152              
2153 0         0 my @ret;
2154             my %done;
2155              
2156 0         0 foreach (@addrlist) {
2157 0         0 s/^mailto://; # from Outlook "forwarded" message
2158 0 0       0 next if defined ($done{$_}); $done{$_} = 1;
  0         0  
2159 0         0 push (@ret, $_);
2160             }
2161              
2162 0         0 @ret;
2163             }
2164              
2165             sub find_all_addrs_in_line {
2166 84     84 0 207 my ($self, $line) = @_;
2167              
2168             # a more permissive pattern based on "dot-atom" as per RFC2822
2169 84         183 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2170 84         124 my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2171              
2172 84         120 my @addrs;
2173             my %seen;
2174 84         820 while ($line =~ s/(?:mailto:)?\s*
2175             ($ID_PATTERN \@
2176             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2177             {
2178 134         292 my $addr = $1;
2179 134         247 $addr =~ s/^mailto://;
2180 134 100       355 next if (defined ($seen{$addr})); $seen{$addr} = 1;
  124         259  
2181 124         1483 push (@addrs, $addr);
2182             }
2183              
2184 84         336 return @addrs;
2185             }
2186              
2187             ###########################################################################
2188              
2189             # sa_die -- used to die with a useful exit code.
2190              
2191             sub sa_die {
2192 0     0 0 0 my $exitcode = shift;
2193 0         0 warn @_;
2194 0         0 exit $exitcode;
2195             }
2196              
2197             ###########################################################################
2198              
2199             =item $f->copy_config ( [ $source ], [ $dest ] )
2200              
2201             Used for daemons to keep a persistent Mail::SpamAssassin object's
2202             configuration correct if switching between users. Pass an associative
2203             array reference as either $source or $dest, and set the other to 'undef'
2204             so that the object will use its current configuration. i.e.:
2205              
2206             # create object w/ configuration
2207             my $spamtest = Mail::SpamAssassin->new( ... );
2208              
2209             # backup configuration to %conf_backup
2210             my %conf_backup;
2211             $spamtest->copy_config(undef, \%conf_backup) ||
2212             die "config: error returned from copy_config!\n";
2213              
2214             ... do stuff, perhaps modify the config, etc ...
2215              
2216             # reset the configuration back to the original
2217             $spamtest->copy_config(\%conf_backup, undef) ||
2218             die "config: error returned from copy_config!\n";
2219              
2220             Note that the contents of the associative arrays should be considered
2221             opaque by calling code.
2222              
2223             =cut
2224              
2225             sub copy_config {
2226 0     0 1 0 my ($self, $source, $dest) = @_;
2227              
2228             # At least one of either source or dest needs to be a hash reference ...
2229 0 0 0     0 unless ((defined $source && ref($source) eq 'HASH') ||
      0        
      0        
2230             (defined $dest && ref($dest) eq 'HASH'))
2231             {
2232 0         0 return 0;
2233             }
2234              
2235 0         0 my $timer = $self->time_method("copy_config");
2236              
2237             # let the Conf object itself do all the heavy lifting. It's better
2238             # than having this class know all about that class' internals...
2239 0 0       0 if (defined $source) {
2240 0         0 dbg ("config: copying current conf from backup");
2241             }
2242             else {
2243 0         0 dbg ("config: copying current conf to backup");
2244             }
2245 0         0 return $self->{conf}->clone($source, $dest);
2246             }
2247              
2248             ###########################################################################
2249              
2250             =item @plugins = $f->get_loaded_plugins_list ( )
2251              
2252             Return the list of plugins currently loaded by this SpamAssassin object's
2253             configuration; each entry in the list is an object of type
2254             C<Mail::SpamAssassin::Plugin>.
2255              
2256             (This API was added in SpamAssassin 3.2.0.)
2257              
2258             =cut
2259              
2260             sub get_loaded_plugins_list {
2261 0     0 1 0 my ($self) = @_;
2262 0         0 return $self->{plugins}->get_loaded_plugins_list();
2263             }
2264              
2265             1;
2266             __END__
2267              
2268             ###########################################################################
2269              
2270             =back
2271              
2272             =head1 PREREQUISITES
2273              
2274             C<HTML::Parser>
2275             C<Sys::Syslog>
2276              
2277             =head1 MORE DOCUMENTATION
2278              
2279             See also E<lt>http://spamassassin.apache.org/E<gt> and
2280             E<lt>http://wiki.apache.org/spamassassin/E<gt> for more information.
2281              
2282             =head1 SEE ALSO
2283              
2284             Mail::SpamAssassin::Conf(3)
2285             Mail::SpamAssassin::PerMsgStatus(3)
2286             spamassassin(1)
2287             sa-update(1)
2288              
2289             =head1 BUGS
2290              
2291             See E<lt>http://issues.apache.org/SpamAssassin/E<gt>
2292              
2293             =head1 AUTHORS
2294              
2295             The SpamAssassin(tm) Project E<lt>http://spamassassin.apache.org/E<gt>
2296              
2297             =head1 COPYRIGHT
2298              
2299             SpamAssassin is distributed under the Apache License, Version 2.0, as
2300             described in the file C<LICENSE> included with the distribution.
2301              
2302             =head1 AVAILABILITY
2303              
2304             The latest version of this library is likely to be available from CPAN
2305             as well as:
2306              
2307             E<lt>http://spamassassin.apache.org/E<gt>
2308              
2309             =cut