File Coverage

lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 439 762 57.6
branch 115 338 34.0
condition 61 176 34.6
subroutine 60 85 70.5
pod 31 56 55.3
total 706 1417 49.8


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             package Mail::SpamAssassin;
62 14     14   867423 use strict;
  14         77  
  14         717  
63 14     14   141 use warnings;
  14         69  
  14         990  
64             # use bytes;
65 14     14   147 use re 'taint';
  14         50  
  14         1190  
66              
67             require 5.006_001;
68              
69 14     14   3876 use Mail::SpamAssassin::Logger;
  14         63  
  14         1441  
70 14     14   3941 use Mail::SpamAssassin::Constants;
  14         36  
  14         799  
71 14     14   3605 use Mail::SpamAssassin::Conf;
  14         55  
  14         707  
72 14     14   4382 use Mail::SpamAssassin::Conf::SQL;
  14         50  
  14         549  
73 14     14   3616 use Mail::SpamAssassin::Conf::LDAP;
  14         61  
  14         539  
74 14     14   3536 use Mail::SpamAssassin::PerMsgStatus;
  14         59  
  14         656  
75 14     14   3997 use Mail::SpamAssassin::Message;
  14         272  
  14         1477  
76 14     14   6209 use Mail::SpamAssassin::PluginHandler;
  14         52  
  14         552  
77 14     14   3936 use Mail::SpamAssassin::DnsResolver;
  14         53  
  14         608  
78 14     14   3886 use Mail::SpamAssassin::RegistryBoundaries;
  14         69  
  14         625  
79 14     14   118 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  14         50  
  14         1000  
80 12     12   4069 use Mail::SpamAssassin::Util::ScopedTimer;
  12         35  
  12         438  
81              
82 12     12   92 use Errno qw(ENOENT EACCES);
  12         28  
  12         812  
83 12     12   95 use File::Basename;
  12         7476  
  12         1519  
84 11     11   78 use File::Path;
  11         22  
  11         1082  
85 11     11   88 use File::Spec 0.8;
  11         398  
  11         392  
86 11     11   73 use Time::HiRes qw(time);
  11         30  
  11         87  
87 11     11   1121 use Cwd;
  11         42  
  11         1040  
88 11     11   81 use Config;
  11         21  
  11         123161  
89              
90             our $VERSION = "3.004002"; # update after release (same format as perl $])
91             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
92             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
93              
94              
95             # Used during the prerelease/release-candidate part of the official release
96             # process. If you hacked up your SA, you should add a version_tag to your .cf
97             # files; this variable should not be modified.
98             our @EXTRA_VERSION = qw();
99              
100             our @ISA = qw();
101              
102             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
103             our $SUB_VERSION = 'svnunknown';
104             if ('$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $' =~ ':') {
105             # Subversion keyword "$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $" has been successfully expanded.
106             # Doesn't happen with automated launchpad builds:
107             # https://bugs.launchpad.net/launchpad/+bug/780916
108             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2018-09-13 21:25:10 -0400 (Thu, 13 Sep 2018) $ updated by SVN'))[1];
109             }
110              
111              
112             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
113             if ('$LastChangedRevision: 1840870 $' =~ ':') {
114             # Subversion keyword "$LastChangedRevision: 1840870 $" has been successfully expanded.
115             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1840870 $ updated by SVN}[1]));
116             } else {
117             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
118             }
119             }
120              
121             sub Version {
122 126     126 0 923 $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
123 126         1556 return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
124             }
125              
126             our $HOME_URL = "http://spamassassin.apache.org/";
127              
128             # note that the CWD takes priority. This is required in case a user
129             # is testing a new version of SpamAssassin on a machine with an older
130             # version installed. Unless you can come up with a fix for this that
131             # allows "make test" to work, don't change this.
132              
133             our @default_rules_path = (
134             './rules', # REMOVEFORINST
135             '../rules', # REMOVEFORINST
136             '__local_state_dir__/__version__',
137             '__def_rules_dir__',
138             '__prefix__/share/spamassassin',
139             '/usr/local/share/spamassassin',
140             '/usr/share/spamassassin',
141             );
142              
143             # first 3 are BSDish, latter 2 Linuxish
144             our @site_rules_path = (
145             '__local_rules_dir__',
146             '__prefix__/etc/mail/spamassassin',
147             '__prefix__/etc/spamassassin',
148             '/usr/local/etc/spamassassin',
149             '/usr/pkg/etc/spamassassin',
150             '/usr/etc/spamassassin',
151             '/etc/mail/spamassassin',
152             '/etc/spamassassin',
153             );
154              
155             our @default_prefs_path = (
156             '__local_rules_dir__/user_prefs.template',
157             '__prefix__/etc/mail/spamassassin/user_prefs.template',
158             '__prefix__/share/spamassassin/user_prefs.template',
159             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
160             '__def_rules_dir__/user_prefs.template',
161             '/etc/spamassassin/user_prefs.template',
162             '/etc/mail/spamassassin/user_prefs.template',
163             '/usr/local/share/spamassassin/user_prefs.template',
164             '/usr/share/spamassassin/user_prefs.template',
165             );
166              
167             our @default_userprefs_path = (
168             '~/.spamassassin/user_prefs',
169             );
170              
171             our @default_userstate_dir = (
172             '~/.spamassassin',
173             );
174              
175             ###########################################################################
176              
177             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
178              
179             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
180             reference to the constructor which may contain the following attribute-
181             value pairs.
182              
183             =over 4
184              
185             =item debug
186              
187             This is the debug options used to determine logging level. It exists to
188             allow sections of debug messages (called "facilities") to be enabled or
189             disabled. If this is a string, it is treated as a comma-delimited list
190             of the debug facilities. If it's a hash reference, then the keys are
191             treated as the list of debug facilities and if it's a array reference,
192             then the elements are treated as the list of debug facilities.
193              
194             There are also two special cases: (1) if the special case of "info" is
195             passed as a debug facility, then all informational messages are enabled;
196             (2) if the special case of "all" is passed as a debug facility, then all
197             debugging facilities are enabled.
198              
199             =item rules_filename
200              
201             The filename/directory to load spam-identifying rules from. (optional)
202              
203             =item site_rules_filename
204              
205             The filename/directory to load site-specific spam-identifying rules from.
206             (optional)
207              
208             =item userprefs_filename
209              
210             The filename to load preferences from. (optional)
211              
212             =item userstate_dir
213              
214             The directory user state is stored in. (optional)
215              
216             =item config_tree_recurse
217              
218             Set to C<1> to recurse through directories when reading configuration
219             files, instead of just reading a single level. (optional, default 0)
220              
221             =item config_text
222              
223             The text of all rules and preferences. If you prefer not to load the rules
224             from files, read them in yourself and set this instead. As a result, this will
225             override the settings for C<rules_filename>, C<site_rules_filename>,
226             and C<userprefs_filename>.
227              
228             =item pre_config_text
229              
230             Similar to C<config_text>, this text is placed before config_text to allow an
231             override of config files.
232              
233             =item post_config_text
234              
235             Similar to C<config_text>, this text is placed after config_text to allow an
236             override of config files.
237              
238             =item force_ipv4
239              
240             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
241             to use IPv6. Use if the existing tests for IPv6 availability produce
242             incorrect results or crashes.
243              
244             =item force_ipv6
245              
246             For symmetry with force_ipv4: if set to 1, DNS or other network tests
247             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
248             this setting and use whatever protocol family they are comfortable with.
249              
250             =item require_rules
251              
252             If set to 1, init() will die if no valid rules could be loaded. This is the
253             default behaviour when called by C<spamassassin> or C<spamd>.
254              
255             =item languages_filename
256              
257             If you want to be able to use the language-guessing rule
258             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
259             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
260             need to set this. It should be the path to the B<languages> file normally
261             found in the SpamAssassin B<rules> directory.
262              
263             =item local_tests_only
264              
265             If set to 1, no tests that require internet access will be performed. (default:
266             0)
267              
268             =item need_tags
269              
270             The option provides a way to avoid more expensive processing when it is known
271             in advance that some information will not be needed by a caller.
272              
273             A value of the option can either be a string (a comma-delimited list of tag
274             names), or a reference to a list of individual tag names. A caller may provide
275             the list in advance, specifying his intention to later collect the information
276             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
277             insensitive), it shows that a caller will not be interested in such tag,
278             although there is no guarantee it would save any resources, nor that a tag
279             value will be empty. Currently no built-in tags start with 'NO'. A later
280             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
281             to TIMING,NOASN.
282              
283             For backward compatibility, all tags available as of version 3.2.4 will
284             be available by default (unless disabled by NOtag), even if not requested
285             through need_tags option. Future versions may provide new tags conditionally
286             available.
287              
288             Currently the only tag that needs to be explicitly requested is 'TIMING'.
289             Not requesting it can save a millisecond or two - it mostly serves to
290             illustrate the usage of need_tags.
291              
292             Example:
293             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
294             or:
295             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
296              
297             =item ignore_site_cf_files
298              
299             If set to 1, any rule files found in the C<site_rules_filename> directory will
300             be ignored. *.pre files (used for loading plugins) found in the
301             C<site_rules_filename> directory will still be used. (default: 0)
302              
303             =item dont_copy_prefs
304              
305             If set to 1, the user preferences file will not be created if it doesn't
306             already exist. (default: 0)
307              
308             =item save_pattern_hits
309              
310             If set to 1, the patterns hit can be retrieved from the
311             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
312              
313             =item home_dir_for_helpers
314              
315             If set, the B<HOME> environment variable will be set to this value
316             when using test applications that require their configuration data,
317             such as Razor, Pyzor and DCC.
318              
319             =item username
320              
321             If set, the C<username> attribute will use this as the current user's name.
322             Otherwise, the default is taken from the runtime environment (ie. this process'
323             effective UID under UNIX).
324              
325             =item skip_prng_reseeding
326              
327             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
328             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
329             Perl function should be called during initialization of each child process,
330             soon after forking.
331              
332             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
333             library.
334              
335             This setting requires the caller to decide when to call srand().
336             This choice may be desired to preserve the entropy of a PRNG. The default
337             value of skip_prng_reseeding is false to maintain backward compatibility.
338              
339             This option should only be set by a caller if it calls srand() upon spawning
340             child processes. Unless you are certain you need it, leave this setting as
341             false.
342              
343             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
344             which allows spamd to call srand() right after forking a child process.
345              
346             =back
347              
348             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
349             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
350             configuration files in the usual installed locations using the below variable
351             definitions which can be passed in.
352              
353             =over 4
354              
355             =item PREFIX
356              
357             Used as the root for certain directory paths such as:
358              
359             '__prefix__/etc/mail/spamassassin'
360             '__prefix__/etc/spamassassin'
361              
362             Defaults to "@@PREFIX@@".
363              
364             =item DEF_RULES_DIR
365              
366             Location where the default rules are installed. Defaults to
367             "@@DEF_RULES_DIR@@".
368              
369             =item LOCAL_RULES_DIR
370              
371             Location where the local site rules are installed. Defaults to
372             "@@LOCAL_RULES_DIR@@".
373              
374             =item LOCAL_STATE_DIR
375              
376             Location of the local state directory, mainly used for installing updates via
377             C<sa-update> and compiling rulesets to native code. Defaults to
378             "@@LOCAL_STATE_DIR@@".
379              
380             =back
381              
382              
383             =cut
384              
385             # undocumented ctor settings:
386             #
387             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
388              
389             sub new {
390 56     56 1 76568 my $class = shift;
391 56   33     578 $class = ref($class) || $class;
392              
393 56         150 my $self = shift;
394 56 50       233 if (!defined $self) { $self = { }; }
  0         0  
395 56         198 bless ($self, $class);
396              
397             # basic backward compatibility; debug used to be a boolean.
398             # translate that into 'all', which is what it meant before 3.1.0.
399 56 50 33     384 if ($self->{debug} && $self->{debug} eq '1') {
400 0         0 $self->{debug} = 'all';
401             }
402              
403             # enable or disable debugging
404 56         666 Mail::SpamAssassin::Logger::add_facilities($self->{debug});
405              
406             # first debugging information possibly printed should be the version
407 56         305 dbg("generic: SpamAssassin version " . Version());
408              
409             # if the libs are installed in an alternate location, and the caller
410             # didn't set PREFIX, we should have an estimated guess ready, values
411             # substituted at 'make' time
412 56   50     605 $self->{PREFIX} ||= '@@PREFIX@@';
413 56   50     512 $self->{DEF_RULES_DIR} ||= '@@DEF_RULES_DIR@@';
414 56   50     416 $self->{LOCAL_RULES_DIR} ||= '@@LOCAL_RULES_DIR@@';
415 56   50     428 $self->{LOCAL_STATE_DIR} ||= '@@LOCAL_STATE_DIR@@';
416 56         197 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
  224         889  
417             qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
418              
419 56         201 $self->{needed_tags} = {};
420 56         129 { my $ntags = $self->{need_tags};
  56         136  
421 56 50       320 if (defined $ntags) {
422 0 0       0 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
423 0 0       0 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
424             }
425             }
426             }
427 56 50 33     301 if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
428 0         0 $self->timer_enable();
429             }
430              
431 56   33     1086 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
432 56         820 $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
433 56         928 $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
434              
435 56   50     501 $self->{save_pattern_hits} ||= 0;
436              
437             # Make sure that we clean $PATH if we're tainted
438 56         498 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
439              
440 56 50       293 if (!defined $self->{username}) {
441 56         480 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
442             }
443              
444 56         432 $self->create_locker();
445              
446 56         220 $self;
447             }
448              
449             sub create_locker {
450 56     56 0 205 my ($self) = @_;
451              
452 56         124 my $class;
453 56         239 my $m = $self->{conf}->{lock_method};
454              
455             # let people choose what they want -- even if they may not work on their
456             # OS. (they could be using cygwin!)
457 56 50       365 if ($m eq 'win32') { $class = 'Win32'; }
  0 50       0  
    50          
458 0         0 elsif ($m eq 'flock') { $class = 'Flock'; }
459 0         0 elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
460             else {
461             # OS-specific defaults
462 56 50       241 if (am_running_on_windows()) {
463 0         0 $class = 'Win32';
464             } else {
465 56         145 $class = 'UnixNFSSafe';
466             }
467             }
468              
469             # this could probably be made a little faster; for now I'm going
470             # for slow but safe, by keeping in quotes
471             eval '
472             use Mail::SpamAssassin::Locker::'.$class.';
473             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
474             1;
475 11 50   11   4763 ' or do {
  11         32  
  11         429  
  56         7555  
476 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
477 0         0 die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
478             };
479              
480 56 50       422 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
481             }
482              
483             ###########################################################################
484              
485             =item parse($message, $parse_now [, $suppl_attrib])
486              
487             Parse will return a Mail::SpamAssassin::Message object with just the
488             headers parsed. When calling this function, there are two optional
489             parameters that can be passed in: $message is either undef (which
490             will use STDIN), a scalar - a string containing an entire message,
491             a reference to such string, an array reference of the message with
492             one line per array element, or either a file glob or an IO::File object
493             which holds the entire contents of the message; and $parse_now, which
494             specifies whether or not to create a MIME tree at parse time or later
495             as necessary.
496              
497             The I<$parse_now> option, by default, is set to false (0). This
498             allows SpamAssassin to not have to generate the tree of internal
499             data nodes if the information is not going to be used. This is
500             handy, for instance, when running C<spamassassin -d>, which only
501             needs the pristine header and body which is always parsed and stored
502             by this function.
503              
504             The optional last argument I<$suppl_attrib> provides a way for a caller
505             to pass additional information about a message to SpamAssassin. It is
506             either undef, or a ref to a hash where each key/value pair provides some
507             supplementary attribute of the message, typically information that cannot
508             be deduced from the message itself, or is hard to do so reliably, or would
509             represent unnecessary work for SpamAssassin to obtain it. The argument will
510             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
511             made available to the rest of the code as well as to plugins. The exact list
512             of attributes will evolve through time, any unknown attribute should be
513             ignored. Possible examples are: SMTP envelope information, a flag indicating
514             that a message as supplied by a caller was truncated due to size limit, an
515             already verified list of DKIM signature objects, or perhaps a list of rule
516             hits predetermined by a caller, which makes another possible way for a
517             caller to provide meta information (instead of having to insert made-up
518             header fields in order to pass information), or maybe just plain rule hits.
519              
520             For more information, please see the C<Mail::SpamAssassin::Message>
521             and C<Mail::SpamAssassin::Message::Node> POD.
522              
523             =cut
524              
525             sub parse {
526 102     102 1 5839 my($self, $message, $parsenow, $suppl_attrib) = @_;
527              
528 102         463 my $start_time = time;
529 102         489 $self->init(1);
530 102         448 my $timer = $self->time_method("parse");
531              
532 102         276 my $master_deadline;
533             # passed in at a function call
534 102 50 66     553 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
535 48         128 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
536             }
537             # found in a config file - overrides passed-in number if lower
538 102 50       419 if ($self->{conf}->{time_limit}) { # defined and nonzero
539 102         404 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
540 102 50 33     386 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
541 102         196 $master_deadline = $time_limit_deadline;
542             }
543             }
544 102 50       358 if (defined $master_deadline) {
545 102         411 dbg("config: time limit %.1f s", $master_deadline - $start_time);
546             }
547              
548             my $msg = Mail::SpamAssassin::Message->new({
549             message=>$message, parsenow=>$parsenow,
550             normalize=>$self->{conf}->{normalize_charset},
551 102         2118 master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
552              
553             # bug 5069: The goal here is to get rendering plugins to do things
554             # like OCR, convert doc and pdf to text, etc, though it could be anything
555             # that wants to process the message after it's been parsed.
556 102         812 $self->call_plugins("post_message_parse", { message => $msg });
557              
558 102         361 return $msg;
559             }
560              
561              
562             ###########################################################################
563              
564             =item $status = $f->check ($mail)
565              
566             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
567             to determine if it is spam or not.
568              
569             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
570             used to test or manipulate the mail message.
571              
572             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
573             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
574             object is a "factory". However, if you do this, be sure to call the
575             C<finish()> method on the status objects when you're done with them.
576              
577             =cut
578              
579             sub check {
580 35     35 1 176 my ($self, $mail_obj) = @_;
581              
582 35         141 $self->init(1);
583 35         268 my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
584 35         161 $pms->check();
585 34 50       192 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
586 34         116 $pms;
587             }
588              
589             =item $status = $f->check_message_text ($mailtext)
590              
591             Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
592             is spam or not.
593              
594             Otherwise identical to C<check()> above.
595              
596             =cut
597              
598             sub check_message_text {
599 32     32 1 116 my ($self, $mailtext) = @_;
600 32         183 my $msg = $self->parse($mailtext, 1);
601 32         145 my $result = $self->check($msg);
602              
603             # Kill off the metadata ...
604             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
605             # the message. So killing it here will cause things like
606             # rewrite_message() to fail. <grrr>
607             #
608 32         248 $msg->finish_metadata();
609              
610 32         159 return $result;
611             }
612              
613             ###########################################################################
614              
615             =item $status = $f->learn ($mail, $id, $isspam, $forget)
616              
617             Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
618              
619             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
620             be learnt as non-spam.
621              
622             If C<$forget> is set, the attributes of the mail will be removed from
623             both the non-spam and spam learning databases.
624              
625             C<$id> is an optional message-identification string, used internally
626             to tag the message. If it is C<undef>, the Message-Id of the message
627             will be used. It should be unique to that message.
628              
629             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
630             manipulate the learning process for each mail.
631              
632             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
633             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
634             object is a "factory". However, if you do this, be sure to call the
635             C<finish()> method on the learner objects when you're done with them.
636              
637             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
638             must be called before using this method.
639              
640             =cut
641              
642             sub learn {
643 0     0 1 0 my ($self, $mail_obj, $id, $isspam, $forget) = @_;
644 0         0 local ($_);
645              
646 0         0 require Mail::SpamAssassin::PerMsgLearner;
647 0         0 $self->init(1);
648 0         0 my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
649              
650 0 0       0 if ($forget) {
    0          
651 0         0 dbg("learn: forgetting message");
652 0         0 $msg->forget($id);
653             } elsif ($isspam) {
654 0         0 dbg("learn: learning spam");
655 0         0 $msg->learn_spam($id);
656             } else {
657 0         0 dbg("learn: learning ham");
658 0         0 $msg->learn_ham($id);
659             }
660              
661 0         0 $msg;
662             }
663              
664             ###########################################################################
665              
666             =item $f->init_learner ( [ { opt => val, ... } ] )
667              
668             Initialise learning. You may pass the following attribute-value pairs to this
669             method.
670              
671             =over 4
672              
673             =item caller_will_untie
674              
675             Whether or not the code calling this method will take care of untie'ing
676             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
677              
678             =item force_expire
679              
680             Should an expiration run be forced to occur immediately? (optional, default 0).
681              
682             =item learn_to_journal
683              
684             Should learning data be written to the journal, instead of directly to the
685             databases? (optional, default 0).
686              
687             =item wait_for_lock
688              
689             Whether or not to wait a long time for locks to complete (optional, default 0).
690              
691             =item opportunistic_expire_check_only
692              
693             During the opportunistic journal sync and expire check, don't actually do the
694             expire but report back whether or not it should occur (optional, default 0).
695              
696             =item no_relearn
697              
698             If doing a learn operation, and the message has already been learned as
699             the opposite type, don't re-learn the message.
700              
701             =back
702              
703             =cut
704              
705             sub init_learner {
706 0     0 1 0 my $self = shift;
707 0         0 my $opts = shift;
708 0         0 dbg("learn: initializing learner");
709              
710             # Make sure we're already initialized ...
711 0         0 $self->init(1);
712              
713 0         0 my %kv = (
714             'force_expire' => 'learn_force_expire',
715             'learn_to_journal' => 'learn_to_journal',
716             'caller_will_untie' => 'learn_caller_will_untie',
717             'wait_for_lock' => 'learn_wait_for_lock',
718             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
719             'no_relearn' => 'learn_no_relearn',
720             );
721              
722 0         0 my %ret;
723              
724             # Set any other options that need setting ...
725 0         0 while( my($k,$v) = each %kv ) {
726 0         0 $ret{$k} = $self->{$v};
727 0 0       0 if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
  0         0  
728             }
729              
730 0         0 return \%ret;
731             }
732              
733             ###########################################################################
734              
735             =item $f->rebuild_learner_caches ({ opt => val })
736              
737             Rebuild any cache databases; should be called after the learning process.
738             Options include: C<verbose>, which will output diagnostics to C<stdout>
739             if set to 1.
740              
741             =cut
742              
743             sub rebuild_learner_caches {
744 0     0 1 0 my $self = shift;
745 0         0 my $opts = shift;
746 0 0       0 $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
747 0         0 1;
748             }
749              
750             =item $f->finish_learner ()
751              
752             Finish learning.
753              
754             =cut
755              
756             sub finish_learner {
757 1     1 1 2 my $self = shift;
758 1 50       7 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
759 1         2 1;
760             }
761              
762             =item $f->dump_bayes_db()
763              
764             Dump the contents of the Bayes DB
765              
766             =cut
767              
768             sub dump_bayes_db {
769 0     0 1 0 my($self,@opts) = @_;
770 0 0       0 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
771             }
772              
773             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
774              
775             Signals that the current user has changed (possibly using C<setuid>), meaning
776             that SpamAssassin should close any per-user databases it has open, and re-open
777             using ones appropriate for the new user.
778              
779             Note that this should be called I<after> reading any per-user configuration, as
780             that data may override some paths opened in this method. You may pass the
781             following attribute-value pairs:
782              
783             =over 4
784              
785             =item username
786              
787             The username of the user. This will be used for the C<username> attribute.
788              
789             =item user_dir
790              
791             A directory to use as a 'home directory' for the current user's data,
792             overriding the system default. This directory must be readable and writable by
793             the process. Note that the resulting C<userstate_dir> will be the
794             C<.spamassassin> subdirectory of this dir.
795              
796             =item userstate_dir
797              
798             A directory to use as a directory for the current user's data, overriding the
799             system default. This directory must be readable and writable by the process.
800             The default is C<user_dir/.spamassassin>.
801              
802             =back
803              
804             =cut
805              
806             sub signal_user_changed {
807 2     2 1 194 my $self = shift;
808 2         4 my $opts = shift;
809 2         3 my $set = 0;
810              
811 2         6 my $timer = $self->time_method("signal_user_changed");
812 2         6 dbg("info: user has changed");
813              
814 2 50 33     23 if (defined $opts && $opts->{username}) {
815 2         5 $self->{username} = $opts->{username};
816             } else {
817 0         0 undef $self->{username};
818             }
819 2 50 33     11 if (defined $opts && $opts->{user_dir}) {
820 2         5 $self->{user_dir} = $opts->{user_dir};
821             } else {
822 0         0 undef $self->{user_dir};
823             }
824 2 50 33     9 if (defined $opts && $opts->{userstate_dir}) {
825 0         0 $self->{userstate_dir} = $opts->{userstate_dir};
826             } else {
827 2         5 undef $self->{userstate_dir};
828             }
829              
830             # reopen bayes dbs for this user
831 2 100       91 $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
832 2 100       7 if ($self->{conf}->{use_bayes}) {
833 1         8 require Mail::SpamAssassin::Bayes;
834 1         8 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
835             } else {
836 1 50       12 delete $self->{bayes_scanner} if $self->{bayes_scanner};
837             }
838              
839             # this user may have a different learn_to_journal setting, so reset appropriately
840 2         7 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
841              
842 2 50       6 $set |= 1 unless $self->{local_tests_only};
843 2 0 66     9 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      33        
844              
845 2         9 $self->{conf}->set_score_set ($set);
846              
847             $self->call_plugins("signal_user_changed", {
848             username => $self->{username},
849             userstate_dir => $self->{userstate_dir},
850             user_dir => $self->{user_dir},
851 2         13 });
852              
853 2         8 1;
854             }
855              
856             ###########################################################################
857              
858             =item $f->report_as_spam ($mail, $options)
859              
860             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
861             human-verified spam. This will submit the mail message to live,
862             collaborative, spam-blocker databases, allowing other users to block this
863             message.
864              
865             It will also submit the mail to SpamAssassin's Bayesian learner.
866              
867             Options is an optional reference to a hash of options. Currently these
868             can be:
869              
870             =over 4
871              
872             =item dont_report_to_dcc
873              
874             Inhibits reporting of the spam to DCC.
875              
876             =item dont_report_to_pyzor
877              
878             Inhibits reporting of the spam to Pyzor.
879              
880             =item dont_report_to_razor
881              
882             Inhibits reporting of the spam to Razor.
883              
884             =item dont_report_to_spamcop
885              
886             Inhibits reporting of the spam to SpamCop.
887              
888             =back
889              
890             =cut
891              
892             sub report_as_spam {
893 0     0 1 0 my ($self, $mail, $options) = @_;
894 0         0 local ($_);
895              
896 0         0 $self->init(1);
897 0         0 my $timer = $self->time_method("report_as_spam");
898              
899             # learn as spam if enabled
900 0 0       0 if ( $self->{conf}->{bayes_learn_during_report} ) {
901 0         0 $self->learn ($mail, undef, 1, 0);
902             }
903              
904 0         0 require Mail::SpamAssassin::Reporter;
905 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
906 0         0 $mail->report();
907             }
908              
909             ###########################################################################
910              
911             =item $f->revoke_as_spam ($mail, $options)
912              
913             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
914             human-verified ham (non-spam). This will revoke the mail message from live,
915             collaborative, spam-blocker databases, allowing other users to block this
916             message.
917              
918             It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
919              
920             Options is an optional reference to a hash of options. Currently these
921             can be:
922              
923             =over 4
924              
925             =item dont_report_to_razor
926              
927             Inhibits revoking of the spam to Razor.
928              
929              
930             =back
931              
932             =cut
933              
934             sub revoke_as_spam {
935 0     0 1 0 my ($self, $mail, $options) = @_;
936 0         0 local ($_);
937              
938 0         0 $self->init(1);
939 0         0 my $timer = $self->time_method("revoke_as_spam");
940              
941             # learn as nonspam
942 0         0 $self->learn ($mail, undef, 0, 0);
943              
944 0         0 require Mail::SpamAssassin::Reporter;
945 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
946 0         0 $mail->revoke ();
947             }
948              
949             ###########################################################################
950              
951             =item $f->add_address_to_whitelist ($addr, $cli_p)
952              
953             Given a string containing an email address, add it to the automatic
954             whitelist database.
955              
956             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
957              
958             =cut
959              
960             sub add_address_to_whitelist {
961 0     0 1 0 my ($self, $addr, $cli_p) = @_;
962              
963 0         0 $self->call_plugins("whitelist_address", { address => $addr,
964             cli_p => $cli_p });
965             }
966              
967             ###########################################################################
968              
969             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
970              
971             Given a mail message, find as many addresses in the usual headers (To, Cc, From
972             etc.), and the message body, and add them to the automatic whitelist database.
973              
974             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
975              
976             =cut
977              
978             sub add_all_addresses_to_whitelist {
979 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
980              
981 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
982 0         0 $self->call_plugins("whitelist_address", { address => $addr,
983             cli_p => $cli_p });
984             }
985             }
986              
987             ###########################################################################
988              
989             =item $f->remove_address_from_whitelist ($addr, $cli_p)
990              
991             Given a string containing an email address, remove it from the automatic
992             whitelist database.
993              
994             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
995              
996             =cut
997              
998             sub remove_address_from_whitelist {
999 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1000              
1001 0         0 $self->call_plugins("remove_address", { address => $addr,
1002             cli_p => $cli_p });
1003             }
1004              
1005             ###########################################################################
1006              
1007             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
1008              
1009             Given a mail message, find as many addresses in the usual headers (To, Cc, From
1010             etc.), and the message body, and remove them from the automatic whitelist
1011             database.
1012              
1013             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1014              
1015             =cut
1016              
1017             sub remove_all_addresses_from_whitelist {
1018 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1019              
1020 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1021 0         0 $self->call_plugins("remove_address", { address => $addr,
1022             cli_p => $cli_p });
1023             }
1024             }
1025              
1026             ###########################################################################
1027              
1028             =item $f->add_address_to_blacklist ($addr, $cli_p)
1029              
1030             Given a string containing an email address, add it to the automatic
1031             whitelist database with a high score, effectively blacklisting them.
1032              
1033             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1034              
1035             =cut
1036              
1037             sub add_address_to_blacklist {
1038 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1039 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1040             cli_p => $cli_p });
1041             }
1042              
1043             ###########################################################################
1044              
1045             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1046              
1047             Given a mail message, find addresses in the From headers and add them to the
1048             automatic whitelist database with a high score, effectively blacklisting them.
1049              
1050             Note that To and Cc addresses are not used.
1051              
1052             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1053              
1054             =cut
1055              
1056             sub add_all_addresses_to_blacklist {
1057 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1058              
1059 0         0 $self->init(1);
1060              
1061 0         0 my @addrlist;
1062 0         0 my @hdrs = $mail_obj->get_header('From');
1063 0 0       0 if ($#hdrs >= 0) {
1064 0         0 push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1065             }
1066              
1067 0         0 foreach my $addr (@addrlist) {
1068 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1069             cli_p => $cli_p });
1070             }
1071              
1072             }
1073              
1074             ###########################################################################
1075              
1076             =item $text = $f->remove_spamassassin_markup ($mail)
1077              
1078             Returns the text of the message, with any SpamAssassin-added text (such
1079             as the report, or X-Spam-Status headers) stripped.
1080              
1081             Note that the B<$mail> object is not modified.
1082              
1083             Warning: if the input message in B<$mail> contains a mixture of CR-LF
1084             (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1085             to use one or the other consistently throughout.
1086              
1087             =cut
1088              
1089             sub remove_spamassassin_markup {
1090 0     0 1 0 my ($self, $mail_obj) = @_;
1091 0         0 local ($_);
1092              
1093 0         0 my $timer = $self->time_method("remove_spamassassin_markup");
1094 0   0     0 my $mbox = $mail_obj->get_mbox_separator() || '';
1095              
1096 0         0 dbg("markup: removing markup");
1097              
1098             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1099             # if we have definitive proof it's not an encapsulated message.
1100 0   0     0 my $ct = $mail_obj->get_header("Content-Type") || '';
1101 0 0       0 if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1102              
1103             # Ok, this is a possible encapsulated message, search for the
1104             # appropriate mime part and deal with it if necessary.
1105 0         0 my $boundary = "\Q$1\E";
1106 0         0 my @msg = split(/^/,$mail_obj->get_pristine_body());
1107              
1108 0         0 my $flag = 0;
1109 0         0 $ct = '';
1110 0         0 my $cd = '';
1111 0         0 for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1112             # only look at mime part headers
1113 0 0 0     0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1114              
1115 0 0       0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1116              
1117             # Ok, we found the encapsulated piece ...
1118 0 0 0     0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
      0        
1119             ($ct eq "message/rfc822" &&
1120             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1121             {
1122 0         0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1123              
1124             # find the end and chop it off
1125 0         0 for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1126 0 0       0 if ( $msg[$i] =~ /^--$boundary/ ) {
1127 0 0       0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1128             # will remove the blank line (not sure it'll always be
1129             # there) and everything below. don't worry, the splice
1130             # guarantees the for will stop ...
1131             }
1132             }
1133              
1134             # Ok, we're done. Return the rewritten message.
1135 0         0 return join('', $mbox, @msg);
1136             }
1137              
1138 0         0 $flag = 0;
1139 0         0 $ct = '';
1140 0         0 $cd = '';
1141 0         0 next;
1142             }
1143              
1144             # Ok, we're in the mime header ... Capture the appropriate headers...
1145 0         0 $flag = 1;
1146 0 0       0 if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
    0          
1147 0         0 $ct = $1;
1148             }
1149             elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1150 0         0 $cd = $1;
1151             }
1152             }
1153             }
1154              
1155             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1156             # So treat it like a "report_safe 0" message.
1157 0         0 my $hdrs = $mail_obj->get_pristine_header();
1158 0         0 my $body = $mail_obj->get_pristine_body();
1159              
1160             # remove DOS line endings
1161 0         0 $hdrs =~ s/\r//gs;
1162              
1163             # unfold SA added headers, but not X-Spam-Prev headers ...
1164 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1165 0         0 1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1166 0         0 $hdrs =~ s/^\n//;
1167              
1168             ###########################################################################
1169             # Backward Compatibilty, pre 3.0.x.
1170              
1171             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1172 0         0 $self->init(1);
1173 0         0 foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
  0         0  
1174             # let the 3.0 decoding do it...
1175 0 0       0 next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1176              
1177 0         0 dbg("markup: removing markup in $header");
1178 0 0       0 if ($header eq 'Subject') {
1179 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1180 0         0 $tag = quotemeta($tag);
1181 0         0 $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1182 0         0 $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1183 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1184 0         0 1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1185             } else {
1186 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1187             }
1188             }
1189              
1190             # Now deal with report cleansing from 2.4x and previous.
1191             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1192             # followed by another "SPAM: ----.+" line, followed by a blank line.
1193 0         0 1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1194             ###########################################################################
1195              
1196             # 3.0 version -- support for previously-nonexistent Subject hdr.
1197             # ensure the Subject line didn't *really* contain "(nonexistent)" in
1198             # the original message!
1199 0 0 0     0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1200             && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1201             {
1202 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1203 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1204             }
1205              
1206             # 3.0 version -- revert from X-Spam-Prev to original ...
1207 0         0 while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1208 0         0 my($hdr, $name) = ($1,$2);
1209              
1210             # If the rewritten version doesn't exist, we should deal with it anyway...
1211 0 0       0 unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1212 0         0 $hdrs =~ s/\n\n/\n$hdr\n/;
1213             }
1214             }
1215              
1216             # remove any other X-Spam headers we added, will be unfolded
1217 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1218 0         0 1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1219 0         0 $hdrs =~ s/^\n//;
1220              
1221             # re-add DOS line endings
1222 0 0       0 if ($mail_obj->{line_ending} ne "\n") {
1223 0         0 $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1224             }
1225              
1226             # Put the whole thing back together ...
1227 0         0 return join ('', $mbox, $hdrs, $body);
1228             }
1229              
1230             ###########################################################################
1231              
1232             =item $f->read_scoreonly_config ($filename)
1233              
1234             Read a configuration file and parse user preferences from it.
1235              
1236             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1237             In other words, they include scoring options, scores, whitelists and
1238             blacklists, and so on, but do not include rule definitions, privileged
1239             settings, etc. unless C<allow_user_rules> is enabled; and they never include
1240             the administrator settings.
1241              
1242             =cut
1243              
1244             sub read_scoreonly_config {
1245 2     2 1 1888 my ($self, $filename) = @_;
1246              
1247 2         5 my $timer = $self->time_method("read_scoreonly_config");
1248 2         7 local *IN;
1249 2 50       61 if (!open(IN,"<$filename")) {
1250             # the file may not exist; this should not be verbose
1251 2         26 dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1252 2         10 return;
1253             }
1254              
1255 0         0 my($inbuf,$nread,$text); $text = '';
  0         0  
1256 0         0 while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
  0         0  
1257 0 0       0 defined $nread or die "error reading $filename: $!";
1258 0 0       0 close IN or die "error closing $filename: $!";
1259 0         0 undef $inbuf;
1260              
1261 0         0 $text = "file start $filename\n" . $text;
1262             # add an extra \n in case file did not end in one.
1263 0         0 $text .= "\nfile end $filename\n";
1264              
1265 0         0 $self->{conf}->{main} = $self;
1266 0         0 $self->{conf}->parse_scores_only ($text);
1267 0         0 $self->{conf}->finish_parsing(1);
1268              
1269 0         0 delete $self->{conf}->{main}; # to allow future GC'ing
1270             }
1271              
1272             ###########################################################################
1273              
1274             =item $f->load_scoreonly_sql ($username)
1275              
1276             Read configuration paramaters from SQL database and parse scores from it. This
1277             will only take effect if the perl C<DBI> module is installed, and the
1278             configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
1279             C<user_scores_sql_password> are set correctly.
1280              
1281             The username in C<$username> will also be used for the C<username> attribute of
1282             the Mail::SpamAssassin object.
1283              
1284             =cut
1285              
1286             sub load_scoreonly_sql {
1287 0     0 1 0 my ($self, $username) = @_;
1288              
1289 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1290 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1291 0         0 $self->{username} = $username;
1292 0 0       0 unless ($src->load($username)) {
1293 0         0 return 0;
1294             }
1295 0         0 return 1;
1296             }
1297              
1298             ###########################################################################
1299              
1300             =item $f->load_scoreonly_ldap ($username)
1301              
1302             Read configuration paramaters from an LDAP server and parse scores from it.
1303             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1304             installed, and the configuration parameters C<user_scores_dsn>,
1305             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1306             correctly.
1307              
1308             The username in C<$username> will also be used for the C<username> attribute of
1309             the Mail::SpamAssassin object.
1310              
1311             =cut
1312              
1313             sub load_scoreonly_ldap {
1314 0     0 1 0 my ($self, $username) = @_;
1315              
1316 0         0 dbg("config: load_scoreonly_ldap($username)");
1317 0         0 my $timer = $self->time_method("load_scoreonly_ldap");
1318 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1319 0         0 $self->{username} = $username;
1320 0         0 $src->load($username);
1321             }
1322              
1323             ###########################################################################
1324              
1325             =item $f->set_persistent_address_list_factory ($factoryobj)
1326              
1327             Set the persistent address list factory, used to create objects for the
1328             automatic whitelist algorithm's persistent-storage back-end. See
1329             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1330             must implement, and the API the objects they produce must implement.
1331              
1332             =cut
1333              
1334             sub set_persistent_address_list_factory {
1335 3     3 1 12 my ($self, $fac) = @_;
1336 3         12 $self->{pers_addr_list_factory} = $fac;
1337             }
1338              
1339             ###########################################################################
1340              
1341             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1342              
1343             Compile all patterns, load all configuration files, and load all
1344             possibly-required Perl modules.
1345              
1346             Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1347             plan to fork() or start a new perl interpreter thread to process a message,
1348             this is suboptimal, as each process/thread will have to perform these actions.
1349              
1350             Call this function in the master thread or process to perform the actions
1351             straightaway, so that the sub-processes will not have to.
1352              
1353             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1354             configuration without reading the per-user configuration file and it will
1355             assume that you will call C<read_scoreonly_config> at a later point.
1356              
1357             If C<$keep_userstate> is true, compile_now() will revert any configuration
1358             options which have a default with I<__userstate__> in it post-init(),
1359             and then re-change the option before returning. This lets you change
1360             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1361             files there as necessary without disturbing the actual files as changed
1362             by a configuration option. By default, this is disabled.
1363              
1364             =cut
1365              
1366             sub compile_now {
1367 1     1 1 20 my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1368              
1369 1         5 my $timer = $self->time_method("compile_now");
1370              
1371             # Backup default values which deal with userstate.
1372             # This is done so we can create any new files in, presumably, a temp dir.
1373             # see bug 2762 for more details.
1374 1         1 my %backup;
1375 1 50 33     6 if (defined $deal_with_userstate && $deal_with_userstate) {
1376 1         2 while(my($k,$v) = each %{$self->{conf}}) {
  116         265  
1377 115 100 100     395 $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
      100        
1378             }
1379             }
1380              
1381 1         5 $self->init($use_user_prefs);
1382              
1383             # if init() didn't change the value from default, forget about it.
1384             # if the value is different, remember the new version, and reset the default.
1385 1         10 while(my($k,$v) = each %backup) {
1386 1 50       5 if ($self->{conf}->{$k} eq $v) {
1387 0         0 delete $backup{$k};
1388             }
1389             else {
1390 1         2 my $backup = $backup{$k};
1391 1         3 $backup{$k} = $self->{conf}->{$k};
1392 1         6 $self->{conf}->{$k} = $backup;
1393             }
1394             }
1395              
1396 1         4 dbg("ignore: test message to precompile patterns and load modules");
1397              
1398             # tell plugins we are about to send a message for compiling purposes
1399 1         8 $self->call_plugins("compile_now_start",
1400             { use_user_prefs => $use_user_prefs,
1401             keep_userstate => $deal_with_userstate});
1402              
1403             # note: this may incur network access. Good. We want to make sure
1404             # as much as possible is preloaded!
1405 1         22 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1406             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1407             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1408              
1409 1         7 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1410 1         16 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1411             { disable_auto_learning => 1 } );
1412              
1413             # We want to turn off the bayes rules for this test msg
1414 1         4 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1415 1         3 $self->{conf}->{use_bayes_rules} = 0;
1416 1         5 $status->check();
1417 1         4 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1418 1         7 $status->finish();
1419 1         8 $mail->finish();
1420 1         4 $self->finish_learner();
1421              
1422 1         5 $self->{conf}->free_uncompiled_rule_source();
1423              
1424             # load SQL modules now as well
1425 1         3 my $dsn = $self->{conf}->{user_scores_dsn};
1426 1 50       4 if ($dsn ne '') {
1427 0 0       0 if ($dsn =~ /^ldap:/i) {
1428 0         0 Mail::SpamAssassin::Conf::LDAP::load_modules();
1429             } else {
1430 0         0 Mail::SpamAssassin::Conf::SQL::load_modules();
1431             }
1432             }
1433              
1434             # make sure things are ready for scanning
1435 1 50       6 $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1436 1         18 $self->call_plugins("compile_now_finish",
1437             { use_user_prefs => $use_user_prefs,
1438             keep_userstate => $deal_with_userstate});
1439              
1440             # Reset any non-default values to the post-init() version.
1441 1         12 while(my($k,$v) = each %backup) {
1442 1         6 $self->{conf}->{$k} = $v;
1443             }
1444              
1445             # clear sed_path_cache
1446 1         8 delete $self->{conf}->{sed_path_cache};
1447              
1448 1         7 1;
1449             }
1450              
1451             ###########################################################################
1452              
1453             =item $f->debug_diagnostics ()
1454              
1455             Output some diagnostic information, useful for debugging SpamAssassin
1456             problems.
1457              
1458             =cut
1459              
1460             sub debug_diagnostics {
1461 0     0 1 0 my ($self) = @_;
1462              
1463             # load this class lazily, to avoid overhead when this method isn't
1464             # called.
1465 0         0 eval {
1466 0         0 require Mail::SpamAssassin::Util::DependencyInfo;
1467 0         0 dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1468             };
1469             }
1470              
1471             ###########################################################################
1472              
1473             =item $failed = $f->lint_rules ()
1474              
1475             Syntax-check the current set of rules. Returns the number of
1476             syntax errors discovered, or 0 if the configuration is valid.
1477              
1478             =cut
1479              
1480             sub lint_rules {
1481 47     47 1 18172 my ($self) = @_;
1482              
1483 47         214 dbg("ignore: using a test message to lint rules");
1484 47         721 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1485             "Subject: \n",
1486             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1487             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1488              
1489 47         293 $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1490 47         133 $self->{syntax_errors} = 0;
1491              
1492 47         114 my $olddcp = $self->{dont_copy_prefs};
1493 47         144 $self->{dont_copy_prefs} = 1;
1494              
1495 47         311 $self->init(1);
1496 47         177 $self->{syntax_errors} += $self->{conf}->{errors};
1497              
1498 47         207 $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1499              
1500             # bug 5048: override settings to ensure a faster lint
1501 47         507 $self->{'conf'}->{'use_auto_whitelist'} = 0;
1502 47         197 $self->{'conf'}->{'bayes_auto_learn'} = 0;
1503              
1504 47         634 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1505 47         969 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1506             { disable_auto_learning => 1 } );
1507 47         304 $status->check();
1508              
1509 47         157 $self->{syntax_errors} += $status->{rule_errors};
1510 47         267 $status->finish();
1511 47         414 $mail->finish();
1512 47 50       179 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1513 47         290 return ($self->{syntax_errors});
1514             }
1515              
1516             ###########################################################################
1517              
1518             =item $f->finish()
1519              
1520             Destroy this object, so that it will be garbage-collected once it
1521             goes out of scope. The object will no longer be usable after this
1522             method is called.
1523              
1524             =cut
1525              
1526             sub finish {
1527 32     32 1 270 my ($self) = @_;
1528              
1529 32         175 $self->timer_start("finish");
1530             $self->call_plugins("finish_tests", { conf => $self->{conf},
1531 32         213 main => $self });
1532              
1533 32         297 $self->{conf}->finish(); delete $self->{conf};
  32         153  
1534 32         240 $self->{plugins}->finish(); delete $self->{plugins};
  32         93  
1535              
1536 32 50       139 if ($self->{bayes_scanner}) {
1537 32         217 $self->{bayes_scanner}->finish();
1538 32         81 delete $self->{bayes_scanner};
1539             }
1540              
1541 32 50       277 $self->{resolver}->finish() if $self->{resolver};
1542              
1543 32         216 $self->timer_end("finish");
1544 32         74 %{$self} = ();
  32         797  
1545             }
1546              
1547             ###########################################################################
1548             # timers: bug 5356
1549              
1550             sub timer_enable {
1551 0     0 0 0 my ($self) = @_;
1552 0 0       0 dbg("config: timing enabled") if !$self->{timer_enabled};
1553 0         0 $self->{timer_enabled} = 1;
1554             }
1555              
1556             sub timer_disable {
1557 0     0 0 0 my ($self) = @_;
1558 0 0       0 dbg("config: timing disabled") if $self->{timer_enabled};
1559 0         0 $self->{timer_enabled} = 0;
1560             }
1561              
1562             # discard all timers, start afresh
1563             sub timer_reset {
1564 0     0 0 0 my ($self) = @_;
1565 0         0 delete $self->{timers};
1566 0         0 delete $self->{timers_order};
1567             }
1568              
1569             sub timer_start {
1570 32     32 0 101 my ($self, $name) = @_;
1571              
1572 32 50       122 return unless $self->{timer_enabled};
1573             # dbg("timing: '$name' starting");
1574              
1575 0 0       0 if (!exists $self->{timers}->{$name}) {
1576 0         0 push @{$self->{timers_order}}, $name;
  0         0  
1577             }
1578            
1579 0         0 $self->{timers}->{$name}->{start} = Time::HiRes::time();
1580             # note that this will reset any existing, unstopped timer of that name;
1581             # that's ok
1582             }
1583              
1584             sub timer_end {
1585 32     32 0 115 my ($self, $name) = @_;
1586 32 50       136 return unless $self->{timer_enabled};
1587              
1588 0         0 my $t = $self->{timers}->{$name};
1589 0         0 $t->{end} = time;
1590              
1591 0 0       0 if (!$t->{start}) {
1592 0         0 warn "timer_end('$name') with no timer_start";
1593 0         0 return;
1594             }
1595              
1596             # add to any existing elapsed time for this event, since
1597             # we may call the same timer name multiple times -- this is ok,
1598             # as long as they are not nested
1599 0         0 my $dt = $t->{end} - $t->{start};
1600 0 0       0 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1601 0 0       0 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
  0         0  
1602 0         0 else { $t->{elapsed} = $dt }
1603             }
1604              
1605             sub time_method {
1606 885     885 0 2315 my ($self, $name) = @_;
1607 885 50       2957 return unless $self->{timer_enabled};
1608 0         0 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1609             }
1610              
1611             sub timer_report {
1612 0     0 0 0 my ($self) = @_;
1613              
1614 0         0 my $earliest;
1615             my $latest;
1616              
1617 0         0 while (my($name,$h) = each(%{$self->{timers}})) {
  0         0  
1618             # dbg("timing: %s - %s", $name, join(", ",
1619             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1620 0         0 my $start = $h->{start};
1621 0 0 0     0 if (defined $start && (!defined $earliest || $earliest > $start)) {
      0        
1622 0         0 $earliest = $start;
1623             }
1624 0         0 my $end = $h->{end};
1625 0 0 0     0 if (defined $end && (!defined $latest || $latest < $end)) {
      0        
1626 0         0 $latest = $end;
1627             }
1628 0 0 0     0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1629             }
1630 0 0 0     0 my $total =
1631             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1632 0         0 my @str;
1633 0         0 foreach my $name (@{$self->{timers_order}}) {
  0         0  
1634 0   0     0 my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1635 0 0 0     0 my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1636 0 0       0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
    0          
1637 0         0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1638             }
1639              
1640 0         0 return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1641             }
1642              
1643             ###########################################################################
1644             # non-public methods.
1645              
1646             sub init {
1647 205     205 0 13773 my ($self, $use_user_pref) = @_;
1648              
1649             # Allow init() to be called multiple times, but only run once.
1650 205 100       839 if (defined $self->{_initted}) {
1651             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1652 150 50       742 if ($self->{_initted} != $$) {
1653 0         0 $self->{_initted} = $$;
1654 0 0       0 srand if !$self->{skip_prng_reseeding};
1655 0         0 $self->{resolver}->reinit_post_fork();
1656             }
1657 150         339 return;
1658             }
1659              
1660 55         234 my $timer = $self->time_method("init");
1661             # Note that this PID has run init()
1662 55         305 $self->{_initted} = $$;
1663              
1664             #fix spamd reading root prefs file
1665 55 100       192 if (!defined $use_user_pref) {
1666 15         22 $use_user_pref = 1;
1667             }
1668              
1669 55 50       201 if (!defined $self->{config_text}) {
1670 55         220 $self->{config_text} = '';
1671              
1672             # read a file called "init.pre" in site rules dir *before* all others;
1673             # even the system config.
1674 55         187 my $siterules = $self->{site_rules_filename};
1675 55   33     169 $siterules ||= $self->first_existing_path (@site_rules_path);
1676              
1677 55         161 my $sysrules = $self->{rules_filename};
1678 55   33     161 $sysrules ||= $self->first_existing_path (@default_rules_path);
1679              
1680 55 50       204 if ($siterules) {
1681 55         284 $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1682             }
1683             else {
1684 0         0 warn "config: could not find site rules directory\n";
1685             }
1686              
1687 55 50       208 if ($sysrules) {
1688 55         245 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1689             }
1690             else {
1691 0         0 warn "config: could not find sys rules directory\n";
1692             }
1693              
1694 55 50       230 if ($sysrules) {
1695 55         259 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1696 55 50 66     327 if ($self->{require_rules} && $cftext !~ /\S/) {
1697 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1698             }
1699 55         2251 $self->{config_text} .= $cftext;
1700             }
1701              
1702 55 50       245 if (!$self->{languages_filename}) {
1703 55         314 $self->{languages_filename} = $self->find_rule_support_file("languages");
1704             }
1705              
1706 55 50 33     616 if ($siterules && !$self->{ignore_site_cf_files}) {
1707 55         245 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1708             }
1709              
1710 55 100       253 if ( $use_user_pref != 0 ) {
1711 50         247 $self->get_and_create_userstate_dir();
1712              
1713             # user prefs file
1714 50         143 my $fname = $self->{userprefs_filename};
1715 50   33     153 $fname ||= $self->first_existing_path (@default_userprefs_path);
1716              
1717 50 100       168 if (!$self->{dont_copy_prefs}) {
1718             # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1719             # just use the last entry in the array as the default path.
1720 1   33     4 $fname ||= $self->sed_path($default_userprefs_path[-1]);
1721              
1722 1 50       14 my $stat_errn = stat($fname) ? 0 : 0+$!;
1723 1 50 33     8 if ($stat_errn == 0 && -f _) {
    0          
    0          
    0          
1724             # exists and is a regular file, nothing to do
1725             } elsif ($stat_errn == 0) {
1726 0         0 warn "config: default user preference file $fname is not a regular file\n";
1727             } elsif ($stat_errn != ENOENT) {
1728 0         0 warn "config: default user preference file $fname not accessible: $!\n";
1729             } elsif (!$self->create_default_prefs($fname)) {
1730 0         0 warn "config: failed to create default user preference file $fname\n";
1731             }
1732             }
1733              
1734 50         213 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1735             }
1736             }
1737              
1738 55 50       376 if ($self->{pre_config_text}) {
1739 0         0 $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1740             }
1741 55 50       256 if ($self->{post_config_text}) {
1742 0         0 $self->{config_text} .= $self->{post_config_text};
1743             }
1744              
1745 55 50       462 if ($self->{config_text} !~ /\S/) {
1746 0         0 my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1747 0 0       0 if ($self->{require_rules}) {
1748 0         0 die $m;
1749             } else {
1750 0         0 warn $m;
1751             }
1752             }
1753              
1754             # Go and parse the config!
1755 55         176 $self->{conf}->{main} = $self;
1756 55 50       278 if (would_log('dbg', 'config_text') > 1) {
1757 0         0 dbg('config_text: '.$self->{config_text});
1758             }
1759 55         440 $self->{conf}->parse_rules ($self->{config_text});
1760 55         458 $self->{conf}->finish_parsing(0);
1761 55         413 delete $self->{conf}->{main}; # to allow future GC'ing
1762              
1763 55         346 undef $self->{config_text}; # ensure it's actually freed
1764 55         381 delete $self->{config_text};
1765              
1766 55 50 66     476 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1767 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1768             }
1769              
1770             # Initialize the Bayes subsystem
1771 55 100       392 if ($self->{conf}->{use_bayes}) {
1772 40         5677 require Mail::SpamAssassin::Bayes;
1773 40         468 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1774             }
1775 55         422 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1776              
1777             # Figure out/set our initial scoreset
1778 55         219 my $set = 0;
1779 55 50       235 $set |= 1 unless $self->{local_tests_only};
1780 55 0 66     421 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      33        
1781 55         677 $self->{conf}->set_score_set ($set);
1782              
1783 55 50       294 if ($self->{only_these_rules}) {
1784 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1785             }
1786              
1787 55 50       224 if (!$self->{timer_enabled}) {
1788             # enable timing implicitly if _TIMING_ is used in add_header templates
1789 55         115 foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
  55         251  
1790 55         327 @{$self->{conf}->{'headers_spam'}}) {
1791 438 50       1395 if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
  0         0  
  0         0  
1792             }
1793             }
1794              
1795             # should be called only after configuration has been parsed
1796 55         1158 $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1797              
1798             # TODO -- open DNS cache etc. if necessary
1799             }
1800              
1801             sub read_cf {
1802 160     160 0 373 my ($self, $allpaths, $desc) = @_;
1803 160         490 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1804             }
1805              
1806             sub read_pre {
1807 110     110 0 349 my ($self, $allpaths, $desc) = @_;
1808 110         474 return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1809             }
1810              
1811             sub _read_cf_pre {
1812 270     270   563 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1813              
1814 270 50       561 return '' unless defined ($allpaths);
1815              
1816 270         506 my $txt = '';
1817 270         993 foreach my $path (split("\000", $allpaths))
1818             {
1819 270         1149 dbg("config: using \"$path\" for $desc");
1820              
1821 270 100       4385 my $stat_errn = stat($path) ? 0 : 0+$!;
1822 270 100 66     1837 if ($stat_errn == ENOENT) {
    50 66        
    100          
    100          
1823             # no file or directory
1824             } elsif ($stat_errn != 0) {
1825 0         0 dbg("config: file or directory $path not accessible: $!");
1826             } elsif (-d _) {
1827 188         658 foreach my $file ($self->$filelistmethod($path)) {
1828 928         1893 $txt .= read_cf_file($file);
1829             }
1830             } elsif (-f _ && -s _ && -r _) {
1831 32         126 $txt .= read_cf_file($path);
1832             }
1833             }
1834              
1835 270         3375 return $txt;
1836             }
1837              
1838              
1839             sub read_cf_file {
1840 960     960 0 1719 my($path) = @_;
1841 960         1404 my $txt = '';
1842              
1843 960         2075 local *IN;
1844 960 50       29727 if (open (IN, "<".$path)) {
1845              
1846 960         2558 my($inbuf,$nread); $txt = '';
  960         1702  
1847 960         15868 while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
  1000         5504  
1848 960 50       1915 defined $nread or die "error reading $path: $!";
1849 960 50       7618 close IN or die "error closing $path: $!";
1850 960         1948 undef $inbuf;
1851              
1852 960         4374 $txt = "file start $path\n" . $txt;
1853             # add an extra \n in case file did not end in one.
1854 960         2159 $txt .= "\nfile end $path\n";
1855              
1856 960         3243 dbg("config: read file $path");
1857             }
1858             else {
1859 0         0 warn "config: cannot open \"$path\": $!\n";
1860             }
1861              
1862 960         6674 return $txt;
1863             }
1864              
1865             sub get_and_create_userstate_dir {
1866 50     50 0 158 my ($self, $dir) = @_;
1867              
1868 50         117 my $fname;
1869              
1870             # If vpopmail is enabled then set fname to virtual homedir
1871             # precedence: dir, userstate_dir, derive from user_dir, system default
1872 50 50       250 if (defined $dir) {
    100          
    50          
1873 0         0 $fname = File::Spec->catdir ($dir, ".spamassassin");
1874             }
1875             elsif (defined $self->{userstate_dir}) {
1876 34         89 $fname = $self->{userstate_dir};
1877             }
1878             elsif (defined $self->{user_dir}) {
1879 0         0 $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1880             }
1881              
1882 50   66     289 $fname ||= $self->first_existing_path (@default_userstate_dir);
1883              
1884             # bug 4932: use the last default_userstate_dir entry if none of the others
1885             # already exist
1886 50   33     187 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1887              
1888 50 100       188 if (!$self->{dont_copy_prefs}) {
1889 1         6 dbg("config: using \"$fname\" for user state dir");
1890             }
1891              
1892             # if this is not a dir, not readable, or we are unable to create the dir,
1893             # this is not (yet) a serious error; in fact, it's not even worth
1894             # a warning at all times, so use dbg(). see bug 6268
1895 50 50       779 my $stat_errn = stat($fname) ? 0 : 0+$!;
1896 50 50 33     581 if ($stat_errn == 0 && !-d _) {
    50 33        
1897 0         0 dbg("config: $fname exists but is not a directory");
1898             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1899 0         0 dbg("config: error accessing $fname: $!");
1900             } else { # does not exist, create it
1901             eval {
1902 50         2795 mkpath($fname, 0, 0700); 1;
  50         300  
1903 50 50       104 } or do {
1904 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
1905 0         0 dbg("config: mkdir $fname failed: $eval_stat");
1906             };
1907             }
1908              
1909 50         126 $fname;
1910             }
1911              
1912             =item $fullpath = $f->find_rule_support_file ($filename)
1913              
1914             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1915             in the system-wide rules directory, and return its full path if
1916             it exists, or undef if it doesn't exist.
1917              
1918             (This API was added in SpamAssassin 3.1.1.)
1919              
1920             =cut
1921              
1922             sub find_rule_support_file {
1923 55     55 1 219 my ($self, $filename) = @_;
1924              
1925             return $self->first_existing_path(
1926 55         214 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
  385         606  
  385         1602  
  385         1124  
1927             }
1928              
1929             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1930              
1931             Copy default preferences file into home directory for later use and
1932             modification, if it does not already exist and C<dont_copy_prefs> is
1933             not set.
1934              
1935             =cut
1936              
1937             sub create_default_prefs {
1938             # $userdir will only exist if vpopmail config is enabled thru spamd
1939             # Its value will be the virtual user's maildir
1940             #
1941 0     0 1 0 my ($self, $fname, $user, $userdir) = @_;
1942              
1943 0 0       0 if ($self->{dont_copy_prefs}) {
1944 0         0 return(0);
1945             }
1946              
1947             # if ($userdir && $userdir ne $self->{user_dir}) {
1948             # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1949             # }
1950              
1951 0 0       0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1952 0 0       0 if ($stat_errn == 0) {
    0          
1953             # fine, it already exists
1954             } elsif ($stat_errn != ENOENT) {
1955 0         0 dbg("config: cannot access user preferences file $fname: $!");
1956             } else {
1957             # Pass on the value of $userdir for virtual users in vpopmail
1958             # otherwise it is empty and the user's normal homedir is used
1959 0         0 $self->get_and_create_userstate_dir($userdir);
1960              
1961             # copy in the default one for later editing
1962 0         0 my $defprefs =
1963             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1964              
1965 0         0 local(*IN,*OUT);
1966 0         0 $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1967 0 0       0 if (!defined $defprefs) {
    0          
    0          
1968 0         0 warn "config: can not determine default prefs path\n";
1969             } elsif (!open(IN, "<$defprefs")) {
1970 0         0 warn "config: cannot open $defprefs: $!\n";
1971             } elsif (!open(OUT, ">$fname")) {
1972 0         0 warn "config: cannot create user preferences file $fname: $!\n";
1973             } else {
1974             # former code skipped lines beginning with '#* ', the following copy
1975             # procedure no longer does so, as it avoids reading line-by-line
1976 0         0 my($inbuf,$nread);
1977 0         0 while ( $nread=read(IN,$inbuf,16384) ) {
1978 0 0       0 print OUT $inbuf or die "cannot write to $fname: $!";
1979             }
1980 0 0       0 defined $nread or die "error reading $defprefs: $!";
1981 0         0 undef $inbuf;
1982 0 0       0 close OUT or die "error closing $fname: $!";
1983 0 0       0 close IN or die "error closing $defprefs: $!";
1984              
1985 0 0 0     0 if (($< == 0) && ($> == 0) && defined($user)) { # chown it
      0        
1986 0         0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1987 0 0       0 unless (chown($uid, $gid, $fname)) {
1988 0         0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1989             }
1990             }
1991 0         0 warn "config: created user preferences file: $fname\n";
1992 0         0 return(1);
1993             }
1994             }
1995              
1996 0         0 return(0);
1997             }
1998              
1999             ###########################################################################
2000              
2001             sub expand_name {
2002 16     16 0 75 my ($self, $name) = @_;
2003 16   50     233 my $home = $self->{user_dir} || $ENV{HOME} || '';
2004              
2005 16 50       73 if (am_running_on_windows()) {
2006 0   0     0 my $userprofile = $ENV{USERPROFILE} || '';
2007              
2008 0 0 0     0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
2009 0 0       0 return $userprofile if ($userprofile =~ m/^\\\\/o);
2010              
2011 0 0 0     0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
2012 0 0       0 return $home if ($home =~ m/^\\\\/o);
2013              
2014 0         0 return '';
2015             } else {
2016 16 50 33     314 return $home if ($home && $home =~ /\//o);
2017 0 0       0 return (getpwnam($name))[7] if ($name ne '');
2018 0         0 return (getpwuid($>))[7];
2019             }
2020             }
2021              
2022             sub sed_path {
2023 136     136 0 328 my ($self, $path) = @_;
2024 136 50       330 return if !defined $path;
2025              
2026 136 50       607 if (exists($self->{conf}->{sed_path_cache}->{$path})) {
2027 0         0 return $self->{conf}->{sed_path_cache}->{$path};
2028             }
2029              
2030 136         240 my $orig_path = $path;
2031              
2032 136 0       264 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
  0         0  
2033 136 50       215 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
  1         9  
2034 136 50       317 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
  1         6  
2035 136 0 33     272 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
  1         8  
2036 136 0       242 $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
  0         0  
2037 136         225 $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
  0         0  
2038 136         326 $path =~ s/__version__/${VERSION}/gs;
2039 136         343 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
  16         57  
2040              
2041 136         542 $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
2042 136         521 $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
2043 136         287 return $path;
2044             }
2045              
2046             sub get_perl_major_version {
2047 0     0 0 0 my $self = shift;
2048 0 0       0 $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2049 0         0 return $1;
2050             }
2051              
2052             sub first_existing_path {
2053 72     72 0 191 my $self = shift;
2054 72         159 my $path;
2055 72         298 foreach my $p (@_) {
2056 133         529 $path = $self->sed_path ($p);
2057 133 50       348 if (defined $path) {
2058 133 100       2825 my($errn) = stat($path) ? 0 : 0+$!;
2059 133 100       634 if ($errn == ENOENT) { } # does not exist
    50          
2060 0         0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2061 71         387 else { return $path }
2062             }
2063             }
2064 1         5 return;
2065             }
2066              
2067             ###########################################################################
2068              
2069             sub get_cf_files_in_dir {
2070 94     94 0 257 my ($self, $dir) = @_;
2071 94         257 return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2072             }
2073              
2074             sub get_pre_files_in_dir {
2075 94     94 0 249 my ($self, $dir) = @_;
2076 94         385 return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2077             }
2078              
2079             sub _get_cf_pre_files_in_dir {
2080 188     188   505 my ($self, $dir, $type) = @_;
2081              
2082 188 100       458 if ($self->{config_tree_recurse}) {
2083 4         5 my @cfs;
2084              
2085             # use "eval" to avoid loading File::Find unless this is specified
2086             eval ' use File::Find qw();
2087             File::Find::find(
2088             { untaint => 1,
2089             follow => 1,
2090             wanted =>
2091             sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
2092             }, $dir); 1;
2093 4 50   1   255 ' or do {
  1     1   7  
  1     1   2  
  1     1   122  
  1         7  
  1         2  
  1         67  
  1         7  
  1         2  
  1         68  
  1         7  
  1         2  
  1         101  
2094 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
2095 0         0 die "_get_cf_pre_files_in_dir error: $eval_stat";
2096             };
2097 4         19 @cfs = sort { $a cmp $b } @cfs;
  46         80  
2098 4         35 return @cfs;
2099             }
2100             else {
2101 184 50       4136 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2102 184 100 100     5128 my @cfs = grep { $_ ne '.' && $_ ne '..' &&
  2212   100     27461  
2103             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2104 184         2303 closedir SA_CF_DIR;
2105              
2106 184         1105 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
  905         2366  
  1777         2179  
2107             }
2108             }
2109              
2110             ###########################################################################
2111              
2112             sub have_plugin {
2113 415     415 0 797 my ($self, $subname) = @_;
2114              
2115             # We could potentially get called after a finish(), so just return.
2116 415 50       830 return unless $self->{plugins};
2117              
2118 415         985 return $self->{plugins}->have_callback ($subname);
2119             }
2120              
2121             sub call_plugins {
2122 3467     3467 0 5869 my $self = shift;
2123              
2124             # We could potentially get called after a finish(), so just return.
2125 3467 50       7733 return unless $self->{plugins};
2126              
2127             # safety net in case some plugin changes global settings, Bug 6218
2128 3467         13005 local $/ = $/; # prevent underlying modules from changing the global $/
2129              
2130 3467         5473 my $subname = shift;
2131 3467         10213 return $self->{plugins}->callback($subname, @_);
2132             }
2133              
2134             ###########################################################################
2135              
2136             sub find_all_addrs_in_mail {
2137 0     0 0 0 my ($self, $mail_obj) = @_;
2138              
2139 0         0 $self->init(1);
2140              
2141 0         0 my @addrlist;
2142 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2143             Errors-To Mail-Followup-To))
2144             {
2145 0         0 my @hdrs = $mail_obj->get_header($header);
2146 0 0       0 if ($#hdrs < 0) { next; }
  0         0  
2147 0         0 push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2148             }
2149              
2150             # find addrs in body, too
2151 0         0 foreach my $line (@{$mail_obj->get_body()}) {
  0         0  
2152 0         0 push (@addrlist, $self->find_all_addrs_in_line($line));
2153             }
2154              
2155 0         0 my @ret;
2156             my %done;
2157              
2158 0         0 foreach (@addrlist) {
2159 0         0 s/^mailto://; # from Outlook "forwarded" message
2160 0 0       0 next if defined ($done{$_}); $done{$_} = 1;
  0         0  
2161 0         0 push (@ret, $_);
2162             }
2163              
2164 0         0 @ret;
2165             }
2166              
2167             sub find_all_addrs_in_line {
2168 67     67 0 243 my ($self, $line) = @_;
2169              
2170             # a more permissive pattern based on "dot-atom" as per RFC2822
2171 67         169 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2172 67         165 my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2173              
2174 67         150 my @addrs;
2175             my %seen;
2176 67         603 while ($line =~ s/(?:mailto:)?\s*
2177             ($ID_PATTERN \@
2178             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2179             {
2180 2         11 my $addr = $1;
2181 2         8 $addr =~ s/^mailto://;
2182 2 50       10 next if (defined ($seen{$addr})); $seen{$addr} = 1;
  2         42  
2183 2         13 push (@addrs, $addr);
2184             }
2185              
2186 67         305 return @addrs;
2187             }
2188              
2189             ###########################################################################
2190              
2191             # sa_die -- used to die with a useful exit code.
2192              
2193             sub sa_die {
2194 0     0 0 0 my $exitcode = shift;
2195 0         0 warn @_;
2196 0         0 exit $exitcode;
2197             }
2198              
2199             ###########################################################################
2200              
2201             =item $f->copy_config ( [ $source ], [ $dest ] )
2202              
2203             Used for daemons to keep a persistent Mail::SpamAssassin object's
2204             configuration correct if switching between users. Pass an associative
2205             array reference as either $source or $dest, and set the other to 'undef'
2206             so that the object will use its current configuration. i.e.:
2207              
2208             # create object w/ configuration
2209             my $spamtest = Mail::SpamAssassin->new( ... );
2210              
2211             # backup configuration to %conf_backup
2212             my %conf_backup;
2213             $spamtest->copy_config(undef, \%conf_backup) ||
2214             die "config: error returned from copy_config!\n";
2215              
2216             ... do stuff, perhaps modify the config, etc ...
2217              
2218             # reset the configuration back to the original
2219             $spamtest->copy_config(\%conf_backup, undef) ||
2220             die "config: error returned from copy_config!\n";
2221              
2222             Note that the contents of the associative arrays should be considered
2223             opaque by calling code.
2224              
2225             =cut
2226              
2227             sub copy_config {
2228 3     3 1 3955 my ($self, $source, $dest) = @_;
2229              
2230             # At least one of either source or dest needs to be a hash reference ...
2231 3 50 66     26 unless ((defined $source && ref($source) eq 'HASH') ||
      33        
      66        
2232             (defined $dest && ref($dest) eq 'HASH'))
2233             {
2234 0         0 return 0;
2235             }
2236              
2237 3         9 my $timer = $self->time_method("copy_config");
2238              
2239             # let the Conf object itself do all the heavy lifting. It's better
2240             # than having this class know all about that class' internals...
2241 3 100       7 if (defined $source) {
2242 2         11 dbg ("config: copying current conf from backup");
2243             }
2244             else {
2245 1         4 dbg ("config: copying current conf to backup");
2246             }
2247 3         17 return $self->{conf}->clone($source, $dest);
2248             }
2249              
2250             ###########################################################################
2251              
2252             =item @plugins = $f->get_loaded_plugins_list ( )
2253              
2254             Return the list of plugins currently loaded by this SpamAssassin object's
2255             configuration; each entry in the list is an object of type
2256             C<Mail::SpamAssassin::Plugin>.
2257              
2258             (This API was added in SpamAssassin 3.2.0.)
2259              
2260             =cut
2261              
2262             sub get_loaded_plugins_list {
2263 0     0 1 0 my ($self) = @_;
2264 0         0 return $self->{plugins}->get_loaded_plugins_list();
2265             }
2266              
2267             1;
2268             __END__
2269              
2270             ###########################################################################
2271              
2272             =back
2273              
2274             =head1 PREREQUISITES
2275              
2276             C<HTML::Parser>
2277             C<Sys::Syslog>
2278              
2279             =head1 MORE DOCUMENTATION
2280              
2281             See also E<lt>http://spamassassin.apache.org/E<gt> and
2282             E<lt>http://wiki.apache.org/spamassassin/E<gt> for more information.
2283              
2284             =head1 SEE ALSO
2285              
2286             Mail::SpamAssassin::Conf(3)
2287             Mail::SpamAssassin::PerMsgStatus(3)
2288             spamassassin(1)
2289             sa-update(1)
2290              
2291             =head1 BUGS
2292              
2293             See E<lt>http://issues.apache.org/SpamAssassin/E<gt>
2294              
2295             =head1 AUTHORS
2296              
2297             The SpamAssassin(tm) Project E<lt>http://spamassassin.apache.org/E<gt>
2298              
2299             =head1 COPYRIGHT
2300              
2301             SpamAssassin is distributed under the Apache License, Version 2.0, as
2302             described in the file C<LICENSE> included with the distribution.
2303              
2304             =head1 AVAILABILITY
2305              
2306             The latest version of this library is likely to be available from CPAN
2307             as well as:
2308              
2309             E<lt>http://spamassassin.apache.org/E<gt>
2310              
2311             =cut