File Coverage

lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 440 763 57.6
branch 117 340 34.4
condition 61 176 34.6
subroutine 60 85 70.5
pod 31 56 55.3
total 709 1420 49.9


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             package Mail::SpamAssassin;
62 14     14   475218 use strict;
  14         112  
  14         739  
63 14     14   133 use warnings;
  14         63  
  14         1013  
64             # use bytes;
65 14     14   177 use re 'taint';
  14         48  
  14         1687  
66              
67             require 5.006_001;
68              
69 14     14   3395 use Mail::SpamAssassin::Logger;
  14         43  
  14         1297  
70 14     14   3801 use Mail::SpamAssassin::Constants;
  14         39  
  14         845  
71 14     14   3815 use Mail::SpamAssassin::Conf;
  14         48  
  14         687  
72 14     14   4439 use Mail::SpamAssassin::Conf::SQL;
  14         41  
  14         546  
73 14     14   3429 use Mail::SpamAssassin::Conf::LDAP;
  14         44  
  14         542  
74 14     14   3564 use Mail::SpamAssassin::PerMsgStatus;
  14         48  
  14         706  
75 14     14   4559 use Mail::SpamAssassin::Message;
  14         266  
  14         1415  
76 14     14   5430 use Mail::SpamAssassin::PluginHandler;
  14         62  
  14         583  
77 14     14   3530 use Mail::SpamAssassin::DnsResolver;
  14         50  
  14         617  
78 14     14   3900 use Mail::SpamAssassin::RegistryBoundaries;
  14         58  
  14         640  
79 14     14   123 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  14         44  
  14         862  
80 12     12   3810 use Mail::SpamAssassin::Util::ScopedTimer;
  12         41  
  12         423  
81              
82 12     12   87 use Errno qw(ENOENT EACCES);
  12         31  
  12         849  
83 12     12   86 use File::Basename;
  12         29  
  12         1472  
84 11     11   84 use File::Path;
  11         24  
  11         1086  
85 11     11   71 use File::Spec 0.8;
  11         353  
  11         328  
86 11     11   75 use Time::HiRes qw(time);
  11         23  
  11         82  
87 11     11   1172 use Cwd;
  11         23  
  11         1065  
88 11     11   79 use Config;
  11         20  
  11         106288  
89              
90             our $VERSION = "3.004003"; # update after release (same format as perl $])
91             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
92             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
93              
94              
95             # Used during the prerelease/release-candidate part of the official release
96             # process. If you hacked up your SA, you should add a version_tag to your .cf
97             # files; this variable should not be modified.
98             our @EXTRA_VERSION = qw();
99              
100             our @ISA = qw();
101              
102             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
103             our $SUB_VERSION = 'svnunknown';
104             if ('$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $' =~ ':') {
105             # Subversion keyword "$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $" has been successfully expanded.
106             # Doesn't happen with automated launchpad builds:
107             # https://bugs.launchpad.net/launchpad/+bug/780916
108             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2019-12-06 18:58:14 -0500 (Fri, 06 Dec 2019) $ updated by SVN'))[1];
109             }
110              
111              
112             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
113             if ('$LastChangedRevision: 1870940 $' =~ ':') {
114             # Subversion keyword "$LastChangedRevision: 1870940 $" has been successfully expanded.
115             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1870940 $ updated by SVN}[1]));
116             } else {
117             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
118             }
119             }
120              
121             sub Version {
122 125     125 0 879 $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
123 125         1564 return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
124             }
125              
126             our $HOME_URL = "https://spamassassin.apache.org/";
127              
128             # note that the CWD takes priority. This is required in case a user
129             # is testing a new version of SpamAssassin on a machine with an older
130             # version installed. Unless you can come up with a fix for this that
131             # allows "make test" to work, don't change this.
132              
133             our @default_rules_path = (
134             './rules', # REMOVEFORINST
135             '../rules', # REMOVEFORINST
136             '__local_state_dir__/__version__',
137             '__def_rules_dir__',
138             '__prefix__/share/spamassassin',
139             '/usr/local/share/spamassassin',
140             '/usr/share/spamassassin',
141             );
142              
143             # first 3 are BSDish, latter 2 Linuxish
144             our @site_rules_path = (
145             '__local_rules_dir__',
146             '__prefix__/etc/mail/spamassassin',
147             '__prefix__/etc/spamassassin',
148             '/usr/local/etc/spamassassin',
149             '/usr/pkg/etc/spamassassin',
150             '/usr/etc/spamassassin',
151             '/etc/mail/spamassassin',
152             '/etc/spamassassin',
153             );
154              
155             our @default_prefs_path = (
156             '__local_rules_dir__/user_prefs.template',
157             '__prefix__/etc/mail/spamassassin/user_prefs.template',
158             '__prefix__/share/spamassassin/user_prefs.template',
159             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
160             '__def_rules_dir__/user_prefs.template',
161             '/etc/spamassassin/user_prefs.template',
162             '/etc/mail/spamassassin/user_prefs.template',
163             '/usr/local/share/spamassassin/user_prefs.template',
164             '/usr/share/spamassassin/user_prefs.template',
165             );
166              
167             our @default_userprefs_path = (
168             '~/.spamassassin/user_prefs',
169             );
170              
171             our @default_userstate_dir = (
172             '~/.spamassassin',
173             );
174              
175             ###########################################################################
176              
177             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
178              
179             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
180             reference to the constructor which may contain the following attribute-
181             value pairs.
182              
183             =over 4
184              
185             =item debug
186              
187             This is the debug options used to determine logging level. It exists to
188             allow sections of debug messages (called "facilities") to be enabled or
189             disabled. If this is a string, it is treated as a comma-delimited list
190             of the debug facilities. If it's a hash reference, then the keys are
191             treated as the list of debug facilities and if it's a array reference,
192             then the elements are treated as the list of debug facilities.
193              
194             There are also two special cases: (1) if the special case of "info" is
195             passed as a debug facility, then all informational messages are enabled;
196             (2) if the special case of "all" is passed as a debug facility, then all
197             debugging facilities are enabled.
198              
199             =item rules_filename
200              
201             The filename/directory to load spam-identifying rules from. (optional)
202              
203             =item site_rules_filename
204              
205             The filename/directory to load site-specific spam-identifying rules from.
206             (optional)
207              
208             =item userprefs_filename
209              
210             The filename to load preferences from. (optional)
211              
212             =item userstate_dir
213              
214             The directory user state is stored in. (optional)
215              
216             =item config_tree_recurse
217              
218             Set to C<1> to recurse through directories when reading configuration
219             files, instead of just reading a single level. (optional, default 0)
220              
221             =item config_text
222              
223             The text of all rules and preferences. If you prefer not to load the rules
224             from files, read them in yourself and set this instead. As a result, this will
225             override the settings for C<rules_filename>, C<site_rules_filename>,
226             and C<userprefs_filename>.
227              
228             =item pre_config_text
229              
230             Similar to C<config_text>, this text is placed before config_text to allow an
231             override of config files.
232              
233             =item post_config_text
234              
235             Similar to C<config_text>, this text is placed after config_text to allow an
236             override of config files.
237              
238             =item force_ipv4
239              
240             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
241             to use IPv6. Use if the existing tests for IPv6 availability produce
242             incorrect results or crashes.
243              
244             =item force_ipv6
245              
246             For symmetry with force_ipv4: if set to 1, DNS or other network tests
247             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
248             this setting and use whatever protocol family they are comfortable with.
249              
250             =item require_rules
251              
252             If set to 1, init() will die if no valid rules could be loaded. This is the
253             default behaviour when called by C<spamassassin> or C<spamd>.
254              
255             =item languages_filename
256              
257             If you want to be able to use the language-guessing rule
258             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
259             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
260             need to set this. It should be the path to the B<languages> file normally
261             found in the SpamAssassin B<rules> directory.
262              
263             =item local_tests_only
264              
265             If set to 1, no tests that require internet access will be performed. (default:
266             0)
267              
268             =item need_tags
269              
270             The option provides a way to avoid more expensive processing when it is known
271             in advance that some information will not be needed by a caller.
272              
273             A value of the option can either be a string (a comma-delimited list of tag
274             names), or a reference to a list of individual tag names. A caller may provide
275             the list in advance, specifying his intention to later collect the information
276             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
277             insensitive), it shows that a caller will not be interested in such tag,
278             although there is no guarantee it would save any resources, nor that a tag
279             value will be empty. Currently no built-in tags start with 'NO'. A later
280             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
281             to TIMING,NOASN.
282              
283             For backward compatibility, all tags available as of version 3.2.4 will
284             be available by default (unless disabled by NOtag), even if not requested
285             through need_tags option. Future versions may provide new tags conditionally
286             available.
287              
288             Currently the only tag that needs to be explicitly requested is 'TIMING'.
289             Not requesting it can save a millisecond or two - it mostly serves to
290             illustrate the usage of need_tags.
291              
292             Example:
293             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
294             or:
295             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
296              
297             =item ignore_site_cf_files
298              
299             If set to 1, any rule files found in the C<site_rules_filename> directory will
300             be ignored. *.pre files (used for loading plugins) found in the
301             C<site_rules_filename> directory will still be used. (default: 0)
302              
303             =item dont_copy_prefs
304              
305             If set to 1, the user preferences file will not be created if it doesn't
306             already exist. (default: 0)
307              
308             =item save_pattern_hits
309              
310             If set to 1, the patterns hit can be retrieved from the
311             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
312              
313             =item home_dir_for_helpers
314              
315             If set, the B<HOME> environment variable will be set to this value
316             when using test applications that require their configuration data,
317             such as Razor, Pyzor and DCC.
318              
319             =item username
320              
321             If set, the C<username> attribute will use this as the current user's name.
322             Otherwise, the default is taken from the runtime environment (ie. this process'
323             effective UID under UNIX).
324              
325             =item skip_prng_reseeding
326              
327             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
328             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
329             Perl function should be called during initialization of each child process,
330             soon after forking.
331              
332             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
333             library.
334              
335             This setting requires the caller to decide when to call srand().
336             This choice may be desired to preserve the entropy of a PRNG. The default
337             value of skip_prng_reseeding is false to maintain backward compatibility.
338              
339             This option should only be set by a caller if it calls srand() upon spawning
340             child processes. Unless you are certain you need it, leave this setting as
341             false.
342              
343             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
344             which allows spamd to call srand() right after forking a child process.
345              
346             =back
347              
348             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
349             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
350             configuration files in the usual installed locations using the below variable
351             definitions which can be passed in.
352              
353             =over 4
354              
355             =item PREFIX
356              
357             Used as the root for certain directory paths such as:
358              
359             '__prefix__/etc/mail/spamassassin'
360             '__prefix__/etc/spamassassin'
361              
362             Defaults to "@@PREFIX@@".
363              
364             =item DEF_RULES_DIR
365              
366             Location where the default rules are installed. Defaults to
367             "@@DEF_RULES_DIR@@".
368              
369             =item LOCAL_RULES_DIR
370              
371             Location where the local site rules are installed. Defaults to
372             "@@LOCAL_RULES_DIR@@".
373              
374             =item LOCAL_STATE_DIR
375              
376             Location of the local state directory, mainly used for installing updates via
377             C<sa-update> and compiling rulesets to native code. Defaults to
378             "@@LOCAL_STATE_DIR@@".
379              
380             =back
381              
382              
383             =cut
384              
385             # undocumented ctor settings:
386             #
387             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
388              
389             sub new {
390 55     55 1 75582 my $class = shift;
391 55   33     597 $class = ref($class) || $class;
392              
393 55         139 my $self = shift;
394 55 50       224 if (!defined $self) { $self = { }; }
  0         0  
395 55         200 bless ($self, $class);
396              
397             # basic backward compatibility; debug used to be a boolean.
398             # translate that into 'all', which is what it meant before 3.1.0.
399 55 50 33     366 if ($self->{debug} && $self->{debug} eq '1') {
400 0         0 $self->{debug} = 'all';
401             }
402              
403             # enable or disable debugging
404 55         522 Mail::SpamAssassin::Logger::add_facilities($self->{debug});
405              
406             # first debugging information possibly printed should be the version
407 55         359 dbg("generic: SpamAssassin version " . Version());
408              
409             # if the libs are installed in an alternate location, and the caller
410             # didn't set PREFIX, we should have an estimated guess ready, values
411             # substituted at 'make' time
412 55   50     519 $self->{PREFIX} ||= '@@PREFIX@@';
413 55   50     461 $self->{DEF_RULES_DIR} ||= '@@DEF_RULES_DIR@@';
414 55   50     382 $self->{LOCAL_RULES_DIR} ||= '@@LOCAL_RULES_DIR@@';
415 55   50     345 $self->{LOCAL_STATE_DIR} ||= '@@LOCAL_STATE_DIR@@';
416 55         196 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
  220         952  
417             qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
418              
419 55         190 $self->{needed_tags} = {};
420 55         126 { my $ntags = $self->{need_tags};
  55         125  
421 55 50       275 if (defined $ntags) {
422 0 0       0 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
423 0 0       0 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
424             }
425             }
426             }
427 55 50 33     267 if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
428 0         0 $self->timer_enable();
429             }
430              
431 55   33     1008 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
432 55         702 $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
433              
434 55   50     356 $self->{save_pattern_hits} ||= 0;
435              
436             # Make sure that we clean $PATH if we're tainted
437 55         381 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
438              
439 55 50       238 if (!defined $self->{username}) {
440 55         471 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
441             }
442              
443 55         486 $self->create_locker();
444              
445 55         228 $self;
446             }
447              
448             sub create_locker {
449 55     55 0 185 my ($self) = @_;
450              
451 55         101 my $class;
452 55         148 my $m = $self->{conf}->{lock_method};
453              
454             # let people choose what they want -- even if they may not work on their
455             # OS. (they could be using cygwin!)
456 55 50       342 if ($m eq 'win32') { $class = 'Win32'; }
  0 50       0  
    50          
457 0         0 elsif ($m eq 'flock') { $class = 'Flock'; }
458 0         0 elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
459             else {
460             # OS-specific defaults
461 55 50       257 if (am_running_on_windows()) {
462 0         0 $class = 'Win32';
463             } else {
464 55         162 $class = 'UnixNFSSafe';
465             }
466             }
467              
468             # this could probably be made a little faster; for now I'm going
469             # for slow but safe, by keeping in quotes
470             eval '
471             use Mail::SpamAssassin::Locker::'.$class.';
472             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
473             1;
474 10 50   10   4465 ' or do {
  10         33  
  10         396  
  55         5761  
475 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
476 0         0 die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
477             };
478              
479 55 50       385 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
480             }
481              
482             ###########################################################################
483              
484             =item parse($message, $parse_now [, $suppl_attrib])
485              
486             Parse will return a Mail::SpamAssassin::Message object with just the
487             headers parsed. When calling this function, there are two optional
488             parameters that can be passed in: $message is either undef (which
489             will use STDIN), a scalar - a string containing an entire message,
490             a reference to such string, an array reference of the message with
491             one line per array element, or either a file glob or an IO::File object
492             which holds the entire contents of the message; and $parse_now, which
493             specifies whether or not to create a MIME tree at parse time or later
494             as necessary.
495              
496             The I<$parse_now> option, by default, is set to false (0). This
497             allows SpamAssassin to not have to generate the tree of internal
498             data nodes if the information is not going to be used. This is
499             handy, for instance, when running C<spamassassin -d>, which only
500             needs the pristine header and body which is always parsed and stored
501             by this function.
502              
503             The optional last argument I<$suppl_attrib> provides a way for a caller
504             to pass additional information about a message to SpamAssassin. It is
505             either undef, or a ref to a hash where each key/value pair provides some
506             supplementary attribute of the message, typically information that cannot
507             be deduced from the message itself, or is hard to do so reliably, or would
508             represent unnecessary work for SpamAssassin to obtain it. The argument will
509             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
510             made available to the rest of the code as well as to plugins. The exact list
511             of attributes will evolve through time, any unknown attribute should be
512             ignored. Possible examples are: SMTP envelope information, a flag indicating
513             that a message as supplied by a caller was truncated due to size limit, an
514             already verified list of DKIM signature objects, or perhaps a list of rule
515             hits predetermined by a caller, which makes another possible way for a
516             caller to provide meta information (instead of having to insert made-up
517             header fields in order to pass information), or maybe just plain rule hits.
518              
519             For more information, please see the C<Mail::SpamAssassin::Message>
520             and C<Mail::SpamAssassin::Message::Node> POD.
521              
522             =cut
523              
524             sub parse {
525 102     102 1 6349 my($self, $message, $parsenow, $suppl_attrib) = @_;
526              
527 102         430 my $start_time = time;
528 102         417 $self->init(1);
529 102         416 my $timer = $self->time_method("parse");
530              
531 102         178 my $master_deadline;
532             # passed in at a function call
533 102 50 66     523 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
534 48         139 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
535             }
536             # found in a config file - overrides passed-in number if lower
537 102 50       383 if ($self->{conf}->{time_limit}) { # defined and nonzero
538 102         385 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
539 102 50 33     485 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
540 102         219 $master_deadline = $time_limit_deadline;
541             }
542             }
543 102 50       317 if (defined $master_deadline) {
544 102         450 dbg("config: time limit %.1f s", $master_deadline - $start_time);
545             }
546              
547             my $msg = Mail::SpamAssassin::Message->new({
548             message=>$message, parsenow=>$parsenow,
549             normalize=>$self->{conf}->{normalize_charset},
550             body_part_scan_size=>$self->{conf}->{body_part_scan_size},
551             rawbody_part_scan_size=>$self->{conf}->{rawbody_part_scan_size},
552 102         2150 master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
553              
554             # bug 5069: The goal here is to get rendering plugins to do things
555             # like OCR, convert doc and pdf to text, etc, though it could be anything
556             # that wants to process the message after it's been parsed.
557 102         785 $self->call_plugins("post_message_parse", { message => $msg });
558              
559 102         427 return $msg;
560             }
561              
562              
563             ###########################################################################
564              
565             =item $status = $f->check ($mail)
566              
567             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
568             to determine if it is spam or not.
569              
570             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
571             used to test or manipulate the mail message.
572              
573             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
574             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
575             object is a "factory". However, if you do this, be sure to call the
576             C<finish()> method on the status objects when you're done with them.
577              
578             =cut
579              
580             sub check {
581 35     35 1 180 my ($self, $mail_obj) = @_;
582              
583 35         133 $self->init(1);
584 35         248 my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
585 35         165 $pms->check();
586 34 50       147 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
587 34         107 $pms;
588             }
589              
590             =item $status = $f->check_message_text ($mailtext)
591              
592             Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
593             is spam or not.
594              
595             Otherwise identical to C<check()> above.
596              
597             =cut
598              
599             sub check_message_text {
600 32     32 1 131 my ($self, $mailtext) = @_;
601 32         144 my $msg = $self->parse($mailtext, 1);
602 32         161 my $result = $self->check($msg);
603              
604             # Kill off the metadata ...
605             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
606             # the message. So killing it here will cause things like
607             # rewrite_message() to fail. <grrr>
608             #
609 32         171 $msg->finish_metadata();
610              
611 32         156 return $result;
612             }
613              
614             ###########################################################################
615              
616             =item $status = $f->learn ($mail, $id, $isspam, $forget)
617              
618             Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
619              
620             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
621             be learnt as non-spam.
622              
623             If C<$forget> is set, the attributes of the mail will be removed from
624             both the non-spam and spam learning databases.
625              
626             C<$id> is an optional message-identification string, used internally
627             to tag the message. If it is C<undef>, the Message-Id of the message
628             will be used. It should be unique to that message.
629              
630             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
631             manipulate the learning process for each mail.
632              
633             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
634             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
635             object is a "factory". However, if you do this, be sure to call the
636             C<finish()> method on the learner objects when you're done with them.
637              
638             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
639             must be called before using this method.
640              
641             =cut
642              
643             sub learn {
644 0     0 1 0 my ($self, $mail_obj, $id, $isspam, $forget) = @_;
645 0         0 local ($_);
646              
647 0         0 require Mail::SpamAssassin::PerMsgLearner;
648 0         0 $self->init(1);
649 0         0 my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
650              
651 0 0       0 if ($forget) {
    0          
652 0         0 dbg("learn: forgetting message");
653 0         0 $msg->forget($id);
654             } elsif ($isspam) {
655 0         0 dbg("learn: learning spam");
656 0         0 $msg->learn_spam($id);
657             } else {
658 0         0 dbg("learn: learning ham");
659 0         0 $msg->learn_ham($id);
660             }
661              
662 0         0 $msg;
663             }
664              
665             ###########################################################################
666              
667             =item $f->init_learner ( [ { opt => val, ... } ] )
668              
669             Initialise learning. You may pass the following attribute-value pairs to this
670             method.
671              
672             =over 4
673              
674             =item caller_will_untie
675              
676             Whether or not the code calling this method will take care of untie'ing
677             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
678              
679             =item force_expire
680              
681             Should an expiration run be forced to occur immediately? (optional, default 0).
682              
683             =item learn_to_journal
684              
685             Should learning data be written to the journal, instead of directly to the
686             databases? (optional, default 0).
687              
688             =item wait_for_lock
689              
690             Whether or not to wait a long time for locks to complete (optional, default 0).
691              
692             =item opportunistic_expire_check_only
693              
694             During the opportunistic journal sync and expire check, don't actually do the
695             expire but report back whether or not it should occur (optional, default 0).
696              
697             =item no_relearn
698              
699             If doing a learn operation, and the message has already been learned as
700             the opposite type, don't re-learn the message.
701              
702             =back
703              
704             =cut
705              
706             sub init_learner {
707 0     0 1 0 my $self = shift;
708 0         0 my $opts = shift;
709 0         0 dbg("learn: initializing learner");
710              
711             # Make sure we're already initialized ...
712 0         0 $self->init(1);
713              
714 0         0 my %kv = (
715             'force_expire' => 'learn_force_expire',
716             'learn_to_journal' => 'learn_to_journal',
717             'caller_will_untie' => 'learn_caller_will_untie',
718             'wait_for_lock' => 'learn_wait_for_lock',
719             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
720             'no_relearn' => 'learn_no_relearn',
721             );
722              
723 0         0 my %ret;
724              
725             # Set any other options that need setting ...
726 0         0 while( my($k,$v) = each %kv ) {
727 0         0 $ret{$k} = $self->{$v};
728 0 0       0 if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
  0         0  
729             }
730              
731 0         0 return \%ret;
732             }
733              
734             ###########################################################################
735              
736             =item $f->rebuild_learner_caches ({ opt => val })
737              
738             Rebuild any cache databases; should be called after the learning process.
739             Options include: C<verbose>, which will output diagnostics to C<stdout>
740             if set to 1.
741              
742             =cut
743              
744             sub rebuild_learner_caches {
745 0     0 1 0 my $self = shift;
746 0         0 my $opts = shift;
747 0 0       0 $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
748 0         0 1;
749             }
750              
751             =item $f->finish_learner ()
752              
753             Finish learning.
754              
755             =cut
756              
757             sub finish_learner {
758 1     1 1 3 my $self = shift;
759 1 50       11 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
760 1         2 1;
761             }
762              
763             =item $f->dump_bayes_db()
764              
765             Dump the contents of the Bayes DB
766              
767             =cut
768              
769             sub dump_bayes_db {
770 0     0 1 0 my($self,@opts) = @_;
771 0 0       0 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
772             }
773              
774             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
775              
776             Signals that the current user has changed (possibly using C<setuid>), meaning
777             that SpamAssassin should close any per-user databases it has open, and re-open
778             using ones appropriate for the new user.
779              
780             Note that this should be called I<after> reading any per-user configuration, as
781             that data may override some paths opened in this method. You may pass the
782             following attribute-value pairs:
783              
784             =over 4
785              
786             =item username
787              
788             The username of the user. This will be used for the C<username> attribute.
789              
790             =item user_dir
791              
792             A directory to use as a 'home directory' for the current user's data,
793             overriding the system default. This directory must be readable and writable by
794             the process. Note that the resulting C<userstate_dir> will be the
795             C<.spamassassin> subdirectory of this dir.
796              
797             =item userstate_dir
798              
799             A directory to use as a directory for the current user's data, overriding the
800             system default. This directory must be readable and writable by the process.
801             The default is C<user_dir/.spamassassin>.
802              
803             =back
804              
805             =cut
806              
807             sub signal_user_changed {
808 2     2 1 94 my $self = shift;
809 2         3 my $opts = shift;
810 2         4 my $set = 0;
811              
812 2         5 my $timer = $self->time_method("signal_user_changed");
813 2         6 dbg("info: user has changed");
814              
815 2 50 33     14 if (defined $opts && $opts->{username}) {
816 2         5 $self->{username} = $opts->{username};
817             } else {
818 0         0 undef $self->{username};
819             }
820 2 50 33     11 if (defined $opts && $opts->{user_dir}) {
821 2         4 $self->{user_dir} = $opts->{user_dir};
822             } else {
823 0         0 undef $self->{user_dir};
824             }
825 2 50 33     10 if (defined $opts && $opts->{userstate_dir}) {
826 0         0 $self->{userstate_dir} = $opts->{userstate_dir};
827             } else {
828 2         6 undef $self->{userstate_dir};
829             }
830              
831             # reopen bayes dbs for this user
832 2 100       10 $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
833 2 100       6 if ($self->{conf}->{use_bayes}) {
834 1         18 require Mail::SpamAssassin::Bayes;
835 1         9 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
836             } else {
837 1 50       23 delete $self->{bayes_scanner} if $self->{bayes_scanner};
838             }
839              
840             # this user may have a different learn_to_journal setting, so reset appropriately
841 2         8 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
842              
843 2 50       7 $set |= 1 unless $self->{local_tests_only};
844 2 0 66     11 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      33        
845              
846 2         13 $self->{conf}->set_score_set ($set);
847              
848             $self->call_plugins("signal_user_changed", {
849             username => $self->{username},
850             userstate_dir => $self->{userstate_dir},
851             user_dir => $self->{user_dir},
852 2         20 });
853              
854 2         10 1;
855             }
856              
857             ###########################################################################
858              
859             =item $f->report_as_spam ($mail, $options)
860              
861             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
862             human-verified spam. This will submit the mail message to live,
863             collaborative, spam-blocker databases, allowing other users to block this
864             message.
865              
866             It will also submit the mail to SpamAssassin's Bayesian learner.
867              
868             Options is an optional reference to a hash of options. Currently these
869             can be:
870              
871             =over 4
872              
873             =item dont_report_to_dcc
874              
875             Inhibits reporting of the spam to DCC.
876              
877             =item dont_report_to_pyzor
878              
879             Inhibits reporting of the spam to Pyzor.
880              
881             =item dont_report_to_razor
882              
883             Inhibits reporting of the spam to Razor.
884              
885             =item dont_report_to_spamcop
886              
887             Inhibits reporting of the spam to SpamCop.
888              
889             =back
890              
891             =cut
892              
893             sub report_as_spam {
894 0     0 1 0 my ($self, $mail, $options) = @_;
895 0         0 local ($_);
896              
897 0         0 $self->init(1);
898 0         0 my $timer = $self->time_method("report_as_spam");
899              
900             # learn as spam if enabled
901 0 0       0 if ( $self->{conf}->{bayes_learn_during_report} ) {
902 0         0 $self->learn ($mail, undef, 1, 0);
903             }
904              
905 0         0 require Mail::SpamAssassin::Reporter;
906 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
907 0         0 $mail->report();
908             }
909              
910             ###########################################################################
911              
912             =item $f->revoke_as_spam ($mail, $options)
913              
914             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
915             human-verified ham (non-spam). This will revoke the mail message from live,
916             collaborative, spam-blocker databases, allowing other users to block this
917             message.
918              
919             It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
920              
921             Options is an optional reference to a hash of options. Currently these
922             can be:
923              
924             =over 4
925              
926             =item dont_report_to_razor
927              
928             Inhibits revoking of the spam to Razor.
929              
930              
931             =back
932              
933             =cut
934              
935             sub revoke_as_spam {
936 0     0 1 0 my ($self, $mail, $options) = @_;
937 0         0 local ($_);
938              
939 0         0 $self->init(1);
940 0         0 my $timer = $self->time_method("revoke_as_spam");
941              
942             # learn as nonspam
943 0         0 $self->learn ($mail, undef, 0, 0);
944              
945 0         0 require Mail::SpamAssassin::Reporter;
946 0         0 $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
947 0         0 $mail->revoke ();
948             }
949              
950             ###########################################################################
951              
952             =item $f->add_address_to_whitelist ($addr, $cli_p)
953              
954             Given a string containing an email address, add it to the automatic
955             whitelist database.
956              
957             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
958              
959             =cut
960              
961             sub add_address_to_whitelist {
962 0     0 1 0 my ($self, $addr, $cli_p) = @_;
963              
964 0         0 $self->call_plugins("whitelist_address", { address => $addr,
965             cli_p => $cli_p });
966             }
967              
968             ###########################################################################
969              
970             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
971              
972             Given a mail message, find as many addresses in the usual headers (To, Cc, From
973             etc.), and the message body, and add them to the automatic whitelist database.
974              
975             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
976              
977             =cut
978              
979             sub add_all_addresses_to_whitelist {
980 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
981              
982 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
983 0         0 $self->call_plugins("whitelist_address", { address => $addr,
984             cli_p => $cli_p });
985             }
986             }
987              
988             ###########################################################################
989              
990             =item $f->remove_address_from_whitelist ($addr, $cli_p)
991              
992             Given a string containing an email address, remove it from the automatic
993             whitelist database.
994              
995             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
996              
997             =cut
998              
999             sub remove_address_from_whitelist {
1000 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1001              
1002 0         0 $self->call_plugins("remove_address", { address => $addr,
1003             cli_p => $cli_p });
1004             }
1005              
1006             ###########################################################################
1007              
1008             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
1009              
1010             Given a mail message, find as many addresses in the usual headers (To, Cc, From
1011             etc.), and the message body, and remove them from the automatic whitelist
1012             database.
1013              
1014             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1015              
1016             =cut
1017              
1018             sub remove_all_addresses_from_whitelist {
1019 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1020              
1021 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1022 0         0 $self->call_plugins("remove_address", { address => $addr,
1023             cli_p => $cli_p });
1024             }
1025             }
1026              
1027             ###########################################################################
1028              
1029             =item $f->add_address_to_blacklist ($addr, $cli_p)
1030              
1031             Given a string containing an email address, add it to the automatic
1032             whitelist database with a high score, effectively blacklisting them.
1033              
1034             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1035              
1036             =cut
1037              
1038             sub add_address_to_blacklist {
1039 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1040 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1041             cli_p => $cli_p });
1042             }
1043              
1044             ###########################################################################
1045              
1046             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1047              
1048             Given a mail message, find addresses in the From headers and add them to the
1049             automatic whitelist database with a high score, effectively blacklisting them.
1050              
1051             Note that To and Cc addresses are not used.
1052              
1053             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1054              
1055             =cut
1056              
1057             sub add_all_addresses_to_blacklist {
1058 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1059              
1060 0         0 $self->init(1);
1061              
1062 0         0 my @addrlist;
1063 0         0 my @hdrs = $mail_obj->get_header('From');
1064 0 0       0 if ($#hdrs >= 0) {
1065 0         0 push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1066             }
1067              
1068 0         0 foreach my $addr (@addrlist) {
1069 0         0 $self->call_plugins("blacklist_address", { address => $addr,
1070             cli_p => $cli_p });
1071             }
1072              
1073             }
1074              
1075             ###########################################################################
1076              
1077             =item $text = $f->remove_spamassassin_markup ($mail)
1078              
1079             Returns the text of the message, with any SpamAssassin-added text (such
1080             as the report, or X-Spam-Status headers) stripped.
1081              
1082             Note that the B<$mail> object is not modified.
1083              
1084             Warning: if the input message in B<$mail> contains a mixture of CR-LF
1085             (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1086             to use one or the other consistently throughout.
1087              
1088             =cut
1089              
1090             sub remove_spamassassin_markup {
1091 0     0 1 0 my ($self, $mail_obj) = @_;
1092 0         0 local ($_);
1093              
1094 0         0 my $timer = $self->time_method("remove_spamassassin_markup");
1095 0   0     0 my $mbox = $mail_obj->get_mbox_separator() || '';
1096              
1097 0         0 dbg("markup: removing markup");
1098              
1099             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1100             # if we have definitive proof it's not an encapsulated message.
1101 0   0     0 my $ct = $mail_obj->get_header("Content-Type") || '';
1102 0 0       0 if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1103              
1104             # Ok, this is a possible encapsulated message, search for the
1105             # appropriate mime part and deal with it if necessary.
1106 0         0 my $boundary = "\Q$1\E";
1107 0         0 my @msg = split(/^/,$mail_obj->get_pristine_body());
1108              
1109 0         0 my $flag = 0;
1110 0         0 $ct = '';
1111 0         0 my $cd = '';
1112 0         0 for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1113             # only look at mime part headers
1114 0 0 0     0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1115              
1116 0 0       0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1117              
1118             # Ok, we found the encapsulated piece ...
1119 0 0 0     0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
      0        
1120             ($ct eq "message/rfc822" &&
1121             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1122             {
1123 0         0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1124              
1125             # find the end and chop it off
1126 0         0 for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1127 0 0       0 if ( $msg[$i] =~ /^--$boundary/ ) {
1128 0 0       0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1129             # will remove the blank line (not sure it'll always be
1130             # there) and everything below. don't worry, the splice
1131             # guarantees the for will stop ...
1132             }
1133             }
1134              
1135             # Ok, we're done. Return the rewritten message.
1136 0         0 return join('', $mbox, @msg);
1137             }
1138              
1139 0         0 $flag = 0;
1140 0         0 $ct = '';
1141 0         0 $cd = '';
1142 0         0 next;
1143             }
1144              
1145             # Ok, we're in the mime header ... Capture the appropriate headers...
1146 0         0 $flag = 1;
1147 0 0       0 if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
    0          
1148 0         0 $ct = $1;
1149             }
1150             elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1151 0         0 $cd = $1;
1152             }
1153             }
1154             }
1155              
1156             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1157             # So treat it like a "report_safe 0" message.
1158 0         0 my $hdrs = $mail_obj->get_pristine_header();
1159 0         0 my $body = $mail_obj->get_pristine_body();
1160              
1161             # remove DOS line endings
1162 0         0 $hdrs =~ s/\r//gs;
1163              
1164             # unfold SA added headers, but not X-Spam-Prev headers ...
1165 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1166 0         0 1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1167 0         0 $hdrs =~ s/^\n//;
1168              
1169             ###########################################################################
1170             # Backward Compatibility, pre 3.0.x.
1171              
1172             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1173 0         0 $self->init(1);
1174 0         0 foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
  0         0  
1175             # let the 3.0 decoding do it...
1176 0 0       0 next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1177              
1178 0         0 dbg("markup: removing markup in $header");
1179 0 0       0 if ($header eq 'Subject') {
1180 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1181 0         0 $tag = quotemeta($tag);
1182 0         0 $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1183 0         0 $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1184 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1185 0         0 1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1186             } else {
1187 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1188             }
1189             }
1190              
1191             # Now deal with report cleansing from 2.4x and previous.
1192             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1193             # followed by another "SPAM: ----.+" line, followed by a blank line.
1194 0         0 1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1195             ###########################################################################
1196              
1197             # 3.0 version -- support for previously-nonexistent Subject hdr.
1198             # ensure the Subject line didn't *really* contain "(nonexistent)" in
1199             # the original message!
1200 0 0 0     0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1201             && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1202             {
1203 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1204 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1205             }
1206              
1207             # 3.0 version -- revert from X-Spam-Prev to original ...
1208 0         0 while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1209 0         0 my($hdr, $name) = ($1,$2);
1210              
1211             # If the rewritten version doesn't exist, we should deal with it anyway...
1212 0 0       0 unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1213 0         0 $hdrs =~ s/\n\n/\n$hdr\n/;
1214             }
1215             }
1216              
1217             # remove any other X-Spam headers we added, will be unfolded
1218 0         0 $hdrs = "\n".$hdrs; # simplifies regexp below
1219 0         0 1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1220 0         0 $hdrs =~ s/^\n//;
1221              
1222             # re-add DOS line endings
1223 0 0       0 if ($mail_obj->{line_ending} ne "\n") {
1224 0         0 $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1225             }
1226              
1227             # Put the whole thing back together ...
1228 0         0 return join ('', $mbox, $hdrs, $body);
1229             }
1230              
1231             ###########################################################################
1232              
1233             =item $f->read_scoreonly_config ($filename)
1234              
1235             Read a configuration file and parse user preferences from it.
1236              
1237             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1238             In other words, they include scoring options, scores, whitelists and
1239             blacklists, and so on, but do not include rule definitions, privileged
1240             settings, etc. unless C<allow_user_rules> is enabled; and they never include
1241             the administrator settings.
1242              
1243             =cut
1244              
1245             sub read_scoreonly_config {
1246 2     2 1 3568 my ($self, $filename) = @_;
1247              
1248 2         6 my $timer = $self->time_method("read_scoreonly_config");
1249 2         6 local *IN;
1250 2 50       102 if (!open(IN,"<$filename")) {
1251             # the file may not exist; this should not be verbose
1252 2         32 dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1253 2         12 return;
1254             }
1255              
1256 0         0 my($inbuf,$nread,$text); $text = '';
  0         0  
1257 0         0 while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
  0         0  
1258 0 0       0 defined $nread or die "error reading $filename: $!";
1259 0 0       0 close IN or die "error closing $filename: $!";
1260 0         0 undef $inbuf;
1261              
1262 0         0 $text = "file start $filename\n" . $text;
1263             # add an extra \n in case file did not end in one.
1264 0         0 $text .= "\nfile end $filename\n";
1265              
1266 0         0 $self->{conf}->{main} = $self;
1267 0         0 $self->{conf}->parse_scores_only ($text);
1268 0         0 $self->{conf}->finish_parsing(1);
1269              
1270 0         0 delete $self->{conf}->{main}; # to allow future GC'ing
1271             }
1272              
1273             ###########################################################################
1274              
1275             =item $f->load_scoreonly_sql ($username)
1276              
1277             Read configuration parameters from SQL database and parse scores from it. This
1278             will only take effect if the perl C<DBI> module is installed, and the
1279             configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
1280             C<user_scores_sql_password> are set correctly.
1281              
1282             The username in C<$username> will also be used for the C<username> attribute of
1283             the Mail::SpamAssassin object.
1284              
1285             =cut
1286              
1287             sub load_scoreonly_sql {
1288 0     0 1 0 my ($self, $username) = @_;
1289              
1290 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1291 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1292 0         0 $self->{username} = $username;
1293 0 0       0 unless ($src->load($username)) {
1294 0         0 return 0;
1295             }
1296 0         0 return 1;
1297             }
1298              
1299             ###########################################################################
1300              
1301             =item $f->load_scoreonly_ldap ($username)
1302              
1303             Read configuration parameters from an LDAP server and parse scores from it.
1304             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1305             installed, and the configuration parameters C<user_scores_dsn>,
1306             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1307             correctly.
1308              
1309             The username in C<$username> will also be used for the C<username> attribute of
1310             the Mail::SpamAssassin object.
1311              
1312             =cut
1313              
1314             sub load_scoreonly_ldap {
1315 0     0 1 0 my ($self, $username) = @_;
1316              
1317 0         0 dbg("config: load_scoreonly_ldap($username)");
1318 0         0 my $timer = $self->time_method("load_scoreonly_ldap");
1319 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1320 0         0 $self->{username} = $username;
1321 0         0 $src->load($username);
1322             }
1323              
1324             ###########################################################################
1325              
1326             =item $f->set_persistent_address_list_factory ($factoryobj)
1327              
1328             Set the persistent address list factory, used to create objects for the
1329             automatic whitelist algorithm's persistent-storage back-end. See
1330             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1331             must implement, and the API the objects they produce must implement.
1332              
1333             =cut
1334              
1335             sub set_persistent_address_list_factory {
1336 3     3 1 16 my ($self, $fac) = @_;
1337 3         14 $self->{pers_addr_list_factory} = $fac;
1338             }
1339              
1340             ###########################################################################
1341              
1342             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1343              
1344             Compile all patterns, load all configuration files, and load all
1345             possibly-required Perl modules.
1346              
1347             Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1348             plan to fork() or start a new perl interpreter thread to process a message,
1349             this is suboptimal, as each process/thread will have to perform these actions.
1350              
1351             Call this function in the master thread or process to perform the actions
1352             straight away, so that the sub-processes will not have to.
1353              
1354             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1355             configuration without reading the per-user configuration file and it will
1356             assume that you will call C<read_scoreonly_config> at a later point.
1357              
1358             If C<$keep_userstate> is true, compile_now() will revert any configuration
1359             options which have a default with I<__userstate__> in it post-init(),
1360             and then re-change the option before returning. This lets you change
1361             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1362             files there as necessary without disturbing the actual files as changed
1363             by a configuration option. By default, this is disabled.
1364              
1365             =cut
1366              
1367             sub compile_now {
1368 1     1 1 14 my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1369              
1370 1         4 my $timer = $self->time_method("compile_now");
1371              
1372             # Backup default values which deal with userstate.
1373             # This is done so we can create any new files in, presumably, a temp dir.
1374             # see bug 2762 for more details.
1375 1         2 my %backup;
1376 1 50 33     6 if (defined $deal_with_userstate && $deal_with_userstate) {
1377 1         3 while(my($k,$v) = each %{$self->{conf}}) {
  117         269  
1378 116 100 100     376 $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
      100        
1379             }
1380             }
1381              
1382 1         7 $self->init($use_user_prefs);
1383              
1384             # if init() didn't change the value from default, forget about it.
1385             # if the value is different, remember the new version, and reset the default.
1386 1         9 while(my($k,$v) = each %backup) {
1387 1 50       6 if ($self->{conf}->{$k} eq $v) {
1388 0         0 delete $backup{$k};
1389             }
1390             else {
1391 1         3 my $backup = $backup{$k};
1392 1         4 $backup{$k} = $self->{conf}->{$k};
1393 1         6 $self->{conf}->{$k} = $backup;
1394             }
1395             }
1396              
1397 1         4 dbg("ignore: test message to precompile patterns and load modules");
1398              
1399             # tell plugins we are about to send a message for compiling purposes
1400 1         6 $self->call_plugins("compile_now_start",
1401             { use_user_prefs => $use_user_prefs,
1402             keep_userstate => $deal_with_userstate});
1403              
1404             # note: this may incur network access. Good. We want to make sure
1405             # as much as possible is preloaded!
1406 1         25 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1407             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1408             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1409              
1410 1         10 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1411 1         73 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1412             { disable_auto_learning => 1 } );
1413              
1414             # We want to turn off the bayes rules for this test msg
1415 1         5 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1416 1         3 $self->{conf}->{use_bayes_rules} = 0;
1417 1         10 $status->check();
1418 1         4 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1419 1         6 $status->finish();
1420 1         10 $mail->finish();
1421 1         6 $self->finish_learner();
1422              
1423 1         10 $self->{conf}->free_uncompiled_rule_source();
1424              
1425             # load SQL modules now as well
1426 1         4 my $dsn = $self->{conf}->{user_scores_dsn};
1427 1 50       5 if ($dsn ne '') {
1428 0 0       0 if ($dsn =~ /^ldap:/i) {
1429 0         0 Mail::SpamAssassin::Conf::LDAP::load_modules();
1430             } else {
1431 0         0 Mail::SpamAssassin::Conf::SQL::load_modules();
1432             }
1433             }
1434              
1435             # make sure things are ready for scanning
1436 1 50       6 $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1437 1         7 $self->call_plugins("compile_now_finish",
1438             { use_user_prefs => $use_user_prefs,
1439             keep_userstate => $deal_with_userstate});
1440              
1441             # Reset any non-default values to the post-init() version.
1442 1         140 while(my($k,$v) = each %backup) {
1443 1         10 $self->{conf}->{$k} = $v;
1444             }
1445              
1446             # clear sed_path_cache
1447 1         10 delete $self->{conf}->{sed_path_cache};
1448              
1449 1         8 1;
1450             }
1451              
1452             ###########################################################################
1453              
1454             =item $f->debug_diagnostics ()
1455              
1456             Output some diagnostic information, useful for debugging SpamAssassin
1457             problems.
1458              
1459             =cut
1460              
1461             sub debug_diagnostics {
1462 0     0 1 0 my ($self) = @_;
1463              
1464             # load this class lazily, to avoid overhead when this method isn't
1465             # called.
1466 0         0 eval {
1467 0         0 require Mail::SpamAssassin::Util::DependencyInfo;
1468 0         0 dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1469             };
1470             }
1471              
1472             ###########################################################################
1473              
1474             =item $failed = $f->lint_rules ()
1475              
1476             Syntax-check the current set of rules. Returns the number of
1477             syntax errors discovered, or 0 if the configuration is valid.
1478              
1479             =cut
1480              
1481             sub lint_rules {
1482 47     47 1 14764 my ($self) = @_;
1483              
1484 47         337 dbg("ignore: using a test message to lint rules");
1485 47         654 my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1486             "Subject: \n",
1487             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1488             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1489              
1490 47         264 $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1491 47         194 $self->{syntax_errors} = 0;
1492              
1493 47         124 my $olddcp = $self->{dont_copy_prefs};
1494 47         118 $self->{dont_copy_prefs} = 1;
1495              
1496 47         272 $self->init(1);
1497 47         176 $self->{syntax_errors} += $self->{conf}->{errors};
1498              
1499 47         119 $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1500              
1501             # bug 5048: override settings to ensure a faster lint
1502 47         143 $self->{'conf'}->{'use_auto_whitelist'} = 0;
1503 47         148 $self->{'conf'}->{'bayes_auto_learn'} = 0;
1504              
1505 47         600 my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1506 47         736 my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1507             { disable_auto_learning => 1 } );
1508 47         310 $status->check();
1509              
1510 47         174 $self->{syntax_errors} += $status->{rule_errors};
1511 47         334 $status->finish();
1512 47         374 $mail->finish();
1513 47 50       155 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1514 47         305 return ($self->{syntax_errors});
1515             }
1516              
1517             ###########################################################################
1518              
1519             =item $f->finish()
1520              
1521             Destroy this object, so that it will be garbage-collected once it
1522             goes out of scope. The object will no longer be usable after this
1523             method is called.
1524              
1525             =cut
1526              
1527             sub finish {
1528 32     32 1 284 my ($self) = @_;
1529              
1530 32         163 $self->timer_start("finish");
1531             $self->call_plugins("finish_tests", { conf => $self->{conf},
1532 32         204 main => $self });
1533              
1534 32         346 $self->{conf}->finish(); delete $self->{conf};
  32         137  
1535 32         239 $self->{plugins}->finish(); delete $self->{plugins};
  32         99  
1536              
1537 32 50       146 if ($self->{bayes_scanner}) {
1538 32         233 $self->{bayes_scanner}->finish();
1539 32         90 delete $self->{bayes_scanner};
1540             }
1541              
1542 32 50       271 $self->{resolver}->finish() if $self->{resolver};
1543              
1544 32         188 $self->timer_end("finish");
1545 32         71 %{$self} = ();
  32         752  
1546             }
1547              
1548             ###########################################################################
1549             # timers: bug 5356
1550              
1551             sub timer_enable {
1552 0     0 0 0 my ($self) = @_;
1553 0 0       0 dbg("config: timing enabled") if !$self->{timer_enabled};
1554 0         0 $self->{timer_enabled} = 1;
1555             }
1556              
1557             sub timer_disable {
1558 0     0 0 0 my ($self) = @_;
1559 0 0       0 dbg("config: timing disabled") if $self->{timer_enabled};
1560 0         0 $self->{timer_enabled} = 0;
1561             }
1562              
1563             # discard all timers, start afresh
1564             sub timer_reset {
1565 0     0 0 0 my ($self) = @_;
1566 0         0 delete $self->{timers};
1567 0         0 delete $self->{timers_order};
1568             }
1569              
1570             sub timer_start {
1571 32     32 0 102 my ($self, $name) = @_;
1572              
1573 32 50       142 return unless $self->{timer_enabled};
1574             # dbg("timing: '$name' starting");
1575              
1576 0 0       0 if (!exists $self->{timers}->{$name}) {
1577 0         0 push @{$self->{timers_order}}, $name;
  0         0  
1578             }
1579            
1580 0         0 $self->{timers}->{$name}->{start} = Time::HiRes::time();
1581             # note that this will reset any existing, unstopped timer of that name;
1582             # that's ok
1583             }
1584              
1585             sub timer_end {
1586 32     32 0 132 my ($self, $name) = @_;
1587 32 50       163 return unless $self->{timer_enabled};
1588              
1589 0         0 my $t = $self->{timers}->{$name};
1590 0         0 $t->{end} = time;
1591              
1592 0 0       0 if (!$t->{start}) {
1593 0         0 warn "timer_end('$name') with no timer_start";
1594 0         0 return;
1595             }
1596              
1597             # add to any existing elapsed time for this event, since
1598             # we may call the same timer name multiple times -- this is ok,
1599             # as long as they are not nested
1600 0         0 my $dt = $t->{end} - $t->{start};
1601 0 0       0 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1602 0 0       0 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
  0         0  
1603 0         0 else { $t->{elapsed} = $dt }
1604             }
1605              
1606             sub time_method {
1607 850     850 0 2228 my ($self, $name) = @_;
1608 850 50       2817 return unless $self->{timer_enabled};
1609 0         0 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1610             }
1611              
1612             sub timer_report {
1613 0     0 0 0 my ($self) = @_;
1614              
1615 0         0 my $earliest;
1616             my $latest;
1617              
1618 0         0 while (my($name,$h) = each(%{$self->{timers}})) {
  0         0  
1619             # dbg("timing: %s - %s", $name, join(", ",
1620             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1621 0         0 my $start = $h->{start};
1622 0 0 0     0 if (defined $start && (!defined $earliest || $earliest > $start)) {
      0        
1623 0         0 $earliest = $start;
1624             }
1625 0         0 my $end = $h->{end};
1626 0 0 0     0 if (defined $end && (!defined $latest || $latest < $end)) {
      0        
1627 0         0 $latest = $end;
1628             }
1629 0 0 0     0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1630             }
1631 0 0 0     0 my $total =
1632             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1633 0         0 my @str;
1634 0         0 foreach my $name (@{$self->{timers_order}}) {
  0         0  
1635 0   0     0 my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1636 0 0 0     0 my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1637 0 0       0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
    0          
1638 0         0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1639             }
1640              
1641 0         0 return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1642             }
1643              
1644             ###########################################################################
1645             # non-public methods.
1646              
1647             sub init {
1648 205     205 0 257899 my ($self, $use_user_pref) = @_;
1649              
1650             # Allow init() to be called multiple times, but only run once.
1651 205 100       848 if (defined $self->{_initted}) {
1652             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1653 150 50       665 if ($self->{_initted} != $$) {
1654 0         0 $self->{_initted} = $$;
1655 0 0       0 srand if !$self->{skip_prng_reseeding};
1656 0         0 $self->{resolver}->reinit_post_fork();
1657             }
1658 150         387 return;
1659             }
1660              
1661 55         331 my $timer = $self->time_method("init");
1662             # Note that this PID has run init()
1663 55         416 $self->{_initted} = $$;
1664              
1665             #fix spamd reading root prefs file
1666 55 100       226 if (!defined $use_user_pref) {
1667 15         49 $use_user_pref = 1;
1668             }
1669              
1670 55 50       224 if (!defined $self->{config_text}) {
1671 55         294 $self->{config_text} = '';
1672              
1673             # read a file called "init.pre" in site rules dir *before* all others;
1674             # even the system config.
1675 55         198 my $siterules = $self->{site_rules_filename};
1676 55   33     188 $siterules ||= $self->first_existing_path (@site_rules_path);
1677              
1678 55         241 my $sysrules = $self->{rules_filename};
1679 55   33     179 $sysrules ||= $self->first_existing_path (@default_rules_path);
1680              
1681 55 50       211 if ($siterules) {
1682 55         303 $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1683             }
1684             else {
1685 0         0 warn "config: could not find site rules directory\n";
1686             }
1687              
1688 55 50       375 if ($sysrules) {
1689 55         242 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1690             }
1691             else {
1692 0         0 warn "config: could not find sys rules directory\n";
1693             }
1694              
1695 55 50       311 if ($sysrules) {
1696 55         295 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1697 55 50 66     386 if ($self->{require_rules} && $cftext !~ /\S/) {
1698 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1699             }
1700 55         3924 $self->{config_text} .= $cftext;
1701             }
1702              
1703 55 50       299 if (!$self->{languages_filename}) {
1704 55         333 $self->{languages_filename} = $self->find_rule_support_file("languages");
1705             }
1706              
1707 55 50 33     705 if ($siterules && !$self->{ignore_site_cf_files}) {
1708 55         395 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1709             }
1710              
1711 55 100       333 if ( $use_user_pref != 0 ) {
1712 50         257 $self->get_and_create_userstate_dir();
1713              
1714             # user prefs file
1715 50         179 my $fname = $self->{userprefs_filename};
1716 50   33     171 $fname ||= $self->first_existing_path (@default_userprefs_path);
1717              
1718 50 100       213 if (!$self->{dont_copy_prefs}) {
1719             # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1720             # just use the last entry in the array as the default path.
1721 1   33     4 $fname ||= $self->sed_path($default_userprefs_path[-1]);
1722              
1723 1 50       17 my $stat_errn = stat($fname) ? 0 : 0+$!;
1724 1 50 33     8 if ($stat_errn == 0 && -f _) {
    0          
    0          
    0          
1725             # exists and is a regular file, nothing to do
1726             } elsif ($stat_errn == 0) {
1727 0         0 warn "config: default user preference file $fname is not a regular file\n";
1728             } elsif ($stat_errn != ENOENT) {
1729 0         0 warn "config: default user preference file $fname not accessible: $!\n";
1730             } elsif (!$self->create_default_prefs($fname)) {
1731 0         0 warn "config: failed to create default user preference file $fname\n";
1732             }
1733             }
1734              
1735 50         177 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1736             }
1737             }
1738              
1739 55 50       356 if ($self->{pre_config_text}) {
1740 0         0 $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1741             }
1742 55 50       267 if ($self->{post_config_text}) {
1743 0         0 $self->{config_text} .= $self->{post_config_text};
1744             }
1745              
1746 55 50       621 if ($self->{config_text} !~ /\S/) {
1747 0         0 my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1748 0 0       0 if ($self->{require_rules}) {
1749 0         0 die $m;
1750             } else {
1751 0         0 warn $m;
1752             }
1753             }
1754              
1755             # Go and parse the config!
1756 55         195 $self->{conf}->{main} = $self;
1757 55 50       326 if (would_log('dbg', 'config_text') > 1) {
1758 0         0 dbg('config_text: '.$self->{config_text});
1759             }
1760 55         483 $self->{conf}->parse_rules ($self->{config_text});
1761 55         629 $self->{conf}->finish_parsing(0);
1762 55         319 delete $self->{conf}->{main}; # to allow future GC'ing
1763              
1764 55         350 undef $self->{config_text}; # ensure it's actually freed
1765 55         269 delete $self->{config_text};
1766              
1767 55 50 66     360 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1768 0         0 die "config: no rules were found! Do you need to run 'sa-update'?\n";
1769             }
1770              
1771             # Initialize the Bayes subsystem
1772 55 100       289 if ($self->{conf}->{use_bayes}) {
1773 40         4884 require Mail::SpamAssassin::Bayes;
1774 40         523 $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1775             }
1776 55         284 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1777              
1778             # Figure out/set our initial scoreset
1779 55         190 my $set = 0;
1780 55 50       256 $set |= 1 unless $self->{local_tests_only};
1781 55 0 66     455 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
      33        
1782 55         6815 $self->{conf}->set_score_set ($set);
1783              
1784 55 50       203 if ($self->{only_these_rules}) {
1785 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1786             }
1787              
1788 55 50       210 if (!$self->{timer_enabled}) {
1789             # enable timing implicitly if _TIMING_ is used in add_header templates
1790 55         117 foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
  55         223  
1791 55         273 @{$self->{conf}->{'headers_spam'}}) {
1792 433 50       1456 if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
  0         0  
  0         0  
1793             }
1794             }
1795              
1796             # should be called only after configuration has been parsed
1797 55         1138 $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1798              
1799             # TODO -- open DNS cache etc. if necessary
1800             }
1801              
1802             sub read_cf {
1803 160     160 0 453 my ($self, $allpaths, $desc) = @_;
1804 160         570 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1805             }
1806              
1807             sub read_pre {
1808 110     110 0 381 my ($self, $allpaths, $desc) = @_;
1809 110         564 return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1810             }
1811              
1812             sub _read_cf_pre {
1813 270     270   657 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1814              
1815 270 50       650 return '' unless defined ($allpaths);
1816              
1817 270         442 my $txt = '';
1818 270         1100 foreach my $path (split("\000", $allpaths))
1819             {
1820 270         1583 dbg("config: using \"$path\" for $desc");
1821              
1822 270 100       4362 my $stat_errn = stat($path) ? 0 : 0+$!;
1823 270 100 66     2152 if ($stat_errn == ENOENT) {
    50 66        
    100          
    100          
1824             # no file or directory
1825             } elsif ($stat_errn != 0) {
1826 0         0 dbg("config: file or directory $path not accessible: $!");
1827             } elsif (-d _) {
1828 188         666 foreach my $file ($self->$filelistmethod($path)) {
1829 1020         2093 $txt .= read_cf_file($file);
1830             }
1831             } elsif (-f _ && -s _ && -r _) {
1832 32         124 $txt .= read_cf_file($path);
1833             }
1834             }
1835              
1836 270         4093 return $txt;
1837             }
1838              
1839              
1840             sub read_cf_file {
1841 1052     1052 0 2397 my($path) = @_;
1842 1052         1641 my $txt = '';
1843              
1844 1052         2238 local *IN;
1845 1052 50       34160 if (open (IN, "<".$path)) {
1846              
1847 1052         2973 my($inbuf,$nread); $txt = '';
  1052         1847  
1848 1052         19975 while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
  1122         8765  
1849 1052 50       2604 defined $nread or die "error reading $path: $!";
1850 1052 50       8833 close IN or die "error closing $path: $!";
1851 1052         2455 undef $inbuf;
1852              
1853 1052         7032 $txt = "file start $path\n" . $txt;
1854             # add an extra \n in case file did not end in one.
1855 1052         3191 $txt .= "\nfile end $path\n";
1856              
1857 1052         4463 dbg("config: read file $path");
1858             }
1859             else {
1860 0         0 warn "config: cannot open \"$path\": $!\n";
1861             }
1862              
1863 1052         9350 return $txt;
1864             }
1865              
1866             sub get_and_create_userstate_dir {
1867 50     50 0 138 my ($self, $dir) = @_;
1868              
1869 50         98 my $fname;
1870              
1871             # If vpopmail is enabled then set fname to virtual homedir
1872             # precedence: dir, userstate_dir, derive from user_dir, system default
1873 50 50       351 if (defined $dir) {
    100          
    50          
1874 0         0 $fname = File::Spec->catdir ($dir, ".spamassassin");
1875             }
1876             elsif (defined $self->{userstate_dir}) {
1877 34         110 $fname = $self->{userstate_dir};
1878             }
1879             elsif (defined $self->{user_dir}) {
1880 0         0 $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1881             }
1882              
1883 50   66     358 $fname ||= $self->first_existing_path (@default_userstate_dir);
1884              
1885             # bug 4932: use the last default_userstate_dir entry if none of the others
1886             # already exist
1887 50   33     154 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1888              
1889 50 100       192 if (!$self->{dont_copy_prefs}) {
1890 1         8 dbg("config: using \"$fname\" for user state dir");
1891             }
1892              
1893             # if this is not a dir, not readable, or we are unable to create the dir,
1894             # this is not (yet) a serious error; in fact, it's not even worth
1895             # a warning at all times, so use dbg(). see bug 6268
1896 50 50       758 my $stat_errn = stat($fname) ? 0 : 0+$!;
1897 50 50 33     540 if ($stat_errn == 0 && !-d _) {
    50 33        
1898 0         0 dbg("config: $fname exists but is not a directory");
1899             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1900 0         0 dbg("config: error accessing $fname: $!");
1901             } else { # does not exist, create it
1902             eval {
1903 50         2873 mkpath($fname, 0, 0700); 1;
  50         270  
1904 50 50       133 } or do {
1905 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
1906 0         0 dbg("config: mkdir $fname failed: $eval_stat");
1907             };
1908             }
1909              
1910 50         139 $fname;
1911             }
1912              
1913             =item $fullpath = $f->find_rule_support_file ($filename)
1914              
1915             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1916             in the system-wide rules directory, and return its full path if
1917             it exists, or undef if it doesn't exist.
1918              
1919             (This API was added in SpamAssassin 3.1.1.)
1920              
1921             =cut
1922              
1923             sub find_rule_support_file {
1924 55     55 1 218 my ($self, $filename) = @_;
1925              
1926             return $self->first_existing_path(
1927 55         231 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
  385         673  
  385         1823  
  385         1184  
1928             }
1929              
1930             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1931              
1932             Copy default preferences file into home directory for later use and
1933             modification, if it does not already exist and C<dont_copy_prefs> is
1934             not set.
1935              
1936             =cut
1937              
1938             sub create_default_prefs {
1939             # $userdir will only exist if vpopmail config is enabled thru spamd
1940             # Its value will be the virtual user's maildir
1941             #
1942 0     0 1 0 my ($self, $fname, $user, $userdir) = @_;
1943              
1944 0 0       0 if ($self->{dont_copy_prefs}) {
1945 0         0 return(0);
1946             }
1947              
1948             # if ($userdir && $userdir ne $self->{user_dir}) {
1949             # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1950             # }
1951              
1952 0 0       0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1953 0 0       0 if ($stat_errn == 0) {
    0          
1954             # fine, it already exists
1955             } elsif ($stat_errn != ENOENT) {
1956 0         0 dbg("config: cannot access user preferences file $fname: $!");
1957             } else {
1958             # Pass on the value of $userdir for virtual users in vpopmail
1959             # otherwise it is empty and the user's normal homedir is used
1960 0         0 $self->get_and_create_userstate_dir($userdir);
1961              
1962             # copy in the default one for later editing
1963 0         0 my $defprefs =
1964             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1965              
1966 0         0 local(*IN,*OUT);
1967 0         0 $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1968 0 0       0 if (!defined $defprefs) {
    0          
    0          
1969 0         0 warn "config: can not determine default prefs path\n";
1970             } elsif (!open(IN, "<$defprefs")) {
1971 0         0 warn "config: cannot open $defprefs: $!\n";
1972             } elsif (!open(OUT, ">$fname")) {
1973 0         0 warn "config: cannot create user preferences file $fname: $!\n";
1974             } else {
1975             # former code skipped lines beginning with '#* ', the following copy
1976             # procedure no longer does so, as it avoids reading line-by-line
1977 0         0 my($inbuf,$nread);
1978 0         0 while ( $nread=read(IN,$inbuf,16384) ) {
1979 0 0       0 print OUT $inbuf or die "cannot write to $fname: $!";
1980             }
1981 0 0       0 defined $nread or die "error reading $defprefs: $!";
1982 0         0 undef $inbuf;
1983 0 0       0 close OUT or die "error closing $fname: $!";
1984 0 0       0 close IN or die "error closing $defprefs: $!";
1985              
1986 0 0 0     0 if (($< == 0) && ($> == 0) && defined($user)) { # chown it
      0        
1987 0         0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1988 0 0       0 unless (chown($uid, $gid, $fname)) {
1989 0         0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1990             }
1991             }
1992 0         0 warn "config: created user preferences file: $fname\n";
1993 0         0 return(1);
1994             }
1995             }
1996              
1997 0         0 return(0);
1998             }
1999              
2000             ###########################################################################
2001              
2002             sub expand_name {
2003 16     16 0 172 my ($self, $name) = @_;
2004 16   50     282 my $home = $self->{user_dir} || $ENV{HOME} || '';
2005              
2006 16 50       103 if (am_running_on_windows()) {
2007 0   0     0 my $userprofile = $ENV{USERPROFILE} || '';
2008              
2009 0 0 0     0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
2010 0 0       0 return $userprofile if ($userprofile =~ m/^\\\\/o);
2011              
2012 0 0 0     0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
2013 0 0       0 return $home if ($home =~ m/^\\\\/o);
2014              
2015 0         0 return '';
2016             } else {
2017 16 50 33     318 return $home if ($home && $home =~ /\//o);
2018 0 0       0 return (getpwnam($name))[7] if ($name ne '');
2019 0         0 return (getpwuid($>))[7];
2020             }
2021             }
2022              
2023             sub sed_path {
2024 136     136 0 348 my ($self, $path) = @_;
2025 136 50       322 return if !defined $path;
2026              
2027 136 50       639 if (exists($self->{conf}->{sed_path_cache}->{$path})) {
2028 0         0 return $self->{conf}->{sed_path_cache}->{$path};
2029             }
2030              
2031 136         257 my $orig_path = $path;
2032              
2033 136 0       224 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
  0         0  
2034 136 50       232 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
  1         8  
2035 136 50       336 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
  1         6  
2036 136 0 33     248 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
  1         17  
2037 136 0       286 $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
  0         0  
2038 136         250 $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
  0         0  
2039 136         251 $path =~ s/__version__/${VERSION}/gs;
2040 136         343 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
  16         77  
2041              
2042 136         651 $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
2043 136         514 $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
2044 136         333 return $path;
2045             }
2046              
2047             sub get_perl_major_version {
2048 0     0 0 0 my $self = shift;
2049 0 0       0 $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2050 0         0 return $1;
2051             }
2052              
2053             sub first_existing_path {
2054 72     72 0 229 my $self = shift;
2055 72         140 my $path;
2056 72         259 foreach my $p (@_) {
2057 133         528 $path = $self->sed_path ($p);
2058 133 50       390 if (defined $path) {
2059 133 100       3199 my($errn) = stat($path) ? 0 : 0+$!;
2060 133 100       656 if ($errn == ENOENT) { } # does not exist
    50          
2061 0         0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2062 71         393 else { return $path }
2063             }
2064             }
2065 1         5 return;
2066             }
2067              
2068             ###########################################################################
2069              
2070             sub get_cf_files_in_dir {
2071 94     94 0 274 my ($self, $dir) = @_;
2072 94         253 return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2073             }
2074              
2075             sub get_pre_files_in_dir {
2076 94     94 0 273 my ($self, $dir) = @_;
2077 94         366 return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2078             }
2079              
2080             sub _get_cf_pre_files_in_dir {
2081 188     188   490 my ($self, $dir, $type) = @_;
2082              
2083 188 100       470 if ($self->{config_tree_recurse}) {
2084 4         7 my @cfs;
2085              
2086             # use "eval" to avoid loading File::Find unless this is specified
2087             eval ' use File::Find qw();
2088             File::Find::find(
2089             { untaint => 1,
2090             follow => 1,
2091             wanted =>
2092             sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
2093             }, $dir); 1;
2094 4 50   1   235 ' or do {
  1     1   7  
  1     1   2  
  1     1   108  
  1         8  
  1         2  
  1         68  
  1         8  
  1         3  
  1         69  
  1         8  
  1         2  
  1         83  
2095 0 0       0 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
  0         0  
2096 0         0 die "_get_cf_pre_files_in_dir error: $eval_stat";
2097             };
2098 4         19 @cfs = sort { $a cmp $b } @cfs;
  52         87  
2099 4         36 return @cfs;
2100             }
2101             else {
2102 184 50       4261 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2103 184 100 100     6579 my @cfs = grep { $_ ne '.' && $_ ne '..' &&
  2404   100     38960  
2104             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2105 184         2687 closedir SA_CF_DIR;
2106              
2107 184         1410 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
  995         4427  
  2013         3862  
2108             }
2109             }
2110              
2111             ###########################################################################
2112              
2113             sub have_plugin {
2114 479     479 0 972 my ($self, $subname) = @_;
2115              
2116             # We could potentially get called after a finish(), so just return.
2117 479 50       1075 return unless $self->{plugins};
2118              
2119 479         1178 return $self->{plugins}->have_callback ($subname);
2120             }
2121              
2122             sub call_plugins {
2123 3462     3462 0 6915 my $self = shift;
2124 3462         5360 my $subname = shift;
2125              
2126             # We could potentially get called after a finish(), so just return.
2127 3462 50       8660 return unless $self->{plugins};
2128              
2129             # Use some calls ourself too
2130 3462 100       7217 if ($subname eq 'finish_parsing_end') {
2131             # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
2132             # read. Plugins can also now use {valid_tlds_re} to one time compile
2133             # regexes in finish_parsing_end.
2134 55         1141 $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
2135             }
2136              
2137             # safety net in case some plugin changes global settings, Bug 6218
2138 3462         13340 local $/ = $/; # prevent underlying modules from changing the global $/
2139              
2140 3462         12327 return $self->{plugins}->callback($subname, @_);
2141             }
2142              
2143             ###########################################################################
2144              
2145             sub find_all_addrs_in_mail {
2146 0     0 0 0 my ($self, $mail_obj) = @_;
2147              
2148 0         0 $self->init(1);
2149              
2150 0         0 my @addrlist;
2151 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2152             Errors-To Mail-Followup-To))
2153             {
2154 0         0 my @hdrs = $mail_obj->get_header($header);
2155 0 0       0 if ($#hdrs < 0) { next; }
  0         0  
2156 0         0 push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2157             }
2158              
2159             # find addrs in body, too
2160 0         0 foreach my $line (@{$mail_obj->get_body()}) {
  0         0  
2161 0         0 push (@addrlist, $self->find_all_addrs_in_line($line));
2162             }
2163              
2164 0         0 my @ret;
2165             my %done;
2166              
2167 0         0 foreach (@addrlist) {
2168 0         0 s/^mailto://; # from Outlook "forwarded" message
2169 0 0       0 next if defined ($done{$_}); $done{$_} = 1;
  0         0  
2170 0         0 push (@ret, $_);
2171             }
2172              
2173 0         0 @ret;
2174             }
2175              
2176             sub find_all_addrs_in_line {
2177 67     67 0 273 my ($self, $line) = @_;
2178              
2179             # a more permissive pattern based on "dot-atom" as per RFC2822
2180 67         150 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2181 67         170 my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2182              
2183 67         140 my @addrs;
2184             my %seen;
2185 67         922 while ($line =~ s/(?:mailto:)?\s*
2186             ($ID_PATTERN \@
2187             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2188             {
2189 2         12 my $addr = $1;
2190 2         7 $addr =~ s/^mailto://;
2191 2 50       12 next if (defined ($seen{$addr})); $seen{$addr} = 1;
  2         8  
2192 2         11 push (@addrs, $addr);
2193             }
2194              
2195 67         312 return @addrs;
2196             }
2197              
2198             ###########################################################################
2199              
2200             # sa_die -- used to die with a useful exit code.
2201              
2202             sub sa_die {
2203 0     0 0 0 my $exitcode = shift;
2204 0         0 warn @_;
2205 0         0 exit $exitcode;
2206             }
2207              
2208             ###########################################################################
2209              
2210             =item $f->copy_config ( [ $source ], [ $dest ] )
2211              
2212             Used for daemons to keep a persistent Mail::SpamAssassin object's
2213             configuration correct if switching between users. Pass an associative
2214             array reference as either $source or $dest, and set the other to 'undef'
2215             so that the object will use its current configuration. i.e.:
2216              
2217             # create object w/ configuration
2218             my $spamtest = Mail::SpamAssassin->new( ... );
2219              
2220             # backup configuration to %conf_backup
2221             my %conf_backup;
2222             $spamtest->copy_config(undef, \%conf_backup) ||
2223             die "config: error returned from copy_config!\n";
2224              
2225             ... do stuff, perhaps modify the config, etc ...
2226              
2227             # reset the configuration back to the original
2228             $spamtest->copy_config(\%conf_backup, undef) ||
2229             die "config: error returned from copy_config!\n";
2230              
2231             Note that the contents of the associative arrays should be considered
2232             opaque by calling code.
2233              
2234             =cut
2235              
2236             sub copy_config {
2237 3     3 1 6410 my ($self, $source, $dest) = @_;
2238              
2239             # At least one of either source or dest needs to be a hash reference ...
2240 3 50 66     30 unless ((defined $source && ref($source) eq 'HASH') ||
      33        
      66        
2241             (defined $dest && ref($dest) eq 'HASH'))
2242             {
2243 0         0 return 0;
2244             }
2245              
2246 3         9 my $timer = $self->time_method("copy_config");
2247              
2248             # let the Conf object itself do all the heavy lifting. It's better
2249             # than having this class know all about that class' internals...
2250 3 100       12 if (defined $source) {
2251 2         10 dbg ("config: copying current conf from backup");
2252             }
2253             else {
2254 1         5 dbg ("config: copying current conf to backup");
2255             }
2256 3         15 return $self->{conf}->clone($source, $dest);
2257             }
2258              
2259             ###########################################################################
2260              
2261             =item @plugins = $f->get_loaded_plugins_list ( )
2262              
2263             Return the list of plugins currently loaded by this SpamAssassin object's
2264             configuration; each entry in the list is an object of type
2265             C<Mail::SpamAssassin::Plugin>.
2266              
2267             (This API was added in SpamAssassin 3.2.0.)
2268              
2269             =cut
2270              
2271             sub get_loaded_plugins_list {
2272 0     0 1 0 my ($self) = @_;
2273 0         0 return $self->{plugins}->get_loaded_plugins_list();
2274             }
2275              
2276             1;
2277             __END__
2278              
2279             ###########################################################################
2280              
2281             =back
2282              
2283             =head1 PREREQUISITES
2284              
2285             C<HTML::Parser>
2286             C<Sys::Syslog>
2287              
2288             =head1 MORE DOCUMENTATION
2289              
2290             See also E<lt>https://spamassassin.apache.org/E<gt> and
2291             E<lt>https://wiki.apache.org/spamassassin/E<gt> for more information.
2292              
2293             =head1 SEE ALSO
2294              
2295             Mail::SpamAssassin::Conf(3)
2296             Mail::SpamAssassin::PerMsgStatus(3)
2297             spamassassin(1)
2298             sa-update(1)
2299              
2300             =head1 BUGS
2301              
2302             See E<lt>https://issues.apache.org/SpamAssassin/E<gt>
2303              
2304             =head1 AUTHORS
2305              
2306             The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>
2307              
2308             =head1 COPYRIGHT
2309              
2310             SpamAssassin is distributed under the Apache License, Version 2.0, as
2311             described in the file C<LICENSE> included with the distribution.
2312              
2313             =head1 AVAILABILITY
2314              
2315             The latest version of this library is likely to be available from CPAN
2316             as well as:
2317              
2318             E<lt>https://spamassassin.apache.org/E<gt>
2319              
2320             =cut