File Coverage

blib/lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 385 757 50.8
branch 109 340 32.0
condition 44 176 25.0
subroutine 56 83 67.4
pod 31 56 55.3
total 625 1412 44.2


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             use strict;
62 32     32   3307546 use warnings;
  32         413  
  32         1054  
63 32     32   193 # use bytes;
  32         66  
  32         1108  
64             use re 'taint';
65 32     32   196  
  32         73  
  32         1718  
66             require 5.006_001;
67              
68             use Mail::SpamAssassin::Logger;
69 31     31   7829 use Mail::SpamAssassin::Constants;
  31         66  
  31         1946  
70 31     31   8648 use Mail::SpamAssassin::Conf;
  31         68  
  31         1521  
71 31     31   8615 use Mail::SpamAssassin::Conf::SQL;
  31         175  
  31         1198  
72 31     31   10188 use Mail::SpamAssassin::Conf::LDAP;
  31         92  
  31         864  
73 31     31   7952 use Mail::SpamAssassin::PerMsgStatus;
  31         73  
  31         1645  
74 31     31   8241 use Mail::SpamAssassin::Message;
  31         1061  
  31         2141  
75 30     30   9514 use Mail::SpamAssassin::PluginHandler;
  30         462  
  30         2752  
76 30     30   13952 use Mail::SpamAssassin::DnsResolver;
  30         90  
  30         921  
77 30     30   9098 use Mail::SpamAssassin::RegistryBoundaries;
  30         84  
  30         1101  
78 30     30   10762 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  30         78  
  30         1051  
79 30     30   215 use Mail::SpamAssassin::Util::ScopedTimer;
  30         77  
  30         1708  
80 30     30   10247  
  30         77  
  30         889  
81             use Errno qw(ENOENT EACCES);
82 30     30   169 use File::Basename;
  30         57  
  30         1733  
83 30     30   198 use File::Path;
  30         68  
  30         2810  
84 30     30   203 use File::Spec 0.8;
  30         55  
  30         2251  
85 30     30   203 use Time::HiRes qw(time);
  30         961  
  30         777  
86 30     30   203 use Cwd;
  30         70  
  30         239  
87 30     30   2802 use Config;
  30         78  
  30         2421  
88 30     30   190  
  30         68  
  30         257584  
89             our $VERSION = "3.004006"; # update after release (same format as perl $])
90             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
91             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
92              
93              
94             # Used during the prerelease/release-candidate part of the official release
95             # process. If you hacked up your SA, you should add a version_tag to your .cf
96             # files; this variable should not be modified.
97             our @EXTRA_VERSION = qw();
98              
99             our @ISA = qw();
100              
101             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
102             our $SUB_VERSION = 'svnunknown';
103             if ('$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $' =~ ':') {
104             # Subversion keyword "$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $" has been successfully expanded.
105             # Doesn't happen with automated launchpad builds:
106             # https://bugs.launchpad.net/launchpad/+bug/780916
107             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $ updated by SVN'))[1];
108             }
109              
110              
111             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
112             if ('$LastChangedRevision: 1888548 $' =~ ':') {
113             # Subversion keyword "$LastChangedRevision: 1888548 $" has been successfully expanded.
114             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1888548 $ updated by SVN}[1]));
115             } else {
116             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
117             }
118             }
119              
120             $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
121             return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
122 37     37 0 496 }
123 37         1242  
124             our $HOME_URL = "https://spamassassin.apache.org/";
125              
126             # note that the CWD takes priority. This is required in case a user
127             # is testing a new version of SpamAssassin on a machine with an older
128             # version installed. Unless you can come up with a fix for this that
129             # allows "make test" to work, don't change this.
130              
131             our @default_rules_path = (
132             '__local_state_dir__/__version__',
133             '__def_rules_dir__',
134             '__prefix__/share/spamassassin',
135             '/usr/local/share/spamassassin',
136             '/usr/share/spamassassin',
137             );
138              
139             # first 3 are BSDish, latter 2 Linuxish
140             our @site_rules_path = (
141             '__local_rules_dir__',
142             '__prefix__/etc/mail/spamassassin',
143             '__prefix__/etc/spamassassin',
144             '/usr/local/etc/spamassassin',
145             '/usr/pkg/etc/spamassassin',
146             '/usr/etc/spamassassin',
147             '/etc/mail/spamassassin',
148             '/etc/spamassassin',
149             );
150              
151             our @default_prefs_path = (
152             '__local_rules_dir__/user_prefs.template',
153             '__prefix__/etc/mail/spamassassin/user_prefs.template',
154             '__prefix__/share/spamassassin/user_prefs.template',
155             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
156             '__def_rules_dir__/user_prefs.template',
157             '/etc/spamassassin/user_prefs.template',
158             '/etc/mail/spamassassin/user_prefs.template',
159             '/usr/local/share/spamassassin/user_prefs.template',
160             '/usr/share/spamassassin/user_prefs.template',
161             );
162              
163             our @default_userprefs_path = (
164             '~/.spamassassin/user_prefs',
165             );
166              
167             our @default_userstate_dir = (
168             '~/.spamassassin',
169             );
170              
171             ###########################################################################
172              
173             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
174              
175             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
176             reference to the constructor which may contain the following attribute-
177             value pairs.
178              
179             =over 4
180              
181             =item debug
182              
183             This is the debug options used to determine logging level. It exists to
184             allow sections of debug messages (called "facilities") to be enabled or
185             disabled. If this is a string, it is treated as a comma-delimited list
186             of the debug facilities. If it's a hash reference, then the keys are
187             treated as the list of debug facilities and if it's a array reference,
188             then the elements are treated as the list of debug facilities.
189              
190             There are also two special cases: (1) if the special case of "info" is
191             passed as a debug facility, then all informational messages are enabled;
192             (2) if the special case of "all" is passed as a debug facility, then all
193             debugging facilities are enabled.
194              
195             =item rules_filename
196              
197             The filename/directory to load spam-identifying rules from. (optional)
198              
199             =item site_rules_filename
200              
201             The filename/directory to load site-specific spam-identifying rules from.
202             (optional)
203              
204             =item userprefs_filename
205              
206             The filename to load preferences from. (optional)
207              
208             =item userstate_dir
209              
210             The directory user state is stored in. (optional)
211              
212             =item config_tree_recurse
213              
214             Set to C<1> to recurse through directories when reading configuration
215             files, instead of just reading a single level. (optional, default 0)
216              
217             =item config_text
218              
219             The text of all rules and preferences. If you prefer not to load the rules
220             from files, read them in yourself and set this instead. As a result, this will
221             override the settings for C<rules_filename>, C<site_rules_filename>,
222             and C<userprefs_filename>.
223              
224             =item pre_config_text
225              
226             Similar to C<config_text>, this text is placed before config_text to allow an
227             override of config files.
228              
229             =item post_config_text
230              
231             Similar to C<config_text>, this text is placed after config_text to allow an
232             override of config files.
233              
234             =item force_ipv4
235              
236             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
237             to use IPv6. Use if the existing tests for IPv6 availability produce
238             incorrect results or crashes.
239              
240             =item force_ipv6
241              
242             For symmetry with force_ipv4: if set to 1, DNS or other network tests
243             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
244             this setting and use whatever protocol family they are comfortable with.
245              
246             =item require_rules
247              
248             If set to 1, init() will die if no valid rules could be loaded. This is the
249             default behaviour when called by C<spamassassin> or C<spamd>.
250              
251             =item languages_filename
252              
253             If you want to be able to use the language-guessing rule
254             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
255             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
256             need to set this. It should be the path to the B<languages> file normally
257             found in the SpamAssassin B<rules> directory.
258              
259             =item local_tests_only
260              
261             If set to 1, no tests that require internet access will be performed. (default:
262             0)
263              
264             =item need_tags
265              
266             The option provides a way to avoid more expensive processing when it is known
267             in advance that some information will not be needed by a caller.
268              
269             A value of the option can either be a string (a comma-delimited list of tag
270             names), or a reference to a list of individual tag names. A caller may provide
271             the list in advance, specifying his intention to later collect the information
272             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
273             insensitive), it shows that a caller will not be interested in such tag,
274             although there is no guarantee it would save any resources, nor that a tag
275             value will be empty. Currently no built-in tags start with 'NO'. A later
276             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
277             to TIMING,NOASN.
278              
279             For backward compatibility, all tags available as of version 3.2.4 will
280             be available by default (unless disabled by NOtag), even if not requested
281             through need_tags option. Future versions may provide new tags conditionally
282             available.
283              
284             Currently the only tag that needs to be explicitly requested is 'TIMING'.
285             Not requesting it can save a millisecond or two - it mostly serves to
286             illustrate the usage of need_tags.
287              
288             Example:
289             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
290             or:
291             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
292              
293             =item ignore_site_cf_files
294              
295             If set to 1, any rule files found in the C<site_rules_filename> directory will
296             be ignored. *.pre files (used for loading plugins) found in the
297             C<site_rules_filename> directory will still be used. (default: 0)
298              
299             =item dont_copy_prefs
300              
301             If set to 1, the user preferences file will not be created if it doesn't
302             already exist. (default: 0)
303              
304             =item save_pattern_hits
305              
306             If set to 1, the patterns hit can be retrieved from the
307             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
308              
309             =item home_dir_for_helpers
310              
311             If set, the B<HOME> environment variable will be set to this value
312             when using test applications that require their configuration data,
313             such as Razor, Pyzor and DCC.
314              
315             =item username
316              
317             If set, the C<username> attribute will use this as the current user's name.
318             Otherwise, the default is taken from the runtime environment (ie. this process'
319             effective UID under UNIX).
320              
321             =item skip_prng_reseeding
322              
323             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
324             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
325             Perl function should be called during initialization of each child process,
326             soon after forking.
327              
328             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
329             library.
330              
331             This setting requires the caller to decide when to call srand().
332             This choice may be desired to preserve the entropy of a PRNG. The default
333             value of skip_prng_reseeding is false to maintain backward compatibility.
334              
335             This option should only be set by a caller if it calls srand() upon spawning
336             child processes. Unless you are certain you need it, leave this setting as
337             false.
338              
339             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
340             which allows spamd to call srand() right after forking a child process.
341              
342             =back
343              
344             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
345             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
346             configuration files in the usual installed locations using the below variable
347             definitions which can be passed in.
348              
349             =over 4
350              
351             =item PREFIX
352              
353             Used as the root for certain directory paths such as:
354              
355             '__prefix__/etc/mail/spamassassin'
356             '__prefix__/etc/spamassassin'
357              
358             Defaults to "/usr/local".
359              
360             =item DEF_RULES_DIR
361              
362             Location where the default rules are installed. Defaults to
363             "/usr/local/share/spamassassin".
364              
365             =item LOCAL_RULES_DIR
366              
367             Location where the local site rules are installed. Defaults to
368             "/etc/mail/spamassassin".
369              
370             =item LOCAL_STATE_DIR
371              
372             Location of the local state directory, mainly used for installing updates via
373             C<sa-update> and compiling rulesets to native code. Defaults to
374             "/var/lib/spamassassin".
375              
376             =back
377              
378              
379             =cut
380              
381             # undocumented ctor settings:
382             #
383             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
384              
385             my $class = shift;
386             $class = ref($class) || $class;
387              
388 37     37 1 11733463 my $self = shift;
389 37   33     722 if (!defined $self) { $self = { }; }
390             bless ($self, $class);
391 37         186  
392 37 50       286 # basic backward compatibility; debug used to be a boolean.
  0         0  
393 37         176 # translate that into 'all', which is what it meant before 3.1.0.
394             if ($self->{debug} && $self->{debug} eq '1') {
395             $self->{debug} = 'all';
396             }
397 37 50 33     535  
398 0         0 # enable or disable debugging
399             Mail::SpamAssassin::Logger::add_facilities($self->{debug});
400              
401             # first debugging information possibly printed should be the version
402 37         647 dbg("generic: SpamAssassin version " . Version());
403              
404             # if the libs are installed in an alternate location, and the caller
405 37         475 # didn't set PREFIX, we should have an estimated guess ready, values
406             # substituted at 'make' time
407             $self->{PREFIX} ||= '/usr/local';
408             $self->{DEF_RULES_DIR} ||= '/usr/local/share/spamassassin';
409             $self->{LOCAL_RULES_DIR} ||= '/etc/mail/spamassassin';
410 37   50     597 $self->{LOCAL_STATE_DIR} ||= '/var/lib/spamassassin';
411 37   50     462 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
412 37   50     352 qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
413 37   50     488  
414 37         146 $self->{needed_tags} = {};
  148         845  
415             { my $ntags = $self->{need_tags};
416             if (defined $ntags) {
417 37         220 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
418 37         141 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
  37         115  
419 37 50       376 }
420 0 0       0 }
421 0 0       0 }
422             if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
423             $self->timer_enable();
424             }
425 37 50 33     357  
426 0         0 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
427             $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
428              
429 37   33     1075 $self->{save_pattern_hits} ||= 0;
430 37         707  
431             # Make sure that we clean $PATH if we're tainted
432 37   50     311 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
433              
434             if (!defined $self->{username}) {
435 37         465 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
436             }
437 37 50       197  
438 37         334 $self->create_locker();
439              
440             $self;
441 37         303 }
442              
443 37         162 my ($self) = @_;
444              
445             my $class;
446             my $m = $self->{conf}->{lock_method};
447 37     37 0 108  
448             # let people choose what they want -- even if they may not work on their
449 37         62 # OS. (they could be using cygwin!)
450 37         99 if ($m eq 'win32') { $class = 'Win32'; }
451             elsif ($m eq 'flock') { $class = 'Flock'; }
452             elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
453             else {
454 37 50       240 # OS-specific defaults
  0 50       0  
    50          
455 0         0 if (am_running_on_windows()) {
456 0         0 $class = 'Win32';
457             } else {
458             $class = 'UnixNFSSafe';
459 37 50       185 }
460 0         0 }
461              
462 37         162 # this could probably be made a little faster; for now I'm going
463             # for slow but safe, by keeping in quotes
464             eval '
465             use Mail::SpamAssassin::Locker::'.$class.';
466             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
467             1;
468             ' or do {
469             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
470             die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
471             };
472 37 50   16   3628  
  16     5   8933  
  16     4   51  
  16         646  
  5         89  
  5         36  
  5         342  
  4         111  
  4         73  
  4         319  
473 0 0       0 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
474 0         0 }
475              
476             ###########################################################################
477 37 50       289  
  0         0  
478             =item parse($message, $parse_now [, $suppl_attrib])
479              
480             Parse will return a Mail::SpamAssassin::Message object with just the
481             headers parsed. When calling this function, there are two optional
482             parameters that can be passed in: $message is either undef (which
483             will use STDIN), a scalar - a string containing an entire message,
484             a reference to such string, an array reference of the message with
485             one line per array element, or either a file glob or an IO::File object
486             which holds the entire contents of the message; and $parse_now, which
487             specifies whether or not to create a MIME tree at parse time or later
488             as necessary.
489              
490             The I<$parse_now> option, by default, is set to false (0). This
491             allows SpamAssassin to not have to generate the tree of internal
492             data nodes if the information is not going to be used. This is
493             handy, for instance, when running C<spamassassin -d>, which only
494             needs the pristine header and body which is always parsed and stored
495             by this function.
496              
497             The optional last argument I<$suppl_attrib> provides a way for a caller
498             to pass additional information about a message to SpamAssassin. It is
499             either undef, or a ref to a hash where each key/value pair provides some
500             supplementary attribute of the message, typically information that cannot
501             be deduced from the message itself, or is hard to do so reliably, or would
502             represent unnecessary work for SpamAssassin to obtain it. The argument will
503             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
504             made available to the rest of the code as well as to plugins. The exact list
505             of attributes will evolve through time, any unknown attribute should be
506             ignored. Possible examples are: SMTP envelope information, a flag indicating
507             that a message as supplied by a caller was truncated due to size limit, an
508             already verified list of DKIM signature objects, or perhaps a list of rule
509             hits predetermined by a caller, which makes another possible way for a
510             caller to provide meta information (instead of having to insert made-up
511             header fields in order to pass information), or maybe just plain rule hits.
512              
513             For more information, please see the C<Mail::SpamAssassin::Message>
514             and C<Mail::SpamAssassin::Message::Node> POD.
515              
516             =cut
517              
518             my($self, $message, $parsenow, $suppl_attrib) = @_;
519              
520             my $start_time = time;
521             $self->init(1);
522             my $timer = $self->time_method("parse");
523 44     44 1 10703  
524             my $master_deadline;
525 44         199 # passed in at a function call
526 44         200 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
527 44         151 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
528             }
529 44         68 # found in a config file - overrides passed-in number if lower
530             if ($self->{conf}->{time_limit}) { # defined and nonzero
531 44 50 66     141 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
532 1         2 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
533             $master_deadline = $time_limit_deadline;
534             }
535 44 50       154 }
536 44         138 if (defined $master_deadline) {
537 44 50 33     179 dbg("config: time limit %.1f s", $master_deadline - $start_time);
538 44         78 }
539              
540             my $msg = Mail::SpamAssassin::Message->new({
541 44 50       129 message=>$message, parsenow=>$parsenow,
542 44         170 normalize=>$self->{conf}->{normalize_charset},
543             body_part_scan_size=>$self->{conf}->{body_part_scan_size},
544             rawbody_part_scan_size=>$self->{conf}->{rawbody_part_scan_size},
545             master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
546              
547             # bug 5069: The goal here is to get rendering plugins to do things
548             # like OCR, convert doc and pdf to text, etc, though it could be anything
549             # that wants to process the message after it's been parsed.
550 44         1018 $self->call_plugins("post_message_parse", { message => $msg });
551              
552             return $msg;
553             }
554              
555 44         294  
556             ###########################################################################
557 44         215  
558             =item $status = $f->check ($mail)
559              
560             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
561             to determine if it is spam or not.
562              
563             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
564             used to test or manipulate the mail message.
565              
566             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
567             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
568             object is a "factory". However, if you do this, be sure to call the
569             C<finish()> method on the status objects when you're done with them.
570              
571             =cut
572              
573             my ($self, $mail_obj) = @_;
574              
575             $self->init(1);
576             my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
577             $pms->check();
578             dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
579 12     12 1 893 $pms;
580             }
581 12         35  
582 12         108 =item $status = $f->check_message_text ($mailtext)
583 12         59  
584 12 50       44 Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
585 12         86 is spam or not.
586              
587             Otherwise identical to C<check()> above.
588              
589             =cut
590              
591             my ($self, $mailtext) = @_;
592             my $msg = $self->parse($mailtext, 1);
593             my $result = $self->check($msg);
594              
595             # Kill off the metadata ...
596             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
597             # the message. So killing it here will cause things like
598 0     0 1 0 # rewrite_message() to fail. <grrr>
599 0         0 #
600 0         0 $msg->finish_metadata();
601              
602             return $result;
603             }
604              
605             ###########################################################################
606              
607 0         0 =item $status = $f->learn ($mail, $id, $isspam, $forget)
608              
609 0         0 Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
610              
611             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
612             be learnt as non-spam.
613              
614             If C<$forget> is set, the attributes of the mail will be removed from
615             both the non-spam and spam learning databases.
616              
617             C<$id> is an optional message-identification string, used internally
618             to tag the message. If it is C<undef>, the Message-Id of the message
619             will be used. It should be unique to that message.
620              
621             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
622             manipulate the learning process for each mail.
623              
624             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
625             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
626             object is a "factory". However, if you do this, be sure to call the
627             C<finish()> method on the learner objects when you're done with them.
628              
629             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
630             must be called before using this method.
631              
632             =cut
633              
634             my ($self, $mail_obj, $id, $isspam, $forget) = @_;
635             local ($_);
636              
637             require Mail::SpamAssassin::PerMsgLearner;
638             $self->init(1);
639             my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
640              
641             if ($forget) {
642 2     2 1 7 dbg("learn: forgetting message");
643 2         5 $msg->forget($id);
644             } elsif ($isspam) {
645 2         511 dbg("learn: learning spam");
646 2         8 $msg->learn_spam($id);
647 2         14 } else {
648             dbg("learn: learning ham");
649 2 50       9 $msg->learn_ham($id);
    50          
650 0         0 }
651 0         0  
652             $msg;
653 0         0 }
654 0         0  
655             ###########################################################################
656 2         9  
657 2         6 =item $f->init_learner ( [ { opt => val, ... } ] )
658              
659             Initialise learning. You may pass the following attribute-value pairs to this
660 2         9 method.
661              
662             =over 4
663              
664             =item caller_will_untie
665              
666             Whether or not the code calling this method will take care of untie'ing
667             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
668              
669             =item force_expire
670              
671             Should an expiration run be forced to occur immediately? (optional, default 0).
672              
673             =item learn_to_journal
674              
675             Should learning data be written to the journal, instead of directly to the
676             databases? (optional, default 0).
677              
678             =item wait_for_lock
679              
680             Whether or not to wait a long time for locks to complete (optional, default 0).
681              
682             =item opportunistic_expire_check_only
683              
684             During the opportunistic journal sync and expire check, don't actually do the
685             expire but report back whether or not it should occur (optional, default 0).
686              
687             =item no_relearn
688              
689             If doing a learn operation, and the message has already been learned as
690             the opposite type, don't re-learn the message.
691              
692             =back
693              
694             =cut
695              
696             my $self = shift;
697             my $opts = shift;
698             dbg("learn: initializing learner");
699              
700             # Make sure we're already initialized ...
701             $self->init(1);
702              
703             my %kv = (
704             'force_expire' => 'learn_force_expire',
705 4     4 1 7 'learn_to_journal' => 'learn_to_journal',
706 4         7 'caller_will_untie' => 'learn_caller_will_untie',
707 4         17 'wait_for_lock' => 'learn_wait_for_lock',
708             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
709             'no_relearn' => 'learn_no_relearn',
710 4         17 );
711              
712 4         34 my %ret;
713              
714             # Set any other options that need setting ...
715             while( my($k,$v) = each %kv ) {
716             $ret{$k} = $self->{$v};
717             if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
718             }
719              
720             return \%ret;
721 4         8 }
722              
723             ###########################################################################
724 4         18  
725 24         56 =item $f->rebuild_learner_caches ({ opt => val })
726 24 100       51  
  14         36  
727             Rebuild any cache databases; should be called after the learning process.
728             Options include: C<verbose>, which will output diagnostics to C<stdout>
729 4         13 if set to 1.
730              
731             =cut
732              
733             my $self = shift;
734             my $opts = shift;
735             $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
736             1;
737             }
738              
739             =item $f->finish_learner ()
740              
741             Finish learning.
742              
743 0     0 1 0 =cut
744 0         0  
745 0 0       0 my $self = shift;
746 0         0 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
747             1;
748             }
749              
750             =item $f->dump_bayes_db()
751              
752             Dump the contents of the Bayes DB
753              
754             =cut
755              
756 3     3 1 6 my($self,@opts) = @_;
757 3 100       17 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
758 3         7 }
759              
760             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
761              
762             Signals that the current user has changed (possibly using C<setuid>), meaning
763             that SpamAssassin should close any per-user databases it has open, and re-open
764             using ones appropriate for the new user.
765              
766             Note that this should be called I<after> reading any per-user configuration, as
767             that data may override some paths opened in this method. You may pass the
768 0     0 1 0 following attribute-value pairs:
769 0 0       0  
770             =over 4
771              
772             =item username
773              
774             The username of the user. This will be used for the C<username> attribute.
775              
776             =item user_dir
777              
778             A directory to use as a 'home directory' for the current user's data,
779             overriding the system default. This directory must be readable and writable by
780             the process. Note that the resulting C<userstate_dir> will be the
781             C<.spamassassin> subdirectory of this dir.
782              
783             =item userstate_dir
784              
785             A directory to use as a directory for the current user's data, overriding the
786             system default. This directory must be readable and writable by the process.
787             The default is C<user_dir/.spamassassin>.
788              
789             =back
790              
791             =cut
792              
793             my $self = shift;
794             my $opts = shift;
795             my $set = 0;
796              
797             my $timer = $self->time_method("signal_user_changed");
798             dbg("info: user has changed");
799              
800             if (defined $opts && $opts->{username}) {
801             $self->{username} = $opts->{username};
802             } else {
803             undef $self->{username};
804             }
805             if (defined $opts && $opts->{user_dir}) {
806 0     0 1 0 $self->{user_dir} = $opts->{user_dir};
807 0         0 } else {
808 0         0 undef $self->{user_dir};
809             }
810 0         0 if (defined $opts && $opts->{userstate_dir}) {
811 0         0 $self->{userstate_dir} = $opts->{userstate_dir};
812             } else {
813 0 0 0     0 undef $self->{userstate_dir};
814 0         0 }
815              
816 0         0 # reopen bayes dbs for this user
817             $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
818 0 0 0     0 if ($self->{conf}->{use_bayes}) {
819 0         0 require Mail::SpamAssassin::Bayes;
820             $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
821 0         0 } else {
822             delete $self->{bayes_scanner} if $self->{bayes_scanner};
823 0 0 0     0 }
824 0         0  
825             # this user may have a different learn_to_journal setting, so reset appropriately
826 0         0 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
827              
828             $set |= 1 unless $self->{local_tests_only};
829             $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
830 0 0       0  
831 0 0       0 $self->{conf}->set_score_set ($set);
832 0         0  
833 0         0 $self->call_plugins("signal_user_changed", {
834             username => $self->{username},
835 0 0       0 userstate_dir => $self->{userstate_dir},
836             user_dir => $self->{user_dir},
837             });
838              
839 0         0 1;
840             }
841 0 0       0  
842 0 0 0     0 ###########################################################################
      0        
843              
844 0         0 =item $f->report_as_spam ($mail, $options)
845              
846             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
847             human-verified spam. This will submit the mail message to live,
848             collaborative, spam-blocker databases, allowing other users to block this
849             message.
850 0         0  
851             It will also submit the mail to SpamAssassin's Bayesian learner.
852 0         0  
853             Options is an optional reference to a hash of options. Currently these
854             can be:
855              
856             =over 4
857              
858             =item dont_report_to_dcc
859              
860             Inhibits reporting of the spam to DCC.
861              
862             =item dont_report_to_pyzor
863              
864             Inhibits reporting of the spam to Pyzor.
865              
866             =item dont_report_to_razor
867              
868             Inhibits reporting of the spam to Razor.
869              
870             =item dont_report_to_spamcop
871              
872             Inhibits reporting of the spam to SpamCop.
873              
874             =back
875              
876             =cut
877              
878             my ($self, $mail, $options) = @_;
879             local ($_);
880              
881             $self->init(1);
882             my $timer = $self->time_method("report_as_spam");
883              
884             # learn as spam if enabled
885             if ( $self->{conf}->{bayes_learn_during_report} ) {
886             $self->learn ($mail, undef, 1, 0);
887             }
888              
889             require Mail::SpamAssassin::Reporter;
890             $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
891             $mail->report();
892 0     0 1 0 }
893 0         0  
894             ###########################################################################
895 0         0  
896 0         0 =item $f->revoke_as_spam ($mail, $options)
897              
898             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
899 0 0       0 human-verified ham (non-spam). This will revoke the mail message from live,
900 0         0 collaborative, spam-blocker databases, allowing other users to block this
901             message.
902              
903 0         0 It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
904 0         0  
905 0         0 Options is an optional reference to a hash of options. Currently these
906             can be:
907              
908             =over 4
909              
910             =item dont_report_to_razor
911              
912             Inhibits revoking of the spam to Razor.
913              
914              
915             =back
916              
917             =cut
918              
919             my ($self, $mail, $options) = @_;
920             local ($_);
921              
922             $self->init(1);
923             my $timer = $self->time_method("revoke_as_spam");
924              
925             # learn as nonspam
926             $self->learn ($mail, undef, 0, 0);
927              
928             require Mail::SpamAssassin::Reporter;
929             $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
930             $mail->revoke ();
931             }
932              
933             ###########################################################################
934 0     0 1 0  
935 0         0 =item $f->add_address_to_whitelist ($addr, $cli_p)
936              
937 0         0 Given a string containing an email address, add it to the automatic
938 0         0 whitelist database.
939              
940             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
941 0         0  
942             =cut
943 0         0  
944 0         0 my ($self, $addr, $cli_p) = @_;
945 0         0  
946             $self->call_plugins("whitelist_address", { address => $addr,
947             cli_p => $cli_p });
948             }
949              
950             ###########################################################################
951              
952             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
953              
954             Given a mail message, find as many addresses in the usual headers (To, Cc, From
955             etc.), and the message body, and add them to the automatic whitelist database.
956              
957             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
958              
959             =cut
960 0     0 1 0  
961             my ($self, $mail_obj, $cli_p) = @_;
962 0         0  
963             foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
964             $self->call_plugins("whitelist_address", { address => $addr,
965             cli_p => $cli_p });
966             }
967             }
968              
969             ###########################################################################
970              
971             =item $f->remove_address_from_whitelist ($addr, $cli_p)
972              
973             Given a string containing an email address, remove it from the automatic
974             whitelist database.
975              
976             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
977              
978 0     0 1 0 =cut
979              
980 0         0 my ($self, $addr, $cli_p) = @_;
981 0         0  
982             $self->call_plugins("remove_address", { address => $addr,
983             cli_p => $cli_p });
984             }
985              
986             ###########################################################################
987              
988             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
989              
990             Given a mail message, find as many addresses in the usual headers (To, Cc, From
991             etc.), and the message body, and remove them from the automatic whitelist
992             database.
993              
994             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
995              
996             =cut
997              
998 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
999              
1000 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1001             $self->call_plugins("remove_address", { address => $addr,
1002             cli_p => $cli_p });
1003             }
1004             }
1005              
1006             ###########################################################################
1007              
1008             =item $f->add_address_to_blacklist ($addr, $cli_p)
1009              
1010             Given a string containing an email address, add it to the automatic
1011             whitelist database with a high score, effectively blacklisting them.
1012              
1013             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1014              
1015             =cut
1016              
1017 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1018             $self->call_plugins("blacklist_address", { address => $addr,
1019 0         0 cli_p => $cli_p });
1020 0         0 }
1021              
1022             ###########################################################################
1023              
1024             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1025              
1026             Given a mail message, find addresses in the From headers and add them to the
1027             automatic whitelist database with a high score, effectively blacklisting them.
1028              
1029             Note that To and Cc addresses are not used.
1030              
1031             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1032              
1033             =cut
1034              
1035             my ($self, $mail_obj, $cli_p) = @_;
1036              
1037 0     0 1 0 $self->init(1);
1038 0         0  
1039             my @addrlist;
1040             my @hdrs = $mail_obj->get_header('From');
1041             if ($#hdrs >= 0) {
1042             push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1043             }
1044              
1045             foreach my $addr (@addrlist) {
1046             $self->call_plugins("blacklist_address", { address => $addr,
1047             cli_p => $cli_p });
1048             }
1049              
1050             }
1051              
1052             ###########################################################################
1053              
1054             =item $text = $f->remove_spamassassin_markup ($mail)
1055              
1056 0     0 1 0 Returns the text of the message, with any SpamAssassin-added text (such
1057             as the report, or X-Spam-Status headers) stripped.
1058 0         0  
1059             Note that the B<$mail> object is not modified.
1060 0         0  
1061 0         0 Warning: if the input message in B<$mail> contains a mixture of CR-LF
1062 0 0       0 (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1063 0         0 to use one or the other consistently throughout.
1064              
1065             =cut
1066 0         0  
1067 0         0 my ($self, $mail_obj) = @_;
1068             local ($_);
1069              
1070             my $timer = $self->time_method("remove_spamassassin_markup");
1071             my $mbox = $mail_obj->get_mbox_separator() || '';
1072              
1073             dbg("markup: removing markup");
1074              
1075             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1076             # if we have definitive proof it's not an encapsulated message.
1077             my $ct = $mail_obj->get_header("Content-Type") || '';
1078             if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1079              
1080             # Ok, this is a possible encapsulated message, search for the
1081             # appropriate mime part and deal with it if necessary.
1082             my $boundary = "\Q$1\E";
1083             my @msg = split(/^/,$mail_obj->get_pristine_body());
1084              
1085             my $flag = 0;
1086             $ct = '';
1087             my $cd = '';
1088             for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1089 0     0 1 0 # only look at mime part headers
1090 0         0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1091              
1092 0         0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1093 0   0     0  
1094             # Ok, we found the encapsulated piece ...
1095 0         0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
1096             ($ct eq "message/rfc822" &&
1097             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1098             {
1099 0   0     0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1100 0 0       0  
1101             # find the end and chop it off
1102             for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1103             if ( $msg[$i] =~ /^--$boundary/ ) {
1104 0         0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1105 0         0 # will remove the blank line (not sure it'll always be
1106             # there) and everything below. don't worry, the splice
1107 0         0 # guarantees the for will stop ...
1108 0         0 }
1109 0         0 }
1110 0         0  
1111             # Ok, we're done. Return the rewritten message.
1112 0 0 0     0 return join('', $mbox, @msg);
1113             }
1114 0 0       0  
1115             $flag = 0;
1116             $ct = '';
1117 0 0 0     0 $cd = '';
      0        
1118             next;
1119             }
1120              
1121 0         0 # Ok, we're in the mime header ... Capture the appropriate headers...
1122             $flag = 1;
1123             if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
1124 0         0 $ct = $1;
1125 0 0       0 }
1126 0 0       0 elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1127             $cd = $1;
1128             }
1129             }
1130             }
1131              
1132             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1133             # So treat it like a "report_safe 0" message.
1134 0         0 my $hdrs = $mail_obj->get_pristine_header();
1135             my $body = $mail_obj->get_pristine_body();
1136              
1137 0         0 # remove DOS line endings
1138 0         0 $hdrs =~ s/\r//gs;
1139 0         0  
1140 0         0 # unfold SA added headers, but not X-Spam-Prev headers ...
1141             $hdrs = "\n".$hdrs; # simplifies regexp below
1142             1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1143             $hdrs =~ s/^\n//;
1144 0         0  
1145 0 0       0 ###########################################################################
    0          
1146 0         0 # Backward Compatibility, pre 3.0.x.
1147              
1148             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1149 0         0 $self->init(1);
1150             foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
1151             # let the 3.0 decoding do it...
1152             next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1153              
1154             dbg("markup: removing markup in $header");
1155             if ($header eq 'Subject') {
1156 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1157 0         0 $tag = quotemeta($tag);
1158             $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1159             $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1160 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1161             1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1162             } else {
1163 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1164 0         0 }
1165 0         0 }
1166              
1167             # Now deal with report cleansing from 2.4x and previous.
1168             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1169             # followed by another "SPAM: ----.+" line, followed by a blank line.
1170             1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1171 0         0 ###########################################################################
1172 0         0  
  0         0  
1173             # 3.0 version -- support for previously-nonexistent Subject hdr.
1174 0 0       0 # ensure the Subject line didn't *really* contain "(nonexistent)" in
1175             # the original message!
1176 0         0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1177 0 0       0 && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1178 0         0 {
1179 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1180 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1181 0         0 }
1182 0         0  
1183 0         0 # 3.0 version -- revert from X-Spam-Prev to original ...
1184             while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1185 0         0 my($hdr, $name) = ($1,$2);
1186              
1187             # If the rewritten version doesn't exist, we should deal with it anyway...
1188             unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1189             $hdrs =~ s/\n\n/\n$hdr\n/;
1190             }
1191             }
1192 0         0  
1193             # remove any other X-Spam headers we added, will be unfolded
1194             $hdrs = "\n".$hdrs; # simplifies regexp below
1195             1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1196             $hdrs =~ s/^\n//;
1197              
1198 0 0 0     0 # re-add DOS line endings
1199             if ($mail_obj->{line_ending} ne "\n") {
1200             $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1201 0         0 }
1202 0         0  
1203             # Put the whole thing back together ...
1204             return join ('', $mbox, $hdrs, $body);
1205             }
1206 0         0  
1207 0         0 ###########################################################################
1208              
1209             =item $f->read_scoreonly_config ($filename)
1210 0 0       0  
1211 0         0 Read a configuration file and parse user preferences from it.
1212              
1213             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1214             In other words, they include scoring options, scores, whitelists and
1215             blacklists, and so on, but do not include rule definitions, privileged
1216 0         0 settings, etc. unless C<allow_user_rules> is enabled; and they never include
1217 0         0 the administrator settings.
1218 0         0  
1219             =cut
1220              
1221 0 0       0 my ($self, $filename) = @_;
1222 0         0  
1223             my $timer = $self->time_method("read_scoreonly_config");
1224             local *IN;
1225             if (!open(IN,"<$filename")) {
1226 0         0 # the file may not exist; this should not be verbose
1227             dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1228             return;
1229             }
1230              
1231             my($inbuf,$nread,$text); $text = '';
1232             while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
1233             defined $nread or die "error reading $filename: $!";
1234             close IN or die "error closing $filename: $!";
1235             undef $inbuf;
1236              
1237             $text = "file start $filename\n" . $text;
1238             # add an extra \n in case file did not end in one.
1239             $text .= "\nfile end $filename\n";
1240              
1241             $self->{conf}->{main} = $self;
1242             $self->{conf}->parse_scores_only ($text);
1243             $self->{conf}->finish_parsing(1);
1244 0     0 1 0  
1245             delete $self->{conf}->{main}; # to allow future GC'ing
1246 0         0 }
1247 0         0  
1248 0 0       0 ###########################################################################
1249              
1250 0         0 =item $f->load_scoreonly_sql ($username)
1251 0         0  
1252             Read configuration parameters from SQL database and parse scores from it. This
1253             will only take effect if the perl C<DBI> module is installed, and the
1254 0         0 configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
  0         0  
1255 0         0 C<user_scores_sql_password> are set correctly.
  0         0  
1256 0 0       0  
1257 0 0       0 The username in C<$username> will also be used for the C<username> attribute of
1258 0         0 the Mail::SpamAssassin object.
1259              
1260 0         0 =cut
1261              
1262 0         0 my ($self, $username) = @_;
1263              
1264 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1265 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1266 0         0 $self->{username} = $username;
1267             unless ($src->load($username)) {
1268 0         0 return 0;
1269             }
1270             return 1;
1271             }
1272              
1273             ###########################################################################
1274              
1275             =item $f->load_scoreonly_ldap ($username)
1276              
1277             Read configuration parameters from an LDAP server and parse scores from it.
1278             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1279             installed, and the configuration parameters C<user_scores_dsn>,
1280             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1281             correctly.
1282              
1283             The username in C<$username> will also be used for the C<username> attribute of
1284             the Mail::SpamAssassin object.
1285              
1286 0     0 1 0 =cut
1287              
1288 0         0 my ($self, $username) = @_;
1289 0         0  
1290 0         0 dbg("config: load_scoreonly_ldap($username)");
1291 0 0       0 my $timer = $self->time_method("load_scoreonly_ldap");
1292 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1293             $self->{username} = $username;
1294 0         0 $src->load($username);
1295             }
1296              
1297             ###########################################################################
1298              
1299             =item $f->set_persistent_address_list_factory ($factoryobj)
1300              
1301             Set the persistent address list factory, used to create objects for the
1302             automatic whitelist algorithm's persistent-storage back-end. See
1303             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1304             must implement, and the API the objects they produce must implement.
1305              
1306             =cut
1307              
1308             my ($self, $fac) = @_;
1309             $self->{pers_addr_list_factory} = $fac;
1310             }
1311              
1312             ###########################################################################
1313 0     0 1 0  
1314             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1315 0         0  
1316 0         0 Compile all patterns, load all configuration files, and load all
1317 0         0 possibly-required Perl modules.
1318 0         0  
1319 0         0 Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1320             plan to fork() or start a new perl interpreter thread to process a message,
1321             this is suboptimal, as each process/thread will have to perform these actions.
1322              
1323             Call this function in the master thread or process to perform the actions
1324             straight away, so that the sub-processes will not have to.
1325              
1326             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1327             configuration without reading the per-user configuration file and it will
1328             assume that you will call C<read_scoreonly_config> at a later point.
1329              
1330             If C<$keep_userstate> is true, compile_now() will revert any configuration
1331             options which have a default with I<__userstate__> in it post-init(),
1332             and then re-change the option before returning. This lets you change
1333             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1334 4     4 1 15 files there as necessary without disturbing the actual files as changed
1335 4         16 by a configuration option. By default, this is disabled.
1336              
1337             =cut
1338              
1339             my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1340              
1341             my $timer = $self->time_method("compile_now");
1342              
1343             # Backup default values which deal with userstate.
1344             # This is done so we can create any new files in, presumably, a temp dir.
1345             # see bug 2762 for more details.
1346             my %backup;
1347             if (defined $deal_with_userstate && $deal_with_userstate) {
1348             while(my($k,$v) = each %{$self->{conf}}) {
1349             $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
1350             }
1351             }
1352              
1353             $self->init($use_user_prefs);
1354              
1355             # if init() didn't change the value from default, forget about it.
1356             # if the value is different, remember the new version, and reset the default.
1357             while(my($k,$v) = each %backup) {
1358             if ($self->{conf}->{$k} eq $v) {
1359             delete $backup{$k};
1360             }
1361             else {
1362             my $backup = $backup{$k};
1363             $backup{$k} = $self->{conf}->{$k};
1364             $self->{conf}->{$k} = $backup;
1365             }
1366 1     1 1 1279 }
1367              
1368 1         16 dbg("ignore: test message to precompile patterns and load modules");
1369              
1370             # tell plugins we are about to send a message for compiling purposes
1371             $self->call_plugins("compile_now_start",
1372             { use_user_prefs => $use_user_prefs,
1373 1         2 keep_userstate => $deal_with_userstate});
1374 1 50 33     4  
1375 0         0 # note: this may incur network access. Good. We want to make sure
  0         0  
1376 0 0 0     0 # as much as possible is preloaded!
      0        
1377             my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1378             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1379             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1380 1         10  
1381             my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1382             my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1383             { disable_auto_learning => 1 } );
1384 1         5  
1385 0 0       0 # We want to turn off the bayes rules for this test msg
1386 0         0 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1387             $self->{conf}->{use_bayes_rules} = 0;
1388             $status->check();
1389 0         0 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1390 0         0 $status->finish();
1391 0         0 $mail->finish();
1392             $self->finish_learner();
1393              
1394             $self->{conf}->free_uncompiled_rule_source();
1395 1         4  
1396             # load SQL modules now as well
1397             my $dsn = $self->{conf}->{user_scores_dsn};
1398 1         4 if ($dsn ne '') {
1399             if ($dsn =~ /^ldap:/i) {
1400             Mail::SpamAssassin::Conf::LDAP::load_modules();
1401             } else {
1402             Mail::SpamAssassin::Conf::SQL::load_modules();
1403             }
1404 1         29 }
1405              
1406             # make sure things are ready for scanning
1407             $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1408 1         7 $self->call_plugins("compile_now_finish",
1409 1         35 { use_user_prefs => $use_user_prefs,
1410             keep_userstate => $deal_with_userstate});
1411              
1412             # Reset any non-default values to the post-init() version.
1413 1         3 while(my($k,$v) = each %backup) {
1414 1         2 $self->{conf}->{$k} = $v;
1415 1         4 }
1416 1         3  
1417 1         4 # clear sed_path_cache
1418 1         14 delete $self->{conf}->{sed_path_cache};
1419 1         10  
1420             1;
1421 1         5 }
1422              
1423             ###########################################################################
1424 1         2  
1425 1 50       4 =item $f->debug_diagnostics ()
1426 0 0       0  
1427 0         0 Output some diagnostic information, useful for debugging SpamAssassin
1428             problems.
1429 0         0  
1430             =cut
1431              
1432             my ($self) = @_;
1433              
1434 1 50       3 # load this class lazily, to avoid overhead when this method isn't
1435 1         5 # called.
1436             eval {
1437             require Mail::SpamAssassin::Util::DependencyInfo;
1438             dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1439             };
1440 1         5 }
1441 0         0  
1442             ###########################################################################
1443              
1444             =item $failed = $f->lint_rules ()
1445 1         6  
1446             Syntax-check the current set of rules. Returns the number of
1447 1         6 syntax errors discovered, or 0 if the configuration is valid.
1448              
1449             =cut
1450              
1451             my ($self) = @_;
1452              
1453             dbg("ignore: using a test message to lint rules");
1454             my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1455             "Subject: \n",
1456             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1457             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1458              
1459             $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1460 0     0 1 0 $self->{syntax_errors} = 0;
1461              
1462             my $olddcp = $self->{dont_copy_prefs};
1463             $self->{dont_copy_prefs} = 1;
1464 0         0  
1465 0         0 $self->init(1);
1466 0         0 $self->{syntax_errors} += $self->{conf}->{errors};
1467              
1468             $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1469              
1470             # bug 5048: override settings to ensure a faster lint
1471             $self->{'conf'}->{'use_auto_whitelist'} = 0;
1472             $self->{'conf'}->{'bayes_auto_learn'} = 0;
1473              
1474             my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1475             my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1476             { disable_auto_learning => 1 } );
1477             $status->check();
1478              
1479             $self->{syntax_errors} += $status->{rule_errors};
1480 0     0 1 0 $status->finish();
1481             $mail->finish();
1482 0         0 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1483 0         0 return ($self->{syntax_errors});
1484             }
1485              
1486             ###########################################################################
1487              
1488 0         0 =item $f->finish()
1489 0         0  
1490             Destroy this object, so that it will be garbage-collected once it
1491 0         0 goes out of scope. The object will no longer be usable after this
1492 0         0 method is called.
1493              
1494 0         0 =cut
1495 0         0  
1496             my ($self) = @_;
1497 0         0  
1498             $self->timer_start("finish");
1499             $self->call_plugins("finish_tests", { conf => $self->{conf},
1500 0         0 main => $self });
1501 0         0  
1502             $self->{conf}->finish(); delete $self->{conf};
1503 0         0 $self->{plugins}->finish(); delete $self->{plugins};
1504 0         0  
1505             if ($self->{bayes_scanner}) {
1506 0         0 $self->{bayes_scanner}->finish();
1507             delete $self->{bayes_scanner};
1508 0         0 }
1509 0         0  
1510 0         0 $self->{resolver}->finish() if $self->{resolver};
1511 0 0       0  
1512 0         0 $self->timer_end("finish");
1513             %{$self} = ();
1514             }
1515              
1516             ###########################################################################
1517             # timers: bug 5356
1518              
1519             my ($self) = @_;
1520             dbg("config: timing enabled") if !$self->{timer_enabled};
1521             $self->{timer_enabled} = 1;
1522             }
1523              
1524             my ($self) = @_;
1525             dbg("config: timing disabled") if $self->{timer_enabled};
1526 20     20 1 18271 $self->{timer_enabled} = 0;
1527             }
1528 20         106  
1529             # discard all timers, start afresh
1530 20         99 my ($self) = @_;
1531             delete $self->{timers};
1532 20         112 delete $self->{timers_order};
  20         62  
1533 20         115 }
  20         58  
1534              
1535 20 100       67 my ($self, $name) = @_;
1536 7         39  
1537 7         26 return unless $self->{timer_enabled};
1538             # dbg("timing: '$name' starting");
1539              
1540 20 50       169 if (!exists $self->{timers}->{$name}) {
1541             push @{$self->{timers_order}}, $name;
1542 20         72 }
1543 20         34
  20         275  
1544             $self->{timers}->{$name}->{start} = Time::HiRes::time();
1545             # note that this will reset any existing, unstopped timer of that name;
1546             # that's ok
1547             }
1548              
1549             my ($self, $name) = @_;
1550 0     0 0 0 return unless $self->{timer_enabled};
1551 0 0       0  
1552 0         0 my $t = $self->{timers}->{$name};
1553             $t->{end} = time;
1554              
1555             if (!$t->{start}) {
1556 0     0 0 0 warn "timer_end('$name') with no timer_start";
1557 0 0       0 return;
1558 0         0 }
1559              
1560             # add to any existing elapsed time for this event, since
1561             # we may call the same timer name multiple times -- this is ok,
1562             # as long as they are not nested
1563 3     3 0 15442 my $dt = $t->{end} - $t->{start};
1564 3         6 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1565 3         7 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
1566             else { $t->{elapsed} = $dt }
1567             }
1568              
1569 20     20 0 70 my ($self, $name) = @_;
1570             return unless $self->{timer_enabled};
1571 20 50       70 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1572             }
1573              
1574 0 0       0 my ($self) = @_;
1575 0         0  
  0         0  
1576             my $earliest;
1577             my $latest;
1578 0         0  
1579             while (my($name,$h) = each(%{$self->{timers}})) {
1580             # dbg("timing: %s - %s", $name, join(", ",
1581             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1582             my $start = $h->{start};
1583             if (defined $start && (!defined $earliest || $earliest > $start)) {
1584 20     20 0 87 $earliest = $start;
1585 20 50       69 }
1586             my $end = $h->{end};
1587 0         0 if (defined $end && (!defined $latest || $latest < $end)) {
1588 0         0 $latest = $end;
1589             }
1590 0 0       0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1591 0         0 }
1592 0         0 my $total =
1593             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1594             my @str;
1595             foreach my $name (@{$self->{timers_order}}) {
1596             my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1597             my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1598 0         0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
1599 0 0       0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1600 0 0       0 }
  0         0  
1601 0         0  
1602             return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1603             }
1604              
1605 450     450 0 1163 ###########################################################################
1606 450 50       1496 # non-public methods.
1607 0         0  
1608             my ($self, $use_user_pref) = @_;
1609              
1610             # Allow init() to be called multiple times, but only run once.
1611 0     0 0 0 if (defined $self->{_initted}) {
1612             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1613 0         0 if ($self->{_initted} != $$) {
1614             $self->{_initted} = $$;
1615             srand if !$self->{skip_prng_reseeding};
1616 0         0 $self->{resolver}->reinit_post_fork();
  0         0  
1617             }
1618             return;
1619 0         0 }
1620 0 0 0     0  
      0        
1621 0         0 my $timer = $self->time_method("init");
1622             # Note that this PID has run init()
1623 0         0 $self->{_initted} = $$;
1624 0 0 0     0  
      0        
1625 0         0 #fix spamd reading root prefs file
1626             if (!defined $use_user_pref) {
1627 0 0 0     0 $use_user_pref = 1;
1628             }
1629 0 0 0     0  
1630             if (!defined $self->{config_text}) {
1631 0         0 $self->{config_text} = '';
1632 0         0  
  0         0  
1633 0   0     0 # read a file called "init.pre" in site rules dir *before* all others;
1634 0 0 0     0 # even the system config.
1635 0 0       0 my $siterules = $self->{site_rules_filename};
    0          
1636 0         0 $siterules ||= $self->first_existing_path (@site_rules_path);
1637              
1638             my $sysrules = $self->{rules_filename};
1639 0         0 $sysrules ||= $self->first_existing_path (@default_rules_path);
1640              
1641             if ($siterules) {
1642             $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1643             }
1644             else {
1645             warn "config: could not find site rules directory\n";
1646 98     98 0 846 }
1647              
1648             if ($sysrules) {
1649 98 100       350 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1650             }
1651 62 50       269 else {
1652 0         0 warn "config: could not find sys rules directory\n";
1653 0 0       0 }
1654 0         0  
1655             if ($sysrules) {
1656 62         135 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1657             if ($self->{require_rules} && $cftext !~ /\S/) {
1658             die "config: no rules were found! Do you need to run 'sa-update'?\n";
1659 36         245 }
1660             $self->{config_text} .= $cftext;
1661 36         229 }
1662              
1663             if (!$self->{languages_filename}) {
1664 36 100       134 $self->{languages_filename} = $self->find_rule_support_file("languages");
1665 7         13 }
1666              
1667             if ($siterules && !$self->{ignore_site_cf_files}) {
1668 36 100       116 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1669 24         157 }
1670              
1671             if ( $use_user_pref != 0 ) {
1672             $self->get_and_create_userstate_dir();
1673 24         107  
1674 24   33     75 # user prefs file
1675             my $fname = $self->{userprefs_filename};
1676 24         73 $fname ||= $self->first_existing_path (@default_userprefs_path);
1677 24   33     76  
1678             if (!$self->{dont_copy_prefs}) {
1679 24 50       71 # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1680 24         153 # just use the last entry in the array as the default path.
1681             $fname ||= $self->sed_path($default_userprefs_path[-1]);
1682              
1683 0         0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1684             if ($stat_errn == 0 && -f _) {
1685             # exists and is a regular file, nothing to do
1686 24 50       138 } elsif ($stat_errn == 0) {
1687 24         117 warn "config: default user preference file $fname is not a regular file\n";
1688             } elsif ($stat_errn != ENOENT) {
1689             warn "config: default user preference file $fname not accessible: $!\n";
1690 0         0 } elsif (!$self->create_default_prefs($fname)) {
1691             warn "config: failed to create default user preference file $fname\n";
1692             }
1693 24 50       170 }
1694 24         161  
1695 24 50 66     243 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1696 0         0 }
1697             }
1698 24         2335  
1699             if ($self->{pre_config_text}) {
1700             $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1701 24 50       155 }
1702 24         220 if ($self->{post_config_text}) {
1703             $self->{config_text} .= $self->{post_config_text};
1704             }
1705 24 50 33     377  
1706 24         137 if ($self->{config_text} !~ /\S/) {
1707             my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1708             if ($self->{require_rules}) {
1709 24 100       174 die $m;
1710 7         38 } else {
1711             warn $m;
1712             }
1713 7         17 }
1714 7   33     19  
1715             # Go and parse the config!
1716 7 100       32 $self->{conf}->{main} = $self;
1717             if (would_log('dbg', 'config_text') > 1) {
1718             dbg('config_text: '.$self->{config_text});
1719 6   33     17 }
1720             $self->{conf}->parse_rules ($self->{config_text});
1721 6 50       75 $self->{conf}->finish_parsing(0);
1722 6 50 33     74 delete $self->{conf}->{main}; # to allow future GC'ing
    0          
    0          
    0          
1723              
1724             undef $self->{config_text}; # ensure it's actually freed
1725 0         0 delete $self->{config_text};
1726              
1727 0         0 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1728             die "config: no rules were found! Do you need to run 'sa-update'?\n";
1729 0         0 }
1730              
1731             # Initialize the Bayes subsystem
1732             if ($self->{conf}->{use_bayes}) {
1733 7         30 require Mail::SpamAssassin::Bayes;
1734             $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1735             }
1736             $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1737 36 50       122  
1738 0         0 # Figure out/set our initial scoreset
1739             my $set = 0;
1740 36 100       105 $set |= 1 unless $self->{local_tests_only};
1741 2         24 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
1742             $self->{conf}->set_score_set ($set);
1743              
1744 36 50       371 if ($self->{only_these_rules}) {
1745 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1746 0 0       0 }
1747 0         0  
1748             if (!$self->{timer_enabled}) {
1749 0         0 # enable timing implicitly if _TIMING_ is used in add_header templates
1750             foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
1751             @{$self->{conf}->{'headers_spam'}}) {
1752             if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
1753             }
1754 36         113 }
1755 36 50       155  
1756 0         0 # should be called only after configuration has been parsed
1757             $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1758 36         207  
1759 36         398 # TODO -- open DNS cache etc. if necessary
1760 36         90 }
1761              
1762 36         107 my ($self, $allpaths, $desc) = @_;
1763 36         90 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1764             }
1765 36 50 66     219  
1766 0         0 my ($self, $allpaths, $desc) = @_;
1767             return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1768             }
1769              
1770 36 100       139 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1771 23         6850  
1772 23         313 return '' unless defined ($allpaths);
1773              
1774 36         144 my $txt = '';
1775             foreach my $path (split("\000", $allpaths))
1776             {
1777 36         62 dbg("config: using \"$path\" for $desc");
1778 36 100       123  
1779 36 50 100     302 my $stat_errn = stat($path) ? 0 : 0+$!;
      66        
1780 36         227 if ($stat_errn == ENOENT) {
1781             # no file or directory
1782 36 50       130 } elsif ($stat_errn != 0) {
1783 0         0 dbg("config: file or directory $path not accessible: $!");
1784             } elsif (-d _) {
1785             foreach my $file ($self->$filelistmethod($path)) {
1786 36 50       112 $txt .= read_cf_file($file);
1787             }
1788 36         61 } elsif (-f _ && -s _ && -r _) {
  36         104  
1789 36         155 $txt .= read_cf_file($path);
1790 187 50       588 }
  0         0  
  0         0  
1791             }
1792              
1793             return $txt;
1794             }
1795 36         513  
1796              
1797             my($path) = @_;
1798             my $txt = '';
1799              
1800             local *IN;
1801 55     55 0 198 if (open (IN, "<".$path)) {
1802 55         211  
1803             my($inbuf,$nread); $txt = '';
1804             while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
1805             defined $nread or die "error reading $path: $!";
1806 48     48 0 192 close IN or die "error closing $path: $!";
1807 48         360 undef $inbuf;
1808              
1809             $txt = "file start $path\n" . $txt;
1810             # add an extra \n in case file did not end in one.
1811 103     103   237 $txt .= "\nfile end $path\n";
1812              
1813 103 50       224 dbg("config: read file $path");
1814             }
1815 103         216 else {
1816 103         438 warn "config: cannot open \"$path\": $!\n";
1817             }
1818 103         534  
1819             return $txt;
1820 103 100       1569 }
1821 103 100 33     578  
    50 33        
    100          
    50          
1822             my ($self, $dir) = @_;
1823              
1824 0         0 my $fname;
1825              
1826 96         316 # If vpopmail is enabled then set fname to virtual homedir
1827 605         1256 # precedence: dir, userstate_dir, derive from user_dir, system default
1828             if (defined $dir) {
1829             $fname = File::Spec->catdir ($dir, ".spamassassin");
1830 0         0 }
1831             elsif (defined $self->{userstate_dir}) {
1832             $fname = $self->{userstate_dir};
1833             }
1834 103         2993 elsif (defined $self->{user_dir}) {
1835             $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1836             }
1837              
1838             $fname ||= $self->first_existing_path (@default_userstate_dir);
1839 605     605 0 1190  
1840 605         822 # bug 4932: use the last default_userstate_dir entry if none of the others
1841             # already exist
1842 605         1170 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1843 605 50       17335  
1844             if (!$self->{dont_copy_prefs}) {
1845 605         1470 dbg("config: using \"$fname\" for user state dir");
  605         882  
1846 605         11720 }
  629         4849  
1847 605 50       1401  
1848 605 50       4958 # if this is not a dir, not readable, or we are unable to create the dir,
1849 605         1281 # this is not (yet) a serious error; in fact, it's not even worth
1850             # a warning at all times, so use dbg(). see bug 6268
1851 605         3737 my $stat_errn = stat($fname) ? 0 : 0+$!;
1852             if ($stat_errn == 0 && !-d _) {
1853 605         1717 dbg("config: $fname exists but is not a directory");
1854             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1855 605         2552 dbg("config: error accessing $fname: $!");
1856             } else { # does not exist, create it
1857             eval {
1858 0         0 mkpath($fname, 0, 0700); 1;
1859             } or do {
1860             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1861 605         5816 dbg("config: mkdir $fname failed: $eval_stat");
1862             };
1863             }
1864              
1865 7     7 0 22 $fname;
1866             }
1867 7         9  
1868             =item $fullpath = $f->find_rule_support_file ($filename)
1869              
1870             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1871 7 50       42 in the system-wide rules directory, and return its full path if
    100          
    50          
1872 0         0 it exists, or undef if it doesn't exist.
1873              
1874             (This API was added in SpamAssassin 3.1.1.)
1875 6         13  
1876             =cut
1877              
1878 0         0 my ($self, $filename) = @_;
1879              
1880             return $self->first_existing_path(
1881 7   66     35 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
1882             }
1883              
1884             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1885 7   33     22  
1886             Copy default preferences file into home directory for later use and
1887 7 100       27 modification, if it does not already exist and C<dont_copy_prefs> is
1888 6         47 not set.
1889              
1890             =cut
1891              
1892             # $userdir will only exist if vpopmail config is enabled thru spamd
1893             # Its value will be the virtual user's maildir
1894 7 50       97 #
1895 7 50 33     114 my ($self, $fname, $user, $userdir) = @_;
    50 33        
1896 0         0  
1897             if ($self->{dont_copy_prefs}) {
1898 0         0 return(0);
1899             }
1900              
1901 7         384 # if ($userdir && $userdir ne $self->{user_dir}) {
  7         34  
1902 7 50       21 # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1903 0 0       0 # }
  0         0  
1904 0         0  
1905             my $stat_errn = stat($fname) ? 0 : 0+$!;
1906             if ($stat_errn == 0) {
1907             # fine, it already exists
1908 7         20 } elsif ($stat_errn != ENOENT) {
1909             dbg("config: cannot access user preferences file $fname: $!");
1910             } else {
1911             # Pass on the value of $userdir for virtual users in vpopmail
1912             # otherwise it is empty and the user's normal homedir is used
1913             $self->get_and_create_userstate_dir($userdir);
1914              
1915             # copy in the default one for later editing
1916             my $defprefs =
1917             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1918              
1919             local(*IN,*OUT);
1920             $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1921             if (!defined $defprefs) {
1922 24     24 1 120 warn "config: can not determine default prefs path\n";
1923             } elsif (!open(IN, "<$defprefs")) {
1924             warn "config: cannot open $defprefs: $!\n";
1925 24         128 } elsif (!open(OUT, ">$fname")) {
  120         244  
  120         651  
  120         455  
1926             warn "config: cannot create user preferences file $fname: $!\n";
1927             } else {
1928             # former code skipped lines beginning with '#* ', the following copy
1929             # procedure no longer does so, as it avoids reading line-by-line
1930             my($inbuf,$nread);
1931             while ( $nread=read(IN,$inbuf,16384) ) {
1932             print OUT $inbuf or die "cannot write to $fname: $!";
1933             }
1934             defined $nread or die "error reading $defprefs: $!";
1935             undef $inbuf;
1936             close OUT or die "error closing $fname: $!";
1937             close IN or die "error closing $defprefs: $!";
1938              
1939             if (($< == 0) && ($> == 0) && defined($user)) { # chown it
1940 0     0 1 0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1941             unless (chown($uid, $gid, $fname)) {
1942 0 0       0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1943 0         0 }
1944             }
1945             warn "config: created user preferences file: $fname\n";
1946             return(1);
1947             }
1948             }
1949              
1950 0 0       0 return(0);
1951 0 0       0 }
    0          
1952              
1953             ###########################################################################
1954 0         0  
1955             my ($self, $name) = @_;
1956             my $home = $self->{user_dir} || $ENV{HOME} || '';
1957              
1958 0         0 if (am_running_on_windows()) {
1959             my $userprofile = $ENV{USERPROFILE} || '';
1960              
1961 0         0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
1962             return $userprofile if ($userprofile =~ m/^\\\\/o);
1963              
1964 0         0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
1965 0         0 return $home if ($home =~ m/^\\\\/o);
1966 0 0       0  
    0          
    0          
1967 0         0 return '';
1968             } else {
1969 0         0 return $home if ($home && $home =~ /\//o);
1970             return (getpwnam($name))[7] if ($name ne '');
1971 0         0 return (getpwuid($>))[7];
1972             }
1973             }
1974              
1975 0         0 my ($self, $path) = @_;
1976 0         0 return if !defined $path;
1977 0 0       0  
1978             if (exists($self->{conf}->{sed_path_cache}->{$path})) {
1979 0 0       0 return $self->{conf}->{sed_path_cache}->{$path};
1980 0         0 }
1981 0 0       0  
1982 0 0       0 my $orig_path = $path;
1983              
1984 0 0 0     0 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
      0        
1985 0         0 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
1986 0 0       0 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
1987 0         0 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
1988             $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
1989             $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
1990 0         0 $path =~ s/__version__/${VERSION}/gs;
1991 0         0 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
1992              
1993             $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
1994             $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
1995 0         0 return $path;
1996             }
1997              
1998             my $self = shift;
1999             $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2000             return $1;
2001 1     1 0 4 }
2002 1   50     16  
2003             my $self = shift;
2004 1 50       5 my $path;
2005 0   0     0 foreach my $p (@_) {
2006             $path = $self->sed_path ($p);
2007 0 0 0     0 if (defined $path) {
2008 0 0       0 my($errn) = stat($path) ? 0 : 0+$!;
2009             if ($errn == ENOENT) { } # does not exist
2010 0 0 0     0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2011 0 0       0 else { return $path }
2012             }
2013 0         0 }
2014             return;
2015 1 50 33     33 }
2016 0 0       0  
2017 0         0 ###########################################################################
2018              
2019             my ($self, $dir) = @_;
2020             return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2021             }
2022 119     119 0 367  
2023 119 50       403 my ($self, $dir) = @_;
2024             return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2025 119 100       702 }
2026 50         406  
2027             my ($self, $dir, $type) = @_;
2028              
2029 69         163 if ($self->{config_tree_recurse}) {
2030             my @cfs;
2031 69 0       287  
  0         0  
2032 69 50       238 # use "eval" to avoid loading File::Find unless this is specified
  25         166  
2033 69 50       305 eval ' use File::Find qw();
  25         149  
2034 69 0 33     165 File::Find::find(
  1         5  
2035 69 0       137 { untaint => 1,
  0         0  
2036 69         124 follow => 1,
  0         0  
2037 69         283 wanted =>
2038 69         163 sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
  1         11  
2039             }, $dir); 1;
2040 69         304 ' or do {
2041 69         311 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
2042 69         232 die "_get_cf_pre_files_in_dir error: $eval_stat";
2043             };
2044             @cfs = sort { $a cmp $b } @cfs;
2045             return @cfs;
2046 0     0 0 0 }
2047 0 0       0 else {
2048 0         0 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2049             my @cfs = grep { $_ ne '.' && $_ ne '..' &&
2050             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2051             closedir SA_CF_DIR;
2052 26     26 0 92  
2053 26         46 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
2054 26         180 }
2055 54         243 }
2056 54 50       141  
2057 54 100       4832 ###########################################################################
2058 54 100       305  
    50          
2059 0         0 my ($self, $subname) = @_;
2060 25         150  
2061             # We could potentially get called after a finish(), so just return.
2062             return unless $self->{plugins};
2063 1         4  
2064             return $self->{plugins}->have_callback ($subname);
2065             }
2066              
2067             my $self = shift;
2068             my $subname = shift;
2069 48     48 0 161  
2070 48         163 # We could potentially get called after a finish(), so just return.
2071             return unless $self->{plugins};
2072              
2073             # Use some calls ourself too
2074 48     48 0 128 if ($subname eq 'finish_parsing_end') {
2075 48         191 # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
2076             # read. Plugins can also now use {valid_tlds_re} to one time compile
2077             # regexes in finish_parsing_end.
2078             $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
2079 96     96   271 }
2080              
2081 96 50       252 # safety net in case some plugin changes global settings, Bug 6218
2082 0         0 local $/ = $/; # prevent underlying modules from changing the global $/
2083              
2084             return $self->{plugins}->callback($subname, @_);
2085             }
2086              
2087             ###########################################################################
2088              
2089             my ($self, $mail_obj) = @_;
2090              
2091             $self->init(1);
2092 0 0       0  
2093 0 0       0 my @addrlist;
  0         0  
2094 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2095             Errors-To Mail-Followup-To))
2096 0         0 {
  0         0  
2097 0         0 my @hdrs = $mail_obj->get_header($header);
2098             if ($#hdrs < 0) { next; }
2099             push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2100 96 50       2331 }
2101 96 100 100     3343  
  1410   100     19795  
2102             # find addrs in body, too
2103 96         1257 foreach my $line (@{$mail_obj->get_body()}) {
2104             push (@addrlist, $self->find_all_addrs_in_line($line));
2105 96         682 }
  605         2498  
  1236         1910  
2106              
2107             my @ret;
2108             my %done;
2109              
2110             foreach (@addrlist) {
2111             s/^mailto://; # from Outlook "forwarded" message
2112 176     176 0 263 next if defined ($done{$_}); $done{$_} = 1;
2113             push (@ret, $_);
2114             }
2115 176 50       326  
2116             @ret;
2117 176         394 }
2118              
2119             my ($self, $line) = @_;
2120              
2121 2119     2119 0 22145 # a more permissive pattern based on "dot-atom" as per RFC2822
2122 2119         2831 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2123             my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2124              
2125 2119 50       4413 my @addrs;
2126             my %seen;
2127             while ($line =~ s/(?:mailto:)?\s*
2128 2119 100       3657 ($ID_PATTERN \@
2129             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2130             {
2131             my $addr = $1;
2132 36         640 $addr =~ s/^mailto://;
2133             next if (defined ($seen{$addr})); $seen{$addr} = 1;
2134             push (@addrs, $addr);
2135             }
2136 2119         6257  
2137             return @addrs;
2138 2119         6099 }
2139              
2140             ###########################################################################
2141              
2142             # sa_die -- used to die with a useful exit code.
2143              
2144 0     0 0 0 my $exitcode = shift;
2145             warn @_;
2146 0         0 exit $exitcode;
2147             }
2148 0         0  
2149 0         0 ###########################################################################
2150              
2151             =item $f->copy_config ( [ $source ], [ $dest ] )
2152 0         0  
2153 0 0       0 Used for daemons to keep a persistent Mail::SpamAssassin object's
  0         0  
2154 0         0 configuration correct if switching between users. Pass an associative
2155             array reference as either $source or $dest, and set the other to 'undef'
2156             so that the object will use its current configuration. i.e.:
2157              
2158 0         0 # create object w/ configuration
  0         0  
2159 0         0 my $spamtest = Mail::SpamAssassin->new( ... );
2160              
2161             # backup configuration to %conf_backup
2162 0         0 my %conf_backup;
2163             $spamtest->copy_config(undef, \%conf_backup) ||
2164             die "config: error returned from copy_config!\n";
2165 0         0  
2166 0         0 ... do stuff, perhaps modify the config, etc ...
2167 0 0       0  
  0         0  
2168 0         0 # reset the configuration back to the original
2169             $spamtest->copy_config(\%conf_backup, undef) ||
2170             die "config: error returned from copy_config!\n";
2171 0         0  
2172             Note that the contents of the associative arrays should be considered
2173             opaque by calling code.
2174              
2175 88     88 0 280 =cut
2176              
2177             my ($self, $source, $dest) = @_;
2178 88         195  
2179 88         140 # At least one of either source or dest needs to be a hash reference ...
2180             unless ((defined $source && ref($source) eq 'HASH') ||
2181 88         133 (defined $dest && ref($dest) eq 'HASH'))
2182             {
2183 88         977 return 0;
2184             }
2185              
2186             my $timer = $self->time_method("copy_config");
2187 137         377  
2188 137         257 # let the Conf object itself do all the heavy lifting. It's better
2189 137 100       518 # than having this class know all about that class' internals...
  127         296  
2190 127         1566 if (defined $source) {
2191             dbg ("config: copying current conf from backup");
2192             }
2193 88         446 else {
2194             dbg ("config: copying current conf to backup");
2195             }
2196             return $self->{conf}->clone($source, $dest);
2197             }
2198              
2199             ###########################################################################
2200              
2201 0     0 0 0 =item @plugins = $f->get_loaded_plugins_list ( )
2202 0         0  
2203 0         0 Return the list of plugins currently loaded by this SpamAssassin object's
2204             configuration; each entry in the list is an object of type
2205             C<Mail::SpamAssassin::Plugin>.
2206              
2207             (This API was added in SpamAssassin 3.2.0.)
2208              
2209             =cut
2210              
2211             my ($self) = @_;
2212             return $self->{plugins}->get_loaded_plugins_list();
2213             }
2214              
2215             1;
2216              
2217             ###########################################################################
2218              
2219             =back
2220              
2221             =head1 PREREQUISITES
2222              
2223             C<HTML::Parser>
2224             C<Sys::Syslog>
2225              
2226             =head1 MORE DOCUMENTATION
2227              
2228             See also E<lt>https://spamassassin.apache.org/E<gt> and
2229             E<lt>https://wiki.apache.org/spamassassin/E<gt> for more information.
2230              
2231             =head1 SEE ALSO
2232              
2233             Mail::SpamAssassin::Conf(3)
2234             Mail::SpamAssassin::PerMsgStatus(3)
2235 0     0 1 0 spamassassin(1)
2236             sa-update(1)
2237              
2238 0 0 0     0 =head1 BUGS
      0        
      0        
2239              
2240             See E<lt>https://issues.apache.org/SpamAssassin/E<gt>
2241 0         0  
2242             =head1 AUTHORS
2243              
2244 0         0 The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>
2245              
2246             =head1 COPYRIGHT
2247              
2248 0 0       0 SpamAssassin is distributed under the Apache License, Version 2.0, as
2249 0         0 described in the file C<LICENSE> included with the distribution.
2250              
2251             =head1 AVAILABILITY
2252 0         0  
2253             The latest version of this library is likely to be available from CPAN
2254 0         0 as well as:
2255              
2256             E<lt>https://spamassassin.apache.org/E<gt>
2257              
2258             =cut