File Coverage

lib/Mail/SpamAssassin.pm
Criterion Covered Total %
statement 440 763 57.6
branch 117 340 34.4
condition 61 176 34.6
subroutine 60 85 70.5
pod 31 56 55.3
total 709 1420 49.9


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin - Spam detector and markup engine
21              
22             =head1 SYNOPSIS
23              
24             my $spamtest = Mail::SpamAssassin->new();
25             my $mail = $spamtest->parse($message);
26             my $status = $spamtest->check($mail);
27              
28             if ($status->is_spam()) {
29             $message = $status->rewrite_mail();
30             }
31             else {
32             ...
33             }
34             ...
35              
36             $status->finish();
37             $mail->finish();
38             $spamtest->finish();
39              
40             =head1 DESCRIPTION
41              
42             Mail::SpamAssassin is a module to identify spam using several methods
43             including text analysis, internet-based realtime blacklists, statistical
44             analysis, and internet-based hashing algorithms.
45              
46             Using its rule base, it uses a wide range of heuristic tests on mail
47             headers and body text to identify "spam", also known as unsolicited bulk
48             email. Once identified as spam, the mail can then be tagged as spam for
49             later filtering using the user's own mail user agent application or at
50             the mail transfer agent.
51              
52             If you wish to use a command-line filter tool, try the C<spamassassin>
53             or the C<spamd>/C<spamc> tools provided.
54              
55             =head1 METHODS
56              
57             =over 4
58              
59             =cut
60              
61             use strict;
62 14     14   568496 use warnings;
  14         111  
  14         643  
63 14     14   119 # use bytes;
  14         47  
  14         866  
64             use re 'taint';
65 14     14   118  
  14         52  
  14         892  
66             require 5.006_001;
67              
68             use Mail::SpamAssassin::Logger;
69 14     14   3006 use Mail::SpamAssassin::Constants;
  14         40  
  14         1272  
70 14     14   3485 use Mail::SpamAssassin::Conf;
  14         46  
  14         822  
71 14     14   3486 use Mail::SpamAssassin::Conf::SQL;
  14         61  
  14         594  
72 14     14   4233 use Mail::SpamAssassin::Conf::LDAP;
  14         41  
  14         484  
73 14     14   3176 use Mail::SpamAssassin::PerMsgStatus;
  14         53  
  14         804  
74 14     14   3220 use Mail::SpamAssassin::Message;
  14         60  
  14         634  
75 14     14   3883 use Mail::SpamAssassin::PluginHandler;
  14         196  
  14         1162  
76 14     14   5099 use Mail::SpamAssassin::DnsResolver;
  14         47  
  14         508  
77 14     14   3357 use Mail::SpamAssassin::RegistryBoundaries;
  14         52  
  14         546  
78 14     14   3891 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
  14         36  
  14         556  
79 14     14   97 use Mail::SpamAssassin::Util::ScopedTimer;
  14         27  
  14         780  
80 12     12   3511  
  12         30  
  12         367  
81             use Errno qw(ENOENT EACCES);
82 12     12   70 use File::Basename;
  12         26  
  12         688  
83 12     12   77 use File::Path;
  12         21  
  12         1036  
84 11     11   67 use File::Spec 0.8;
  11         37  
  11         857  
85 11     11   69 use Time::HiRes qw(time);
  11         337  
  11         277  
86 11     11   56 use Cwd;
  11         28  
  11         86  
87 11     11   998 use Config;
  11         22  
  11         866  
88 11     11   72  
  11         17  
  11         95194  
89             our $VERSION = "3.004006"; # update after release (same format as perl $])
90             #our $IS_DEVEL_BUILD = 1; # 1 for devel build
91             our $IS_DEVEL_BUILD = 0; # 0 for release versions including rc & pre releases
92              
93              
94             # Used during the prerelease/release-candidate part of the official release
95             # process. If you hacked up your SA, you should add a version_tag to your .cf
96             # files; this variable should not be modified.
97             our @EXTRA_VERSION = qw();
98              
99             our @ISA = qw();
100              
101             # SUB_VERSION is now just <yyyy>-<mm>-<dd>
102             our $SUB_VERSION = 'svnunknown';
103             if ('$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $' =~ ':') {
104             # Subversion keyword "$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $" has been successfully expanded.
105             # Doesn't happen with automated launchpad builds:
106             # https://bugs.launchpad.net/launchpad/+bug/780916
107             $SUB_VERSION = (split(/\s+/,'$LastChangedDate: 2021-04-09 19:54:52 +1200 (Fri, 09 Apr 2021) $ updated by SVN'))[1];
108             }
109              
110              
111             if (defined $IS_DEVEL_BUILD && $IS_DEVEL_BUILD) {
112             if ('$LastChangedRevision: 1888548 $' =~ ':') {
113             # Subversion keyword "$LastChangedRevision: 1888548 $" has been successfully expanded.
114             push(@EXTRA_VERSION, ('r' . qw{$LastChangedRevision: 1888548 $ updated by SVN}[1]));
115             } else {
116             push(@EXTRA_VERSION, ('r' . 'svnunknown'));
117             }
118             }
119              
120             $VERSION =~ /^(\d+)\.(\d\d\d)(\d\d\d)$/;
121             return join('-', sprintf("%d.%d.%d", $1, $2, $3), @EXTRA_VERSION);
122 125     125 0 775 }
123 125         1416  
124             our $HOME_URL = "https://spamassassin.apache.org/";
125              
126             # note that the CWD takes priority. This is required in case a user
127             # is testing a new version of SpamAssassin on a machine with an older
128             # version installed. Unless you can come up with a fix for this that
129             # allows "make test" to work, don't change this.
130              
131             our @default_rules_path = (
132             './rules', # REMOVEFORINST
133             '../rules', # REMOVEFORINST
134             '__local_state_dir__/__version__',
135             '__def_rules_dir__',
136             '__prefix__/share/spamassassin',
137             '/usr/local/share/spamassassin',
138             '/usr/share/spamassassin',
139             );
140              
141             # first 3 are BSDish, latter 2 Linuxish
142             our @site_rules_path = (
143             '__local_rules_dir__',
144             '__prefix__/etc/mail/spamassassin',
145             '__prefix__/etc/spamassassin',
146             '/usr/local/etc/spamassassin',
147             '/usr/pkg/etc/spamassassin',
148             '/usr/etc/spamassassin',
149             '/etc/mail/spamassassin',
150             '/etc/spamassassin',
151             );
152              
153             our @default_prefs_path = (
154             '__local_rules_dir__/user_prefs.template',
155             '__prefix__/etc/mail/spamassassin/user_prefs.template',
156             '__prefix__/share/spamassassin/user_prefs.template',
157             '__local_state_dir__/__version__/updates_spamassassin_org/user_prefs.template',
158             '__def_rules_dir__/user_prefs.template',
159             '/etc/spamassassin/user_prefs.template',
160             '/etc/mail/spamassassin/user_prefs.template',
161             '/usr/local/share/spamassassin/user_prefs.template',
162             '/usr/share/spamassassin/user_prefs.template',
163             );
164              
165             our @default_userprefs_path = (
166             '~/.spamassassin/user_prefs',
167             );
168              
169             our @default_userstate_dir = (
170             '~/.spamassassin',
171             );
172              
173             ###########################################################################
174              
175             =item $t = Mail::SpamAssassin->new( { opt => val, ... } )
176              
177             Constructs a new C<Mail::SpamAssassin> object. You may pass a hash
178             reference to the constructor which may contain the following attribute-
179             value pairs.
180              
181             =over 4
182              
183             =item debug
184              
185             This is the debug options used to determine logging level. It exists to
186             allow sections of debug messages (called "facilities") to be enabled or
187             disabled. If this is a string, it is treated as a comma-delimited list
188             of the debug facilities. If it's a hash reference, then the keys are
189             treated as the list of debug facilities and if it's a array reference,
190             then the elements are treated as the list of debug facilities.
191              
192             There are also two special cases: (1) if the special case of "info" is
193             passed as a debug facility, then all informational messages are enabled;
194             (2) if the special case of "all" is passed as a debug facility, then all
195             debugging facilities are enabled.
196              
197             =item rules_filename
198              
199             The filename/directory to load spam-identifying rules from. (optional)
200              
201             =item site_rules_filename
202              
203             The filename/directory to load site-specific spam-identifying rules from.
204             (optional)
205              
206             =item userprefs_filename
207              
208             The filename to load preferences from. (optional)
209              
210             =item userstate_dir
211              
212             The directory user state is stored in. (optional)
213              
214             =item config_tree_recurse
215              
216             Set to C<1> to recurse through directories when reading configuration
217             files, instead of just reading a single level. (optional, default 0)
218              
219             =item config_text
220              
221             The text of all rules and preferences. If you prefer not to load the rules
222             from files, read them in yourself and set this instead. As a result, this will
223             override the settings for C<rules_filename>, C<site_rules_filename>,
224             and C<userprefs_filename>.
225              
226             =item pre_config_text
227              
228             Similar to C<config_text>, this text is placed before config_text to allow an
229             override of config files.
230              
231             =item post_config_text
232              
233             Similar to C<config_text>, this text is placed after config_text to allow an
234             override of config files.
235              
236             =item force_ipv4
237              
238             If set to 1, DNS or other network tests will prefer IPv4 and not attempt
239             to use IPv6. Use if the existing tests for IPv6 availability produce
240             incorrect results or crashes.
241              
242             =item force_ipv6
243              
244             For symmetry with force_ipv4: if set to 1, DNS or other network tests
245             will prefer IPv6 and not attempt to use IPv4. Some plugins may disregard
246             this setting and use whatever protocol family they are comfortable with.
247              
248             =item require_rules
249              
250             If set to 1, init() will die if no valid rules could be loaded. This is the
251             default behaviour when called by C<spamassassin> or C<spamd>.
252              
253             =item languages_filename
254              
255             If you want to be able to use the language-guessing rule
256             C<UNWANTED_LANGUAGE_BODY>, and are using C<config_text> instead of
257             C<rules_filename>, C<site_rules_filename>, and C<userprefs_filename>, you will
258             need to set this. It should be the path to the B<languages> file normally
259             found in the SpamAssassin B<rules> directory.
260              
261             =item local_tests_only
262              
263             If set to 1, no tests that require internet access will be performed. (default:
264             0)
265              
266             =item need_tags
267              
268             The option provides a way to avoid more expensive processing when it is known
269             in advance that some information will not be needed by a caller.
270              
271             A value of the option can either be a string (a comma-delimited list of tag
272             names), or a reference to a list of individual tag names. A caller may provide
273             the list in advance, specifying his intention to later collect the information
274             through $pms->get_tag() calls. If a name of a tag starts with a 'NO' (case
275             insensitive), it shows that a caller will not be interested in such tag,
276             although there is no guarantee it would save any resources, nor that a tag
277             value will be empty. Currently no built-in tags start with 'NO'. A later
278             entry overrides previous one, e.g. ASN,NOASN,ASN,TIMING,NOASN is equivalent
279             to TIMING,NOASN.
280              
281             For backward compatibility, all tags available as of version 3.2.4 will
282             be available by default (unless disabled by NOtag), even if not requested
283             through need_tags option. Future versions may provide new tags conditionally
284             available.
285              
286             Currently the only tag that needs to be explicitly requested is 'TIMING'.
287             Not requesting it can save a millisecond or two - it mostly serves to
288             illustrate the usage of need_tags.
289              
290             Example:
291             need_tags => 'TIMING,noLANGUAGES,RELAYCOUNTRY,ASN,noASNCIDR',
292             or:
293             need_tags => [qw(TIMING noLANGUAGES RELAYCOUNTRY ASN noASNCIDR)],
294              
295             =item ignore_site_cf_files
296              
297             If set to 1, any rule files found in the C<site_rules_filename> directory will
298             be ignored. *.pre files (used for loading plugins) found in the
299             C<site_rules_filename> directory will still be used. (default: 0)
300              
301             =item dont_copy_prefs
302              
303             If set to 1, the user preferences file will not be created if it doesn't
304             already exist. (default: 0)
305              
306             =item save_pattern_hits
307              
308             If set to 1, the patterns hit can be retrieved from the
309             C<Mail::SpamAssassin::PerMsgStatus> object. Used for debugging.
310              
311             =item home_dir_for_helpers
312              
313             If set, the B<HOME> environment variable will be set to this value
314             when using test applications that require their configuration data,
315             such as Razor, Pyzor and DCC.
316              
317             =item username
318              
319             If set, the C<username> attribute will use this as the current user's name.
320             Otherwise, the default is taken from the runtime environment (ie. this process'
321             effective UID under UNIX).
322              
323             =item skip_prng_reseeding
324              
325             If skip_prng_reseeding is set to true, the SpamAssassin library will B<not>
326             call srand() to reseed a pseudo-random number generator (PRNG). The srand()
327             Perl function should be called during initialization of each child process,
328             soon after forking.
329              
330             Prior to version 3.4.0, calling srand() was handled by the SpamAssassin
331             library.
332              
333             This setting requires the caller to decide when to call srand().
334             This choice may be desired to preserve the entropy of a PRNG. The default
335             value of skip_prng_reseeding is false to maintain backward compatibility.
336              
337             This option should only be set by a caller if it calls srand() upon spawning
338             child processes. Unless you are certain you need it, leave this setting as
339             false.
340              
341             NOTE: The skip_prng_reseeding feature is implemented in spamd as of 3.4.0
342             which allows spamd to call srand() right after forking a child process.
343              
344             =back
345              
346             If none of C<rules_filename>, C<site_rules_filename>, C<userprefs_filename>, or
347             C<config_text> is set, the C<Mail::SpamAssassin> module will search for the
348             configuration files in the usual installed locations using the below variable
349             definitions which can be passed in.
350              
351             =over 4
352              
353             =item PREFIX
354              
355             Used as the root for certain directory paths such as:
356              
357             '__prefix__/etc/mail/spamassassin'
358             '__prefix__/etc/spamassassin'
359              
360             Defaults to "@@PREFIX@@".
361              
362             =item DEF_RULES_DIR
363              
364             Location where the default rules are installed. Defaults to
365             "@@DEF_RULES_DIR@@".
366              
367             =item LOCAL_RULES_DIR
368              
369             Location where the local site rules are installed. Defaults to
370             "@@LOCAL_RULES_DIR@@".
371              
372             =item LOCAL_STATE_DIR
373              
374             Location of the local state directory, mainly used for installing updates via
375             C<sa-update> and compiling rulesets to native code. Defaults to
376             "@@LOCAL_STATE_DIR@@".
377              
378             =back
379              
380              
381             =cut
382              
383             # undocumented ctor settings:
384             #
385             # - keep_config_parsing_metadata: used by build/listpromotable, default 0
386              
387             my $class = shift;
388             $class = ref($class) || $class;
389              
390 55     55 1 90850 my $self = shift;
391 55   33     549 if (!defined $self) { $self = { }; }
392             bless ($self, $class);
393 55         142  
394 55 50       195 # basic backward compatibility; debug used to be a boolean.
  0         0  
395 55         180 # translate that into 'all', which is what it meant before 3.1.0.
396             if ($self->{debug} && $self->{debug} eq '1') {
397             $self->{debug} = 'all';
398             }
399 55 50 33     335  
400 0         0 # enable or disable debugging
401             Mail::SpamAssassin::Logger::add_facilities($self->{debug});
402              
403             # first debugging information possibly printed should be the version
404 55         523 dbg("generic: SpamAssassin version " . Version());
405              
406             # if the libs are installed in an alternate location, and the caller
407 55         341 # didn't set PREFIX, we should have an estimated guess ready, values
408             # substituted at 'make' time
409             $self->{PREFIX} ||= '@@PREFIX@@';
410             $self->{DEF_RULES_DIR} ||= '@@DEF_RULES_DIR@@';
411             $self->{LOCAL_RULES_DIR} ||= '@@LOCAL_RULES_DIR@@';
412 55   50     528 $self->{LOCAL_STATE_DIR} ||= '@@LOCAL_STATE_DIR@@';
413 55   50     378 dbg("generic: Perl %s, %s", $], join(", ", map { $_ . '=' . $self->{$_} }
414 55   50     361 qw(PREFIX DEF_RULES_DIR LOCAL_RULES_DIR LOCAL_STATE_DIR)));
415 55   50     475  
416 55         164 $self->{needed_tags} = {};
  220         932  
417             { my $ntags = $self->{need_tags};
418             if (defined $ntags) {
419 55         193 for my $t (ref $ntags ? @$ntags : split(/[, \s]+/,$ntags)) {
420 55         103 $self->{needed_tags}->{$2} = !defined($1) if $t =~ /^(NO)?(.+)\z/si;
  55         117  
421 55 50       254 }
422 0 0       0 }
423 0 0       0 }
424             if (would_log('dbg','timing') || $self->{needed_tags}->{TIMING}) {
425             $self->timer_enable();
426             }
427 55 50 33     244  
428 0         0 $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
429             $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
430              
431 55   33     851 $self->{save_pattern_hits} ||= 0;
432 55         718  
433             # Make sure that we clean $PATH if we're tainted
434 55   50     278 Mail::SpamAssassin::Util::clean_path_in_taint_mode();
435              
436             if (!defined $self->{username}) {
437 55         370 $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
438             }
439 55 50       185  
440 55         452 $self->create_locker();
441              
442             $self;
443 55         406 }
444              
445 55         220 my ($self) = @_;
446              
447             my $class;
448             my $m = $self->{conf}->{lock_method};
449 55     55 0 185  
450             # let people choose what they want -- even if they may not work on their
451 55         96 # OS. (they could be using cygwin!)
452 55         151 if ($m eq 'win32') { $class = 'Win32'; }
453             elsif ($m eq 'flock') { $class = 'Flock'; }
454             elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
455             else {
456 55 50       313 # OS-specific defaults
  0 50       0  
    50          
457 0         0 if (am_running_on_windows()) {
458 0         0 $class = 'Win32';
459             } else {
460             $class = 'UnixNFSSafe';
461 55 50       259 }
462 0         0 }
463              
464 55         180 # this could probably be made a little faster; for now I'm going
465             # for slow but safe, by keeping in quotes
466             eval '
467             use Mail::SpamAssassin::Locker::'.$class.';
468             $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
469             1;
470             ' or do {
471             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
472             die "Mail::SpamAssassin::Locker::$class error: $eval_stat\n";
473             };
474 10 50   10   4241  
  10         29  
  10         367  
  55         4660  
475 0 0       0 if (!defined $self->{locker}) { die "locker: oops! no locker"; }
  0         0  
476 0         0 }
477              
478             ###########################################################################
479 55 50       339  
  0         0  
480             =item parse($message, $parse_now [, $suppl_attrib])
481              
482             Parse will return a Mail::SpamAssassin::Message object with just the
483             headers parsed. When calling this function, there are two optional
484             parameters that can be passed in: $message is either undef (which
485             will use STDIN), a scalar - a string containing an entire message,
486             a reference to such string, an array reference of the message with
487             one line per array element, or either a file glob or an IO::File object
488             which holds the entire contents of the message; and $parse_now, which
489             specifies whether or not to create a MIME tree at parse time or later
490             as necessary.
491              
492             The I<$parse_now> option, by default, is set to false (0). This
493             allows SpamAssassin to not have to generate the tree of internal
494             data nodes if the information is not going to be used. This is
495             handy, for instance, when running C<spamassassin -d>, which only
496             needs the pristine header and body which is always parsed and stored
497             by this function.
498              
499             The optional last argument I<$suppl_attrib> provides a way for a caller
500             to pass additional information about a message to SpamAssassin. It is
501             either undef, or a ref to a hash where each key/value pair provides some
502             supplementary attribute of the message, typically information that cannot
503             be deduced from the message itself, or is hard to do so reliably, or would
504             represent unnecessary work for SpamAssassin to obtain it. The argument will
505             be stored to a Mail::SpamAssassin::Message object as 'suppl_attrib', thus
506             made available to the rest of the code as well as to plugins. The exact list
507             of attributes will evolve through time, any unknown attribute should be
508             ignored. Possible examples are: SMTP envelope information, a flag indicating
509             that a message as supplied by a caller was truncated due to size limit, an
510             already verified list of DKIM signature objects, or perhaps a list of rule
511             hits predetermined by a caller, which makes another possible way for a
512             caller to provide meta information (instead of having to insert made-up
513             header fields in order to pass information), or maybe just plain rule hits.
514              
515             For more information, please see the C<Mail::SpamAssassin::Message>
516             and C<Mail::SpamAssassin::Message::Node> POD.
517              
518             =cut
519              
520             my($self, $message, $parsenow, $suppl_attrib) = @_;
521              
522             my $start_time = time;
523             $self->init(1);
524             my $timer = $self->time_method("parse");
525 102     102 1 6695  
526             my $master_deadline;
527 102         393 # passed in at a function call
528 102         350 if (ref $suppl_attrib && exists $suppl_attrib->{master_deadline}) {
529 102         378 $master_deadline = $suppl_attrib->{master_deadline}; # may be undef
530             }
531 102         131 # found in a config file - overrides passed-in number if lower
532             if ($self->{conf}->{time_limit}) { # defined and nonzero
533 102 50 66     654 my $time_limit_deadline = $start_time + $self->{conf}->{time_limit};
534 48         102 if (!defined $master_deadline || $time_limit_deadline < $master_deadline) {
535             $master_deadline = $time_limit_deadline;
536             }
537 102 50       318 }
538 102         282 if (defined $master_deadline) {
539 102 50 33     320 dbg("config: time limit %.1f s", $master_deadline - $start_time);
540 102         154 }
541              
542             my $msg = Mail::SpamAssassin::Message->new({
543 102 50       264 message=>$message, parsenow=>$parsenow,
544 102         349 normalize=>$self->{conf}->{normalize_charset},
545             body_part_scan_size=>$self->{conf}->{body_part_scan_size},
546             rawbody_part_scan_size=>$self->{conf}->{rawbody_part_scan_size},
547             master_deadline=>$master_deadline, suppl_attrib=>$suppl_attrib });
548              
549             # bug 5069: The goal here is to get rendering plugins to do things
550             # like OCR, convert doc and pdf to text, etc, though it could be anything
551             # that wants to process the message after it's been parsed.
552 102         1997 $self->call_plugins("post_message_parse", { message => $msg });
553              
554             return $msg;
555             }
556              
557 102         632  
558             ###########################################################################
559 102         338  
560             =item $status = $f->check ($mail)
561              
562             Check a mail, encapsulated in a C<Mail::SpamAssassin::Message> object,
563             to determine if it is spam or not.
564              
565             Returns a C<Mail::SpamAssassin::PerMsgStatus> object which can be
566             used to test or manipulate the mail message.
567              
568             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
569             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
570             object is a "factory". However, if you do this, be sure to call the
571             C<finish()> method on the status objects when you're done with them.
572              
573             =cut
574              
575             my ($self, $mail_obj) = @_;
576              
577             $self->init(1);
578             my $pms = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
579             $pms->check();
580             dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
581 35     35 1 171 $pms;
582             }
583 35         106  
584 35         293 =item $status = $f->check_message_text ($mailtext)
585 35         134  
586 34 50       115 Check a mail, encapsulated in a plain string C<$mailtext>, to determine if it
587 34         97 is spam or not.
588              
589             Otherwise identical to C<check()> above.
590              
591             =cut
592              
593             my ($self, $mailtext) = @_;
594             my $msg = $self->parse($mailtext, 1);
595             my $result = $self->check($msg);
596              
597             # Kill off the metadata ...
598             # Do _NOT_ call normal finish() here. PerMsgStatus has a copy of
599             # the message. So killing it here will cause things like
600 32     32 1 95 # rewrite_message() to fail. <grrr>
601 32         121 #
602 32         120 $msg->finish_metadata();
603              
604             return $result;
605             }
606              
607             ###########################################################################
608              
609 32         132 =item $status = $f->learn ($mail, $id, $isspam, $forget)
610              
611 32         133 Learn from a mail, encapsulated in a C<Mail::SpamAssassin::Message> object.
612              
613             If C<$isspam> is set, the mail is assumed to be spam, otherwise it will
614             be learnt as non-spam.
615              
616             If C<$forget> is set, the attributes of the mail will be removed from
617             both the non-spam and spam learning databases.
618              
619             C<$id> is an optional message-identification string, used internally
620             to tag the message. If it is C<undef>, the Message-Id of the message
621             will be used. It should be unique to that message.
622              
623             Returns a C<Mail::SpamAssassin::PerMsgLearner> object which can be used to
624             manipulate the learning process for each mail.
625              
626             Note that the C<Mail::SpamAssassin> object can be re-used for further messages
627             without affecting this check; in OO terminology, the C<Mail::SpamAssassin>
628             object is a "factory". However, if you do this, be sure to call the
629             C<finish()> method on the learner objects when you're done with them.
630              
631             C<learn()> and C<check()> can be run using the same factory. C<init_learner()>
632             must be called before using this method.
633              
634             =cut
635              
636             my ($self, $mail_obj, $id, $isspam, $forget) = @_;
637             local ($_);
638              
639             require Mail::SpamAssassin::PerMsgLearner;
640             $self->init(1);
641             my $msg = Mail::SpamAssassin::PerMsgLearner->new($self, $mail_obj);
642              
643             if ($forget) {
644 0     0 1 0 dbg("learn: forgetting message");
645 0         0 $msg->forget($id);
646             } elsif ($isspam) {
647 0         0 dbg("learn: learning spam");
648 0         0 $msg->learn_spam($id);
649 0         0 } else {
650             dbg("learn: learning ham");
651 0 0       0 $msg->learn_ham($id);
    0          
652 0         0 }
653 0         0  
654             $msg;
655 0         0 }
656 0         0  
657             ###########################################################################
658 0         0  
659 0         0 =item $f->init_learner ( [ { opt => val, ... } ] )
660              
661             Initialise learning. You may pass the following attribute-value pairs to this
662 0         0 method.
663              
664             =over 4
665              
666             =item caller_will_untie
667              
668             Whether or not the code calling this method will take care of untie'ing
669             from the Bayes databases (by calling C<finish_learner()>) (optional, default 0).
670              
671             =item force_expire
672              
673             Should an expiration run be forced to occur immediately? (optional, default 0).
674              
675             =item learn_to_journal
676              
677             Should learning data be written to the journal, instead of directly to the
678             databases? (optional, default 0).
679              
680             =item wait_for_lock
681              
682             Whether or not to wait a long time for locks to complete (optional, default 0).
683              
684             =item opportunistic_expire_check_only
685              
686             During the opportunistic journal sync and expire check, don't actually do the
687             expire but report back whether or not it should occur (optional, default 0).
688              
689             =item no_relearn
690              
691             If doing a learn operation, and the message has already been learned as
692             the opposite type, don't re-learn the message.
693              
694             =back
695              
696             =cut
697              
698             my $self = shift;
699             my $opts = shift;
700             dbg("learn: initializing learner");
701              
702             # Make sure we're already initialized ...
703             $self->init(1);
704              
705             my %kv = (
706             'force_expire' => 'learn_force_expire',
707 0     0 1 0 'learn_to_journal' => 'learn_to_journal',
708 0         0 'caller_will_untie' => 'learn_caller_will_untie',
709 0         0 'wait_for_lock' => 'learn_wait_for_lock',
710             'opportunistic_expire_check_only' => 'opportunistic_expire_check_only',
711             'no_relearn' => 'learn_no_relearn',
712 0         0 );
713              
714 0         0 my %ret;
715              
716             # Set any other options that need setting ...
717             while( my($k,$v) = each %kv ) {
718             $ret{$k} = $self->{$v};
719             if (exists $opts->{$k}) { $self->{$v} = $opts->{$k}; }
720             }
721              
722             return \%ret;
723 0         0 }
724              
725             ###########################################################################
726 0         0  
727 0         0 =item $f->rebuild_learner_caches ({ opt => val })
728 0 0       0  
  0         0  
729             Rebuild any cache databases; should be called after the learning process.
730             Options include: C<verbose>, which will output diagnostics to C<stdout>
731 0         0 if set to 1.
732              
733             =cut
734              
735             my $self = shift;
736             my $opts = shift;
737             $self->{bayes_scanner}->sync(1,1,$opts) if $self->{bayes_scanner};
738             1;
739             }
740              
741             =item $f->finish_learner ()
742              
743             Finish learning.
744              
745 0     0 1 0 =cut
746 0         0  
747 0 0       0 my $self = shift;
748 0         0 $self->{bayes_scanner}->force_close(1) if $self->{bayes_scanner};
749             1;
750             }
751              
752             =item $f->dump_bayes_db()
753              
754             Dump the contents of the Bayes DB
755              
756             =cut
757              
758 1     1 1 2 my($self,@opts) = @_;
759 1 50       9 $self->{bayes_scanner}->dump_bayes_db(@opts) if $self->{bayes_scanner};
760 1         5 }
761              
762             =item $f->signal_user_changed ( [ { opt => val, ... } ] )
763              
764             Signals that the current user has changed (possibly using C<setuid>), meaning
765             that SpamAssassin should close any per-user databases it has open, and re-open
766             using ones appropriate for the new user.
767              
768             Note that this should be called I<after> reading any per-user configuration, as
769             that data may override some paths opened in this method. You may pass the
770 0     0 1 0 following attribute-value pairs:
771 0 0       0  
772             =over 4
773              
774             =item username
775              
776             The username of the user. This will be used for the C<username> attribute.
777              
778             =item user_dir
779              
780             A directory to use as a 'home directory' for the current user's data,
781             overriding the system default. This directory must be readable and writable by
782             the process. Note that the resulting C<userstate_dir> will be the
783             C<.spamassassin> subdirectory of this dir.
784              
785             =item userstate_dir
786              
787             A directory to use as a directory for the current user's data, overriding the
788             system default. This directory must be readable and writable by the process.
789             The default is C<user_dir/.spamassassin>.
790              
791             =back
792              
793             =cut
794              
795             my $self = shift;
796             my $opts = shift;
797             my $set = 0;
798              
799             my $timer = $self->time_method("signal_user_changed");
800             dbg("info: user has changed");
801              
802             if (defined $opts && $opts->{username}) {
803             $self->{username} = $opts->{username};
804             } else {
805             undef $self->{username};
806             }
807             if (defined $opts && $opts->{user_dir}) {
808 2     2 1 75 $self->{user_dir} = $opts->{user_dir};
809 2         10 } else {
810 2         3 undef $self->{user_dir};
811             }
812 2         4 if (defined $opts && $opts->{userstate_dir}) {
813 2         6 $self->{userstate_dir} = $opts->{userstate_dir};
814             } else {
815 2 50 33     11 undef $self->{userstate_dir};
816 2         4 }
817              
818 0         0 # reopen bayes dbs for this user
819             $self->{bayes_scanner}->finish() if $self->{bayes_scanner};
820 2 50 33     8 if ($self->{conf}->{use_bayes}) {
821 2         3 require Mail::SpamAssassin::Bayes;
822             $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
823 0         0 } else {
824             delete $self->{bayes_scanner} if $self->{bayes_scanner};
825 2 50 33     8 }
826 0         0  
827             # this user may have a different learn_to_journal setting, so reset appropriately
828 2         4 $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
829              
830             $set |= 1 unless $self->{local_tests_only};
831             $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
832 2 100       6  
833 2 100       5 $self->{conf}->set_score_set ($set);
834 1         7  
835 1         7 $self->call_plugins("signal_user_changed", {
836             username => $self->{username},
837 1 50       9 userstate_dir => $self->{userstate_dir},
838             user_dir => $self->{user_dir},
839             });
840              
841 2         11 1;
842             }
843 2 50       7  
844 2 0 66     14 ###########################################################################
      33        
845              
846 2         9 =item $f->report_as_spam ($mail, $options)
847              
848             Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
849             human-verified spam. This will submit the mail message to live,
850             collaborative, spam-blocker databases, allowing other users to block this
851             message.
852 2         10  
853             It will also submit the mail to SpamAssassin's Bayesian learner.
854 2         7  
855             Options is an optional reference to a hash of options. Currently these
856             can be:
857              
858             =over 4
859              
860             =item dont_report_to_dcc
861              
862             Inhibits reporting of the spam to DCC.
863              
864             =item dont_report_to_pyzor
865              
866             Inhibits reporting of the spam to Pyzor.
867              
868             =item dont_report_to_razor
869              
870             Inhibits reporting of the spam to Razor.
871              
872             =item dont_report_to_spamcop
873              
874             Inhibits reporting of the spam to SpamCop.
875              
876             =back
877              
878             =cut
879              
880             my ($self, $mail, $options) = @_;
881             local ($_);
882              
883             $self->init(1);
884             my $timer = $self->time_method("report_as_spam");
885              
886             # learn as spam if enabled
887             if ( $self->{conf}->{bayes_learn_during_report} ) {
888             $self->learn ($mail, undef, 1, 0);
889             }
890              
891             require Mail::SpamAssassin::Reporter;
892             $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
893             $mail->report();
894 0     0 1 0 }
895 0         0  
896             ###########################################################################
897 0         0  
898 0         0 =item $f->revoke_as_spam ($mail, $options)
899              
900             Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
901 0 0       0 human-verified ham (non-spam). This will revoke the mail message from live,
902 0         0 collaborative, spam-blocker databases, allowing other users to block this
903             message.
904              
905 0         0 It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
906 0         0  
907 0         0 Options is an optional reference to a hash of options. Currently these
908             can be:
909              
910             =over 4
911              
912             =item dont_report_to_razor
913              
914             Inhibits revoking of the spam to Razor.
915              
916              
917             =back
918              
919             =cut
920              
921             my ($self, $mail, $options) = @_;
922             local ($_);
923              
924             $self->init(1);
925             my $timer = $self->time_method("revoke_as_spam");
926              
927             # learn as nonspam
928             $self->learn ($mail, undef, 0, 0);
929              
930             require Mail::SpamAssassin::Reporter;
931             $mail = Mail::SpamAssassin::Reporter->new($self, $mail, $options);
932             $mail->revoke ();
933             }
934              
935             ###########################################################################
936 0     0 1 0  
937 0         0 =item $f->add_address_to_whitelist ($addr, $cli_p)
938              
939 0         0 Given a string containing an email address, add it to the automatic
940 0         0 whitelist database.
941              
942             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
943 0         0  
944             =cut
945 0         0  
946 0         0 my ($self, $addr, $cli_p) = @_;
947 0         0  
948             $self->call_plugins("whitelist_address", { address => $addr,
949             cli_p => $cli_p });
950             }
951              
952             ###########################################################################
953              
954             =item $f->add_all_addresses_to_whitelist ($mail, $cli_p)
955              
956             Given a mail message, find as many addresses in the usual headers (To, Cc, From
957             etc.), and the message body, and add them to the automatic whitelist database.
958              
959             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
960              
961             =cut
962 0     0 1 0  
963             my ($self, $mail_obj, $cli_p) = @_;
964 0         0  
965             foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
966             $self->call_plugins("whitelist_address", { address => $addr,
967             cli_p => $cli_p });
968             }
969             }
970              
971             ###########################################################################
972              
973             =item $f->remove_address_from_whitelist ($addr, $cli_p)
974              
975             Given a string containing an email address, remove it from the automatic
976             whitelist database.
977              
978             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
979              
980 0     0 1 0 =cut
981              
982 0         0 my ($self, $addr, $cli_p) = @_;
983 0         0  
984             $self->call_plugins("remove_address", { address => $addr,
985             cli_p => $cli_p });
986             }
987              
988             ###########################################################################
989              
990             =item $f->remove_all_addresses_from_whitelist ($mail, $cli_p)
991              
992             Given a mail message, find as many addresses in the usual headers (To, Cc, From
993             etc.), and the message body, and remove them from the automatic whitelist
994             database.
995              
996             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
997              
998             =cut
999              
1000 0     0 1 0 my ($self, $mail_obj, $cli_p) = @_;
1001              
1002 0         0 foreach my $addr ($self->find_all_addrs_in_mail ($mail_obj)) {
1003             $self->call_plugins("remove_address", { address => $addr,
1004             cli_p => $cli_p });
1005             }
1006             }
1007              
1008             ###########################################################################
1009              
1010             =item $f->add_address_to_blacklist ($addr, $cli_p)
1011              
1012             Given a string containing an email address, add it to the automatic
1013             whitelist database with a high score, effectively blacklisting them.
1014              
1015             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1016              
1017             =cut
1018              
1019 0     0 1 0 my ($self, $addr, $cli_p) = @_;
1020             $self->call_plugins("blacklist_address", { address => $addr,
1021 0         0 cli_p => $cli_p });
1022 0         0 }
1023              
1024             ###########################################################################
1025              
1026             =item $f->add_all_addresses_to_blacklist ($mail, $cli_p)
1027              
1028             Given a mail message, find addresses in the From headers and add them to the
1029             automatic whitelist database with a high score, effectively blacklisting them.
1030              
1031             Note that To and Cc addresses are not used.
1032              
1033             If $cli_p is set then underlying plugin may give visual feedback on additions/failures.
1034              
1035             =cut
1036              
1037             my ($self, $mail_obj, $cli_p) = @_;
1038              
1039 0     0 1 0 $self->init(1);
1040 0         0  
1041             my @addrlist;
1042             my @hdrs = $mail_obj->get_header('From');
1043             if ($#hdrs >= 0) {
1044             push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
1045             }
1046              
1047             foreach my $addr (@addrlist) {
1048             $self->call_plugins("blacklist_address", { address => $addr,
1049             cli_p => $cli_p });
1050             }
1051              
1052             }
1053              
1054             ###########################################################################
1055              
1056             =item $text = $f->remove_spamassassin_markup ($mail)
1057              
1058 0     0 1 0 Returns the text of the message, with any SpamAssassin-added text (such
1059             as the report, or X-Spam-Status headers) stripped.
1060 0         0  
1061             Note that the B<$mail> object is not modified.
1062 0         0  
1063 0         0 Warning: if the input message in B<$mail> contains a mixture of CR-LF
1064 0 0       0 (Windows-style) and LF (UNIX-style) line endings, it will be "canonicalized"
1065 0         0 to use one or the other consistently throughout.
1066              
1067             =cut
1068 0         0  
1069 0         0 my ($self, $mail_obj) = @_;
1070             local ($_);
1071              
1072             my $timer = $self->time_method("remove_spamassassin_markup");
1073             my $mbox = $mail_obj->get_mbox_separator() || '';
1074              
1075             dbg("markup: removing markup");
1076              
1077             # Go looking for a "report_safe" encapsulated message. Abort out ASAP
1078             # if we have definitive proof it's not an encapsulated message.
1079             my $ct = $mail_obj->get_header("Content-Type") || '';
1080             if ( $ct =~ m!^\s*multipart/mixed;\s+boundary\s*=\s*["']?(.+?)["']?(?:;|$)!i ) {
1081              
1082             # Ok, this is a possible encapsulated message, search for the
1083             # appropriate mime part and deal with it if necessary.
1084             my $boundary = "\Q$1\E";
1085             my @msg = split(/^/,$mail_obj->get_pristine_body());
1086              
1087             my $flag = 0;
1088             $ct = '';
1089             my $cd = '';
1090             for ( my $i = 0 ; $i <= $#msg ; $i++ ) {
1091 0     0 1 0 # only look at mime part headers
1092 0         0 next unless ( $msg[$i] =~ /^--$boundary\r?$/ || $flag );
1093              
1094 0         0 if ( $msg[$i] =~ /^\s*$/ ) { # end of mime header
1095 0   0     0  
1096             # Ok, we found the encapsulated piece ...
1097 0         0 if ($ct =~ m@^(?:message/rfc822|text/plain);\s+x-spam-type=original@ ||
1098             ($ct eq "message/rfc822" &&
1099             $cd eq $self->{conf}->{'encapsulated_content_description'}))
1100             {
1101 0   0     0 splice @msg, 0, $i+1; # remove the front part, including the blank line
1102 0 0       0  
1103             # find the end and chop it off
1104             for ( $i = 0 ; $i <= $#msg ; $i++ ) {
1105             if ( $msg[$i] =~ /^--$boundary/ ) {
1106 0         0 splice @msg, ($msg[$i-1] =~ /\S/ ? $i : $i-1);
1107 0         0 # will remove the blank line (not sure it'll always be
1108             # there) and everything below. don't worry, the splice
1109 0         0 # guarantees the for will stop ...
1110 0         0 }
1111 0         0 }
1112 0         0  
1113             # Ok, we're done. Return the rewritten message.
1114 0 0 0     0 return join('', $mbox, @msg);
1115             }
1116 0 0       0  
1117             $flag = 0;
1118             $ct = '';
1119 0 0 0     0 $cd = '';
      0        
1120             next;
1121             }
1122              
1123 0         0 # Ok, we're in the mime header ... Capture the appropriate headers...
1124             $flag = 1;
1125             if ( $msg[$i] =~ /^Content-Type:\s+(.+?)\s*$/i ) {
1126 0         0 $ct = $1;
1127 0 0       0 }
1128 0 0       0 elsif ( $msg[$i] =~ /^Content-Description:\s+(.+?)\s*$/i ) {
1129             $cd = $1;
1130             }
1131             }
1132             }
1133              
1134             # Ok, if we got here, the message wasn't a report_safe encapsulated message.
1135             # So treat it like a "report_safe 0" message.
1136 0         0 my $hdrs = $mail_obj->get_pristine_header();
1137             my $body = $mail_obj->get_pristine_body();
1138              
1139 0         0 # remove DOS line endings
1140 0         0 $hdrs =~ s/\r//gs;
1141 0         0  
1142 0         0 # unfold SA added headers, but not X-Spam-Prev headers ...
1143             $hdrs = "\n".$hdrs; # simplifies regexp below
1144             1 while $hdrs =~ s/(\nX-Spam-(?!Prev).+?)\n[ \t]+(\S.*\n)/$1 $2/g;
1145             $hdrs =~ s/^\n//;
1146 0         0  
1147 0 0       0 ###########################################################################
    0          
1148 0         0 # Backward Compatibility, pre 3.0.x.
1149              
1150             # deal with rewritten headers w/out X-Spam-Prev- versions ...
1151 0         0 $self->init(1);
1152             foreach my $header ( keys %{$self->{conf}->{rewrite_header}} ) {
1153             # let the 3.0 decoding do it...
1154             next if ($hdrs =~ /^X-Spam-Prev-$header:/im);
1155              
1156             dbg("markup: removing markup in $header");
1157             if ($header eq 'Subject') {
1158 0         0 my $tag = $self->{conf}->{rewrite_header}->{'Subject'};
1159 0         0 $tag = quotemeta($tag);
1160             $tag =~ s/_HITS_/\\d{2}\\.\\d{2}/g;
1161             $tag =~ s/_SCORE_/\\d{2}\\.\\d{2}/g;
1162 0         0 $tag =~ s/_REQD_/\\d{2}\\.\\d{2}/g;
1163             1 while $hdrs =~ s/^Subject: ${tag} /Subject: /gm;
1164             } else {
1165 0         0 $hdrs =~ s/^(${header}:[ \t].*?)\t\([^)]*\)$/$1/gm;
1166 0         0 }
1167 0         0 }
1168              
1169             # Now deal with report cleansing from 2.4x and previous.
1170             # possibly a blank line, "SPAM: ----.+", followed by "SPAM: stuff" lines,
1171             # followed by another "SPAM: ----.+" line, followed by a blank line.
1172             1 while ($body =~ s/^\n?SPAM: ----.+\n(?:SPAM:.*\n)*SPAM: ----.+\n\n//);
1173 0         0 ###########################################################################
1174 0         0  
  0         0  
1175             # 3.0 version -- support for previously-nonexistent Subject hdr.
1176 0 0       0 # ensure the Subject line didn't *really* contain "(nonexistent)" in
1177             # the original message!
1178 0         0 if ($hdrs =~ /^X-Spam-Prev-Subject:\s*\(nonexistent\)$/m
1179 0 0       0 && $hdrs !~ /^Subject:.*\(nonexistent\).*$/m)
1180 0         0 {
1181 0         0 $hdrs =~ s/(^|\n)X-Spam-Prev-Subject:\s*\(nonexistent\)\n/$1\n/s;
1182 0         0 $hdrs =~ s/(^|\n)Subject:\s*[ \t]*.*\n(?:\s+\S.*\n)*/$1\n/s;
1183 0         0 }
1184 0         0  
1185 0         0 # 3.0 version -- revert from X-Spam-Prev to original ...
1186             while ($hdrs =~ s/^X-Spam-Prev-(([^:]+:)[ \t]*.*\n(?:\s+\S.*\n)*)//m) {
1187 0         0 my($hdr, $name) = ($1,$2);
1188              
1189             # If the rewritten version doesn't exist, we should deal with it anyway...
1190             unless ($hdrs =~ s/^$name[ \t]*.*\n(?:\s+\S.*\n)*/$hdr/m) {
1191             $hdrs =~ s/\n\n/\n$hdr\n/;
1192             }
1193             }
1194 0         0  
1195             # remove any other X-Spam headers we added, will be unfolded
1196             $hdrs = "\n".$hdrs; # simplifies regexp below
1197             1 while $hdrs =~ s/\nX-Spam-.*\n/\n/g;
1198             $hdrs =~ s/^\n//;
1199              
1200 0 0 0     0 # re-add DOS line endings
1201             if ($mail_obj->{line_ending} ne "\n") {
1202             $hdrs =~ s/\r?\n/$mail_obj->{line_ending}/gs;
1203 0         0 }
1204 0         0  
1205             # Put the whole thing back together ...
1206             return join ('', $mbox, $hdrs, $body);
1207             }
1208 0         0  
1209 0         0 ###########################################################################
1210              
1211             =item $f->read_scoreonly_config ($filename)
1212 0 0       0  
1213 0         0 Read a configuration file and parse user preferences from it.
1214              
1215             User preferences are as defined in the C<Mail::SpamAssassin::Conf> manual page.
1216             In other words, they include scoring options, scores, whitelists and
1217             blacklists, and so on, but do not include rule definitions, privileged
1218 0         0 settings, etc. unless C<allow_user_rules> is enabled; and they never include
1219 0         0 the administrator settings.
1220 0         0  
1221             =cut
1222              
1223 0 0       0 my ($self, $filename) = @_;
1224 0         0  
1225             my $timer = $self->time_method("read_scoreonly_config");
1226             local *IN;
1227             if (!open(IN,"<$filename")) {
1228 0         0 # the file may not exist; this should not be verbose
1229             dbg("config: read_scoreonly_config: cannot open \"$filename\": $!");
1230             return;
1231             }
1232              
1233             my($inbuf,$nread,$text); $text = '';
1234             while ( $nread=read(IN,$inbuf,16384) ) { $text .= $inbuf }
1235             defined $nread or die "error reading $filename: $!";
1236             close IN or die "error closing $filename: $!";
1237             undef $inbuf;
1238              
1239             $text = "file start $filename\n" . $text;
1240             # add an extra \n in case file did not end in one.
1241             $text .= "\nfile end $filename\n";
1242              
1243             $self->{conf}->{main} = $self;
1244             $self->{conf}->parse_scores_only ($text);
1245             $self->{conf}->finish_parsing(1);
1246 2     2 1 3432  
1247             delete $self->{conf}->{main}; # to allow future GC'ing
1248 2         6 }
1249 2         6  
1250 2 50       59 ###########################################################################
1251              
1252 2         20 =item $f->load_scoreonly_sql ($username)
1253 2         10  
1254             Read configuration parameters from SQL database and parse scores from it. This
1255             will only take effect if the perl C<DBI> module is installed, and the
1256 0         0 configuration parameters C<user_scores_dsn>, C<user_scores_sql_username>, and
  0         0  
1257 0         0 C<user_scores_sql_password> are set correctly.
  0         0  
1258 0 0       0  
1259 0 0       0 The username in C<$username> will also be used for the C<username> attribute of
1260 0         0 the Mail::SpamAssassin object.
1261              
1262 0         0 =cut
1263              
1264 0         0 my ($self, $username) = @_;
1265              
1266 0         0 my $timer = $self->time_method("load_scoreonly_sql");
1267 0         0 my $src = Mail::SpamAssassin::Conf::SQL->new ($self);
1268 0         0 $self->{username} = $username;
1269             unless ($src->load($username)) {
1270 0         0 return 0;
1271             }
1272             return 1;
1273             }
1274              
1275             ###########################################################################
1276              
1277             =item $f->load_scoreonly_ldap ($username)
1278              
1279             Read configuration parameters from an LDAP server and parse scores from it.
1280             This will only take effect if the perl C<Net::LDAP> and C<URI> modules are
1281             installed, and the configuration parameters C<user_scores_dsn>,
1282             C<user_scores_ldap_username>, and C<user_scores_ldap_password> are set
1283             correctly.
1284              
1285             The username in C<$username> will also be used for the C<username> attribute of
1286             the Mail::SpamAssassin object.
1287              
1288 0     0 1 0 =cut
1289              
1290 0         0 my ($self, $username) = @_;
1291 0         0  
1292 0         0 dbg("config: load_scoreonly_ldap($username)");
1293 0 0       0 my $timer = $self->time_method("load_scoreonly_ldap");
1294 0         0 my $src = Mail::SpamAssassin::Conf::LDAP->new ($self);
1295             $self->{username} = $username;
1296 0         0 $src->load($username);
1297             }
1298              
1299             ###########################################################################
1300              
1301             =item $f->set_persistent_address_list_factory ($factoryobj)
1302              
1303             Set the persistent address list factory, used to create objects for the
1304             automatic whitelist algorithm's persistent-storage back-end. See
1305             C<Mail::SpamAssassin::PersistentAddrList> for the API these factory objects
1306             must implement, and the API the objects they produce must implement.
1307              
1308             =cut
1309              
1310             my ($self, $fac) = @_;
1311             $self->{pers_addr_list_factory} = $fac;
1312             }
1313              
1314             ###########################################################################
1315 0     0 1 0  
1316             =item $f->compile_now ($use_user_prefs, $keep_userstate)
1317 0         0  
1318 0         0 Compile all patterns, load all configuration files, and load all
1319 0         0 possibly-required Perl modules.
1320 0         0  
1321 0         0 Normally, Mail::SpamAssassin uses lazy evaluation where possible, but if you
1322             plan to fork() or start a new perl interpreter thread to process a message,
1323             this is suboptimal, as each process/thread will have to perform these actions.
1324              
1325             Call this function in the master thread or process to perform the actions
1326             straight away, so that the sub-processes will not have to.
1327              
1328             If C<$use_user_prefs> is 0, this will initialise the SpamAssassin
1329             configuration without reading the per-user configuration file and it will
1330             assume that you will call C<read_scoreonly_config> at a later point.
1331              
1332             If C<$keep_userstate> is true, compile_now() will revert any configuration
1333             options which have a default with I<__userstate__> in it post-init(),
1334             and then re-change the option before returning. This lets you change
1335             I<$ENV{'HOME'}> to a temp directory, have compile_now() and create any
1336 3     3 1 9 files there as necessary without disturbing the actual files as changed
1337 3         12 by a configuration option. By default, this is disabled.
1338              
1339             =cut
1340              
1341             my ($self, $use_user_prefs, $deal_with_userstate) = @_;
1342              
1343             my $timer = $self->time_method("compile_now");
1344              
1345             # Backup default values which deal with userstate.
1346             # This is done so we can create any new files in, presumably, a temp dir.
1347             # see bug 2762 for more details.
1348             my %backup;
1349             if (defined $deal_with_userstate && $deal_with_userstate) {
1350             while(my($k,$v) = each %{$self->{conf}}) {
1351             $backup{$k} = $v if (defined $v && !ref($v) && $v =~/__userstate__/);
1352             }
1353             }
1354              
1355             $self->init($use_user_prefs);
1356              
1357             # if init() didn't change the value from default, forget about it.
1358             # if the value is different, remember the new version, and reset the default.
1359             while(my($k,$v) = each %backup) {
1360             if ($self->{conf}->{$k} eq $v) {
1361             delete $backup{$k};
1362             }
1363             else {
1364             my $backup = $backup{$k};
1365             $backup{$k} = $self->{conf}->{$k};
1366             $self->{conf}->{$k} = $backup;
1367             }
1368 1     1 1 19 }
1369              
1370 1         3 dbg("ignore: test message to precompile patterns and load modules");
1371              
1372             # tell plugins we are about to send a message for compiling purposes
1373             $self->call_plugins("compile_now_start",
1374             { use_user_prefs => $use_user_prefs,
1375 1         2 keep_userstate => $deal_with_userstate});
1376 1 50 33     5  
1377 1         2 # note: this may incur network access. Good. We want to make sure
  117         201  
1378 116 100 100     280 # as much as possible is preloaded!
      100        
1379             my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1380             "Message-Id: <".time."\@spamassassin_spamd_init>\n", "\n",
1381             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1382 1         2  
1383             my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1384             my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1385             { disable_auto_learning => 1 } );
1386 1         9  
1387 1 50       5 # We want to turn off the bayes rules for this test msg
1388 0         0 my $use_bayes_rules_value = $self->{conf}->{use_bayes_rules};
1389             $self->{conf}->{use_bayes_rules} = 0;
1390             $status->check();
1391 1         11 $self->{conf}->{use_bayes_rules} = $use_bayes_rules_value;
1392 1         4 $status->finish();
1393 1         6 $mail->finish();
1394             $self->finish_learner();
1395              
1396             $self->{conf}->free_uncompiled_rule_source();
1397 1         4  
1398             # load SQL modules now as well
1399             my $dsn = $self->{conf}->{user_scores_dsn};
1400 1         6 if ($dsn ne '') {
1401             if ($dsn =~ /^ldap:/i) {
1402             Mail::SpamAssassin::Conf::LDAP::load_modules();
1403             } else {
1404             Mail::SpamAssassin::Conf::SQL::load_modules();
1405             }
1406 1         19 }
1407              
1408             # make sure things are ready for scanning
1409             $self->{bayes_scanner}->force_close() if $self->{bayes_scanner};
1410 1         7 $self->call_plugins("compile_now_finish",
1411 1         17 { use_user_prefs => $use_user_prefs,
1412             keep_userstate => $deal_with_userstate});
1413              
1414             # Reset any non-default values to the post-init() version.
1415 1         3 while(my($k,$v) = each %backup) {
1416 1         2 $self->{conf}->{$k} = $v;
1417 1         10 }
1418 1         4  
1419 1         5 # clear sed_path_cache
1420 1         8 delete $self->{conf}->{sed_path_cache};
1421 1         5  
1422             1;
1423 1         7 }
1424              
1425             ###########################################################################
1426 1         3  
1427 1 50       3 =item $f->debug_diagnostics ()
1428 0 0       0  
1429 0         0 Output some diagnostic information, useful for debugging SpamAssassin
1430             problems.
1431 0         0  
1432             =cut
1433              
1434             my ($self) = @_;
1435              
1436 1 50       5 # load this class lazily, to avoid overhead when this method isn't
1437 1         7 # called.
1438             eval {
1439             require Mail::SpamAssassin::Util::DependencyInfo;
1440             dbg(Mail::SpamAssassin::Util::DependencyInfo::debug_diagnostics($self));
1441             };
1442 1         7 }
1443 1         5  
1444             ###########################################################################
1445              
1446             =item $failed = $f->lint_rules ()
1447 1         8  
1448             Syntax-check the current set of rules. Returns the number of
1449 1         6 syntax errors discovered, or 0 if the configuration is valid.
1450              
1451             =cut
1452              
1453             my ($self) = @_;
1454              
1455             dbg("ignore: using a test message to lint rules");
1456             my @testmsg = ("From: ignore\@compiling.spamassassin.taint.org\n",
1457             "Subject: \n",
1458             "Message-Id: <".CORE::time()."\@lint_rules>\n", "\n",
1459             "I need to make this message body somewhat long so TextCat preloads\n"x20);
1460              
1461             $self->{lint_rules} = $self->{conf}->{lint_rules} = 1;
1462 0     0 1 0 $self->{syntax_errors} = 0;
1463              
1464             my $olddcp = $self->{dont_copy_prefs};
1465             $self->{dont_copy_prefs} = 1;
1466 0         0  
1467 0         0 $self->init(1);
1468 0         0 $self->{syntax_errors} += $self->{conf}->{errors};
1469              
1470             $self->{dont_copy_prefs} = $olddcp; # revert back to previous
1471              
1472             # bug 5048: override settings to ensure a faster lint
1473             $self->{'conf'}->{'use_auto_whitelist'} = 0;
1474             $self->{'conf'}->{'bayes_auto_learn'} = 0;
1475              
1476             my $mail = $self->parse(\@testmsg, 1, { master_deadline => undef });
1477             my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
1478             { disable_auto_learning => 1 } );
1479             $status->check();
1480              
1481             $self->{syntax_errors} += $status->{rule_errors};
1482 47     47 1 14341 $status->finish();
1483             $mail->finish();
1484 47         162 dbg("timing: " . $self->timer_report()) if $self->{timer_enabled};
1485 47         580 return ($self->{syntax_errors});
1486             }
1487              
1488             ###########################################################################
1489              
1490 47         223 =item $f->finish()
1491 47         171  
1492             Destroy this object, so that it will be garbage-collected once it
1493 47         104 goes out of scope. The object will no longer be usable after this
1494 47         95 method is called.
1495              
1496 47         272 =cut
1497 47         149  
1498             my ($self) = @_;
1499 47         87  
1500             $self->timer_start("finish");
1501             $self->call_plugins("finish_tests", { conf => $self->{conf},
1502 47         132 main => $self });
1503 47         118  
1504             $self->{conf}->finish(); delete $self->{conf};
1505 47         400 $self->{plugins}->finish(); delete $self->{plugins};
1506 47         848  
1507             if ($self->{bayes_scanner}) {
1508 47         353 $self->{bayes_scanner}->finish();
1509             delete $self->{bayes_scanner};
1510 47         148 }
1511 47         234  
1512 47         284 $self->{resolver}->finish() if $self->{resolver};
1513 47 50       167  
1514 47         245 $self->timer_end("finish");
1515             %{$self} = ();
1516             }
1517              
1518             ###########################################################################
1519             # timers: bug 5356
1520              
1521             my ($self) = @_;
1522             dbg("config: timing enabled") if !$self->{timer_enabled};
1523             $self->{timer_enabled} = 1;
1524             }
1525              
1526             my ($self) = @_;
1527             dbg("config: timing disabled") if $self->{timer_enabled};
1528 32     32 1 227 $self->{timer_enabled} = 0;
1529             }
1530 32         149  
1531             # discard all timers, start afresh
1532 32         145 my ($self) = @_;
1533             delete $self->{timers};
1534 32         249 delete $self->{timers_order};
  32         137  
1535 32         184 }
  32         90  
1536              
1537 32 50       116 my ($self, $name) = @_;
1538 32         159  
1539 32         75 return unless $self->{timer_enabled};
1540             # dbg("timing: '$name' starting");
1541              
1542 32 50       256 if (!exists $self->{timers}->{$name}) {
1543             push @{$self->{timers_order}}, $name;
1544 32         129 }
1545 32         55
  32         702  
1546             $self->{timers}->{$name}->{start} = Time::HiRes::time();
1547             # note that this will reset any existing, unstopped timer of that name;
1548             # that's ok
1549             }
1550              
1551             my ($self, $name) = @_;
1552 0     0 0 0 return unless $self->{timer_enabled};
1553 0 0       0  
1554 0         0 my $t = $self->{timers}->{$name};
1555             $t->{end} = time;
1556              
1557             if (!$t->{start}) {
1558 0     0 0 0 warn "timer_end('$name') with no timer_start";
1559 0 0       0 return;
1560 0         0 }
1561              
1562             # add to any existing elapsed time for this event, since
1563             # we may call the same timer name multiple times -- this is ok,
1564             # as long as they are not nested
1565 0     0 0 0 my $dt = $t->{end} - $t->{start};
1566 0         0 $dt = 0 if $dt < 0; # tolerate clock jumps, just in case
1567 0         0 if (defined $t->{elapsed}) { $t->{elapsed} += $dt }
1568             else { $t->{elapsed} = $dt }
1569             }
1570              
1571 32     32 0 93 my ($self, $name) = @_;
1572             return unless $self->{timer_enabled};
1573 32 50       130 return Mail::SpamAssassin::Util::ScopedTimer->new($self, $name);
1574             }
1575              
1576 0 0       0 my ($self) = @_;
1577 0         0  
  0         0  
1578             my $earliest;
1579             my $latest;
1580 0         0  
1581             while (my($name,$h) = each(%{$self->{timers}})) {
1582             # dbg("timing: %s - %s", $name, join(", ",
1583             # map { sprintf("%s => %s", $_, $h->{$_}) } keys(%$h)));
1584             my $start = $h->{start};
1585             if (defined $start && (!defined $earliest || $earliest > $start)) {
1586 32     32 0 93 $earliest = $start;
1587 32 50       124 }
1588             my $end = $h->{end};
1589 0         0 if (defined $end && (!defined $latest || $latest < $end)) {
1590 0         0 $latest = $end;
1591             }
1592 0 0       0 dbg("timing: start but no end: $name") if defined $start && !defined $end;
1593 0         0 }
1594 0         0 my $total =
1595             (!defined $latest || !defined $earliest) ? 0 : $latest - $earliest;
1596             my @str;
1597             foreach my $name (@{$self->{timers_order}}) {
1598             my $elapsed = $self->{timers}->{$name}->{elapsed} || 0;
1599             my $pc = $total <= 0 || $elapsed >= $total ? 100 : ($elapsed/$total)*100;
1600 0         0 my $fmt = $elapsed >= 0.005 ? "%.0f" : $elapsed >= 0.002 ? "%.1f" : "%.2f";
1601 0 0       0 push @str, sprintf("%s: $fmt (%.1f%%)", $name, $elapsed*1000, $pc);
1602 0 0       0 }
  0         0  
1603 0         0  
1604             return sprintf("total %.0f ms - %s", $total*1000, join(", ", @str));
1605             }
1606              
1607 850     850 0 1941 ###########################################################################
1608 850 50       2603 # non-public methods.
1609 0         0  
1610             my ($self, $use_user_pref) = @_;
1611              
1612             # Allow init() to be called multiple times, but only run once.
1613 0     0 0 0 if (defined $self->{_initted}) {
1614             # If the PID changes, reseed the PRNG (if permitted) and the DNS ID counter
1615 0         0 if ($self->{_initted} != $$) {
1616             $self->{_initted} = $$;
1617             srand if !$self->{skip_prng_reseeding};
1618 0         0 $self->{resolver}->reinit_post_fork();
  0         0  
1619             }
1620             return;
1621 0         0 }
1622 0 0 0     0  
      0        
1623 0         0 my $timer = $self->time_method("init");
1624             # Note that this PID has run init()
1625 0         0 $self->{_initted} = $$;
1626 0 0 0     0  
      0        
1627 0         0 #fix spamd reading root prefs file
1628             if (!defined $use_user_pref) {
1629 0 0 0     0 $use_user_pref = 1;
1630             }
1631 0 0 0     0  
1632             if (!defined $self->{config_text}) {
1633 0         0 $self->{config_text} = '';
1634 0         0  
  0         0  
1635 0   0     0 # read a file called "init.pre" in site rules dir *before* all others;
1636 0 0 0     0 # even the system config.
1637 0 0       0 my $siterules = $self->{site_rules_filename};
    0          
1638 0         0 $siterules ||= $self->first_existing_path (@site_rules_path);
1639              
1640             my $sysrules = $self->{rules_filename};
1641 0         0 $sysrules ||= $self->first_existing_path (@default_rules_path);
1642              
1643             if ($siterules) {
1644             $self->{config_text} .= $self->read_pre($siterules, 'site rules pre files');
1645             }
1646             else {
1647             warn "config: could not find site rules directory\n";
1648 205     205 0 318306 }
1649              
1650             if ($sysrules) {
1651 205 100       689 $self->{config_text} .= $self->read_pre($sysrules, 'sys rules pre files');
1652             }
1653 150 50       683 else {
1654 0         0 warn "config: could not find sys rules directory\n";
1655 0 0       0 }
1656 0         0  
1657             if ($sysrules) {
1658 150         286 my $cftext = $self->read_cf($sysrules, 'default rules dir');
1659             if ($self->{require_rules} && $cftext !~ /\S/) {
1660             die "config: no rules were found! Do you need to run 'sa-update'?\n";
1661 55         358 }
1662             $self->{config_text} .= $cftext;
1663 55         434 }
1664              
1665             if (!$self->{languages_filename}) {
1666 55 100       216 $self->{languages_filename} = $self->find_rule_support_file("languages");
1667 15         42 }
1668              
1669             if ($siterules && !$self->{ignore_site_cf_files}) {
1670 55 50       234 $self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
1671 55         256 }
1672              
1673             if ( $use_user_pref != 0 ) {
1674             $self->get_and_create_userstate_dir();
1675 55         201  
1676 55   33     167 # user prefs file
1677             my $fname = $self->{userprefs_filename};
1678 55         220 $fname ||= $self->first_existing_path (@default_userprefs_path);
1679 55   33     167  
1680             if (!$self->{dont_copy_prefs}) {
1681 55 50       168 # bug 4932: if the userprefs path doesn't exist, we need to make it, so
1682 55         353 # just use the last entry in the array as the default path.
1683             $fname ||= $self->sed_path($default_userprefs_path[-1]);
1684              
1685 0         0 my $stat_errn = stat($fname) ? 0 : 0+$!;
1686             if ($stat_errn == 0 && -f _) {
1687             # exists and is a regular file, nothing to do
1688 55 50       388 } elsif ($stat_errn == 0) {
1689 55         254 warn "config: default user preference file $fname is not a regular file\n";
1690             } elsif ($stat_errn != ENOENT) {
1691             warn "config: default user preference file $fname not accessible: $!\n";
1692 0         0 } elsif (!$self->create_default_prefs($fname)) {
1693             warn "config: failed to create default user preference file $fname\n";
1694             }
1695 55 50       378 }
1696 55         274  
1697 55 50 66     336 $self->{config_text} .= $self->read_cf($fname, 'user prefs file');
1698 0         0 }
1699             }
1700 55         3092  
1701             if ($self->{pre_config_text}) {
1702             $self->{config_text} = $self->{pre_config_text} . $self->{config_text};
1703 55 50       295 }
1704 55         304 if ($self->{post_config_text}) {
1705             $self->{config_text} .= $self->{post_config_text};
1706             }
1707 55 50 33     541  
1708 55         228 if ($self->{config_text} !~ /\S/) {
1709             my $m = "config: no configuration text or files found! do you need to run 'sa-update'?\n";
1710             if ($self->{require_rules}) {
1711 55 100       233 die $m;
1712 50         203 } else {
1713             warn $m;
1714             }
1715 50         246 }
1716 50   33     135  
1717             # Go and parse the config!
1718 50 100       156 $self->{conf}->{main} = $self;
1719             if (would_log('dbg', 'config_text') > 1) {
1720             dbg('config_text: '.$self->{config_text});
1721 1   33     4 }
1722             $self->{conf}->parse_rules ($self->{config_text});
1723 1 50       15 $self->{conf}->finish_parsing(0);
1724 1 50 33     8 delete $self->{conf}->{main}; # to allow future GC'ing
    0          
    0          
    0          
1725              
1726             undef $self->{config_text}; # ensure it's actually freed
1727 0         0 delete $self->{config_text};
1728              
1729 0         0 if ($self->{require_rules} && !$self->{conf}->found_any_rules()) {
1730             die "config: no rules were found! Do you need to run 'sa-update'?\n";
1731 0         0 }
1732              
1733             # Initialize the Bayes subsystem
1734             if ($self->{conf}->{use_bayes}) {
1735 50         199 require Mail::SpamAssassin::Bayes;
1736             $self->{bayes_scanner} = new Mail::SpamAssassin::Bayes ($self);
1737             }
1738             $self->{'learn_to_journal'} = $self->{conf}->{bayes_learn_to_journal};
1739 55 50       397  
1740 0         0 # Figure out/set our initial scoreset
1741             my $set = 0;
1742 55 50       204 $set |= 1 unless $self->{local_tests_only};
1743 0         0 $set |= 2 if $self->{bayes_scanner} && $self->{bayes_scanner}->is_scan_available() && $self->{conf}->{use_bayes_rules};
1744             $self->{conf}->set_score_set ($set);
1745              
1746 55 50       574 if ($self->{only_these_rules}) {
1747 0         0 $self->{conf}->trim_rules($self->{only_these_rules});
1748 0 0       0 }
1749 0         0  
1750             if (!$self->{timer_enabled}) {
1751 0         0 # enable timing implicitly if _TIMING_ is used in add_header templates
1752             foreach my $hf_ref (@{$self->{conf}->{'headers_ham'}},
1753             @{$self->{conf}->{'headers_spam'}}) {
1754             if ($hf_ref->[1] =~ /_TIMING_/) { $self->timer_enable(); last }
1755             }
1756 55         229 }
1757 55 50       319  
1758 0         0 # should be called only after configuration has been parsed
1759             $self->{resolver} = Mail::SpamAssassin::DnsResolver->new($self);
1760 55         600  
1761 55         488 # TODO -- open DNS cache etc. if necessary
1762 55         257 }
1763              
1764 55         350 my ($self, $allpaths, $desc) = @_;
1765 55         285 return $self->_read_cf_pre($allpaths,$desc,\&get_cf_files_in_dir);
1766             }
1767 55 50 66     344  
1768 0         0 my ($self, $allpaths, $desc) = @_;
1769             return $self->_read_cf_pre($allpaths,$desc,\&get_pre_files_in_dir);
1770             }
1771              
1772 55 100       263 my ($self, $allpaths, $desc, $filelistmethod) = @_;
1773 40         4577  
1774 40         372 return '' unless defined ($allpaths);
1775              
1776 55         249 my $txt = '';
1777             foreach my $path (split("\000", $allpaths))
1778             {
1779 55         168 dbg("config: using \"$path\" for $desc");
1780 55 50       202  
1781 55 0 66     355 my $stat_errn = stat($path) ? 0 : 0+$!;
      33        
1782 55         530 if ($stat_errn == ENOENT) {
1783             # no file or directory
1784 55 50       202 } elsif ($stat_errn != 0) {
1785 0         0 dbg("config: file or directory $path not accessible: $!");
1786             } elsif (-d _) {
1787             foreach my $file ($self->$filelistmethod($path)) {
1788 55 50       217 $txt .= read_cf_file($file);
1789             }
1790 55         105 } elsif (-f _ && -s _ && -r _) {
  55         225  
1791 55         282 $txt .= read_cf_file($path);
1792 433 50       1262 }
  0         0  
  0         0  
1793             }
1794              
1795             return $txt;
1796             }
1797 55         1006  
1798              
1799             my($path) = @_;
1800             my $txt = '';
1801              
1802             local *IN;
1803 160     160 0 405 if (open (IN, "<".$path)) {
1804 160         542  
1805             my($inbuf,$nread); $txt = '';
1806             while ( $nread=read(IN,$inbuf,16384) ) { $txt .= $inbuf }
1807             defined $nread or die "error reading $path: $!";
1808 110     110 0 289 close IN or die "error closing $path: $!";
1809 110         531 undef $inbuf;
1810              
1811             $txt = "file start $path\n" . $txt;
1812             # add an extra \n in case file did not end in one.
1813 270     270   635 $txt .= "\nfile end $path\n";
1814              
1815 270 50       568 dbg("config: read file $path");
1816             }
1817 270         373 else {
1818 270         1029 warn "config: cannot open \"$path\": $!\n";
1819             }
1820 270         1305  
1821             return $txt;
1822 270 100       3867 }
1823 270 100 66     1818  
    50 66        
    100          
    100          
1824             my ($self, $dir) = @_;
1825              
1826 0         0 my $fname;
1827              
1828 188         591 # If vpopmail is enabled then set fname to virtual homedir
1829 1020         1910 # precedence: dir, userstate_dir, derive from user_dir, system default
1830             if (defined $dir) {
1831             $fname = File::Spec->catdir ($dir, ".spamassassin");
1832 32         99 }
1833             elsif (defined $self->{userstate_dir}) {
1834             $fname = $self->{userstate_dir};
1835             }
1836 270         3655 elsif (defined $self->{user_dir}) {
1837             $fname = File::Spec->catdir ($self->{user_dir}, ".spamassassin");
1838             }
1839              
1840             $fname ||= $self->first_existing_path (@default_userstate_dir);
1841 1052     1052 0 1932  
1842 1052         1286 # bug 4932: use the last default_userstate_dir entry if none of the others
1843             # already exist
1844 1052         2159 $fname ||= $self->sed_path($default_userstate_dir[-1]);
1845 1052 50       30834  
1846             if (!$self->{dont_copy_prefs}) {
1847 1052         2714 dbg("config: using \"$fname\" for user state dir");
  1052         1541  
1848 1052         19068 }
  1122         8240  
1849 1052 50       2285  
1850 1052 50       8966 # if this is not a dir, not readable, or we are unable to create the dir,
1851 1052         2125 # this is not (yet) a serious error; in fact, it's not even worth
1852             # a warning at all times, so use dbg(). see bug 6268
1853 1052         6156 my $stat_errn = stat($fname) ? 0 : 0+$!;
1854             if ($stat_errn == 0 && !-d _) {
1855 1052         2876 dbg("config: $fname exists but is not a directory");
1856             } elsif ($stat_errn != 0 && $stat_errn != ENOENT) {
1857 1052         3775 dbg("config: error accessing $fname: $!");
1858             } else { # does not exist, create it
1859             eval {
1860 0         0 mkpath($fname, 0, 0700); 1;
1861             } or do {
1862             my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
1863 1052         8613 dbg("config: mkdir $fname failed: $eval_stat");
1864             };
1865             }
1866              
1867 50     50 0 137 $fname;
1868             }
1869 50         95  
1870             =item $fullpath = $f->find_rule_support_file ($filename)
1871              
1872             Find a rule-support file, such as C<languages> or C<triplets.txt>,
1873 50 50       327 in the system-wide rules directory, and return its full path if
    100          
    50          
1874 0         0 it exists, or undef if it doesn't exist.
1875              
1876             (This API was added in SpamAssassin 3.1.1.)
1877 34         86  
1878             =cut
1879              
1880 0         0 my ($self, $filename) = @_;
1881              
1882             return $self->first_existing_path(
1883 50   66     376 map { my $p = $_; $p =~ s{$}{/$filename}; $p } @default_rules_path );
1884             }
1885              
1886             =item $f->create_default_prefs ($filename, $username [ , $userdir ] )
1887 50   33     164  
1888             Copy default preferences file into home directory for later use and
1889 50 100       166 modification, if it does not already exist and C<dont_copy_prefs> is
1890 1         6 not set.
1891              
1892             =cut
1893              
1894             # $userdir will only exist if vpopmail config is enabled thru spamd
1895             # Its value will be the virtual user's maildir
1896 50 50       632 #
1897 50 50 33     505 my ($self, $fname, $user, $userdir) = @_;
    50 33        
1898 0         0  
1899             if ($self->{dont_copy_prefs}) {
1900 0         0 return(0);
1901             }
1902              
1903 50         2608 # if ($userdir && $userdir ne $self->{user_dir}) {
  50         252  
1904 50 50       134 # warn "config: hooray! user_dirs don't match! '$userdir' vs '$self->{user_dir}'\n";
1905 0 0       0 # }
  0         0  
1906 0         0  
1907             my $stat_errn = stat($fname) ? 0 : 0+$!;
1908             if ($stat_errn == 0) {
1909             # fine, it already exists
1910 50         115 } elsif ($stat_errn != ENOENT) {
1911             dbg("config: cannot access user preferences file $fname: $!");
1912             } else {
1913             # Pass on the value of $userdir for virtual users in vpopmail
1914             # otherwise it is empty and the user's normal homedir is used
1915             $self->get_and_create_userstate_dir($userdir);
1916              
1917             # copy in the default one for later editing
1918             my $defprefs =
1919             $self->first_existing_path(@Mail::SpamAssassin::default_prefs_path);
1920              
1921             local(*IN,*OUT);
1922             $fname = Mail::SpamAssassin::Util::untaint_file_path($fname);
1923             if (!defined $defprefs) {
1924 55     55 1 181 warn "config: can not determine default prefs path\n";
1925             } elsif (!open(IN, "<$defprefs")) {
1926             warn "config: cannot open $defprefs: $!\n";
1927 55         230 } elsif (!open(OUT, ">$fname")) {
  385         640  
  385         1643  
  385         1064  
1928             warn "config: cannot create user preferences file $fname: $!\n";
1929             } else {
1930             # former code skipped lines beginning with '#* ', the following copy
1931             # procedure no longer does so, as it avoids reading line-by-line
1932             my($inbuf,$nread);
1933             while ( $nread=read(IN,$inbuf,16384) ) {
1934             print OUT $inbuf or die "cannot write to $fname: $!";
1935             }
1936             defined $nread or die "error reading $defprefs: $!";
1937             undef $inbuf;
1938             close OUT or die "error closing $fname: $!";
1939             close IN or die "error closing $defprefs: $!";
1940              
1941             if (($< == 0) && ($> == 0) && defined($user)) { # chown it
1942 0     0 1 0 my ($uid,$gid) = (getpwnam(untaint_var($user)))[2,3];
1943             unless (chown($uid, $gid, $fname)) {
1944 0 0       0 warn "config: couldn't chown $fname to $uid:$gid for $user: $!\n";
1945 0         0 }
1946             }
1947             warn "config: created user preferences file: $fname\n";
1948             return(1);
1949             }
1950             }
1951              
1952 0 0       0 return(0);
1953 0 0       0 }
    0          
1954              
1955             ###########################################################################
1956 0         0  
1957             my ($self, $name) = @_;
1958             my $home = $self->{user_dir} || $ENV{HOME} || '';
1959              
1960 0         0 if (am_running_on_windows()) {
1961             my $userprofile = $ENV{USERPROFILE} || '';
1962              
1963 0         0 return $userprofile if ($userprofile && $userprofile =~ m/^[a-z]\:[\/\\]/oi);
1964             return $userprofile if ($userprofile =~ m/^\\\\/o);
1965              
1966 0         0 return $home if ($home && $home =~ m/^[a-z]\:[\/\\]/oi);
1967 0         0 return $home if ($home =~ m/^\\\\/o);
1968 0 0       0  
    0          
    0          
1969 0         0 return '';
1970             } else {
1971 0         0 return $home if ($home && $home =~ /\//o);
1972             return (getpwnam($name))[7] if ($name ne '');
1973 0         0 return (getpwuid($>))[7];
1974             }
1975             }
1976              
1977 0         0 my ($self, $path) = @_;
1978 0         0 return if !defined $path;
1979 0 0       0  
1980             if (exists($self->{conf}->{sed_path_cache}->{$path})) {
1981 0 0       0 return $self->{conf}->{sed_path_cache}->{$path};
1982 0         0 }
1983 0 0       0  
1984 0 0       0 my $orig_path = $path;
1985              
1986 0 0 0     0 $path =~ s/__local_rules_dir__/$self->{LOCAL_RULES_DIR} || ''/ges;
      0        
1987 0         0 $path =~ s/__local_state_dir__/$self->{LOCAL_STATE_DIR} || ''/ges;
1988 0 0       0 $path =~ s/__def_rules_dir__/$self->{DEF_RULES_DIR} || ''/ges;
1989 0         0 $path =~ s{__prefix__}{$self->{PREFIX} || $Config{prefix} || '/usr'}ges;
1990             $path =~ s{__userstate__}{$self->get_and_create_userstate_dir() || ''}ges;
1991             $path =~ s{__perl_major_ver__}{$self->get_perl_major_version()}ges;
1992 0         0 $path =~ s/__version__/${VERSION}/gs;
1993 0         0 $path =~ s/^\~([^\/]*)/$self->expand_name($1)/es;
1994              
1995             $path = Mail::SpamAssassin::Util::untaint_file_path ($path);
1996             $self->{conf}->{sed_path_cache}->{$orig_path} = $path;
1997 0         0 return $path;
1998             }
1999              
2000             my $self = shift;
2001             $] =~ /^(\d\.\d\d\d)/ or die "bad perl ver $]";
2002             return $1;
2003 16     16 0 210 }
2004 16   50     290  
2005             my $self = shift;
2006 16 50       99 my $path;
2007 0   0     0 foreach my $p (@_) {
2008             $path = $self->sed_path ($p);
2009 0 0 0     0 if (defined $path) {
2010 0 0       0 my($errn) = stat($path) ? 0 : 0+$!;
2011             if ($errn == ENOENT) { } # does not exist
2012 0 0 0     0 elsif ($errn) { warn "config: path \"$path\" is inaccessible: $!\n" }
2013 0 0       0 else { return $path }
2014             }
2015 0         0 }
2016             return;
2017 16 50 33     357 }
2018 0 0       0  
2019 0         0 ###########################################################################
2020              
2021             my ($self, $dir) = @_;
2022             return $self->_get_cf_pre_files_in_dir($dir, 'cf');
2023             }
2024 136     136 0 365  
2025 136 50       294 my ($self, $dir) = @_;
2026             return $self->_get_cf_pre_files_in_dir($dir, 'pre');
2027 136 50       570 }
2028 0         0  
2029             my ($self, $dir, $type) = @_;
2030              
2031 136         233 if ($self->{config_tree_recurse}) {
2032             my @cfs;
2033 136 0       212  
  0         0  
2034 136 50       193 # use "eval" to avoid loading File::Find unless this is specified
  1         8  
2035 136 50       388 eval ' use File::Find qw();
  1         14  
2036 136 0 33     296 File::Find::find(
  1         6  
2037 136 0       186 { untaint => 1,
  0         0  
2038 136         202 follow => 1,
  0         0  
2039 136         239 wanted =>
2040 136         324 sub { push(@cfs, $File::Find::name) if /\.\Q$type\E$/i && -f $_ }
  16         81  
2041             }, $dir); 1;
2042 136         607 ' or do {
2043 136         466 my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;
2044 136         330 die "_get_cf_pre_files_in_dir error: $eval_stat";
2045             };
2046             @cfs = sort { $a cmp $b } @cfs;
2047             return @cfs;
2048 0     0 0 0 }
2049 0 0       0 else {
2050 0         0 opendir(SA_CF_DIR, $dir) or warn "config: cannot opendir $dir: $!\n";
2051             my @cfs = grep { $_ ne '.' && $_ ne '..' &&
2052             /\.${type}$/i && -f "$dir/$_" } readdir(SA_CF_DIR);
2053             closedir SA_CF_DIR;
2054 72     72 0 257  
2055 72         119 return map { "$dir/$_" } sort { $a cmp $b } @cfs;
2056 72         246 }
2057 133         489 }
2058 133 50       327  
2059 133 100       2649 ###########################################################################
2060 133 100       617  
    50          
2061 0         0 my ($self, $subname) = @_;
2062 71         419  
2063             # We could potentially get called after a finish(), so just return.
2064             return unless $self->{plugins};
2065 1         5  
2066             return $self->{plugins}->have_callback ($subname);
2067             }
2068              
2069             my $self = shift;
2070             my $subname = shift;
2071 94     94 0 297  
2072 94         291 # We could potentially get called after a finish(), so just return.
2073             return unless $self->{plugins};
2074              
2075             # Use some calls ourself too
2076 94     94 0 215 if ($subname eq 'finish_parsing_end') {
2077 94         320 # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
2078             # read. Plugins can also now use {valid_tlds_re} to one time compile
2079             # regexes in finish_parsing_end.
2080             $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
2081 188     188   499 }
2082              
2083 188 100       515 # safety net in case some plugin changes global settings, Bug 6218
2084 4         6 local $/ = $/; # prevent underlying modules from changing the global $/
2085              
2086             return $self->{plugins}->callback($subname, @_);
2087             }
2088              
2089             ###########################################################################
2090              
2091             my ($self, $mail_obj) = @_;
2092              
2093             $self->init(1);
2094 4 50   1   204  
  1     1   6  
  1     1   2  
  1     1   87  
  1         6  
  1         2  
  1         56  
  1         6  
  1         1  
  1         57  
  1         7  
  1         1  
  1         58  
2095 0 0       0 my @addrlist;
  0         0  
2096 0         0 foreach my $header (qw(To From Cc Reply-To Sender
2097             Errors-To Mail-Followup-To))
2098 4         16 {
  52         76  
2099 4         32 my @hdrs = $mail_obj->get_header($header);
2100             if ($#hdrs < 0) { next; }
2101             push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
2102 184 50       3723 }
2103 184 100 100     6031  
  2404   100     32577  
2104             # find addrs in body, too
2105 184         2224 foreach my $line (@{$mail_obj->get_body()}) {
2106             push (@addrlist, $self->find_all_addrs_in_line($line));
2107 184         1230 }
  995         3904  
  2013         3121  
2108              
2109             my @ret;
2110             my %done;
2111              
2112             foreach (@addrlist) {
2113             s/^mailto://; # from Outlook "forwarded" message
2114 479     479 0 750 next if defined ($done{$_}); $done{$_} = 1;
2115             push (@ret, $_);
2116             }
2117 479 50       911  
2118             @ret;
2119 479         1030 }
2120              
2121             my ($self, $line) = @_;
2122              
2123 3462     3462 0 5535 # a more permissive pattern based on "dot-atom" as per RFC2822
2124 3462         4164 my $ID_PATTERN = '[-a-z0-9_\+\:\=\!\#\$\%\&\*\^\?\{\}\|\~\/\.]+';
2125             my $HOST_PATTERN = '[-a-z0-9_\+\:\/]+';
2126              
2127 3462 50       7550 my @addrs;
2128             my %seen;
2129             while ($line =~ s/(?:mailto:)?\s*
2130 3462 100       5919 ($ID_PATTERN \@
2131             $HOST_PATTERN(?:\.$HOST_PATTERN)+)//oix)
2132             {
2133             my $addr = $1;
2134 55         900 $addr =~ s/^mailto://;
2135             next if (defined ($seen{$addr})); $seen{$addr} = 1;
2136             push (@addrs, $addr);
2137             }
2138 3462         11010  
2139             return @addrs;
2140 3462         10273 }
2141              
2142             ###########################################################################
2143              
2144             # sa_die -- used to die with a useful exit code.
2145              
2146 0     0 0 0 my $exitcode = shift;
2147             warn @_;
2148 0         0 exit $exitcode;
2149             }
2150 0         0  
2151 0         0 ###########################################################################
2152              
2153             =item $f->copy_config ( [ $source ], [ $dest ] )
2154 0         0  
2155 0 0       0 Used for daemons to keep a persistent Mail::SpamAssassin object's
  0         0  
2156 0         0 configuration correct if switching between users. Pass an associative
2157             array reference as either $source or $dest, and set the other to 'undef'
2158             so that the object will use its current configuration. i.e.:
2159              
2160 0         0 # create object w/ configuration
  0         0  
2161 0         0 my $spamtest = Mail::SpamAssassin->new( ... );
2162              
2163             # backup configuration to %conf_backup
2164 0         0 my %conf_backup;
2165             $spamtest->copy_config(undef, \%conf_backup) ||
2166             die "config: error returned from copy_config!\n";
2167 0         0  
2168 0         0 ... do stuff, perhaps modify the config, etc ...
2169 0 0       0  
  0         0  
2170 0         0 # reset the configuration back to the original
2171             $spamtest->copy_config(\%conf_backup, undef) ||
2172             die "config: error returned from copy_config!\n";
2173 0         0  
2174             Note that the contents of the associative arrays should be considered
2175             opaque by calling code.
2176              
2177 67     67 0 194 =cut
2178              
2179             my ($self, $source, $dest) = @_;
2180 67         104  
2181 67         98 # At least one of either source or dest needs to be a hash reference ...
2182             unless ((defined $source && ref($source) eq 'HASH') ||
2183 67         130 (defined $dest && ref($dest) eq 'HASH'))
2184             {
2185 67         462 return 0;
2186             }
2187              
2188             my $timer = $self->time_method("copy_config");
2189 2         11  
2190 2         17 # let the Conf object itself do all the heavy lifting. It's better
2191 2 50       14 # than having this class know all about that class' internals...
  2         7  
2192 2         11 if (defined $source) {
2193             dbg ("config: copying current conf from backup");
2194             }
2195 67         253 else {
2196             dbg ("config: copying current conf to backup");
2197             }
2198             return $self->{conf}->clone($source, $dest);
2199             }
2200              
2201             ###########################################################################
2202              
2203 0     0 0 0 =item @plugins = $f->get_loaded_plugins_list ( )
2204 0         0  
2205 0         0 Return the list of plugins currently loaded by this SpamAssassin object's
2206             configuration; each entry in the list is an object of type
2207             C<Mail::SpamAssassin::Plugin>.
2208              
2209             (This API was added in SpamAssassin 3.2.0.)
2210              
2211             =cut
2212              
2213             my ($self) = @_;
2214             return $self->{plugins}->get_loaded_plugins_list();
2215             }
2216              
2217             1;
2218              
2219             ###########################################################################
2220              
2221             =back
2222              
2223             =head1 PREREQUISITES
2224              
2225             C<HTML::Parser>
2226             C<Sys::Syslog>
2227              
2228             =head1 MORE DOCUMENTATION
2229              
2230             See also E<lt>https://spamassassin.apache.org/E<gt> and
2231             E<lt>https://wiki.apache.org/spamassassin/E<gt> for more information.
2232              
2233             =head1 SEE ALSO
2234              
2235             Mail::SpamAssassin::Conf(3)
2236             Mail::SpamAssassin::PerMsgStatus(3)
2237 3     3 1 5948 spamassassin(1)
2238             sa-update(1)
2239              
2240 3 50 66     23 =head1 BUGS
      33        
      66        
2241              
2242             See E<lt>https://issues.apache.org/SpamAssassin/E<gt>
2243 0         0  
2244             =head1 AUTHORS
2245              
2246 3         6 The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>
2247              
2248             =head1 COPYRIGHT
2249              
2250 3 100       6 SpamAssassin is distributed under the Apache License, Version 2.0, as
2251 2         8 described in the file C<LICENSE> included with the distribution.
2252              
2253             =head1 AVAILABILITY
2254 1         4  
2255             The latest version of this library is likely to be available from CPAN
2256 3         11 as well as:
2257              
2258             E<lt>https://spamassassin.apache.org/E<gt>
2259              
2260             =cut