| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# <@LICENSE> |
|
2
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
|
3
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
|
4
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
|
5
|
|
|
|
|
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0 |
|
6
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
|
7
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at: |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
|
12
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
13
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
|
15
|
|
|
|
|
|
|
# limitations under the License. |
|
16
|
|
|
|
|
|
|
# </@LICENSE> |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package Mail::SpamAssassin::BayesStore::DBM; |
|
19
|
|
|
|
|
|
|
|
|
20
|
20
|
|
|
20
|
|
209
|
use strict; |
|
|
20
|
|
|
|
|
48
|
|
|
|
20
|
|
|
|
|
572
|
|
|
21
|
20
|
|
|
20
|
|
116
|
use warnings; |
|
|
20
|
|
|
|
|
50
|
|
|
|
20
|
|
|
|
|
525
|
|
|
22
|
|
|
|
|
|
|
# use bytes; |
|
23
|
20
|
|
|
20
|
|
118
|
use re 'taint'; |
|
|
20
|
|
|
|
|
54
|
|
|
|
20
|
|
|
|
|
544
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
20
|
|
|
20
|
|
125
|
use Fcntl; |
|
|
20
|
|
|
|
|
40
|
|
|
|
20
|
|
|
|
|
5226
|
|
|
26
|
20
|
|
|
20
|
|
153
|
use Errno qw(EBADF); |
|
|
20
|
|
|
|
|
45
|
|
|
|
20
|
|
|
|
|
830
|
|
|
27
|
20
|
|
|
20
|
|
133
|
use File::Basename; |
|
|
20
|
|
|
|
|
43
|
|
|
|
20
|
|
|
|
|
1057
|
|
|
28
|
20
|
|
|
20
|
|
120
|
use File::Spec; |
|
|
20
|
|
|
|
|
38
|
|
|
|
20
|
|
|
|
|
549
|
|
|
29
|
20
|
|
|
20
|
|
127
|
use File::Path; |
|
|
20
|
|
|
|
|
50
|
|
|
|
20
|
|
|
|
|
1690
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
BEGIN { |
|
32
|
20
|
|
|
|
|
112
|
eval { require Digest::SHA; import Digest::SHA qw(sha1); 1 } |
|
|
20
|
|
|
|
|
501
|
|
|
|
20
|
|
|
|
|
431
|
|
|
33
|
20
|
50
|
|
20
|
|
73
|
or do { require Digest::SHA1; import Digest::SHA1 qw(sha1) } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
20
|
|
|
20
|
|
123
|
use Mail::SpamAssassin; |
|
|
20
|
|
|
|
|
48
|
|
|
|
20
|
|
|
|
|
422
|
|
|
37
|
20
|
|
|
20
|
|
112
|
use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows); |
|
|
20
|
|
|
|
|
56
|
|
|
|
20
|
|
|
|
|
883
|
|
|
38
|
20
|
|
|
20
|
|
6469
|
use Mail::SpamAssassin::BayesStore; |
|
|
20
|
|
|
|
|
950
|
|
|
|
20
|
|
|
|
|
939
|
|
|
39
|
20
|
|
|
20
|
|
159
|
use Mail::SpamAssassin::Logger; |
|
|
20
|
|
|
|
|
50
|
|
|
|
20
|
|
|
|
|
1583
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
20
|
|
|
20
|
|
155
|
use constant MAGIC_RE => qr/^\015\001\007\011\003/; |
|
|
20
|
|
|
|
|
57
|
|
|
|
20
|
|
|
|
|
153151
|
|
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
our ( @DBNAMES, |
|
44
|
|
|
|
|
|
|
$NSPAM_MAGIC_TOKEN, $NHAM_MAGIC_TOKEN, $LAST_EXPIRE_MAGIC_TOKEN, $LAST_JOURNAL_SYNC_MAGIC_TOKEN, |
|
45
|
|
|
|
|
|
|
$NTOKENS_MAGIC_TOKEN, $OLDEST_TOKEN_AGE_MAGIC_TOKEN, $LAST_EXPIRE_REDUCE_MAGIC_TOKEN, |
|
46
|
|
|
|
|
|
|
$RUNNING_EXPIRE_MAGIC_TOKEN, $DB_VERSION_MAGIC_TOKEN, $LAST_ATIME_DELTA_MAGIC_TOKEN, |
|
47
|
|
|
|
|
|
|
$NEWEST_TOKEN_AGE_MAGIC_TOKEN |
|
48
|
|
|
|
|
|
|
); |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
our @ISA = qw( Mail::SpamAssassin::BayesStore ); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# db layout (quoting Matt): |
|
53
|
|
|
|
|
|
|
# |
|
54
|
|
|
|
|
|
|
# > need five db files though to make it real fast: |
|
55
|
|
|
|
|
|
|
# [probs] 1. ngood and nbad (two entries, so could be a flat file rather |
|
56
|
|
|
|
|
|
|
# than a db file). (now 2 entries in db_toks) |
|
57
|
|
|
|
|
|
|
# [toks] 2. good token -> number seen |
|
58
|
|
|
|
|
|
|
# [toks] 3. bad token -> number seen (both are packed into 1 entry in 1 db) |
|
59
|
|
|
|
|
|
|
# [probs] 4. Consolidated good token -> probability |
|
60
|
|
|
|
|
|
|
# [probs] 5. Consolidated bad token -> probability |
|
61
|
|
|
|
|
|
|
# > As you add new mails, you update the entry in 2 or 3, then regenerate |
|
62
|
|
|
|
|
|
|
# > the entry for that token in 4 or 5. |
|
63
|
|
|
|
|
|
|
# > Then as you test a new mail, you just need to pull the probability |
|
64
|
|
|
|
|
|
|
# > direct from 4 and 5, and generate the overall probability. A simple and |
|
65
|
|
|
|
|
|
|
# > very fast operation. |
|
66
|
|
|
|
|
|
|
# |
|
67
|
|
|
|
|
|
|
# jm: we use probs as overall probability. <0.5 = ham, >0.5 = spam |
|
68
|
|
|
|
|
|
|
# |
|
69
|
|
|
|
|
|
|
# update: probs is no longer maintained as a db, to keep on-disk and in-core |
|
70
|
|
|
|
|
|
|
# usage down. |
|
71
|
|
|
|
|
|
|
# |
|
72
|
|
|
|
|
|
|
# also, added a new one to support forgetting, auto-learning, and |
|
73
|
|
|
|
|
|
|
# auto-forgetting for refiled mails: |
|
74
|
|
|
|
|
|
|
# [seen] 6. a list of Message-IDs of messages already learnt from. values |
|
75
|
|
|
|
|
|
|
# are 's' for learnt-as-spam, 'h' for learnt-as-ham. |
|
76
|
|
|
|
|
|
|
# |
|
77
|
|
|
|
|
|
|
# and another, called [scancount] to model the scan-count for expiry. |
|
78
|
|
|
|
|
|
|
# This is not a database. Instead it increases by one byte for each |
|
79
|
|
|
|
|
|
|
# message scanned (note: scanned, not learned). |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
@DBNAMES = qw(toks seen); |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# These are the magic tokens we use to track stuff in the DB. |
|
84
|
|
|
|
|
|
|
# The format is '^M^A^G^I^C' followed by any string you want. |
|
85
|
|
|
|
|
|
|
# None of the control chars will be in a real token. |
|
86
|
|
|
|
|
|
|
$DB_VERSION_MAGIC_TOKEN = "\015\001\007\011\003DBVERSION"; |
|
87
|
|
|
|
|
|
|
$LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA"; |
|
88
|
|
|
|
|
|
|
$LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE"; |
|
89
|
|
|
|
|
|
|
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE"; |
|
90
|
|
|
|
|
|
|
$LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC"; |
|
91
|
|
|
|
|
|
|
$NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE"; |
|
92
|
|
|
|
|
|
|
$NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM"; |
|
93
|
|
|
|
|
|
|
$NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM"; |
|
94
|
|
|
|
|
|
|
$NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS"; |
|
95
|
|
|
|
|
|
|
$OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE"; |
|
96
|
|
|
|
|
|
|
$RUNNING_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003RUNNINGEXPIRE"; |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub HAS_DBM_MODULE { |
|
99
|
135
|
|
|
135
|
0
|
320
|
my ($self) = @_; |
|
100
|
135
|
100
|
|
|
|
454
|
if (exists($self->{has_dbm_module})) { |
|
101
|
79
|
|
|
|
|
326
|
return $self->{has_dbm_module}; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
56
|
|
|
|
|
107
|
$self->{has_dbm_module} = eval { require DB_File; }; |
|
|
56
|
|
|
|
|
11686
|
|
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub DBM_MODULE { |
|
107
|
135
|
|
|
135
|
0
|
618
|
return "DB_File"; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Possible file extensions used by the kinds of database files DB_File |
|
111
|
|
|
|
|
|
|
# might create. We need these so we can create a new file and rename |
|
112
|
|
|
|
|
|
|
# it into place. |
|
113
|
|
|
|
|
|
|
sub DB_EXTENSIONS { |
|
114
|
0
|
|
|
0
|
0
|
0
|
return ('', '.db'); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
########################################################################### |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub new { |
|
120
|
62
|
|
|
62
|
1
|
232
|
my $class = shift; |
|
121
|
62
|
|
33
|
|
|
1075
|
$class = ref($class) || $class; |
|
122
|
|
|
|
|
|
|
|
|
123
|
62
|
|
|
|
|
568
|
my $self = $class->SUPER::new(@_); |
|
124
|
|
|
|
|
|
|
|
|
125
|
62
|
|
|
|
|
574
|
$self->{supported_db_version} = 3; |
|
126
|
|
|
|
|
|
|
|
|
127
|
62
|
|
|
|
|
2257
|
$self->{already_tied} = 0; |
|
128
|
62
|
|
|
|
|
193
|
$self->{is_locked} = 0; |
|
129
|
62
|
|
|
|
|
503
|
$self->{string_to_journal} = ''; |
|
130
|
|
|
|
|
|
|
|
|
131
|
62
|
|
|
|
|
1333
|
$self; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
########################################################################### |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub tie_db_readonly { |
|
137
|
147
|
|
|
147
|
1
|
377
|
my ($self) = @_; |
|
138
|
|
|
|
|
|
|
|
|
139
|
147
|
100
|
|
|
|
624
|
if (!$self->HAS_DBM_MODULE) { |
|
140
|
133
|
|
|
|
|
530
|
dbg("bayes: %s module not installed, cannot use bayes", $self->DBM_MODULE); |
|
141
|
133
|
|
|
|
|
813
|
return 0; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# return if we've already tied to the db's, using the same mode |
|
145
|
|
|
|
|
|
|
# (locked/unlocked) as before. |
|
146
|
14
|
100
|
66
|
|
|
101
|
return 1 if ($self->{already_tied} && $self->{is_locked} == 0); |
|
147
|
|
|
|
|
|
|
|
|
148
|
12
|
|
|
|
|
28
|
my $main = $self->{bayes}->{main}; |
|
149
|
12
|
50
|
|
|
|
70
|
if (!defined($main->{conf}->{bayes_path})) { |
|
150
|
0
|
|
|
|
|
0
|
dbg("bayes: bayes_path not defined"); |
|
151
|
0
|
|
|
|
|
0
|
return 0; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
12
|
|
|
|
|
82
|
$self->read_db_configs(); |
|
155
|
|
|
|
|
|
|
|
|
156
|
12
|
|
|
|
|
61
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
157
|
|
|
|
|
|
|
|
|
158
|
12
|
|
|
|
|
36
|
my $found = 0; |
|
159
|
12
|
|
|
|
|
51
|
for my $ext ($self->DB_EXTENSIONS) { |
|
160
|
20
|
100
|
|
|
|
428
|
if (-f $path.'_toks'.$ext) { |
|
161
|
4
|
|
|
|
|
16
|
$found = 1; |
|
162
|
4
|
|
|
|
|
18
|
last; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
12
|
100
|
|
|
|
52
|
if (!$found) { |
|
167
|
8
|
|
|
|
|
54
|
dbg("bayes: no dbs present, cannot tie DB R/O: %s", $path.'_toks'); |
|
168
|
8
|
|
|
|
|
89
|
return 0; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
4
|
|
|
|
|
29
|
foreach my $dbname (@DBNAMES) { |
|
172
|
8
|
|
|
|
|
37
|
my $name = $path.'_'.$dbname; |
|
173
|
8
|
|
|
|
|
24
|
my $db_var = 'db_'.$dbname; |
|
174
|
8
|
|
|
|
|
42
|
dbg("bayes: tie-ing to DB file R/O $name"); |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Bug 6901, [rt.cpan.org #83060] |
|
177
|
|
|
|
|
|
|
# DB_File: Repeated tie to the same hash with no untie causes corruption |
|
178
|
8
|
|
|
|
|
26
|
untie %{$self->{$db_var}}; # has no effect if the variable is not tied |
|
|
8
|
|
|
|
|
52
|
|
|
179
|
|
|
|
|
|
|
|
|
180
|
8
|
100
|
|
|
|
20
|
if (!tie %{$self->{$db_var}}, $self->DBM_MODULE, $name, O_RDONLY, |
|
|
8
|
|
|
|
|
42
|
|
|
181
|
|
|
|
|
|
|
(oct($main->{conf}->{bayes_file_mode}) & 0666)) |
|
182
|
|
|
|
|
|
|
{ |
|
183
|
|
|
|
|
|
|
# bug 2975: it's acceptable for the db_seen to not be present, |
|
184
|
|
|
|
|
|
|
# to allow it to be recycled. if that's the case, just create |
|
185
|
|
|
|
|
|
|
# a new, empty one. we don't need to lock it, since we won't |
|
186
|
|
|
|
|
|
|
# be writing to it; let the R/W api deal with that case. |
|
187
|
|
|
|
|
|
|
|
|
188
|
1
|
50
|
|
|
|
19
|
if ($dbname eq 'seen') { |
|
189
|
|
|
|
|
|
|
# Bug 6901, [rt.cpan.org #83060] |
|
190
|
1
|
|
|
|
|
2
|
untie %{$self->{$db_var}}; # has no effect if the variable is not tied |
|
|
1
|
|
|
|
|
4
|
|
|
191
|
1
|
|
|
|
|
13
|
tie %{$self->{$db_var}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT, |
|
192
|
1
|
50
|
|
|
|
7
|
(oct($main->{conf}->{bayes_file_mode}) & 0666) |
|
193
|
|
|
|
|
|
|
or goto failed_to_tie; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
else { |
|
196
|
0
|
|
|
|
|
0
|
goto failed_to_tie; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
4
|
|
|
|
|
48
|
$self->{db_version} = ($self->get_storage_variables())[6]; |
|
202
|
4
|
|
|
|
|
22
|
dbg("bayes: found bayes db version %s", $self->{db_version}); |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# If the DB version is one we don't understand, abort! |
|
205
|
4
|
50
|
|
|
|
43
|
if ($self->_check_db_version() != 0) { |
|
206
|
0
|
|
|
|
|
0
|
warn("bayes: bayes db version ".$self->{db_version}." is not able to be used, aborting!"); |
|
207
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
208
|
0
|
|
|
|
|
0
|
return 0; |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
4
|
|
|
|
|
17
|
$self->{already_tied} = 1; |
|
212
|
4
|
|
|
|
|
25
|
return 1; |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
0
|
failed_to_tie: |
|
215
|
|
|
|
|
|
|
warn "bayes: cannot open bayes databases ${path}_* R/O: tie failed: $!\n"; |
|
216
|
0
|
|
|
|
|
0
|
foreach my $dbname (@DBNAMES) { |
|
217
|
0
|
|
|
|
|
0
|
my $db_var = 'db_'.$dbname; |
|
218
|
0
|
0
|
|
|
|
0
|
next unless exists $self->{$db_var}; |
|
219
|
0
|
|
|
|
|
0
|
dbg("bayes: untie-ing DB file $dbname"); |
|
220
|
0
|
|
|
|
|
0
|
untie %{$self->{$db_var}}; |
|
|
0
|
|
|
|
|
0
|
|
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
0
|
return 0; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# tie() to the databases, read-write and locked. Any callers of |
|
227
|
|
|
|
|
|
|
# this should ensure they call untie_db() afterwards! |
|
228
|
|
|
|
|
|
|
# |
|
229
|
|
|
|
|
|
|
sub tie_db_writable { |
|
230
|
30
|
|
|
30
|
1
|
90
|
my ($self) = @_; |
|
231
|
|
|
|
|
|
|
|
|
232
|
30
|
100
|
|
|
|
117
|
if (!$self->HAS_DBM_MODULE) { |
|
233
|
2
|
|
|
|
|
9
|
dbg("bayes: %s module not installed, cannot use bayes", $self->DBM_MODULE); |
|
234
|
2
|
|
|
|
|
7
|
return 0; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# Useful shortcut ... |
|
238
|
28
|
|
|
|
|
94
|
my $main = $self->{bayes}->{main}; |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# if we've already tied the db's using the same mode |
|
241
|
|
|
|
|
|
|
# (locked/unlocked) as we want now, freshen the lock and return. |
|
242
|
28
|
100
|
66
|
|
|
123
|
if ($self->{already_tied} && $self->{is_locked} == 1) { |
|
243
|
2
|
|
|
|
|
14
|
$main->{locker}->refresh_lock($self->{locked_file}); |
|
244
|
2
|
|
|
|
|
9
|
return 1; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
26
|
50
|
|
|
|
124
|
if (!defined($main->{conf}->{bayes_path})) { |
|
248
|
0
|
|
|
|
|
0
|
dbg("bayes: bayes_path not defined"); |
|
249
|
0
|
|
|
|
|
0
|
return 0; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
26
|
|
|
|
|
155
|
$self->read_db_configs(); |
|
253
|
|
|
|
|
|
|
|
|
254
|
26
|
|
|
|
|
97
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
255
|
|
|
|
|
|
|
|
|
256
|
26
|
|
|
|
|
71
|
my $found = 0; |
|
257
|
26
|
|
|
|
|
91
|
for my $ext ($self->DB_EXTENSIONS) { |
|
258
|
30
|
100
|
|
|
|
592
|
if (-f $path.'_toks'.$ext) { |
|
259
|
22
|
|
|
|
|
74
|
$found = 1; |
|
260
|
22
|
|
|
|
|
53
|
last; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
26
|
|
|
|
|
1338
|
my $parentdir = dirname($path); |
|
265
|
26
|
50
|
|
|
|
475
|
if (!-d $parentdir) { |
|
266
|
|
|
|
|
|
|
# run in an eval(); if mkpath has no perms, it calls die() |
|
267
|
0
|
|
|
|
|
0
|
eval { |
|
268
|
0
|
|
|
|
|
0
|
mkpath($parentdir, 0, (oct($main->{conf}->{bayes_file_mode}) & 0777)); |
|
269
|
|
|
|
|
|
|
}; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
26
|
|
|
|
|
71
|
my $tout; |
|
273
|
26
|
50
|
|
|
|
112
|
if ($main->{learn_wait_for_lock}) { |
|
274
|
0
|
|
|
|
|
0
|
$tout = 300; # TODO: Dan to write better lock code |
|
275
|
|
|
|
|
|
|
} else { |
|
276
|
26
|
|
|
|
|
64
|
$tout = 10; |
|
277
|
|
|
|
|
|
|
} |
|
278
|
26
|
50
|
|
|
|
191
|
if ($main->{locker}->safe_lock($path, $tout, $main->{conf}->{bayes_file_mode})) |
|
279
|
|
|
|
|
|
|
{ |
|
280
|
26
|
|
|
|
|
78
|
$self->{locked_file} = $path; |
|
281
|
26
|
|
|
|
|
60
|
$self->{is_locked} = 1; |
|
282
|
|
|
|
|
|
|
} else { |
|
283
|
0
|
|
|
|
|
0
|
warn "bayes: cannot open bayes databases ${path}_* R/W: lock failed: $!\n"; |
|
284
|
0
|
|
|
|
|
0
|
return 0; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
|
|
|
|
|
|
|
|
287
|
26
|
|
|
|
|
156
|
my $umask = umask 0; |
|
288
|
26
|
|
|
|
|
106
|
foreach my $dbname (@DBNAMES) { |
|
289
|
52
|
|
|
|
|
175
|
my $name = $path.'_'.$dbname; |
|
290
|
52
|
|
|
|
|
96
|
my $db_var = 'db_'.$dbname; |
|
291
|
52
|
|
|
|
|
208
|
dbg("bayes: tie-ing to DB file R/W $name"); |
|
292
|
|
|
|
|
|
|
|
|
293
|
52
|
50
|
|
|
|
163
|
($self->DBM_MODULE eq 'DB_File') and |
|
294
|
|
|
|
|
|
|
Mail::SpamAssassin::Util::avoid_db_file_locking_bug ($name); |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# Bug 6901, [rt.cpan.org #83060] |
|
297
|
52
|
|
|
|
|
91
|
untie %{$self->{$db_var}}; # has no effect if the variable is not tied |
|
|
52
|
|
|
|
|
241
|
|
|
298
|
52
|
|
|
|
|
149
|
tie %{$self->{$db_var}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT, |
|
299
|
52
|
50
|
|
|
|
81
|
(oct($main->{conf}->{bayes_file_mode}) & 0666) |
|
300
|
|
|
|
|
|
|
or goto failed_to_tie; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
26
|
|
|
|
|
172
|
umask $umask; |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# set our cache to what version DB we're using |
|
305
|
26
|
|
|
|
|
135
|
$self->{db_version} = ($self->get_storage_variables())[6]; |
|
306
|
|
|
|
|
|
|
# don't bother printing this unless found since it would be bogus anyway |
|
307
|
26
|
100
|
|
|
|
171
|
dbg("bayes: found bayes db version %s", $self->{db_version}) if $found; |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# figure out if we can read the current DB and if we need to do a |
|
310
|
|
|
|
|
|
|
# DB version update and do it if necessary if either has a problem, |
|
311
|
|
|
|
|
|
|
# fail immediately |
|
312
|
|
|
|
|
|
|
# |
|
313
|
26
|
50
|
66
|
|
|
150
|
if ($found && !$self->_upgrade_db()) { |
|
|
|
100
|
|
|
|
|
|
|
314
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
315
|
0
|
|
|
|
|
0
|
return 0; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
elsif (!$found) { # new DB, make sure we know that ... |
|
318
|
4
|
|
|
|
|
91
|
$self->{db_version} = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION; |
|
319
|
4
|
|
|
|
|
80
|
$self->{db_toks}->{$NTOKENS_MAGIC_TOKEN} = 0; # no tokens in the db ... |
|
320
|
4
|
|
|
|
|
25
|
dbg("bayes: new db, set db version %s and 0 tokens", $self->{db_version}); |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
26
|
|
|
|
|
61
|
$self->{already_tied} = 1; |
|
324
|
26
|
|
|
|
|
177
|
return 1; |
|
325
|
|
|
|
|
|
|
|
|
326
|
0
|
|
|
|
|
0
|
failed_to_tie: |
|
327
|
|
|
|
|
|
|
my $err = $!; |
|
328
|
0
|
|
|
|
|
0
|
umask $umask; |
|
329
|
|
|
|
|
|
|
|
|
330
|
0
|
|
|
|
|
0
|
foreach my $dbname (@DBNAMES) { |
|
331
|
0
|
|
|
|
|
0
|
my $db_var = 'db_'.$dbname; |
|
332
|
0
|
0
|
|
|
|
0
|
next unless exists $self->{$db_var}; |
|
333
|
0
|
|
|
|
|
0
|
dbg("bayes: untie-ing DB file $dbname"); |
|
334
|
0
|
|
|
|
|
0
|
untie %{$self->{$db_var}}; |
|
|
0
|
|
|
|
|
0
|
|
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
0
|
0
|
|
|
|
0
|
if ($self->{is_locked}) { |
|
338
|
0
|
|
|
|
|
0
|
$self->{bayes}->{main}->{locker}->safe_unlock($self->{locked_file}); |
|
339
|
0
|
|
|
|
|
0
|
$self->{is_locked} = 0; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
0
|
|
|
|
|
0
|
warn "bayes: cannot open bayes databases ${path}_* R/W: tie failed: $err\n"; |
|
342
|
0
|
|
|
|
|
0
|
return 0; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# Do we understand how to deal with this DB version? |
|
346
|
|
|
|
|
|
|
sub _check_db_version { |
|
347
|
26
|
|
|
26
|
|
61
|
my ($self) = @_; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# return -1 if older, 0 if current, 1 if newer |
|
350
|
26
|
|
|
|
|
112
|
return $self->{db_version} <=> $self->DB_VERSION; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# Check to see if we need to upgrade the DB, and do so if necessary |
|
354
|
|
|
|
|
|
|
sub _upgrade_db { |
|
355
|
22
|
|
|
22
|
|
61
|
my ($self) = @_; |
|
356
|
|
|
|
|
|
|
|
|
357
|
22
|
|
|
|
|
135
|
my $verschk = $self->_check_db_version(); |
|
358
|
22
|
|
|
|
|
50
|
my $res = 0; # used later on for tie() checks |
|
359
|
22
|
|
|
|
|
39
|
my $umask; # used later for umask modifications |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# If the DB is the latest version, no problem. |
|
362
|
22
|
50
|
|
|
|
164
|
return 1 if ($verschk == 0); |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# If the DB is a newer version that we know what to do with ... abort! |
|
365
|
0
|
0
|
|
|
|
0
|
if ($verschk == 1) { |
|
366
|
0
|
|
|
|
|
0
|
warn("bayes: bayes db version ".$self->{db_version}." is newer than we understand, aborting!"); |
|
367
|
0
|
|
|
|
|
0
|
return 0; |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# If the current DB version is lower than the new version, upgrade! |
|
371
|
|
|
|
|
|
|
# Do conversions in order so we can go 1 -> 3, make sure to update |
|
372
|
|
|
|
|
|
|
# $self->{db_version} along the way |
|
373
|
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
0
|
dbg("bayes: detected bayes db format %s, upgrading", $self->{db_version}); |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# since DB_File will not shrink a database (!!), we need to *create* |
|
377
|
|
|
|
|
|
|
# a new one instead. |
|
378
|
0
|
|
|
|
|
0
|
my $main = $self->{bayes}->{main}; |
|
379
|
0
|
|
|
|
|
0
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
380
|
0
|
|
|
|
|
0
|
my $name = $path.'_toks'; |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# older version's journal files are likely not in the same format as the new ones, so remove it. |
|
383
|
0
|
|
|
|
|
0
|
my $jpath = $self->_get_journal_filename(); |
|
384
|
0
|
0
|
|
|
|
0
|
if (-f $jpath) { |
|
385
|
0
|
|
|
|
|
0
|
dbg("bayes: old journal file found, removing"); |
|
386
|
0
|
0
|
|
|
|
0
|
warn "bayes: couldn't remove $jpath: $!" if (!unlink $jpath); |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
|
|
389
|
0
|
0
|
|
|
|
0
|
if ($self->{db_version} < 2) { |
|
390
|
0
|
|
|
|
|
0
|
dbg("bayes: upgrading database format from v%s to v2", $self->{db_version}); |
|
391
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
392
|
|
|
|
|
|
|
|
|
393
|
0
|
|
|
|
|
0
|
my ($DB_NSPAM_MAGIC_TOKEN, $DB_NHAM_MAGIC_TOKEN, $DB_NTOKENS_MAGIC_TOKEN); |
|
394
|
0
|
|
|
|
|
0
|
my ($DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN, $DB_LAST_EXPIRE_MAGIC_TOKEN); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# Magic tokens for version 0, defined as '**[A-Z]+' |
|
397
|
0
|
0
|
|
|
|
0
|
if ($self->{db_version} == 0) { |
|
398
|
0
|
|
|
|
|
0
|
$DB_NSPAM_MAGIC_TOKEN = '**NSPAM'; |
|
399
|
0
|
|
|
|
|
0
|
$DB_NHAM_MAGIC_TOKEN = '**NHAM'; |
|
400
|
0
|
|
|
|
|
0
|
$DB_NTOKENS_MAGIC_TOKEN = '**NTOKENS'; |
|
401
|
|
|
|
|
|
|
#$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN = '**OLDESTAGE'; |
|
402
|
|
|
|
|
|
|
#$DB_LAST_EXPIRE_MAGIC_TOKEN = '**LASTEXPIRE'; |
|
403
|
|
|
|
|
|
|
#$DB_SCANCOUNT_BASE_MAGIC_TOKEN = '**SCANBASE'; |
|
404
|
|
|
|
|
|
|
#$DB_RUNNING_EXPIRE_MAGIC_TOKEN = '**RUNNINGEXPIRE'; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
else { |
|
407
|
0
|
|
|
|
|
0
|
$DB_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM"; |
|
408
|
0
|
|
|
|
|
0
|
$DB_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM"; |
|
409
|
0
|
|
|
|
|
0
|
$DB_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS"; |
|
410
|
|
|
|
|
|
|
#$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE"; |
|
411
|
|
|
|
|
|
|
#$DB_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE"; |
|
412
|
|
|
|
|
|
|
#$DB_SCANCOUNT_BASE_MAGIC_TOKEN = "\015\001\007\011\003SCANBASE"; |
|
413
|
|
|
|
|
|
|
#$DB_RUNNING_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003RUNNINGEXPIRE"; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# remember when we started ... |
|
417
|
0
|
|
|
|
|
0
|
my $started = time; |
|
418
|
0
|
|
|
|
|
0
|
my $newatime = $started; |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# use O_EXCL to avoid races (bonus paranoia, since we should be locked |
|
421
|
|
|
|
|
|
|
# anyway) |
|
422
|
0
|
|
|
|
|
0
|
my %new_toks; |
|
423
|
0
|
|
|
|
|
0
|
$umask = umask 0; |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
$res = tie %new_toks, $self->DBM_MODULE, "${name}.new", |
|
426
|
|
|
|
|
|
|
O_RDWR|O_CREAT|O_EXCL, |
|
427
|
0
|
|
|
|
|
0
|
(oct($main->{conf}->{bayes_file_mode}) & 0666); |
|
428
|
0
|
|
|
|
|
0
|
umask $umask; |
|
429
|
0
|
0
|
|
|
|
0
|
return 0 unless $res; |
|
430
|
0
|
|
|
|
|
0
|
undef $res; |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# add the magic tokens to the new db. |
|
433
|
0
|
|
|
|
|
0
|
$new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN}; |
|
434
|
0
|
|
|
|
|
0
|
$new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN}; |
|
435
|
0
|
|
|
|
|
0
|
$new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN}; |
|
436
|
0
|
|
|
|
|
0
|
$new_toks{$DB_VERSION_MAGIC_TOKEN} = 2; # we're now a DB version 2 file |
|
437
|
0
|
|
|
|
|
0
|
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime; |
|
438
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $newatime; |
|
439
|
0
|
|
|
|
|
0
|
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime; |
|
440
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $newatime; |
|
441
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0; |
|
442
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0; |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# deal with the data tokens |
|
445
|
0
|
|
|
|
|
0
|
my ($tok, $packed); |
|
446
|
0
|
|
|
|
|
0
|
my $count = 0; |
|
447
|
0
|
|
|
|
|
0
|
while (($tok, $packed) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
448
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ /^(?:\*\*[A-Z]+$|\015\001\007\011\003)/); # skip magic tokens |
|
449
|
|
|
|
|
|
|
|
|
450
|
0
|
|
|
|
|
0
|
my ($ts, $th, $atime) = $self->tok_unpack($packed); |
|
451
|
0
|
|
|
|
|
0
|
$new_toks{$tok} = $self->tok_pack($ts, $th, $newatime); |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# Refresh the lock every so often... |
|
454
|
0
|
0
|
|
|
|
0
|
if (($count++ % 1000) == 0) { |
|
455
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# now untie so we can do renames |
|
461
|
0
|
|
|
|
|
0
|
untie %{$self->{db_toks}}; |
|
|
0
|
|
|
|
|
0
|
|
|
462
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# This is the critical phase (moving files around), so don't allow |
|
465
|
|
|
|
|
|
|
# it to be interrupted. |
|
466
|
0
|
|
|
|
|
0
|
local $SIG{'INT'} = 'IGNORE'; |
|
467
|
0
|
|
|
|
|
0
|
local $SIG{'TERM'} = 'IGNORE'; |
|
468
|
0
|
0
|
|
|
|
0
|
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows(); |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# older versions used scancount, so kill the stupid little file ... |
|
471
|
0
|
|
|
|
|
0
|
my $msgc = $path.'_msgcount'; |
|
472
|
0
|
0
|
|
|
|
0
|
if (-f $msgc) { |
|
473
|
0
|
|
|
|
|
0
|
dbg("bayes: old msgcount file found, removing"); |
|
474
|
0
|
0
|
|
|
|
0
|
if (!unlink $msgc) { |
|
475
|
0
|
|
|
|
|
0
|
warn "bayes: couldn't remove $msgc: $!"; |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# now rename in the new one. Try several extensions |
|
480
|
0
|
|
|
|
|
0
|
for my $ext ($self->DB_EXTENSIONS) { |
|
481
|
0
|
|
|
|
|
0
|
my $newf = $name.'.new'.$ext; |
|
482
|
0
|
|
|
|
|
0
|
my $oldf = $name.$ext; |
|
483
|
0
|
0
|
|
|
|
0
|
next unless (-f $newf); |
|
484
|
0
|
0
|
|
|
|
0
|
if (!rename ($newf, $oldf)) { |
|
485
|
0
|
|
|
|
|
0
|
warn "bayes: rename $newf to $oldf failed: $!\n"; |
|
486
|
0
|
|
|
|
|
0
|
return 0; |
|
487
|
|
|
|
|
|
|
} |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# re-tie to the new db in read-write mode ... |
|
491
|
0
|
|
|
|
|
0
|
$umask = umask 0; |
|
492
|
|
|
|
|
|
|
# Bug 6901, [rt.cpan.org #83060] |
|
493
|
0
|
|
|
|
|
0
|
untie %{$self->{db_toks}}; # has no effect if the variable is not tied |
|
|
0
|
|
|
|
|
0
|
|
|
494
|
0
|
|
|
|
|
0
|
$res = tie %{$self->{db_toks}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT, |
|
495
|
0
|
|
|
|
|
0
|
(oct($main->{conf}->{bayes_file_mode}) & 0666); |
|
496
|
0
|
|
|
|
|
0
|
umask $umask; |
|
497
|
0
|
0
|
|
|
|
0
|
return 0 unless $res; |
|
498
|
0
|
|
|
|
|
0
|
undef $res; |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
dbg("bayes: upgraded database format from v%s to v2 in %d seconds", |
|
501
|
0
|
|
|
|
|
0
|
$self->{db_version}, time - $started); |
|
502
|
0
|
|
|
|
|
0
|
$self->{db_version} = 2; # need this for other functions which check |
|
503
|
|
|
|
|
|
|
} |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# Version 3 of the database converts all existing tokens to SHA1 hashes |
|
506
|
0
|
0
|
|
|
|
0
|
if ($self->{db_version} == 2) { |
|
507
|
0
|
|
|
|
|
0
|
dbg("bayes: upgrading database format from v%s to v3", $self->{db_version}); |
|
508
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
509
|
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
0
|
my $DB_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM"; |
|
511
|
0
|
|
|
|
|
0
|
my $DB_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM"; |
|
512
|
0
|
|
|
|
|
0
|
my $DB_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS"; |
|
513
|
0
|
|
|
|
|
0
|
my $DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE"; |
|
514
|
0
|
|
|
|
|
0
|
my $DB_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE"; |
|
515
|
0
|
|
|
|
|
0
|
my $DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE"; |
|
516
|
0
|
|
|
|
|
0
|
my $DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC"; |
|
517
|
0
|
|
|
|
|
0
|
my $DB_LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA"; |
|
518
|
0
|
|
|
|
|
0
|
my $DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE"; |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# remember when we started ... |
|
521
|
0
|
|
|
|
|
0
|
my $started = time; |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# use O_EXCL to avoid races (bonus paranoia, since we should be locked |
|
524
|
|
|
|
|
|
|
# anyway) |
|
525
|
0
|
|
|
|
|
0
|
my %new_toks; |
|
526
|
0
|
|
|
|
|
0
|
$umask = umask 0; |
|
527
|
|
|
|
|
|
|
$res = tie %new_toks, $self->DBM_MODULE, "${name}.new", O_RDWR|O_CREAT|O_EXCL, |
|
528
|
0
|
|
|
|
|
0
|
(oct($main->{conf}->{bayes_file_mode}) & 0666); |
|
529
|
0
|
|
|
|
|
0
|
umask $umask; |
|
530
|
0
|
0
|
|
|
|
0
|
return 0 unless $res; |
|
531
|
0
|
|
|
|
|
0
|
undef $res; |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# add the magic tokens to the new db. |
|
534
|
0
|
|
|
|
|
0
|
$new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN}; |
|
535
|
0
|
|
|
|
|
0
|
$new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN}; |
|
536
|
0
|
|
|
|
|
0
|
$new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN}; |
|
537
|
0
|
|
|
|
|
0
|
$new_toks{$DB_VERSION_MAGIC_TOKEN} = 3; # we're now a DB version 3 file |
|
538
|
0
|
|
|
|
|
0
|
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN}; |
|
539
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_EXPIRE_MAGIC_TOKEN}; |
|
540
|
0
|
|
|
|
|
0
|
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN}; |
|
541
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN}; |
|
542
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_ATIME_DELTA_MAGIC_TOKEN}; |
|
543
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} =$self->{db_toks}->{$DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN}; |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# deal with the data tokens |
|
546
|
0
|
|
|
|
|
0
|
my $count = 0; |
|
547
|
0
|
|
|
|
|
0
|
while (my ($tok, $packed) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
548
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ /^\015\001\007\011\003/); # skip magic tokens |
|
549
|
0
|
|
|
|
|
0
|
my $tok_hash = substr(sha1($tok), -5); |
|
550
|
0
|
|
|
|
|
0
|
$new_toks{$tok_hash} = $packed; |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# Refresh the lock every so often... |
|
553
|
0
|
0
|
|
|
|
0
|
if (($count++ % 1000) == 0) { |
|
554
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
555
|
|
|
|
|
|
|
} |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
# now untie so we can do renames |
|
559
|
0
|
|
|
|
|
0
|
untie %{$self->{db_toks}}; |
|
|
0
|
|
|
|
|
0
|
|
|
560
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# This is the critical phase (moving files around), so don't allow |
|
563
|
|
|
|
|
|
|
# it to be interrupted. |
|
564
|
0
|
|
|
|
|
0
|
local $SIG{'INT'} = 'IGNORE'; |
|
565
|
0
|
|
|
|
|
0
|
local $SIG{'TERM'} = 'IGNORE'; |
|
566
|
0
|
0
|
|
|
|
0
|
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows(); |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
# now rename in the new one. Try several extensions |
|
569
|
0
|
|
|
|
|
0
|
for my $ext ($self->DB_EXTENSIONS) { |
|
570
|
0
|
|
|
|
|
0
|
my $newf = $name.'.new'.$ext; |
|
571
|
0
|
|
|
|
|
0
|
my $oldf = $name.$ext; |
|
572
|
0
|
0
|
|
|
|
0
|
next unless (-f $newf); |
|
573
|
0
|
0
|
|
|
|
0
|
if (!rename($newf, $oldf)) { |
|
574
|
0
|
|
|
|
|
0
|
warn "bayes: rename $newf to $oldf failed: $!\n"; |
|
575
|
0
|
|
|
|
|
0
|
return 0; |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
} |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# re-tie to the new db in read-write mode ... |
|
580
|
0
|
|
|
|
|
0
|
$umask = umask 0; |
|
581
|
|
|
|
|
|
|
# Bug 6901, [rt.cpan.org #83060] |
|
582
|
0
|
|
|
|
|
0
|
untie %{$self->{db_toks}}; # has no effect if the variable is not tied |
|
|
0
|
|
|
|
|
0
|
|
|
583
|
0
|
|
|
|
|
0
|
$res = tie %{$self->{db_toks}}, $self->DBM_MODULE, $name, O_RDWR|O_CREAT, |
|
584
|
0
|
|
|
|
|
0
|
(oct ($main->{conf}->{bayes_file_mode}) & 0666); |
|
585
|
0
|
|
|
|
|
0
|
umask $umask; |
|
586
|
0
|
0
|
|
|
|
0
|
return 0 unless $res; |
|
587
|
0
|
|
|
|
|
0
|
undef $res; |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
dbg("bayes: upgraded database format from v%s to v3 in %d seconds", |
|
590
|
0
|
|
|
|
|
0
|
$self->{db_version}, time - $started); |
|
591
|
|
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
0
|
$self->{db_version} = 3; # need this for other functions which check |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
# if ($self->{db_version} == 3) { |
|
596
|
|
|
|
|
|
|
# ... |
|
597
|
|
|
|
|
|
|
# $self->{db_version} = 4; # need this for other functions which check |
|
598
|
|
|
|
|
|
|
# } |
|
599
|
|
|
|
|
|
|
# ... and so on. |
|
600
|
|
|
|
|
|
|
|
|
601
|
0
|
|
|
|
|
0
|
return 1; |
|
602
|
|
|
|
|
|
|
} |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
########################################################################### |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
sub untie_db { |
|
607
|
69
|
|
|
69
|
1
|
185
|
my $self = shift; |
|
608
|
|
|
|
|
|
|
|
|
609
|
69
|
100
|
|
|
|
347
|
return if (!$self->{already_tied}); |
|
610
|
|
|
|
|
|
|
|
|
611
|
30
|
|
|
|
|
130
|
dbg("bayes: untie-ing"); |
|
612
|
|
|
|
|
|
|
|
|
613
|
30
|
|
|
|
|
108
|
foreach my $dbname (@DBNAMES) { |
|
614
|
60
|
|
|
|
|
160
|
my $db_var = 'db_'.$dbname; |
|
615
|
|
|
|
|
|
|
|
|
616
|
60
|
100
|
|
|
|
176
|
if (exists $self->{$db_var}) { |
|
617
|
|
|
|
|
|
|
# dbg("bayes: untie-ing $db_var"); |
|
618
|
56
|
|
|
|
|
80
|
untie %{$self->{$db_var}}; |
|
|
56
|
|
|
|
|
939
|
|
|
619
|
56
|
|
|
|
|
237
|
delete $self->{$db_var}; |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
} |
|
622
|
|
|
|
|
|
|
|
|
623
|
30
|
100
|
|
|
|
95
|
if ($self->{is_locked}) { |
|
624
|
26
|
|
|
|
|
98
|
dbg("bayes: files locked, now unlocking lock"); |
|
625
|
26
|
|
|
|
|
167
|
$self->{bayes}->{main}->{locker}->safe_unlock ($self->{locked_file}); |
|
626
|
26
|
|
|
|
|
80
|
$self->{is_locked} = 0; |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
30
|
|
|
|
|
109
|
$self->{already_tied} = 0; |
|
630
|
30
|
|
|
|
|
99
|
$self->{db_version} = undef; |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
########################################################################### |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub calculate_expire_delta { |
|
636
|
0
|
|
|
0
|
1
|
0
|
my ($self, $newest_atime, $start, $max_expire_mult) = @_; |
|
637
|
|
|
|
|
|
|
|
|
638
|
0
|
|
|
|
|
0
|
my %delta; # use a hash since an array is going to be very sparse |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# do the first pass, figure out atime delta |
|
641
|
0
|
|
|
|
|
0
|
my ($tok, $packed); |
|
642
|
0
|
|
|
|
|
0
|
while (($tok, $packed) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
643
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ MAGIC_RE); # skip magic tokens |
|
644
|
|
|
|
|
|
|
|
|
645
|
0
|
|
|
|
|
0
|
my ($ts, $th, $atime) = $self->tok_unpack ($packed); |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# Go through from $start * 1 to $start * 512, mark how many tokens |
|
648
|
|
|
|
|
|
|
# we would expire |
|
649
|
0
|
|
|
|
|
0
|
my $token_age = $newest_atime - $atime; |
|
650
|
0
|
|
|
|
|
0
|
for (my $i = 1; $i <= $max_expire_mult; $i<<=1) { |
|
651
|
0
|
0
|
|
|
|
0
|
if ($token_age >= $start * $i) { |
|
652
|
0
|
|
|
|
|
0
|
$delta{$i}++; |
|
653
|
|
|
|
|
|
|
} |
|
654
|
|
|
|
|
|
|
else { |
|
655
|
|
|
|
|
|
|
# If the token age is less than the expire delta, it'll be |
|
656
|
|
|
|
|
|
|
# less for all upcoming checks too, so abort early. |
|
657
|
0
|
|
|
|
|
0
|
last; |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
} |
|
661
|
0
|
|
|
|
|
0
|
return %delta; |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
########################################################################### |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
sub token_expiration { |
|
667
|
0
|
|
|
0
|
1
|
0
|
my ($self, $opts, $newdelta, @vars) = @_; |
|
668
|
|
|
|
|
|
|
|
|
669
|
0
|
|
|
|
|
0
|
my $deleted = 0; |
|
670
|
0
|
|
|
|
|
0
|
my $kept = 0; |
|
671
|
0
|
|
|
|
|
0
|
my $num_hapaxes = 0; |
|
672
|
0
|
|
|
|
|
0
|
my $num_lowfreq = 0; |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# since DB_File will not shrink a database (!!), we need to *create* |
|
675
|
|
|
|
|
|
|
# a new one instead. |
|
676
|
0
|
|
|
|
|
0
|
my $main = $self->{bayes}->{main}; |
|
677
|
0
|
|
|
|
|
0
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
# use a temporary PID-based suffix just in case another one was |
|
680
|
|
|
|
|
|
|
# created previously by an interrupted expire |
|
681
|
0
|
|
|
|
|
0
|
my $tmpsuffix = "expire$$"; |
|
682
|
0
|
|
|
|
|
0
|
my $tmpdbname = $path.'_toks.'.$tmpsuffix; |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# clean out any leftover db copies from previous runs |
|
685
|
0
|
|
|
|
|
0
|
for my $ext ($self->DB_EXTENSIONS) { unlink ($tmpdbname.$ext); } |
|
|
0
|
|
|
|
|
0
|
|
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# use O_EXCL to avoid races (bonus paranoia, since we should be locked |
|
688
|
|
|
|
|
|
|
# anyway) |
|
689
|
0
|
|
|
|
|
0
|
my %new_toks; |
|
690
|
0
|
|
|
|
|
0
|
my $umask = umask 0; |
|
691
|
|
|
|
|
|
|
tie %new_toks, $self->DBM_MODULE, $tmpdbname, O_RDWR|O_CREAT|O_EXCL, |
|
692
|
0
|
|
|
|
|
0
|
(oct ($main->{conf}->{bayes_file_mode}) & 0666); |
|
693
|
0
|
|
|
|
|
0
|
umask $umask; |
|
694
|
0
|
|
|
|
|
0
|
my $oldest; |
|
695
|
|
|
|
|
|
|
|
|
696
|
0
|
|
|
|
|
0
|
my $showdots = $opts->{showdots}; |
|
697
|
0
|
0
|
|
|
|
0
|
if ($showdots) { print STDERR "\n"; } |
|
|
0
|
|
|
|
|
0
|
|
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# We've chosen a new atime delta if we've gotten here, so record it |
|
700
|
|
|
|
|
|
|
# for posterity. |
|
701
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = $newdelta; |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# Figure out how old is too old... |
|
704
|
0
|
|
|
|
|
0
|
my $too_old = $vars[10] - $newdelta; # tooold = newest - delta |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
# Go ahead and do the move to new db/expire run now ... |
|
707
|
0
|
|
|
|
|
0
|
my ($tok, $packed); |
|
708
|
0
|
|
|
|
|
0
|
while (($tok, $packed) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
709
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ MAGIC_RE); # skip magic tokens |
|
710
|
|
|
|
|
|
|
|
|
711
|
0
|
|
|
|
|
0
|
my ($ts, $th, $atime) = $self->tok_unpack ($packed); |
|
712
|
|
|
|
|
|
|
|
|
713
|
0
|
0
|
|
|
|
0
|
if ($atime < $too_old) { |
|
714
|
0
|
|
|
|
|
0
|
$deleted++; |
|
715
|
|
|
|
|
|
|
} |
|
716
|
|
|
|
|
|
|
else { |
|
717
|
|
|
|
|
|
|
# if token atime > newest, reset to newest ... |
|
718
|
0
|
0
|
|
|
|
0
|
if ($atime > $vars[10]) { |
|
719
|
0
|
|
|
|
|
0
|
$atime = $vars[10]; |
|
720
|
|
|
|
|
|
|
} |
|
721
|
|
|
|
|
|
|
|
|
722
|
0
|
|
|
|
|
0
|
$new_toks{$tok} = $self->tok_pack ($ts, $th, $atime); $kept++; |
|
|
0
|
|
|
|
|
0
|
|
|
723
|
0
|
0
|
0
|
|
|
0
|
if (!defined($oldest) || $atime < $oldest) { $oldest = $atime; } |
|
|
0
|
|
|
|
|
0
|
|
|
724
|
0
|
0
|
0
|
|
|
0
|
if ($ts + $th == 1) { |
|
|
|
0
|
|
|
|
|
|
|
725
|
0
|
|
|
|
|
0
|
$num_hapaxes++; |
|
726
|
|
|
|
|
|
|
} elsif ($ts < 8 && $th < 8) { |
|
727
|
0
|
|
|
|
|
0
|
$num_lowfreq++; |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
|
|
731
|
0
|
0
|
|
|
|
0
|
if ((($kept + $deleted) % 1000) == 0) { |
|
732
|
0
|
0
|
|
|
|
0
|
if ($showdots) { print STDERR "."; } |
|
|
0
|
|
|
|
|
0
|
|
|
733
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
734
|
|
|
|
|
|
|
} |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# and add the magic tokens. don't add the expire_running token. |
|
738
|
0
|
|
|
|
|
0
|
$new_toks{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION; |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# We haven't changed messages of each type seen, so just copy over. |
|
741
|
0
|
|
|
|
|
0
|
$new_toks{$NSPAM_MAGIC_TOKEN} = $vars[1]; |
|
742
|
0
|
|
|
|
|
0
|
$new_toks{$NHAM_MAGIC_TOKEN} = $vars[2]; |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
# We magically haven't removed the newest token, so just copy that value over. |
|
745
|
0
|
|
|
|
|
0
|
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $vars[10]; |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# The rest of these have been modified, so replace as necessary. |
|
748
|
0
|
|
|
|
|
0
|
$new_toks{$NTOKENS_MAGIC_TOKEN} = $kept; |
|
749
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = time(); |
|
750
|
0
|
|
|
|
|
0
|
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest; |
|
751
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = $deleted; |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
# Sanity check: if we expired too many tokens, abort! |
|
754
|
0
|
0
|
|
|
|
0
|
if ($kept < 100000) { |
|
755
|
0
|
|
|
|
|
0
|
dbg("bayes: token expiration would expire too many tokens, aborting"); |
|
756
|
|
|
|
|
|
|
# set the magic tokens appropriately |
|
757
|
|
|
|
|
|
|
# make sure the next expire run does a first pass |
|
758
|
0
|
|
|
|
|
0
|
$self->{db_toks}->{$LAST_EXPIRE_MAGIC_TOKEN} = time(); |
|
759
|
0
|
|
|
|
|
0
|
$self->{db_toks}->{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0; |
|
760
|
0
|
|
|
|
|
0
|
$self->{db_toks}->{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0; |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# remove the new DB |
|
763
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
764
|
0
|
|
|
|
|
0
|
for my $ext ($self->DB_EXTENSIONS) { unlink ($tmpdbname.$ext); } |
|
|
0
|
|
|
|
|
0
|
|
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# reset the results for the return |
|
767
|
0
|
|
|
|
|
0
|
$kept = $vars[3]; |
|
768
|
0
|
|
|
|
|
0
|
$deleted = 0; |
|
769
|
0
|
|
|
|
|
0
|
$num_hapaxes = 0; |
|
770
|
0
|
|
|
|
|
0
|
$num_lowfreq = 0; |
|
771
|
|
|
|
|
|
|
} |
|
772
|
|
|
|
|
|
|
else { |
|
773
|
|
|
|
|
|
|
# now untie so we can do renames |
|
774
|
0
|
|
|
|
|
0
|
untie %{$self->{db_toks}}; |
|
|
0
|
|
|
|
|
0
|
|
|
775
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
# This is the critical phase (moving files around), so don't allow |
|
778
|
|
|
|
|
|
|
# it to be interrupted. Scope the signal changes. |
|
779
|
|
|
|
|
|
|
{ |
|
780
|
0
|
|
|
|
|
0
|
local $SIG{'INT'} = 'IGNORE'; |
|
|
0
|
|
|
|
|
0
|
|
|
781
|
0
|
|
|
|
|
0
|
local $SIG{'TERM'} = 'IGNORE'; |
|
782
|
0
|
0
|
|
|
|
0
|
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows(); |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
# now rename in the new one. Try several extensions |
|
785
|
0
|
|
|
|
|
0
|
for my $ext ($self->DB_EXTENSIONS) { |
|
786
|
0
|
|
|
|
|
0
|
my $newf = $tmpdbname.$ext; |
|
787
|
0
|
|
|
|
|
0
|
my $oldf = $path.'_toks'.$ext; |
|
788
|
0
|
0
|
|
|
|
0
|
next unless (-f $newf); |
|
789
|
0
|
0
|
|
|
|
0
|
if (!rename ($newf, $oldf)) { |
|
790
|
0
|
|
|
|
|
0
|
warn "bayes: rename $newf to $oldf failed: $!\n"; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
} |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
} |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
# Call untie_db() so we unlock correctly. |
|
797
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
798
|
|
|
|
|
|
|
|
|
799
|
0
|
|
|
|
|
0
|
return ($kept, $deleted, $num_hapaxes, $num_lowfreq); |
|
800
|
|
|
|
|
|
|
} |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
########################################################################### |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# Is a sync due? |
|
805
|
|
|
|
|
|
|
sub sync_due { |
|
806
|
10
|
|
|
10
|
1
|
31
|
my ($self) = @_; |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
# don't bother doing old db versions |
|
809
|
10
|
50
|
|
|
|
53
|
return 0 if ($self->{db_version} < $self->DB_VERSION); |
|
810
|
|
|
|
|
|
|
|
|
811
|
10
|
|
|
|
|
30
|
my $conf = $self->{bayes}->{main}->{conf}; |
|
812
|
10
|
50
|
|
|
|
37
|
return 0 if ($conf->{bayes_journal_max_size} == 0); |
|
813
|
|
|
|
|
|
|
|
|
814
|
10
|
|
|
|
|
34
|
my @vars = $self->get_storage_variables(); |
|
815
|
10
|
|
|
|
|
53
|
dbg("bayes: DB journal sync: last sync: %s", $vars[7]); |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
## Ok, should we do a sync? |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
# Not if the journal file doesn't exist, it's not a file, or it's 0 |
|
820
|
|
|
|
|
|
|
# bytes long. |
|
821
|
10
|
100
|
66
|
|
|
50
|
return 0 unless (stat($self->_get_journal_filename()) && -f _); |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
# Yes if the file size is larger than the specified maximum size. |
|
824
|
4
|
50
|
|
|
|
291
|
return 1 if (-s _ > $conf->{bayes_journal_max_size}); |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
# Yes there has been a sync before, and if it's been at least a day |
|
827
|
|
|
|
|
|
|
# since that sync. |
|
828
|
4
|
50
|
33
|
|
|
208
|
return 1 if (($vars[7] > 0) && (time - $vars[7] > 86400)); |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
# No, I guess not. |
|
831
|
4
|
|
|
|
|
26
|
return 0; |
|
832
|
|
|
|
|
|
|
} |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
########################################################################### |
|
835
|
|
|
|
|
|
|
# db_seen reading APIs |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
sub seen_get { |
|
838
|
24
|
|
|
24
|
1
|
86
|
my ($self, $msgid) = @_; |
|
839
|
24
|
|
|
|
|
487
|
$self->{db_seen}->{$msgid}; |
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
sub seen_put { |
|
843
|
6
|
|
|
6
|
1
|
39
|
my ($self, $msgid, $seen) = @_; |
|
844
|
|
|
|
|
|
|
|
|
845
|
6
|
100
|
|
|
|
33
|
if ($self->{bayes}->{main}->{learn_to_journal}) { |
|
846
|
2
|
|
|
|
|
16
|
$self->defer_update ("m $seen $msgid"); |
|
847
|
|
|
|
|
|
|
} |
|
848
|
|
|
|
|
|
|
else { |
|
849
|
4
|
|
|
|
|
25
|
$self->_seen_put_direct($msgid, $seen); |
|
850
|
|
|
|
|
|
|
} |
|
851
|
|
|
|
|
|
|
} |
|
852
|
|
|
|
|
|
|
sub _seen_put_direct { |
|
853
|
6
|
|
|
6
|
|
32
|
my ($self, $msgid, $seen) = @_; |
|
854
|
6
|
|
|
|
|
213
|
$self->{db_seen}->{$msgid} = $seen; |
|
855
|
|
|
|
|
|
|
} |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
sub seen_delete { |
|
858
|
4
|
|
|
4
|
1
|
24
|
my ($self, $msgid) = @_; |
|
859
|
|
|
|
|
|
|
|
|
860
|
4
|
50
|
|
|
|
24
|
if ($self->{bayes}->{main}->{learn_to_journal}) { |
|
861
|
0
|
|
|
|
|
0
|
$self->defer_update ("m f $msgid"); |
|
862
|
|
|
|
|
|
|
} |
|
863
|
|
|
|
|
|
|
else { |
|
864
|
4
|
|
|
|
|
24
|
$self->_seen_delete_direct($msgid); |
|
865
|
|
|
|
|
|
|
} |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
sub _seen_delete_direct { |
|
868
|
4
|
|
|
4
|
|
19
|
my ($self, $msgid) = @_; |
|
869
|
4
|
|
|
|
|
114
|
delete $self->{db_seen}->{$msgid}; |
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
########################################################################### |
|
873
|
|
|
|
|
|
|
# db reading APIs |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
sub tok_get { |
|
876
|
3878
|
|
|
3878
|
1
|
7084
|
my ($self, $tok) = @_; |
|
877
|
3878
|
|
|
|
|
11517
|
$self->tok_unpack ($self->{db_toks}->{$tok}); |
|
878
|
|
|
|
|
|
|
} |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
sub tok_get_all { |
|
881
|
6
|
|
|
6
|
1
|
269
|
my ($self, @tokens) = @_; |
|
882
|
|
|
|
|
|
|
|
|
883
|
6
|
|
|
|
|
35
|
my @tokensdata; |
|
884
|
6
|
|
|
|
|
30
|
foreach my $token (@tokens) { |
|
885
|
1658
|
|
|
|
|
5056
|
my ($tok_spam, $tok_ham, $atime) = $self->tok_unpack($self->{db_toks}->{$token}); |
|
886
|
1658
|
|
|
|
|
6471
|
push(@tokensdata, [$token, $tok_spam, $tok_ham, $atime]); |
|
887
|
|
|
|
|
|
|
} |
|
888
|
6
|
|
|
|
|
88
|
return \@tokensdata; |
|
889
|
|
|
|
|
|
|
} |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# return the magic tokens in a specific order: |
|
892
|
|
|
|
|
|
|
# 0: scan count base |
|
893
|
|
|
|
|
|
|
# 1: number of spam |
|
894
|
|
|
|
|
|
|
# 2: number of ham |
|
895
|
|
|
|
|
|
|
# 3: number of tokens in db |
|
896
|
|
|
|
|
|
|
# 4: last expire atime |
|
897
|
|
|
|
|
|
|
# 5: oldest token in db atime |
|
898
|
|
|
|
|
|
|
# 6: db version value |
|
899
|
|
|
|
|
|
|
# 7: last journal sync |
|
900
|
|
|
|
|
|
|
# 8: last atime delta |
|
901
|
|
|
|
|
|
|
# 9: last expire reduction count |
|
902
|
|
|
|
|
|
|
# 10: newest token in db atime |
|
903
|
|
|
|
|
|
|
# |
|
904
|
|
|
|
|
|
|
sub get_storage_variables { |
|
905
|
64
|
|
|
64
|
1
|
151
|
my ($self) = @_; |
|
906
|
64
|
|
|
|
|
110
|
my @values; |
|
907
|
|
|
|
|
|
|
|
|
908
|
64
|
|
|
|
|
1219
|
my $db_ver = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN}; |
|
909
|
|
|
|
|
|
|
|
|
910
|
64
|
100
|
66
|
|
|
602
|
if (!$db_ver || $db_ver =~ /\D/) { $db_ver = 0; } |
|
|
4
|
|
|
|
|
12
|
|
|
911
|
|
|
|
|
|
|
|
|
912
|
64
|
100
|
|
|
|
221
|
if ($db_ver >= 2) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
913
|
60
|
|
|
|
|
139
|
my $DB2_LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA"; |
|
914
|
60
|
|
|
|
|
131
|
my $DB2_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE"; |
|
915
|
60
|
|
|
|
|
131
|
my $DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE"; |
|
916
|
60
|
|
|
|
|
106
|
my $DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC"; |
|
917
|
60
|
|
|
|
|
99
|
my $DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE"; |
|
918
|
60
|
|
|
|
|
108
|
my $DB2_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM"; |
|
919
|
60
|
|
|
|
|
93
|
my $DB2_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM"; |
|
920
|
60
|
|
|
|
|
97
|
my $DB2_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS"; |
|
921
|
60
|
|
|
|
|
110
|
my $DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE"; |
|
922
|
60
|
|
|
|
|
102
|
my $DB2_RUNNING_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003RUNNINGEXPIRE"; |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
@values = ( |
|
925
|
|
|
|
|
|
|
0, |
|
926
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_NSPAM_MAGIC_TOKEN}, |
|
927
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_NHAM_MAGIC_TOKEN}, |
|
928
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_NTOKENS_MAGIC_TOKEN}, |
|
929
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_LAST_EXPIRE_MAGIC_TOKEN}, |
|
930
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN}, |
|
931
|
|
|
|
|
|
|
$db_ver, |
|
932
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN}, |
|
933
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_LAST_ATIME_DELTA_MAGIC_TOKEN}, |
|
934
|
|
|
|
|
|
|
$self->{db_toks}->{$DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN}, |
|
935
|
60
|
|
|
|
|
4474
|
$self->{db_toks}->{$DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN}, |
|
936
|
|
|
|
|
|
|
); |
|
937
|
|
|
|
|
|
|
} |
|
938
|
|
|
|
|
|
|
elsif ($db_ver == 0) { |
|
939
|
4
|
|
|
|
|
20
|
my $DB0_NSPAM_MAGIC_TOKEN = '**NSPAM'; |
|
940
|
4
|
|
|
|
|
18
|
my $DB0_NHAM_MAGIC_TOKEN = '**NHAM'; |
|
941
|
4
|
|
|
|
|
17
|
my $DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN = '**OLDESTAGE'; |
|
942
|
4
|
|
|
|
|
7
|
my $DB0_LAST_EXPIRE_MAGIC_TOKEN = '**LASTEXPIRE'; |
|
943
|
4
|
|
|
|
|
9
|
my $DB0_NTOKENS_MAGIC_TOKEN = '**NTOKENS'; |
|
944
|
4
|
|
|
|
|
17
|
my $DB0_SCANCOUNT_BASE_MAGIC_TOKEN = '**SCANBASE'; |
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
@values = ( |
|
947
|
|
|
|
|
|
|
$self->{db_toks}->{$DB0_SCANCOUNT_BASE_MAGIC_TOKEN}, |
|
948
|
|
|
|
|
|
|
$self->{db_toks}->{$DB0_NSPAM_MAGIC_TOKEN}, |
|
949
|
|
|
|
|
|
|
$self->{db_toks}->{$DB0_NHAM_MAGIC_TOKEN}, |
|
950
|
|
|
|
|
|
|
$self->{db_toks}->{$DB0_NTOKENS_MAGIC_TOKEN}, |
|
951
|
|
|
|
|
|
|
$self->{db_toks}->{$DB0_LAST_EXPIRE_MAGIC_TOKEN}, |
|
952
|
4
|
|
|
|
|
75
|
$self->{db_toks}->{$DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN}, |
|
953
|
|
|
|
|
|
|
0, |
|
954
|
|
|
|
|
|
|
0, |
|
955
|
|
|
|
|
|
|
0, |
|
956
|
|
|
|
|
|
|
0, |
|
957
|
|
|
|
|
|
|
0, |
|
958
|
|
|
|
|
|
|
); |
|
959
|
|
|
|
|
|
|
} |
|
960
|
|
|
|
|
|
|
elsif ($db_ver == 1) { |
|
961
|
0
|
|
|
|
|
0
|
my $DB1_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM"; |
|
962
|
0
|
|
|
|
|
0
|
my $DB1_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM"; |
|
963
|
0
|
|
|
|
|
0
|
my $DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE"; |
|
964
|
0
|
|
|
|
|
0
|
my $DB1_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE"; |
|
965
|
0
|
|
|
|
|
0
|
my $DB1_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS"; |
|
966
|
0
|
|
|
|
|
0
|
my $DB1_SCANCOUNT_BASE_MAGIC_TOKEN = "\015\001\007\011\003SCANBASE"; |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
@values = ( |
|
969
|
|
|
|
|
|
|
$self->{db_toks}->{$DB1_SCANCOUNT_BASE_MAGIC_TOKEN}, |
|
970
|
|
|
|
|
|
|
$self->{db_toks}->{$DB1_NSPAM_MAGIC_TOKEN}, |
|
971
|
|
|
|
|
|
|
$self->{db_toks}->{$DB1_NHAM_MAGIC_TOKEN}, |
|
972
|
|
|
|
|
|
|
$self->{db_toks}->{$DB1_NTOKENS_MAGIC_TOKEN}, |
|
973
|
|
|
|
|
|
|
$self->{db_toks}->{$DB1_LAST_EXPIRE_MAGIC_TOKEN}, |
|
974
|
0
|
|
|
|
|
0
|
$self->{db_toks}->{$DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN}, |
|
975
|
|
|
|
|
|
|
1, |
|
976
|
|
|
|
|
|
|
0, |
|
977
|
|
|
|
|
|
|
0, |
|
978
|
|
|
|
|
|
|
0, |
|
979
|
|
|
|
|
|
|
0, |
|
980
|
|
|
|
|
|
|
); |
|
981
|
|
|
|
|
|
|
} |
|
982
|
|
|
|
|
|
|
|
|
983
|
64
|
|
|
|
|
401
|
foreach (@values) { |
|
984
|
704
|
100
|
66
|
|
|
1966
|
if (!$_ || $_ =~ /\D/) { |
|
985
|
414
|
|
|
|
|
739
|
$_ = 0; |
|
986
|
|
|
|
|
|
|
} |
|
987
|
|
|
|
|
|
|
} |
|
988
|
|
|
|
|
|
|
|
|
989
|
64
|
|
|
|
|
420
|
return @values; |
|
990
|
|
|
|
|
|
|
} |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
sub dump_db_toks { |
|
993
|
0
|
|
|
0
|
1
|
0
|
my ($self, $template, $regex, @vars) = @_; |
|
994
|
|
|
|
|
|
|
|
|
995
|
0
|
|
|
|
|
0
|
while (my ($tok, $tokvalue) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
996
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ MAGIC_RE); # skip magic tokens |
|
997
|
0
|
0
|
0
|
|
|
0
|
next if (defined $regex && ($tok !~ /$regex/o)); |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
# We have the value already, so just unpack it. |
|
1000
|
0
|
|
|
|
|
0
|
my ($ts, $th, $atime) = $self->tok_unpack ($tokvalue); |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
0
|
|
|
|
|
0
|
my $prob = $self->{bayes}->_compute_prob_for_token($tok, $vars[1], $vars[2], $ts, $th); |
|
1003
|
0
|
|
0
|
|
|
0
|
$prob ||= 0.5; |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
0
|
|
|
|
|
0
|
my $encoded_tok = unpack("H*",$tok); |
|
1006
|
0
|
|
|
|
|
0
|
printf $template,$prob,$ts,$th,$atime,$encoded_tok; |
|
1007
|
|
|
|
|
|
|
} |
|
1008
|
|
|
|
|
|
|
} |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
sub set_last_expire { |
|
1011
|
0
|
|
|
0
|
1
|
0
|
my ($self, $time) = @_; |
|
1012
|
0
|
|
|
|
|
0
|
$self->{db_toks}->{$LAST_EXPIRE_MAGIC_TOKEN} = time(); |
|
1013
|
|
|
|
|
|
|
} |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
## Don't bother using get_magic_tokens here. This token should only |
|
1016
|
|
|
|
|
|
|
## ever exist when we're running expire, so we don't want to convert it if |
|
1017
|
|
|
|
|
|
|
## it's there and we're not expiring ... |
|
1018
|
|
|
|
|
|
|
sub get_running_expire_tok { |
|
1019
|
10
|
|
|
10
|
1
|
31
|
my ($self) = @_; |
|
1020
|
10
|
|
|
|
|
201
|
my $running = $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN}; |
|
1021
|
10
|
50
|
33
|
|
|
77
|
if (!$running || $running =~ /\D/) { return; } |
|
|
10
|
|
|
|
|
41
|
|
|
1022
|
0
|
|
|
|
|
0
|
return $running; |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
sub set_running_expire_tok { |
|
1026
|
2
|
|
|
2
|
1
|
8
|
my ($self) = @_; |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
# update the lock and running expire magic token |
|
1029
|
2
|
|
|
|
|
17
|
$self->{bayes}->{main}->{locker}->refresh_lock ($self->{locked_file}); |
|
1030
|
2
|
|
|
|
|
55
|
$self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN} = time(); |
|
1031
|
|
|
|
|
|
|
} |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
sub remove_running_expire_tok { |
|
1034
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
1035
|
0
|
|
|
|
|
0
|
delete $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN}; |
|
1036
|
|
|
|
|
|
|
} |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
########################################################################### |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# db abstraction: allow deferred writes, since we will be frequently |
|
1041
|
|
|
|
|
|
|
# writing while checking. |
|
1042
|
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub tok_count_change { |
|
1044
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ds, $dh, $tok, $atime) = @_; |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
0
|
0
|
|
|
|
0
|
$atime = 0 unless defined $atime; |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
0
|
0
|
|
|
|
0
|
if ($self->{bayes}->{main}->{learn_to_journal}) { |
|
1049
|
|
|
|
|
|
|
# we can't store the SHA1 binary value in the journal, so convert it |
|
1050
|
|
|
|
|
|
|
# to a printable value that can be converted back later |
|
1051
|
0
|
|
|
|
|
0
|
my $encoded_tok = unpack("H*",$tok); |
|
1052
|
0
|
|
|
|
|
0
|
$self->defer_update ("c $ds $dh $atime $encoded_tok"); |
|
1053
|
|
|
|
|
|
|
} else { |
|
1054
|
0
|
|
|
|
|
0
|
$self->tok_sync_counters ($ds, $dh, $atime, $tok); |
|
1055
|
|
|
|
|
|
|
} |
|
1056
|
|
|
|
|
|
|
} |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
sub multi_tok_count_change { |
|
1059
|
10
|
|
|
10
|
1
|
49
|
my ($self, $ds, $dh, $tokens, $atime) = @_; |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
10
|
100
|
|
|
|
49
|
$atime = 0 unless defined $atime; |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
10
|
|
|
|
|
23
|
foreach my $tok (keys %{$tokens}) { |
|
|
10
|
|
|
|
|
343
|
|
|
1064
|
2770
|
100
|
|
|
|
9010
|
if ($self->{bayes}->{main}->{learn_to_journal}) { |
|
1065
|
|
|
|
|
|
|
# we can't store the SHA1 binary value in the journal, so convert it |
|
1066
|
|
|
|
|
|
|
# to a printable value that can be converted back later |
|
1067
|
554
|
|
|
|
|
1018
|
my $encoded_tok = unpack("H*",$tok); |
|
1068
|
554
|
|
|
|
|
1856
|
$self->defer_update ("c $ds $dh $atime $encoded_tok"); |
|
1069
|
|
|
|
|
|
|
} else { |
|
1070
|
2216
|
|
|
|
|
5421
|
$self->tok_sync_counters ($ds, $dh, $atime, $tok); |
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
} |
|
1073
|
|
|
|
|
|
|
} |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
sub nspam_nham_get { |
|
1076
|
10
|
|
|
10
|
1
|
36
|
my ($self) = @_; |
|
1077
|
10
|
|
|
|
|
33
|
my @vars = $self->get_storage_variables(); |
|
1078
|
10
|
|
|
|
|
60
|
($vars[1], $vars[2]); |
|
1079
|
|
|
|
|
|
|
} |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
sub nspam_nham_change { |
|
1082
|
10
|
|
|
10
|
1
|
35
|
my ($self, $ds, $dh) = @_; |
|
1083
|
|
|
|
|
|
|
|
|
1084
|
10
|
100
|
|
|
|
43
|
if ($self->{bayes}->{main}->{learn_to_journal}) { |
|
1085
|
2
|
|
|
|
|
73
|
$self->defer_update ("n $ds $dh"); |
|
1086
|
|
|
|
|
|
|
} else { |
|
1087
|
8
|
|
|
|
|
39
|
$self->tok_sync_nspam_nham ($ds, $dh); |
|
1088
|
|
|
|
|
|
|
} |
|
1089
|
|
|
|
|
|
|
} |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
sub tok_touch { |
|
1092
|
0
|
|
|
0
|
1
|
0
|
my ($self, $tok, $atime) = @_; |
|
1093
|
|
|
|
|
|
|
# we can't store the SHA1 binary value in the journal, so convert it |
|
1094
|
|
|
|
|
|
|
# to a printable value that can be converted back later |
|
1095
|
0
|
|
|
|
|
0
|
my $encoded_tok = unpack("H*", $tok); |
|
1096
|
0
|
|
|
|
|
0
|
$self->defer_update ("t $atime $encoded_tok"); |
|
1097
|
|
|
|
|
|
|
} |
|
1098
|
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
sub tok_touch_all { |
|
1100
|
4
|
|
|
4
|
1
|
20
|
my ($self, $tokens, $atime) = @_; |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
4
|
|
|
|
|
18
|
foreach my $token (@{$tokens}) { |
|
|
4
|
|
|
|
|
23
|
|
|
1103
|
|
|
|
|
|
|
# we can't store the SHA1 binary value in the journal, so convert it |
|
1104
|
|
|
|
|
|
|
# to a printable value that can be converted back later |
|
1105
|
282
|
|
|
|
|
590
|
my $encoded_tok = unpack("H*", $token); |
|
1106
|
282
|
|
|
|
|
776
|
$self->defer_update ("t $atime $encoded_tok"); |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
} |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
sub defer_update { |
|
1111
|
840
|
|
|
840
|
0
|
1922
|
my ($self, $str) = @_; |
|
1112
|
840
|
|
|
|
|
2628
|
$self->{string_to_journal} .= "$str\n"; |
|
1113
|
|
|
|
|
|
|
} |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
########################################################################### |
|
1116
|
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub cleanup { |
|
1118
|
14
|
|
|
14
|
1
|
51
|
my ($self) = @_; |
|
1119
|
|
|
|
|
|
|
|
|
1120
|
14
|
|
|
|
|
68
|
my $nbytes = length ($self->{string_to_journal}); |
|
1121
|
14
|
100
|
|
|
|
75
|
return if ($nbytes == 0); |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
6
|
|
|
|
|
50
|
my $path = $self->_get_journal_filename(); |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
# use append mode, write atomically, then close, so simultaneous updates are |
|
1126
|
|
|
|
|
|
|
# not lost |
|
1127
|
6
|
|
|
|
|
32
|
my $conf = $self->{bayes}->{main}->{conf}; |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# set the umask to the inverse of what we want ... |
|
1130
|
6
|
|
|
|
|
58
|
my $umask = umask(0777 - (oct ($conf->{bayes_file_mode}) & 0666)); |
|
1131
|
|
|
|
|
|
|
|
|
1132
|
6
|
50
|
|
|
|
581
|
if (!open (OUT, ">>".$path)) { |
|
1133
|
0
|
|
|
|
|
0
|
warn "bayes: cannot write to $path, bayes db update ignored: $!\n"; |
|
1134
|
0
|
|
|
|
|
0
|
umask $umask; # reset umask |
|
1135
|
0
|
|
|
|
|
0
|
return; |
|
1136
|
|
|
|
|
|
|
} |
|
1137
|
6
|
|
|
|
|
51
|
umask $umask; # reset umask |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
# do not use print() here, it will break up the buffer if it's >8192 bytes, |
|
1140
|
|
|
|
|
|
|
# which could result in two sets of tokens getting mixed up and their |
|
1141
|
|
|
|
|
|
|
# touches missed. |
|
1142
|
6
|
|
|
|
|
28
|
my $write_failure = 0; |
|
1143
|
6
|
|
|
|
|
26
|
my $original_point = tell OUT; |
|
1144
|
6
|
50
|
|
|
|
32
|
$original_point >= 0 or die "Can't obtain file position: $!"; |
|
1145
|
6
|
|
|
|
|
16
|
my $len; |
|
1146
|
6
|
|
|
|
|
12
|
do { |
|
1147
|
6
|
|
|
|
|
244
|
$len = syswrite (OUT, $self->{string_to_journal}, $nbytes); |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
# argh, write failure, give up |
|
1150
|
6
|
50
|
33
|
|
|
107
|
if (!defined $len || $len < 0) { |
|
1151
|
0
|
|
|
|
|
0
|
my $err = ''; |
|
1152
|
0
|
0
|
|
|
|
0
|
if (!defined $len) { |
|
1153
|
0
|
|
|
|
|
0
|
$len = 0; |
|
1154
|
0
|
|
|
|
|
0
|
$err = " ($!)"; |
|
1155
|
|
|
|
|
|
|
} |
|
1156
|
0
|
|
|
|
|
0
|
warn "bayes: write failed to Bayes journal $path ($len of $nbytes)!$err\n"; |
|
1157
|
0
|
|
|
|
|
0
|
last; |
|
1158
|
|
|
|
|
|
|
} |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
# This shouldn't happen, but could if the fs is full... |
|
1161
|
6
|
50
|
|
|
|
94
|
if ($len != $nbytes) { |
|
1162
|
0
|
|
|
|
|
0
|
warn "bayes: partial write to bayes journal $path ($len of $nbytes), recovering\n"; |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
# we want to be atomic, so revert the journal file back to where |
|
1165
|
|
|
|
|
|
|
# we know it's "good". if we can't truncate the journal, or we've |
|
1166
|
|
|
|
|
|
|
# tried 5 times to do the write, abort! |
|
1167
|
0
|
0
|
0
|
|
|
0
|
if (!truncate(OUT, $original_point) || ($write_failure++ > 4)) { |
|
1168
|
0
|
|
|
|
|
0
|
warn "bayes: cannot write to bayes journal $path, aborting!\n"; |
|
1169
|
0
|
|
|
|
|
0
|
last; |
|
1170
|
|
|
|
|
|
|
} |
|
1171
|
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
# if the fs is full, let's give the system a break |
|
1173
|
0
|
|
|
|
|
0
|
sleep 1; |
|
1174
|
|
|
|
|
|
|
} |
|
1175
|
|
|
|
|
|
|
} while ($len != $nbytes); |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
6
|
50
|
|
|
|
103
|
if (!close OUT) { |
|
1178
|
0
|
|
|
|
|
0
|
warn "bayes: cannot write to $path, bayes db update ignored\n"; |
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
6
|
|
|
|
|
46
|
$self->{string_to_journal} = ''; |
|
1182
|
|
|
|
|
|
|
} |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
# Return a qr'd RE to match a token with the correct format's magic token |
|
1185
|
|
|
|
|
|
|
sub get_magic_re { |
|
1186
|
532
|
|
|
532
|
1
|
1011
|
my ($self) = @_; |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
532
|
50
|
66
|
|
|
2029
|
if (!defined $self->{db_version} || $self->{db_version} >= 1) { |
|
1189
|
532
|
|
|
|
|
1410
|
return MAGIC_RE; |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
# When in doubt, assume v0 |
|
1193
|
0
|
|
|
|
|
0
|
return qr/^\*\*[A-Z]+$/; |
|
1194
|
|
|
|
|
|
|
} |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# provide a more generalized public interface into the journal sync |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
sub sync { |
|
1199
|
2
|
|
|
2
|
1
|
7
|
my ($self, $opts) = @_; |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
2
|
|
|
|
|
36
|
return $self->_sync_journal($opts); |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
########################################################################### |
|
1205
|
|
|
|
|
|
|
# And this method reads the journal and applies the changes in one |
|
1206
|
|
|
|
|
|
|
# (locked) transaction. |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
sub _sync_journal { |
|
1209
|
2
|
|
|
2
|
|
11
|
my ($self, $opts) = @_; |
|
1210
|
2
|
|
|
|
|
10
|
my $ret = 0; |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
2
|
|
|
|
|
11
|
my $path = $self->_get_journal_filename(); |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# if $path doesn't exist, or it's not a file, or is 0 bytes in length, return |
|
1215
|
2
|
50
|
33
|
|
|
85
|
if (!stat($path) || !-f _ || -z _) { |
|
|
|
|
33
|
|
|
|
|
|
1216
|
0
|
|
|
|
|
0
|
return 0; |
|
1217
|
|
|
|
|
|
|
} |
|
1218
|
|
|
|
|
|
|
|
|
1219
|
2
|
|
|
|
|
12
|
my $eval_stat; |
|
1220
|
|
|
|
|
|
|
eval { |
|
1221
|
2
|
|
|
|
|
13
|
local $SIG{'__DIE__'}; # do not run user die() traps in here |
|
1222
|
2
|
50
|
|
|
|
18
|
if ($self->tie_db_writable()) { |
|
1223
|
2
|
|
|
|
|
32
|
$ret = $self->_sync_journal_trapped($opts, $path); |
|
1224
|
|
|
|
|
|
|
} |
|
1225
|
2
|
|
|
|
|
17
|
1; |
|
1226
|
2
|
50
|
|
|
|
6
|
} or do { |
|
1227
|
0
|
0
|
|
|
|
0
|
$eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; |
|
|
0
|
|
|
|
|
0
|
|
|
1228
|
|
|
|
|
|
|
}; |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
# ok, untie from write-mode if we can |
|
1231
|
2
|
50
|
|
|
|
18
|
if (!$self->{bayes}->{main}->{learn_caller_will_untie}) { |
|
1232
|
2
|
|
|
|
|
15
|
$self->untie_db(); |
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
# handle any errors that may have occurred |
|
1236
|
2
|
50
|
|
|
|
19
|
if (defined $eval_stat) { |
|
1237
|
0
|
|
|
|
|
0
|
warn "bayes: $eval_stat\n"; |
|
1238
|
0
|
|
|
|
|
0
|
return 0; |
|
1239
|
|
|
|
|
|
|
} |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
2
|
|
|
|
|
12
|
$ret; |
|
1242
|
|
|
|
|
|
|
} |
|
1243
|
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
sub _sync_journal_trapped { |
|
1245
|
2
|
|
|
2
|
|
17
|
my ($self, $opts, $path) = @_; |
|
1246
|
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
# Flag that we're doing work |
|
1248
|
2
|
|
|
|
|
21
|
$self->set_running_expire_tok(); |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
2
|
|
|
|
|
12
|
my $started = time(); |
|
1251
|
2
|
|
|
|
|
7
|
my $count = 0; |
|
1252
|
2
|
|
|
|
|
6
|
my $total_count = 0; |
|
1253
|
2
|
|
|
|
|
9
|
my %tokens; |
|
1254
|
2
|
|
|
|
|
9
|
my $showdots = $opts->{showdots}; |
|
1255
|
2
|
|
|
|
|
9
|
my $retirepath = $path.".old"; |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
# if $path doesn't exist, or it's not a file, or is 0 bytes in length, |
|
1258
|
|
|
|
|
|
|
# return we have to check again since the file may have been removed |
|
1259
|
|
|
|
|
|
|
# by a recent bayes db upgrade ... |
|
1260
|
2
|
50
|
33
|
|
|
94
|
if (!stat($path) || !-f _ || -z _) { |
|
|
|
|
33
|
|
|
|
|
|
1261
|
0
|
|
|
|
|
0
|
return 0; |
|
1262
|
|
|
|
|
|
|
} |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
2
|
50
|
|
|
|
39
|
if (!-r $path) { # will we be able to read the file? |
|
1265
|
0
|
|
|
|
|
0
|
warn "bayes: bad permissions on journal, can't read: $path\n"; |
|
1266
|
0
|
|
|
|
|
0
|
return 0; |
|
1267
|
|
|
|
|
|
|
} |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# This is the critical phase (moving files around), so don't allow |
|
1270
|
|
|
|
|
|
|
# it to be interrupted. |
|
1271
|
|
|
|
|
|
|
{ |
|
1272
|
2
|
|
|
|
|
5
|
local $SIG{'INT'} = 'IGNORE'; |
|
|
2
|
|
|
|
|
62
|
|
|
1273
|
2
|
|
|
|
|
57
|
local $SIG{'TERM'} = 'IGNORE'; |
|
1274
|
2
|
50
|
|
|
|
17
|
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows(); |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
# retire the journal, so we can update the db files from it in peace. |
|
1277
|
|
|
|
|
|
|
# TODO: use locking here |
|
1278
|
2
|
50
|
|
|
|
125
|
if (!rename ($path, $retirepath)) { |
|
1279
|
0
|
|
|
|
|
0
|
warn "bayes: failed rename $path to $retirepath\n"; |
|
1280
|
0
|
|
|
|
|
0
|
return 0; |
|
1281
|
|
|
|
|
|
|
} |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
# now read the retired journal |
|
1284
|
2
|
|
|
|
|
10
|
local *JOURNAL; |
|
1285
|
2
|
50
|
|
|
|
80
|
if (!open (JOURNAL, "<$retirepath")) { |
|
1286
|
0
|
|
|
|
|
0
|
warn "bayes: cannot open read $retirepath\n"; |
|
1287
|
0
|
|
|
|
|
0
|
return 0; |
|
1288
|
|
|
|
|
|
|
} |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
# Read the journal |
|
1292
|
2
|
|
|
|
|
48
|
for ($!=0; defined($_=<JOURNAL>); $!=0) { |
|
1293
|
558
|
|
|
|
|
1035
|
$total_count++; |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
558
|
50
|
|
|
|
3969
|
if (/^t (\d+) (.+)$/) { # Token timestamp update, cache resultant entries |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1296
|
0
|
|
|
|
|
0
|
my $tok = pack("H*",$2); |
|
1297
|
0
|
0
|
0
|
|
|
0
|
$tokens{$tok} = $1+0 if (!exists $tokens{$tok} || $1+0 > $tokens{$tok}); |
|
1298
|
|
|
|
|
|
|
} elsif (/^c (-?\d+) (-?\d+) (\d+) (.+)$/) { # Add/full token update |
|
1299
|
554
|
|
|
|
|
2189
|
my $tok = pack("H*",$4); |
|
1300
|
554
|
|
|
|
|
3367
|
$self->tok_sync_counters ($1+0, $2+0, $3+0, $tok); |
|
1301
|
554
|
|
|
|
|
4438
|
$count++; |
|
1302
|
|
|
|
|
|
|
} elsif (/^n (-?\d+) (-?\d+)$/) { # update ham/spam count |
|
1303
|
2
|
|
|
|
|
25
|
$self->tok_sync_nspam_nham ($1+0, $2+0); |
|
1304
|
2
|
|
|
|
|
23
|
$count++; |
|
1305
|
|
|
|
|
|
|
} elsif (/^m ([hsf]) (.+)$/) { # update msgid seen database |
|
1306
|
2
|
50
|
|
|
|
26
|
if ($1 eq "f") { |
|
1307
|
0
|
|
|
|
|
0
|
$self->_seen_delete_direct($2); |
|
1308
|
|
|
|
|
|
|
} |
|
1309
|
|
|
|
|
|
|
else { |
|
1310
|
2
|
|
|
|
|
25
|
$self->_seen_put_direct($2,$1); |
|
1311
|
|
|
|
|
|
|
} |
|
1312
|
2
|
|
|
|
|
35
|
$count++; |
|
1313
|
|
|
|
|
|
|
} else { |
|
1314
|
0
|
|
|
|
|
0
|
warn "bayes: gibberish entry found in journal: $_"; |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
} |
|
1317
|
2
|
0
|
33
|
|
|
49
|
defined $_ || $!==0 or |
|
|
|
50
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
$!==EBADF ? dbg("bayes: error reading journal file: $!") |
|
1319
|
|
|
|
|
|
|
: die "error reading journal file: $!"; |
|
1320
|
2
|
50
|
|
|
|
45
|
close(JOURNAL) or die "Can't close journal file: $!"; |
|
1321
|
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
# Now that we've determined what tokens we need to update and their |
|
1323
|
|
|
|
|
|
|
# final values, update the DB. Should be much smaller than the full |
|
1324
|
|
|
|
|
|
|
# journal entries. |
|
1325
|
2
|
|
|
|
|
21
|
while (my ($k,$v) = each %tokens) { |
|
1326
|
0
|
|
|
|
|
0
|
$self->tok_touch_token ($v, $k); |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
0
|
0
|
|
|
|
0
|
if ((++$count % 1000) == 0) { |
|
1329
|
0
|
0
|
|
|
|
0
|
if ($showdots) { print STDERR "."; } |
|
|
0
|
|
|
|
|
0
|
|
|
1330
|
0
|
|
|
|
|
0
|
$self->set_running_expire_tok(); |
|
1331
|
|
|
|
|
|
|
} |
|
1332
|
|
|
|
|
|
|
} |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
2
|
50
|
|
|
|
16
|
if ($showdots) { print STDERR "\n"; } |
|
|
0
|
|
|
|
|
0
|
|
|
1335
|
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
# we're all done, so unlink the old journal file |
|
1337
|
2
|
50
|
|
|
|
181
|
unlink ($retirepath) || warn "bayes: can't unlink $retirepath: $!\n"; |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
2
|
|
|
|
|
56
|
$self->{db_toks}->{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $started; |
|
1340
|
|
|
|
|
|
|
|
|
1341
|
2
|
|
|
|
|
13
|
my $done = time(); |
|
1342
|
2
|
|
|
|
|
23
|
my $msg = ("bayes: synced databases from journal in " . |
|
1343
|
|
|
|
|
|
|
($done - $started) . |
|
1344
|
|
|
|
|
|
|
" seconds: $count unique entries ($total_count total entries)"); |
|
1345
|
|
|
|
|
|
|
|
|
1346
|
2
|
50
|
|
|
|
27
|
if ($opts->{verbose}) { |
|
1347
|
0
|
|
|
|
|
0
|
print $msg,"\n"; |
|
1348
|
|
|
|
|
|
|
} else { |
|
1349
|
2
|
|
|
|
|
22
|
dbg($msg); |
|
1350
|
|
|
|
|
|
|
} |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
# else, that's the lot, we're synced. return |
|
1354
|
2
|
|
|
|
|
25
|
return 1; |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
sub tok_touch_token { |
|
1358
|
0
|
|
|
0
|
0
|
0
|
my ($self, $atime, $tok) = @_; |
|
1359
|
0
|
|
|
|
|
0
|
my ($ts, $th, $oldatime) = $self->tok_get ($tok); |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
# If the new atime is < the old atime, ignore the update |
|
1362
|
|
|
|
|
|
|
# We figure that we'll never want to lower a token atime, so abort if |
|
1363
|
|
|
|
|
|
|
# we try. (journal out of sync, etc.) |
|
1364
|
0
|
0
|
|
|
|
0
|
return if ($oldatime >= $atime); |
|
1365
|
|
|
|
|
|
|
|
|
1366
|
0
|
|
|
|
|
0
|
$self->tok_put ($tok, $ts, $th, $atime); |
|
1367
|
|
|
|
|
|
|
} |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
sub tok_sync_counters { |
|
1370
|
2770
|
|
|
2770
|
0
|
7841
|
my ($self, $ds, $dh, $atime, $tok) = @_; |
|
1371
|
2770
|
|
|
|
|
6759
|
my ($ts, $th, $oldatime) = $self->tok_get ($tok); |
|
1372
|
2770
|
50
|
|
|
|
6869
|
$ts += $ds; if ($ts < 0) { $ts = 0; } |
|
|
2770
|
|
|
|
|
5785
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1373
|
2770
|
50
|
|
|
|
4067
|
$th += $dh; if ($th < 0) { $th = 0; } |
|
|
2770
|
|
|
|
|
5151
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
# Don't roll the atime of tokens backwards ... |
|
1376
|
2770
|
100
|
|
|
|
6669
|
$atime = $oldatime if ($oldatime > $atime); |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
2770
|
|
|
|
|
6621
|
$self->tok_put ($tok, $ts, $th, $atime); |
|
1379
|
|
|
|
|
|
|
} |
|
1380
|
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
sub tok_put { |
|
1382
|
2770
|
|
|
2770
|
0
|
7390
|
my ($self, $tok, $ts, $th, $atime) = @_; |
|
1383
|
2770
|
|
100
|
|
|
8951
|
$ts ||= 0; |
|
1384
|
2770
|
|
100
|
|
|
8761
|
$th ||= 0; |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
# Ignore magic tokens, the don't go in this way ... |
|
1387
|
2770
|
50
|
|
|
|
7141
|
return if ($tok =~ MAGIC_RE); |
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
# use defined() rather than exists(); the latter is not supported |
|
1390
|
|
|
|
|
|
|
# by NDBM_File, believe it or not. Using defined() did not |
|
1391
|
|
|
|
|
|
|
# indicate any noticeable speed hit in my testing. (Mar 31 2003 jm) |
|
1392
|
2770
|
|
|
|
|
12717
|
my $exists_already = defined $self->{db_toks}->{$tok}; |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
2770
|
100
|
100
|
|
|
10274
|
if ($ts == 0 && $th == 0) { |
|
1395
|
1108
|
50
|
|
|
|
2255
|
return if (!$exists_already); # If the token doesn't exist, just return |
|
1396
|
1108
|
|
|
|
|
23898
|
$self->{db_toks}->{$NTOKENS_MAGIC_TOKEN}--; |
|
1397
|
1108
|
|
|
|
|
23679
|
delete $self->{db_toks}->{$tok}; |
|
1398
|
|
|
|
|
|
|
} else { |
|
1399
|
1662
|
50
|
|
|
|
3342
|
if (!$exists_already) { # If the token doesn't exist, raise the token count |
|
1400
|
1662
|
|
|
|
|
34609
|
$self->{db_toks}->{$NTOKENS_MAGIC_TOKEN}++; |
|
1401
|
|
|
|
|
|
|
} |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
1662
|
|
|
|
|
7131
|
$self->{db_toks}->{$tok} = $self->tok_pack ($ts, $th, $atime); |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
1662
|
|
|
|
|
17874
|
my $newmagic = $self->{db_toks}->{$NEWEST_TOKEN_AGE_MAGIC_TOKEN}; |
|
1406
|
1662
|
100
|
66
|
|
|
10496
|
if (!defined ($newmagic) || $atime > $newmagic) { |
|
1407
|
4
|
|
|
|
|
64
|
$self->{db_toks}->{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $atime; |
|
1408
|
|
|
|
|
|
|
} |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
# Make sure to check for either !defined or "" ... Apparently |
|
1411
|
|
|
|
|
|
|
# sometimes the DB module doesn't return the value correctly. :( |
|
1412
|
1662
|
|
|
|
|
16982
|
my $oldmagic = $self->{db_toks}->{$OLDEST_TOKEN_AGE_MAGIC_TOKEN}; |
|
1413
|
1662
|
100
|
66
|
|
|
17362
|
if (!defined ($oldmagic) || $oldmagic eq "" || $atime < $oldmagic) { |
|
|
|
|
66
|
|
|
|
|
|
1414
|
4
|
|
|
|
|
82
|
$self->{db_toks}->{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $atime; |
|
1415
|
|
|
|
|
|
|
} |
|
1416
|
|
|
|
|
|
|
} |
|
1417
|
|
|
|
|
|
|
} |
|
1418
|
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
sub tok_sync_nspam_nham { |
|
1420
|
10
|
|
|
10
|
0
|
36
|
my ($self, $ds, $dh) = @_; |
|
1421
|
10
|
|
|
|
|
41
|
my ($ns, $nh) = ($self->get_storage_variables())[1,2]; |
|
1422
|
10
|
100
|
|
|
|
49
|
if ($ds) { $ns += $ds; } if ($ns < 0) { $ns = 0; } |
|
|
6
|
50
|
|
|
|
17
|
|
|
|
10
|
|
|
|
|
34
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1423
|
10
|
100
|
|
|
|
37
|
if ($dh) { $nh += $dh; } if ($nh < 0) { $nh = 0; } |
|
|
4
|
50
|
|
|
|
11
|
|
|
|
10
|
|
|
|
|
40
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1424
|
10
|
|
|
|
|
307
|
$self->{db_toks}->{$NSPAM_MAGIC_TOKEN} = $ns; |
|
1425
|
10
|
|
|
|
|
233
|
$self->{db_toks}->{$NHAM_MAGIC_TOKEN} = $nh; |
|
1426
|
|
|
|
|
|
|
} |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
########################################################################### |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
sub _get_journal_filename { |
|
1431
|
18
|
|
|
18
|
|
56
|
my ($self) = @_; |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
18
|
|
|
|
|
52
|
my $main = $self->{bayes}->{main}; |
|
1434
|
18
|
|
|
|
|
134
|
return $main->sed_path($main->{conf}->{bayes_path}."_journal"); |
|
1435
|
|
|
|
|
|
|
} |
|
1436
|
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
########################################################################### |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
# this is called directly from sa-learn(1). |
|
1440
|
|
|
|
|
|
|
sub perform_upgrade { |
|
1441
|
0
|
|
|
0
|
1
|
0
|
my ($self, $opts) = @_; |
|
1442
|
0
|
|
|
|
|
0
|
my $ret = 0; |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
0
|
|
|
|
|
0
|
my $eval_stat; |
|
1445
|
|
|
|
|
|
|
eval { |
|
1446
|
0
|
|
|
|
|
0
|
local $SIG{'__DIE__'}; # do not run user die() traps in here |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
20
|
|
|
20
|
|
245
|
use File::Basename; |
|
|
20
|
|
|
|
|
583
|
|
|
|
20
|
|
|
|
|
51084
|
|
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
# bayes directory |
|
1451
|
0
|
|
|
|
|
0
|
my $main = $self->{bayes}->{main}; |
|
1452
|
0
|
|
|
|
|
0
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
# prevent dirname() from tainting the result, it assumes $1 is not tainted |
|
1455
|
0
|
|
|
|
|
0
|
local($1,$2,$3); # Bug 6310; perl #67962 (fixed in perl 5.12/5.13) |
|
1456
|
0
|
|
|
|
|
0
|
my $dir = dirname($path); |
|
1457
|
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
# make temporary copy since old dbm and new dbm may have same name |
|
1459
|
0
|
0
|
|
|
|
0
|
opendir(DIR, $dir) or die "bayes: can't opendir $dir: $!"; |
|
1460
|
0
|
|
|
|
|
0
|
my @files = grep { /^bayes_(?:seen|toks)(?:\.\w+)?$/ } readdir(DIR); |
|
|
0
|
|
|
|
|
0
|
|
|
1461
|
0
|
0
|
|
|
|
0
|
closedir(DIR) or die "bayes: can't close directory $dir: $!"; |
|
1462
|
0
|
0
|
0
|
|
|
0
|
if (@files < 2 || !grep(/bayes_seen/,@files) || !grep(/bayes_toks/,@files)) |
|
|
|
|
0
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
{ |
|
1464
|
0
|
|
|
|
|
0
|
die "bayes: unable to find bayes_toks and bayes_seen, stopping\n"; |
|
1465
|
|
|
|
|
|
|
} |
|
1466
|
|
|
|
|
|
|
# untaint @files (already safe after grep) |
|
1467
|
0
|
|
|
|
|
0
|
untaint_var(\@files); |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
0
|
|
|
|
|
0
|
for (@files) { |
|
1470
|
0
|
|
|
|
|
0
|
my $src = "$dir/$_"; |
|
1471
|
0
|
|
|
|
|
0
|
my $dst = "$dir/old_$_"; |
|
1472
|
0
|
0
|
|
|
|
0
|
eval q{ |
|
1473
|
|
|
|
|
|
|
use File::Copy; |
|
1474
|
|
|
|
|
|
|
copy($src, $dst); |
|
1475
|
|
|
|
|
|
|
} || die "bayes: can't copy $src to $dst: $!\n"; |
|
1476
|
|
|
|
|
|
|
} |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
# delete previous to make way for import |
|
1479
|
0
|
|
|
|
|
0
|
for (@files) { unlink("$dir/$_"); } |
|
|
0
|
|
|
|
|
0
|
|
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
# import |
|
1482
|
0
|
0
|
|
|
|
0
|
if ($self->tie_db_writable()) { |
|
1483
|
|
|
|
|
|
|
$ret += $self->upgrade_old_dbm_files_trapped("$dir/old_bayes_seen", |
|
1484
|
0
|
|
|
|
|
0
|
$self->{db_seen}); |
|
1485
|
|
|
|
|
|
|
$ret += $self->upgrade_old_dbm_files_trapped("$dir/old_bayes_toks", |
|
1486
|
0
|
|
|
|
|
0
|
$self->{db_toks}); |
|
1487
|
|
|
|
|
|
|
} |
|
1488
|
|
|
|
|
|
|
|
|
1489
|
0
|
0
|
|
|
|
0
|
if ($ret == 2) { |
|
1490
|
0
|
|
|
|
|
0
|
print "import successful, original files saved with \"old\" prefix\n"; |
|
1491
|
|
|
|
|
|
|
} |
|
1492
|
|
|
|
|
|
|
else { |
|
1493
|
0
|
|
|
|
|
0
|
print "import failed, original files saved with \"old\" prefix\n"; |
|
1494
|
|
|
|
|
|
|
} |
|
1495
|
0
|
|
|
|
|
0
|
1; |
|
1496
|
0
|
0
|
|
|
|
0
|
} or do { |
|
1497
|
0
|
0
|
|
|
|
0
|
$eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; |
|
|
0
|
|
|
|
|
0
|
|
|
1498
|
|
|
|
|
|
|
}; |
|
1499
|
|
|
|
|
|
|
|
|
1500
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1501
|
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
# if we died, untie the dbm files |
|
1503
|
0
|
0
|
|
|
|
0
|
if (defined $eval_stat) { |
|
1504
|
0
|
|
|
|
|
0
|
warn "bayes: perform_upgrade: $eval_stat\n"; |
|
1505
|
0
|
|
|
|
|
0
|
return 0; |
|
1506
|
|
|
|
|
|
|
} |
|
1507
|
0
|
|
|
|
|
0
|
$ret; |
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
sub upgrade_old_dbm_files_trapped { |
|
1511
|
0
|
|
|
0
|
0
|
0
|
my ($self, $filename, $output) = @_; |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
0
|
|
|
|
|
0
|
my $count; |
|
1514
|
|
|
|
|
|
|
my %in; |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
0
|
|
|
|
|
0
|
print "upgrading to DB_File, please be patient: $filename\n"; |
|
1517
|
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
# try each type of file until we find one with > 0 entries |
|
1519
|
0
|
|
|
|
|
0
|
for my $dbm ('DB_File', 'GDBM_File', 'NDBM_File', 'SDBM_File') { |
|
1520
|
0
|
|
|
|
|
0
|
$count = 0; |
|
1521
|
|
|
|
|
|
|
# wrap in eval so it doesn't run in general use. This accesses db |
|
1522
|
|
|
|
|
|
|
# modules directly. |
|
1523
|
|
|
|
|
|
|
# Note: (bug 2390), the 'use' needs to be on the same line as the eval |
|
1524
|
|
|
|
|
|
|
# for RPM dependency checks to work properly. It's lame, but... |
|
1525
|
0
|
|
|
|
|
0
|
my $eval_stat; |
|
1526
|
|
|
|
|
|
|
eval 'use ' . $dbm . '; |
|
1527
|
|
|
|
|
|
|
tie %in, "' . $dbm . '", $filename, O_RDONLY, 0600; |
|
1528
|
|
|
|
|
|
|
%{ $output } = %in; |
|
1529
|
|
|
|
|
|
|
$count = scalar keys %{ $output }; |
|
1530
|
|
|
|
|
|
|
untie %in; |
|
1531
|
|
|
|
|
|
|
1; |
|
1532
|
0
|
0
|
|
|
|
0
|
' or do { |
|
1533
|
0
|
0
|
|
|
|
0
|
$eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat; |
|
|
0
|
|
|
|
|
0
|
|
|
1534
|
|
|
|
|
|
|
}; |
|
1535
|
0
|
0
|
|
|
|
0
|
if (defined $eval_stat) { |
|
|
|
0
|
|
|
|
|
|
|
1536
|
0
|
|
|
|
|
0
|
print "$dbm: $dbm module not installed(?), nothing copied: $eval_stat\n"; |
|
1537
|
0
|
|
|
|
|
0
|
dbg("bayes: error was: $eval_stat"); |
|
1538
|
|
|
|
|
|
|
} |
|
1539
|
|
|
|
|
|
|
elsif ($count == 0) { |
|
1540
|
0
|
|
|
|
|
0
|
print "$dbm: no database of that kind found, nothing copied\n"; |
|
1541
|
|
|
|
|
|
|
} |
|
1542
|
|
|
|
|
|
|
else { |
|
1543
|
0
|
|
|
|
|
0
|
print "$dbm: copied $count entries\n"; |
|
1544
|
0
|
|
|
|
|
0
|
return 1; |
|
1545
|
|
|
|
|
|
|
} |
|
1546
|
|
|
|
|
|
|
} |
|
1547
|
|
|
|
|
|
|
|
|
1548
|
0
|
|
|
|
|
0
|
return 0; |
|
1549
|
|
|
|
|
|
|
} |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
sub clear_database { |
|
1552
|
2
|
|
|
2
|
1
|
12
|
my ($self) = @_; |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
2
|
50
|
|
|
|
21
|
return 0 unless ($self->tie_db_writable()); |
|
1555
|
|
|
|
|
|
|
|
|
1556
|
2
|
|
|
|
|
22
|
dbg("bayes: untie-ing in preparation for removal."); |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
2
|
|
|
|
|
17
|
foreach my $dbname (@DBNAMES) { |
|
1559
|
4
|
|
|
|
|
15
|
my $db_var = 'db_'.$dbname; |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
4
|
50
|
|
|
|
15
|
if (exists $self->{$db_var}) { |
|
1562
|
|
|
|
|
|
|
# dbg("bayes: untie-ing $db_var"); |
|
1563
|
4
|
|
|
|
|
9
|
untie %{$self->{$db_var}}; |
|
|
4
|
|
|
|
|
60
|
|
|
1564
|
4
|
|
|
|
|
19
|
delete $self->{$db_var}; |
|
1565
|
|
|
|
|
|
|
} |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
2
|
|
|
|
|
17
|
my $path = $self->{bayes}->{main}->sed_path($self->{bayes}->{main}->{conf}->{bayes_path}); |
|
1569
|
|
|
|
|
|
|
|
|
1570
|
2
|
|
|
|
|
7
|
foreach my $dbname (@DBNAMES, 'journal') { |
|
1571
|
6
|
|
|
|
|
22
|
foreach my $ext ($self->DB_EXTENSIONS) { |
|
1572
|
12
|
|
|
|
|
32
|
my $name = $path.'_'.$dbname.$ext; |
|
1573
|
12
|
|
|
|
|
616
|
my $ret = unlink $name; |
|
1574
|
12
|
100
|
|
|
|
71
|
dbg("bayes: clear_database: %s %s", |
|
1575
|
|
|
|
|
|
|
$ret ? 'removed' : 'tried to remove', $name); |
|
1576
|
|
|
|
|
|
|
} |
|
1577
|
|
|
|
|
|
|
} |
|
1578
|
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
# the journal file needs to be done separately since it has no extension |
|
1580
|
2
|
|
|
|
|
8
|
foreach my $dbname ('journal') { |
|
1581
|
2
|
|
|
|
|
8
|
my $name = $path.'_'.$dbname; |
|
1582
|
2
|
|
|
|
|
78
|
my $ret = unlink $name; |
|
1583
|
2
|
50
|
|
|
|
26
|
dbg("bayes: clear_database: %s %s", |
|
1584
|
|
|
|
|
|
|
$ret ? 'removed' : 'tried to remove', $name); |
|
1585
|
|
|
|
|
|
|
} |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
2
|
|
|
|
|
17
|
$self->untie_db(); |
|
1588
|
|
|
|
|
|
|
|
|
1589
|
2
|
|
|
|
|
15
|
return 1; |
|
1590
|
|
|
|
|
|
|
} |
|
1591
|
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
sub backup_database { |
|
1593
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
1594
|
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# we tie writable because we want the upgrade code to kick in if needed |
|
1596
|
0
|
0
|
|
|
|
0
|
return 0 unless ($self->tie_db_writable()); |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
0
|
|
|
|
|
0
|
my @vars = $self->get_storage_variables(); |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
0
|
|
|
|
|
0
|
print "v\t$vars[6]\tdb_version # this must be the first line!!!\n"; |
|
1601
|
0
|
|
|
|
|
0
|
print "v\t$vars[1]\tnum_spam\n"; |
|
1602
|
0
|
|
|
|
|
0
|
print "v\t$vars[2]\tnum_nonspam\n"; |
|
1603
|
|
|
|
|
|
|
|
|
1604
|
0
|
|
|
|
|
0
|
while (my ($tok, $packed) = each %{$self->{db_toks}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
1605
|
0
|
0
|
|
|
|
0
|
next if ($tok =~ MAGIC_RE); # skip magic tokens |
|
1606
|
|
|
|
|
|
|
|
|
1607
|
0
|
|
|
|
|
0
|
my ($ts, $th, $atime) = $self->tok_unpack($packed); |
|
1608
|
0
|
|
|
|
|
0
|
my $encoded_token = unpack("H*",$tok); |
|
1609
|
0
|
|
|
|
|
0
|
print "t\t$ts\t$th\t$atime\t$encoded_token\n"; |
|
1610
|
|
|
|
|
|
|
} |
|
1611
|
|
|
|
|
|
|
|
|
1612
|
0
|
|
|
|
|
0
|
while (my ($msgid, $flag) = each %{$self->{db_seen}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
1613
|
0
|
|
|
|
|
0
|
print "s\t$flag\t$msgid\n"; |
|
1614
|
|
|
|
|
|
|
} |
|
1615
|
|
|
|
|
|
|
|
|
1616
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1617
|
|
|
|
|
|
|
|
|
1618
|
0
|
|
|
|
|
0
|
return 1; |
|
1619
|
|
|
|
|
|
|
} |
|
1620
|
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
sub restore_database { |
|
1622
|
0
|
|
|
0
|
1
|
0
|
my ($self, $filename, $showdots) = @_; |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
0
|
|
|
|
|
0
|
local *DUMPFILE; |
|
1625
|
0
|
0
|
|
|
|
0
|
if (!open(DUMPFILE, '<', $filename)) { |
|
1626
|
0
|
|
|
|
|
0
|
dbg("bayes: unable to open backup file $filename: $!"); |
|
1627
|
0
|
|
|
|
|
0
|
return 0; |
|
1628
|
|
|
|
|
|
|
} |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
0
|
0
|
|
|
|
0
|
if (!$self->tie_db_writable()) { |
|
1631
|
0
|
|
|
|
|
0
|
dbg("bayes: failed to tie db writable"); |
|
1632
|
0
|
|
|
|
|
0
|
return 0; |
|
1633
|
|
|
|
|
|
|
} |
|
1634
|
|
|
|
|
|
|
|
|
1635
|
0
|
|
|
|
|
0
|
my $main = $self->{bayes}->{main}; |
|
1636
|
0
|
|
|
|
|
0
|
my $path = $main->sed_path($main->{conf}->{bayes_path}); |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
# use a temporary PID-based suffix just in case another one was |
|
1639
|
|
|
|
|
|
|
# created previously by an interrupted expire |
|
1640
|
0
|
|
|
|
|
0
|
my $tmpsuffix = "convert$$"; |
|
1641
|
0
|
|
|
|
|
0
|
my $tmptoksdbname = $path.'_toks.'.$tmpsuffix; |
|
1642
|
0
|
|
|
|
|
0
|
my $tmpseendbname = $path.'_seen.'.$tmpsuffix; |
|
1643
|
0
|
|
|
|
|
0
|
my $toksdbname = $path.'_toks'; |
|
1644
|
0
|
|
|
|
|
0
|
my $seendbname = $path.'_seen'; |
|
1645
|
|
|
|
|
|
|
|
|
1646
|
0
|
|
|
|
|
0
|
my %new_toks; |
|
1647
|
|
|
|
|
|
|
my %new_seen; |
|
1648
|
0
|
|
|
|
|
0
|
my $umask = umask 0; |
|
1649
|
0
|
0
|
|
|
|
0
|
unless (tie %new_toks, $self->DBM_MODULE, $tmptoksdbname, O_RDWR|O_CREAT|O_EXCL, |
|
1650
|
|
|
|
|
|
|
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) { |
|
1651
|
0
|
|
|
|
|
0
|
dbg("bayes: failed to tie temp toks db: $!"); |
|
1652
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1653
|
0
|
|
|
|
|
0
|
umask $umask; |
|
1654
|
0
|
|
|
|
|
0
|
return 0; |
|
1655
|
|
|
|
|
|
|
} |
|
1656
|
0
|
0
|
|
|
|
0
|
unless (tie %new_seen, $self->DBM_MODULE, $tmpseendbname, O_RDWR|O_CREAT|O_EXCL, |
|
1657
|
|
|
|
|
|
|
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) { |
|
1658
|
0
|
|
|
|
|
0
|
dbg("bayes: failed to tie temp seen db: $!"); |
|
1659
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
1660
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmptoksdbname); |
|
1661
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1662
|
0
|
|
|
|
|
0
|
umask $umask; |
|
1663
|
0
|
|
|
|
|
0
|
return 0; |
|
1664
|
|
|
|
|
|
|
} |
|
1665
|
0
|
|
|
|
|
0
|
umask $umask; |
|
1666
|
|
|
|
|
|
|
|
|
1667
|
0
|
|
|
|
|
0
|
my $line_count = 0; |
|
1668
|
0
|
|
|
|
|
0
|
my $db_version; |
|
1669
|
0
|
|
|
|
|
0
|
my $token_count = 0; |
|
1670
|
0
|
|
|
|
|
0
|
my $num_spam; |
|
1671
|
|
|
|
|
|
|
my $num_ham; |
|
1672
|
0
|
|
|
|
|
0
|
my $error_p = 0; |
|
1673
|
0
|
|
|
|
|
0
|
my $newest_token_age = 0; |
|
1674
|
|
|
|
|
|
|
# Kinda weird I know, but we need a nice big value and we know there will be |
|
1675
|
|
|
|
|
|
|
# no tokens > time() since we reset atime if > time(), so use that with a |
|
1676
|
|
|
|
|
|
|
# little buffer just in case. |
|
1677
|
0
|
|
|
|
|
0
|
my $oldest_token_age = time() + 100000; |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
0
|
|
|
|
|
0
|
my $line = <DUMPFILE>; |
|
1680
|
0
|
0
|
|
|
|
0
|
defined $line or die "Error reading dump file: $!"; |
|
1681
|
0
|
|
|
|
|
0
|
$line_count++; |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
# We require the database version line to be the first in the file so we can |
|
1684
|
|
|
|
|
|
|
# figure out how to properly deal with the file. If it is not the first |
|
1685
|
|
|
|
|
|
|
# line then fail |
|
1686
|
0
|
0
|
|
|
|
0
|
if ($line =~ m/^v\s+(\d+)\s+db_version/) { |
|
1687
|
0
|
|
|
|
|
0
|
$db_version = $1; |
|
1688
|
|
|
|
|
|
|
} |
|
1689
|
|
|
|
|
|
|
else { |
|
1690
|
0
|
|
|
|
|
0
|
dbg("bayes: database version must be the first line in the backup file, correct and re-run"); |
|
1691
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
1692
|
0
|
|
|
|
|
0
|
untie %new_seen; |
|
1693
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmptoksdbname); |
|
1694
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmpseendbname); |
|
1695
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1696
|
0
|
|
|
|
|
0
|
return 0; |
|
1697
|
|
|
|
|
|
|
} |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
0
|
0
|
0
|
|
|
0
|
unless ($db_version == 2 || $db_version == 3) { |
|
1700
|
0
|
|
|
|
|
0
|
warn("bayes: database version $db_version is unsupported, must be version 2 or 3"); |
|
1701
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
1702
|
0
|
|
|
|
|
0
|
untie %new_seen; |
|
1703
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmptoksdbname); |
|
1704
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmpseendbname); |
|
1705
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1706
|
0
|
|
|
|
|
0
|
return 0; |
|
1707
|
|
|
|
|
|
|
} |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
0
|
|
|
|
|
0
|
for ($!=0; defined($line=<DUMPFILE>); $!=0) { |
|
1710
|
0
|
|
|
|
|
0
|
chomp($line); |
|
1711
|
0
|
|
|
|
|
0
|
$line_count++; |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
0
|
0
|
|
|
|
0
|
if ($line_count % 1000 == 0) { |
|
1714
|
0
|
0
|
|
|
|
0
|
print STDERR "." if ($showdots); |
|
1715
|
|
|
|
|
|
|
} |
|
1716
|
|
|
|
|
|
|
|
|
1717
|
0
|
0
|
|
|
|
0
|
if ($line =~ /^v\s+/) { # variable line |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1718
|
0
|
|
|
|
|
0
|
my @parsed_line = split(/\s+/, $line, 3); |
|
1719
|
0
|
|
|
|
|
0
|
my $value = $parsed_line[1] + 0; |
|
1720
|
0
|
0
|
|
|
|
0
|
if ($parsed_line[2] eq 'num_spam') { |
|
|
|
0
|
|
|
|
|
|
|
1721
|
0
|
|
|
|
|
0
|
$num_spam = $value; |
|
1722
|
|
|
|
|
|
|
} |
|
1723
|
|
|
|
|
|
|
elsif ($parsed_line[2] eq 'num_nonspam') { |
|
1724
|
0
|
|
|
|
|
0
|
$num_ham = $value; |
|
1725
|
|
|
|
|
|
|
} |
|
1726
|
|
|
|
|
|
|
else { |
|
1727
|
0
|
|
|
|
|
0
|
dbg("bayes: restore_database: skipping unknown line: $line"); |
|
1728
|
|
|
|
|
|
|
} |
|
1729
|
|
|
|
|
|
|
} |
|
1730
|
|
|
|
|
|
|
elsif ($line =~ /^t\s+/) { # token line |
|
1731
|
0
|
|
|
|
|
0
|
my @parsed_line = split(/\s+/, $line, 5); |
|
1732
|
0
|
|
|
|
|
0
|
my $spam_count = $parsed_line[1] + 0; |
|
1733
|
0
|
|
|
|
|
0
|
my $ham_count = $parsed_line[2] + 0; |
|
1734
|
0
|
|
|
|
|
0
|
my $atime = $parsed_line[3] + 0; |
|
1735
|
0
|
|
|
|
|
0
|
my $token = $parsed_line[4]; |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
0
|
|
|
|
|
0
|
my $token_warn_p = 0; |
|
1738
|
0
|
|
|
|
|
0
|
my @warnings; |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
0
|
0
|
|
|
|
0
|
if ($spam_count < 0) { |
|
1741
|
0
|
|
|
|
|
0
|
$spam_count = 0; |
|
1742
|
0
|
|
|
|
|
0
|
push(@warnings, 'spam count < 0, resetting'); |
|
1743
|
0
|
|
|
|
|
0
|
$token_warn_p = 1; |
|
1744
|
|
|
|
|
|
|
} |
|
1745
|
0
|
0
|
|
|
|
0
|
if ($ham_count < 0) { |
|
1746
|
0
|
|
|
|
|
0
|
$ham_count = 0; |
|
1747
|
0
|
|
|
|
|
0
|
push(@warnings, 'ham count < 0, resetting'); |
|
1748
|
0
|
|
|
|
|
0
|
$token_warn_p = 1; |
|
1749
|
|
|
|
|
|
|
} |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
0
|
0
|
0
|
|
|
0
|
if ($spam_count == 0 && $ham_count == 0) { |
|
1752
|
0
|
|
|
|
|
0
|
dbg("bayes: token has zero spam and ham count, skipping"); |
|
1753
|
0
|
|
|
|
|
0
|
next; |
|
1754
|
|
|
|
|
|
|
} |
|
1755
|
|
|
|
|
|
|
|
|
1756
|
0
|
0
|
|
|
|
0
|
if ($atime > time()) { |
|
1757
|
0
|
|
|
|
|
0
|
$atime = time(); |
|
1758
|
0
|
|
|
|
|
0
|
push(@warnings, 'atime > current time, resetting'); |
|
1759
|
0
|
|
|
|
|
0
|
$token_warn_p = 1; |
|
1760
|
|
|
|
|
|
|
} |
|
1761
|
|
|
|
|
|
|
|
|
1762
|
0
|
0
|
|
|
|
0
|
if ($token_warn_p) { |
|
1763
|
0
|
|
|
|
|
0
|
dbg("bayes: token (%s) has the following warnings:\n%s", |
|
1764
|
|
|
|
|
|
|
$token, join("\n",@warnings)); |
|
1765
|
|
|
|
|
|
|
} |
|
1766
|
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
# database versions < 3 did not encode their token values |
|
1768
|
0
|
0
|
|
|
|
0
|
if ($db_version < 3) { |
|
1769
|
0
|
|
|
|
|
0
|
$token = substr(sha1($token), -5); |
|
1770
|
|
|
|
|
|
|
} |
|
1771
|
|
|
|
|
|
|
else { |
|
1772
|
|
|
|
|
|
|
# turn unpacked binary token back into binary value |
|
1773
|
0
|
|
|
|
|
0
|
$token = pack("H*",$token); |
|
1774
|
|
|
|
|
|
|
} |
|
1775
|
|
|
|
|
|
|
|
|
1776
|
0
|
|
|
|
|
0
|
$new_toks{$token} = $self->tok_pack($spam_count, $ham_count, $atime); |
|
1777
|
0
|
0
|
|
|
|
0
|
if ($atime < $oldest_token_age) { |
|
1778
|
0
|
|
|
|
|
0
|
$oldest_token_age = $atime; |
|
1779
|
|
|
|
|
|
|
} |
|
1780
|
0
|
0
|
|
|
|
0
|
if ($atime > $newest_token_age) { |
|
1781
|
0
|
|
|
|
|
0
|
$newest_token_age = $atime; |
|
1782
|
|
|
|
|
|
|
} |
|
1783
|
0
|
|
|
|
|
0
|
$token_count++; |
|
1784
|
|
|
|
|
|
|
} |
|
1785
|
|
|
|
|
|
|
elsif ($line =~ /^s\s+/) { # seen line |
|
1786
|
0
|
|
|
|
|
0
|
my @parsed_line = split(/\s+/, $line, 3); |
|
1787
|
0
|
|
|
|
|
0
|
my $flag = $parsed_line[1]; |
|
1788
|
0
|
|
|
|
|
0
|
my $msgid = $parsed_line[2]; |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
0
|
0
|
0
|
|
|
0
|
unless ($flag eq 'h' || $flag eq 's') { |
|
1791
|
0
|
|
|
|
|
0
|
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping"); |
|
1792
|
0
|
|
|
|
|
0
|
next; |
|
1793
|
|
|
|
|
|
|
} |
|
1794
|
|
|
|
|
|
|
|
|
1795
|
0
|
0
|
|
|
|
0
|
unless ($msgid) { |
|
1796
|
0
|
|
|
|
|
0
|
dbg("bayes: blank msgid for line: $line, skipping"); |
|
1797
|
0
|
|
|
|
|
0
|
next; |
|
1798
|
|
|
|
|
|
|
} |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
0
|
|
|
|
|
0
|
$new_seen{$msgid} = $flag; |
|
1801
|
|
|
|
|
|
|
} |
|
1802
|
|
|
|
|
|
|
else { |
|
1803
|
0
|
|
|
|
|
0
|
dbg("bayes: skipping unknown line: $line"); |
|
1804
|
0
|
|
|
|
|
0
|
next; |
|
1805
|
|
|
|
|
|
|
} |
|
1806
|
|
|
|
|
|
|
} |
|
1807
|
0
|
0
|
0
|
|
|
0
|
defined $line || $!==0 or die "Error reading dump file: $!"; |
|
1808
|
0
|
0
|
|
|
|
0
|
close(DUMPFILE) or die "Can't close dump file: $!"; |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
0
|
0
|
|
|
|
0
|
print STDERR "\n" if ($showdots); |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
0
|
0
|
|
|
|
0
|
unless (defined($num_spam)) { |
|
1813
|
0
|
|
|
|
|
0
|
dbg("bayes: unable to find num spam, please check file"); |
|
1814
|
0
|
|
|
|
|
0
|
$error_p = 1; |
|
1815
|
|
|
|
|
|
|
} |
|
1816
|
|
|
|
|
|
|
|
|
1817
|
0
|
0
|
|
|
|
0
|
unless (defined($num_ham)) { |
|
1818
|
0
|
|
|
|
|
0
|
dbg("bayes: unable to find num ham, please check file"); |
|
1819
|
0
|
|
|
|
|
0
|
$error_p = 1; |
|
1820
|
|
|
|
|
|
|
} |
|
1821
|
|
|
|
|
|
|
|
|
1822
|
0
|
0
|
|
|
|
0
|
if ($error_p) { |
|
1823
|
0
|
|
|
|
|
0
|
dbg("bayes: error(s) while attempting to load $filename, correct and re-run"); |
|
1824
|
|
|
|
|
|
|
|
|
1825
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
1826
|
0
|
|
|
|
|
0
|
untie %new_seen; |
|
1827
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmptoksdbname); |
|
1828
|
0
|
|
|
|
|
0
|
$self->_unlink_file($tmpseendbname); |
|
1829
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1830
|
0
|
|
|
|
|
0
|
return 0; |
|
1831
|
|
|
|
|
|
|
} |
|
1832
|
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
# set the calculated magic tokens |
|
1834
|
0
|
|
|
|
|
0
|
$new_toks{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION(); |
|
1835
|
0
|
|
|
|
|
0
|
$new_toks{$NTOKENS_MAGIC_TOKEN} = $token_count; |
|
1836
|
0
|
|
|
|
|
0
|
$new_toks{$NSPAM_MAGIC_TOKEN} = $num_spam; |
|
1837
|
0
|
|
|
|
|
0
|
$new_toks{$NHAM_MAGIC_TOKEN} = $num_ham; |
|
1838
|
0
|
|
|
|
|
0
|
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newest_token_age; |
|
1839
|
0
|
|
|
|
|
0
|
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest_token_age; |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
# go ahead and zero out these, chances are good that they are bogus anyway. |
|
1842
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = 0; |
|
1843
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = 0; |
|
1844
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0; |
|
1845
|
0
|
|
|
|
|
0
|
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0; |
|
1846
|
|
|
|
|
|
|
|
|
1847
|
0
|
|
|
|
|
0
|
local $SIG{'INT'} = 'IGNORE'; |
|
1848
|
0
|
|
|
|
|
0
|
local $SIG{'TERM'} = 'IGNORE'; |
|
1849
|
0
|
0
|
|
|
|
0
|
local $SIG{'HUP'} = 'IGNORE' if !am_running_on_windows(); |
|
1850
|
|
|
|
|
|
|
|
|
1851
|
0
|
|
|
|
|
0
|
untie %new_toks; |
|
1852
|
0
|
|
|
|
|
0
|
untie %new_seen; |
|
1853
|
0
|
|
|
|
|
0
|
$self->untie_db(); |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
# Here is where something can go horribly wrong and screw up the bayes |
|
1856
|
|
|
|
|
|
|
# database files. If we are able to copy one and not the other then it |
|
1857
|
|
|
|
|
|
|
# will leave the database in an inconsistent state. Since this is an |
|
1858
|
|
|
|
|
|
|
# edge case, and they're trying to replace the DB anyway we should be ok. |
|
1859
|
0
|
0
|
|
|
|
0
|
unless ($self->_rename_file($tmptoksdbname, $toksdbname)) { |
|
1860
|
0
|
|
|
|
|
0
|
dbg("bayes: error while renaming $tmptoksdbname to $toksdbname: $!"); |
|
1861
|
0
|
|
|
|
|
0
|
return 0; |
|
1862
|
|
|
|
|
|
|
} |
|
1863
|
0
|
0
|
|
|
|
0
|
unless ($self->_rename_file($tmpseendbname, $seendbname)) { |
|
1864
|
0
|
|
|
|
|
0
|
dbg("bayes: error while renaming $tmpseendbname to $seendbname: $!"); |
|
1865
|
0
|
|
|
|
|
0
|
dbg("bayes: database now in inconsistent state"); |
|
1866
|
0
|
|
|
|
|
0
|
return 0; |
|
1867
|
|
|
|
|
|
|
} |
|
1868
|
|
|
|
|
|
|
|
|
1869
|
0
|
|
|
|
|
0
|
dbg("bayes: parsed $line_count lines"); |
|
1870
|
0
|
|
|
|
|
0
|
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages"); |
|
1871
|
|
|
|
|
|
|
|
|
1872
|
0
|
|
|
|
|
0
|
return 1; |
|
1873
|
|
|
|
|
|
|
} |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
########################################################################### |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
# token marshalling format for db_toks. |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
# Since we may have many entries with few hits, especially thousands of hapaxes |
|
1880
|
|
|
|
|
|
|
# (1-occurrence entries), use a flexible entry format, instead of simply "2 |
|
1881
|
|
|
|
|
|
|
# packed ints", to keep the memory and disk space usage down. In my |
|
1882
|
|
|
|
|
|
|
# 18k-message test corpus, only 8.9% have >= 8 hits in either counter, so we |
|
1883
|
|
|
|
|
|
|
# can use a 1-byte representation for the other 91% of low-hitting entries |
|
1884
|
|
|
|
|
|
|
# and save masses of space. |
|
1885
|
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
# This looks like: XXSSSHHH (XX = format bits, SSS = 3 spam-count bits, HHH = 3 |
|
1887
|
|
|
|
|
|
|
# ham-count bits). If XX in the first byte is 11, it's packed as this 1-byte |
|
1888
|
|
|
|
|
|
|
# representation; otherwise, if XX in the first byte is 00, it's packed as |
|
1889
|
|
|
|
|
|
|
# "CLL", ie. 1 byte and 2 32-bit "longs" in perl pack format. |
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
# Savings: roughly halves size of toks db, at the cost of a ~10% slowdown. |
|
1892
|
|
|
|
|
|
|
|
|
1893
|
20
|
|
|
20
|
|
242
|
use constant FORMAT_FLAG => 0xc0; # 11000000 |
|
|
20
|
|
|
|
|
63
|
|
|
|
20
|
|
|
|
|
1533
|
|
|
1894
|
20
|
|
|
20
|
|
153
|
use constant ONE_BYTE_FORMAT => 0xc0; # 11000000 |
|
|
20
|
|
|
|
|
43
|
|
|
|
20
|
|
|
|
|
1733
|
|
|
1895
|
20
|
|
|
20
|
|
148
|
use constant TWO_LONGS_FORMAT => 0x00; # 00000000 |
|
|
20
|
|
|
|
|
572
|
|
|
|
20
|
|
|
|
|
1369
|
|
|
1896
|
|
|
|
|
|
|
|
|
1897
|
20
|
|
|
20
|
|
157
|
use constant ONE_BYTE_SSS_BITS => 0x38; # 00111000 |
|
|
20
|
|
|
|
|
712
|
|
|
|
20
|
|
|
|
|
1365
|
|
|
1898
|
20
|
|
|
20
|
|
683
|
use constant ONE_BYTE_HHH_BITS => 0x07; # 00000111 |
|
|
20
|
|
|
|
|
52
|
|
|
|
20
|
|
|
|
|
9859
|
|
|
1899
|
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
sub tok_unpack { |
|
1901
|
5536
|
|
|
5536
|
0
|
58414
|
my ($self, $value) = @_; |
|
1902
|
5536
|
|
100
|
|
|
20149
|
$value ||= 0; |
|
1903
|
|
|
|
|
|
|
|
|
1904
|
5536
|
|
|
|
|
8356
|
my ($packed, $atime); |
|
1905
|
5536
|
50
|
|
|
|
12809
|
if ($self->{db_version} >= 1) { |
|
|
|
0
|
|
|
|
|
|
|
1906
|
5536
|
|
|
|
|
14567
|
($packed, $atime) = unpack("CV", $value); |
|
1907
|
|
|
|
|
|
|
} |
|
1908
|
|
|
|
|
|
|
elsif ($self->{db_version} == 0) { |
|
1909
|
0
|
|
|
|
|
0
|
($packed, $atime) = unpack("CS", $value); |
|
1910
|
|
|
|
|
|
|
} |
|
1911
|
|
|
|
|
|
|
|
|
1912
|
5536
|
100
|
|
|
|
13734
|
if (($packed & FORMAT_FLAG) == ONE_BYTE_FORMAT) { |
|
|
|
50
|
|
|
|
|
|
|
1913
|
3134
|
|
50
|
|
|
13319
|
return (($packed & ONE_BYTE_SSS_BITS) >> 3, |
|
1914
|
|
|
|
|
|
|
$packed & ONE_BYTE_HHH_BITS, |
|
1915
|
|
|
|
|
|
|
$atime || 0); |
|
1916
|
|
|
|
|
|
|
} |
|
1917
|
|
|
|
|
|
|
elsif (($packed & FORMAT_FLAG) == TWO_LONGS_FORMAT) { |
|
1918
|
2402
|
|
|
|
|
3670
|
my ($packed, $ts, $th, $atime); |
|
1919
|
2402
|
50
|
|
|
|
4508
|
if ($self->{db_version} >= 1) { |
|
|
|
0
|
|
|
|
|
|
|
1920
|
2402
|
|
|
|
|
5113
|
($packed, $ts, $th, $atime) = unpack("CVVV", $value); |
|
1921
|
|
|
|
|
|
|
} |
|
1922
|
|
|
|
|
|
|
elsif ($self->{db_version} == 0) { |
|
1923
|
0
|
|
|
|
|
0
|
($packed, $ts, $th, $atime) = unpack("CLLS", $value); |
|
1924
|
|
|
|
|
|
|
} |
|
1925
|
2402
|
|
100
|
|
|
16388
|
return ($ts || 0, $th || 0, $atime || 0); |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
} |
|
1927
|
|
|
|
|
|
|
# other formats would go here... |
|
1928
|
|
|
|
|
|
|
else { |
|
1929
|
0
|
|
|
|
|
0
|
warn "bayes: unknown packing format for bayes db, please re-learn: $packed"; |
|
1930
|
0
|
|
|
|
|
0
|
return (0, 0, 0); |
|
1931
|
|
|
|
|
|
|
} |
|
1932
|
|
|
|
|
|
|
} |
|
1933
|
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
sub tok_pack { |
|
1935
|
1662
|
|
|
1662
|
0
|
5444
|
my ($self, $ts, $th, $atime) = @_; |
|
1936
|
1662
|
|
100
|
|
|
5465
|
$ts ||= 0; $th ||= 0; $atime ||= 0; |
|
|
1662
|
|
100
|
|
|
5489
|
|
|
|
1662
|
|
50
|
|
|
3924
|
|
|
1937
|
1662
|
50
|
33
|
|
|
6105
|
if ($ts < 8 && $th < 8) { |
|
1938
|
1662
|
|
|
|
|
36926
|
return pack ("CV", ONE_BYTE_FORMAT | ($ts << 3) | $th, $atime); |
|
1939
|
|
|
|
|
|
|
} else { |
|
1940
|
0
|
|
|
|
|
0
|
return pack ("CVVV", TWO_LONGS_FORMAT, $ts, $th, $atime); |
|
1941
|
|
|
|
|
|
|
} |
|
1942
|
|
|
|
|
|
|
} |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
########################################################################### |
|
1945
|
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
sub db_readable { |
|
1947
|
16
|
|
|
16
|
1
|
46
|
my ($self) = @_; |
|
1948
|
16
|
|
|
|
|
70
|
return $self->{already_tied}; |
|
1949
|
|
|
|
|
|
|
} |
|
1950
|
|
|
|
|
|
|
|
|
1951
|
|
|
|
|
|
|
sub db_writable { |
|
1952
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
1953
|
0
|
|
0
|
|
|
|
return $self->{already_tied} && $self->{is_locked}; |
|
1954
|
|
|
|
|
|
|
} |
|
1955
|
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
########################################################################### |
|
1957
|
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
sub _unlink_file { |
|
1959
|
0
|
|
|
0
|
|
|
my ($self, $filename) = @_; |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
0
|
|
|
|
|
|
unlink $filename; |
|
1962
|
|
|
|
|
|
|
} |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
sub _rename_file { |
|
1965
|
0
|
|
|
0
|
|
|
my ($self, $sourcefilename, $targetfilename) = @_; |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
0
|
0
|
|
|
|
|
return 0 unless (rename($sourcefilename, $targetfilename)); |
|
1968
|
|
|
|
|
|
|
|
|
1969
|
0
|
|
|
|
|
|
return 1; |
|
1970
|
|
|
|
|
|
|
} |
|
1971
|
|
|
|
|
|
|
|
|
1972
|
0
|
|
|
0
|
0
|
|
sub sa_die { Mail::SpamAssassin::sa_die(@_); } |
|
1973
|
|
|
|
|
|
|
|
|
1974
|
|
|
|
|
|
|
1; |