File Coverage

blib/lib/Mail/SpamAssassin/Plugin/Razor2.pm
Criterion Covered Total %
statement 44 193 22.8
branch 5 120 4.1
condition 1 32 3.1
subroutine 9 14 64.2
pod 3 7 42.8
total 62 366 16.9


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Plugin::Razor2 - perform Razor check of messages
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::Razor2
25              
26             =head1 DESCRIPTION
27              
28             Vipul's Razor is a distributed, collaborative, spam detection and
29             filtering network based on user submissions of spam. Detection is done
30             with signatures that efficiently spot mutating spam content and user
31             input is validated through reputation assignments.
32              
33             See http://razor.sourceforge.net/ for more information about Razor.
34              
35             =head1 USER SETTINGS
36              
37             =over 4
38              
39             =cut
40              
41             package Mail::SpamAssassin::Plugin::Razor2;
42              
43 21     21   184 use Mail::SpamAssassin::Plugin;
  21         56  
  21         749  
44 21     21   133 use Mail::SpamAssassin::Logger;
  21         43  
  21         1314  
45 21     21   137 use Mail::SpamAssassin::Timeout;
  21         44  
  21         1050  
46 21     21   140 use strict;
  21         65  
  21         533  
47 21     21   108 use warnings;
  21         45  
  21         652  
48             # use bytes;
49 21     21   153 use re 'taint';
  21         57  
  21         47255  
50              
51             our @ISA = qw(Mail::SpamAssassin::Plugin);
52              
53             sub new {
54 62     62 1 235 my $class = shift;
55 62         187 my $mailsaobject = shift;
56              
57 62   33     441 $class = ref($class) || $class;
58 62         370 my $self = $class->SUPER::new($mailsaobject);
59 62         179 bless ($self, $class);
60              
61             # figure out if razor is even available or not ...
62 62         276 $self->{razor2_available} = 0;
63 62 100       275 if ($mailsaobject->{local_tests_only}) {
64 61         240 dbg("razor2: local tests only, skipping Razor");
65             }
66             else {
67 1 50       3 if (eval { require Razor2::Client::Agent; }) {
  1         233  
68 0         0 $self->{razor2_available} = 1;
69 0         0 dbg("razor2: razor2 is available, version " . $Razor2::Client::Version::VERSION . "\n");
70             }
71             else {
72 1         6 dbg("razor2: razor2 is not available");
73             }
74             }
75              
76 62         333 $self->register_eval_rule("check_razor2");
77 62         258 $self->register_eval_rule("check_razor2_range");
78              
79 62         338 $self->set_config($mailsaobject->{conf});
80              
81 62         688 return $self;
82             }
83              
84             sub set_config {
85 62     62 0 189 my ($self, $conf) = @_;
86 62         151 my @cmds;
87              
88             =item use_razor2 (0|1) (default: 1)
89              
90             Whether to use Razor2, if it is available.
91              
92             =cut
93              
94 62         327 push(@cmds, {
95             setting => 'use_razor2',
96             default => 1,
97             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
98             });
99              
100             =back
101              
102             =head1 ADMINISTRATOR SETTINGS
103              
104             =over 4
105              
106             =item razor_timeout n (default: 5)
107              
108             How many seconds you wait for Razor to complete before you go on without
109             the results
110              
111             =cut
112              
113 62         319 push(@cmds, {
114             setting => 'razor_timeout',
115             is_admin => 1,
116             default => 5,
117             type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION,
118             });
119              
120             =item razor_config filename
121              
122             Define the filename used to store Razor's configuration settings.
123             Currently this is left to Razor to decide.
124              
125             =cut
126              
127 62         241 push(@cmds, {
128             setting => 'razor_config',
129             is_admin => 1,
130             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
131             });
132              
133 62         302 $conf->{parser}->register_commands(\@cmds);
134             }
135              
136             sub razor2_access {
137 0     0 0 0 my ($self, $fulltext, $type, $deadline) = @_;
138 0         0 my $timeout = $self->{main}->{conf}->{razor_timeout};
139 0         0 my $return = 0;
140 0         0 my @results;
141              
142 0 0       0 my $debug = $type eq 'check' ? 'razor2' : 'reporter';
143              
144             # razor also debugs to stdout. argh. fix it to stderr...
145 0 0       0 if (would_log('dbg', $debug)) {
146 0         0 open(OLDOUT, ">&STDOUT");
147 0         0 open(STDOUT, ">&STDERR");
148             }
149              
150 0         0 Mail::SpamAssassin::PerMsgStatus::enter_helper_run_mode($self);
151              
152 0         0 my $rnd = rand(0x7fffffff); # save entropy before Razor clobbers it
153              
154 0         0 my $timer = Mail::SpamAssassin::Timeout->new(
155             { secs => $timeout, deadline => $deadline });
156             my $err = $timer->run_and_catch(sub {
157              
158 0     0   0 local ($^W) = 0; # argh, warnings in Razor
159              
160             # everything's in the module!
161 0         0 my $rc = Razor2::Client::Agent->new("razor-$type");
162              
163 0 0       0 if ($rc) {
164             $rc->{opt} = {
165             debug => (would_log('dbg', $debug) > 1),
166             foreground => 1,
167             config => $self->{main}->{conf}->{razor_config}
168 0         0 };
169             # no facility prefix on this die
170 0 0       0 $rc->do_conf() or die "$debug: " . $rc->errstr;
171              
172             # Razor2 requires authentication for reporting
173 0         0 my $ident;
174 0 0       0 if ($type ne 'check') {
175             # no facility prefix on this die
176 0 0       0 $ident = $rc->get_ident
177             or die("$type requires authentication");
178             }
179              
180 0         0 my @msg = ($fulltext);
181             # no facility prefix on this die
182 0 0       0 my $objects = $rc->prepare_objects(\@msg)
183             or die "$debug: error in prepare_objects";
184 0 0       0 unless ($rc->get_server_info()) {
185 0   0     0 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during get_server_info";
186 0         0 die $error;
187             }
188              
189             # let's reset the alarm since get_server_info() calls
190             # nextserver() which calls discover() which very likely will
191             # reset the alarm for us ... how polite. :(
192 0         0 $timer->reset();
193              
194             # no facility prefix on this die
195 0 0       0 my $sigs = $rc->compute_sigs($objects)
196             or die "$debug: error in compute_sigs";
197              
198             # if mail isn't whitelisted, check it out
199             # see 'man razor-whitelist'
200 0 0 0     0 if ($type ne 'check' || ! $rc->local_check($objects->[0])) {
201             # provide a better error message when servers are unavailable,
202             # than "Bad file descriptor Died".
203 0 0       0 $rc->connect() or die "$debug: could not connect to any servers\n";
204              
205             # Talk to the Razor server and do work
206 0 0       0 if ($type eq 'check') {
207 0 0       0 unless ($rc->check($objects)) {
208 0   0     0 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during check";
209 0         0 die $error;
210             }
211             }
212             else {
213 0 0       0 unless ($rc->authenticate($ident)) {
214 0   0     0 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during authenticate";
215 0         0 die $error;
216             }
217 0 0       0 unless ($rc->report($objects)) {
218 0   0     0 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during report";
219 0         0 die $error;
220             }
221             }
222              
223 0 0       0 unless ($rc->disconnect()) {
224 0   0     0 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during disconnect";
225 0         0 die $error;
226             }
227             }
228              
229             # Razor 2.14 says that if we get here, we did ok.
230 0         0 $return = 1;
231              
232             # figure out if we have a log file we need to close...
233 0 0 0     0 if (ref($rc->{logref}) && exists $rc->{logref}->{fd}) {
234             # the fd can be stdout or stderr, so we need to find out if it is
235             # so we don't close them by accident. Note: we can't just
236             # undef the fd here (like the IO::Handle manpage says we can)
237             # because it won't actually close, unfortunately. :(
238 0         0 my $untie = 1;
239 0         0 foreach my $log (*STDOUT{IO}, *STDERR{IO}) {
240 0 0       0 if ($log == $rc->{logref}->{fd}) {
241 0         0 $untie = 0;
242 0         0 last;
243             }
244             }
245 0 0       0 if ($untie) {
246 0 0       0 close($rc->{logref}->{fd}) or die "error closing log: $!";
247             }
248             }
249              
250 0 0       0 if ($type eq 'check') {
251             # so $objects->[0] is the first (only) message, and ->{spam} is a general yes/no
252 0         0 push(@results, { result => $objects->[0]->{spam} });
253              
254             # great for debugging, but leave this off!
255             #use Data::Dumper;
256             #print Dumper($objects),"\n";
257              
258             # ->{p} is for each part of the message
259             # so go through each part, taking the highest cf we find
260             # of any part that isn't contested (ct). This helps avoid false
261             # positives. equals logic_method 4.
262             #
263             # razor-agents < 2.14 have a different object format, so we now support both.
264             # $objects->[0]->{resp} vs $objects->[0]->{p}->[part #]->{resp}
265 0         0 my $part = 0;
266 0   0     0 my $arrayref = $objects->[0]->{p} || $objects;
267 0 0       0 if (defined $arrayref) {
268 0         0 foreach my $cf (@{$arrayref}) {
  0         0  
269 0 0       0 if (exists $cf->{resp}) {
270 0         0 for (my $response=0; $response<@{$cf->{resp}}; $response++) {
  0         0  
271 0         0 my $tmp = $cf->{resp}->[$response];
272 0         0 my $tmpcf = $tmp->{cf}; # Part confidence
273 0         0 my $tmpct = $tmp->{ct}; # Part contested?
274 0         0 my $engine = $cf->{sent}->[$response]->{e};
275              
276             # These should always be set, but just in case ...
277 0 0       0 $tmpcf = 0 unless defined $tmpcf;
278 0 0       0 $tmpct = 0 unless defined $tmpct;
279 0 0       0 $engine = 0 unless defined $engine;
280              
281 0         0 push(@results,
282             { part => $part, engine => $engine, contested => $tmpct, confidence => $tmpcf });
283             }
284             }
285             else {
286 0         0 push(@results, { part => $part, noresponse => 1 });
287             }
288 0         0 $part++;
289             }
290             }
291             else {
292             # If we have some new $objects format that isn't close to
293             # the current razor-agents 2.x version, we won't FP but we
294             # should alert in debug.
295 0         0 dbg("$debug: it looks like the internal Razor object has changed format!");
296             }
297             }
298             }
299             else {
300 0         0 warn "$debug: undefined Razor2::Client::Agent\n";
301             }
302            
303 0         0 });
304              
305             # OK, that's enough Razor stuff. now, reset all that global
306             # state it futzes with :(
307             # work around serious brain damage in Razor2 (constant seed)
308 0         0 $rnd ^= int(rand(0xffffffff)); # mix old acc with whatever came out of razor
309 0         0 srand; # let Perl give it a try ...
310 0         0 $rnd ^= int(rand(0xffffffff)); # ... and mix-in that too
311 0         0 srand($rnd & 0x7fffffff); # reseed, keep it unsigned 32-bit just in case
312              
313 0         0 Mail::SpamAssassin::PerMsgStatus::leave_helper_run_mode($self);
314              
315 0 0       0 if ($timer->timed_out()) {
316 0         0 dbg("$debug: razor2 $type timed out after $timeout seconds");
317             }
318              
319 0 0       0 if ($err) {
320 0         0 chomp $err;
321 0 0       0 if ($err =~ /(?:could not connect|network is unreachable)/) {
    0          
322             # make this a dbg(); SpamAssassin will still continue,
323             # but without Razor checking. otherwise there may be
324             # DSNs and errors in syslog etc., yuck
325 0         0 dbg("$debug: razor2 $type could not connect to any servers");
326             } elsif ($err =~ /timeout/i) {
327 0         0 dbg("$debug: razor2 $type timed out connecting to servers");
328             } else {
329 0         0 warn("$debug: razor2 $type failed: $! $err");
330             }
331             }
332              
333             # razor also debugs to stdout. argh. fix it to stderr...
334 0 0       0 if (would_log('dbg', $debug)) {
335 0         0 open(STDOUT, ">&OLDOUT");
336 0         0 close OLDOUT;
337             }
338              
339 0 0       0 return wantarray ? ($return, @results) : $return;
340             }
341              
342             sub plugin_report {
343 0     0 1 0 my ($self, $options) = @_;
344              
345 0 0       0 return unless $self->{razor2_available};
346 0 0       0 return if $self->{main}->{local_tests_only};
347 0 0       0 return unless $self->{main}->{conf}->{use_razor2};
348 0 0       0 return if $options->{report}->{options}->{dont_report_to_razor};
349              
350 0 0       0 if ($self->razor2_access($options->{text}, 'report', undef)) {
351 0         0 $options->{report}->{report_available} = 1;
352 0         0 info('reporter: spam reported to Razor');
353 0         0 $options->{report}->{report_return} = 1;
354             }
355             else {
356 0         0 info('reporter: could not report spam to Razor');
357             }
358             }
359              
360             sub plugin_revoke {
361 0     0 1 0 my ($self, $options) = @_;
362              
363 0 0       0 return unless $self->{razor2_available};
364 0 0       0 return if $self->{main}->{local_tests_only};
365 0 0       0 return unless $self->{main}->{conf}->{use_razor2};
366 0 0       0 return if $options->{revoke}->{options}->{dont_report_to_razor};
367              
368 0 0       0 if ($self->razor2_access($options->{text}, 'revoke', undef)) {
369 0         0 $options->{revoke}->{revoke_available} = 1;
370 0         0 info('reporter: spam revoked from Razor');
371 0         0 $options->{revoke}->{revoke_return} = 1;
372             }
373             else {
374 0         0 info('reporter: could not revoke spam from Razor');
375             }
376             }
377              
378             sub check_razor2 {
379 4     4 0 14 my ($self, $permsgstatus, $full) = @_;
380              
381 4 50       29 return $permsgstatus->{razor2_result} if (defined $permsgstatus->{razor2_result});
382 4         16 $permsgstatus->{razor2_result} = 0;
383 4         24 $permsgstatus->{razor2_cf_score} = { '4' => 0, '8' => 0 };
384              
385 4 50       80 return unless $self->{razor2_available};
386 0 0         return unless $self->{main}->{conf}->{use_razor2};
387              
388 0           my $timer = $self->{main}->time_method("check_razor2");
389              
390 0           my $return;
391             my @results;
392              
393             # TODO: check for cache header, set results appropriately
394              
395             # do it this way to make it easier to get out the results later from the
396             # netcache plugin
397             ($return, @results) =
398 0           $self->razor2_access($full, 'check', $permsgstatus->{master_deadline});
399 0           $self->{main}->call_plugins ('process_razor_result',
400             { results => \@results, permsgstatus => $permsgstatus }
401             );
402              
403 0           foreach my $result (@results) {
404 0 0         if (exists $result->{result}) {
    0          
405 0 0         $permsgstatus->{razor2_result} = $result->{result} if $result->{result};
406             }
407             elsif ($result->{noresponse}) {
408 0           dbg('razor2: part=' . $result->{part} . ' noresponse');
409             }
410             else {
411             dbg('razor2: part=' . $result->{part} .
412             ' engine=' . $result->{engine} .
413             ' contested=' . $result->{contested} .
414 0           ' confidence=' . $result->{confidence});
415              
416 0 0         next if $result->{contested};
417              
418 0   0       my $cf = $permsgstatus->{razor2_cf_score}->{$result->{engine}} || 0;
419 0 0         if ($result->{confidence} > $cf) {
420 0           $permsgstatus->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
421             }
422             }
423             }
424              
425 0           dbg("razor2: results: spam? " . $permsgstatus->{razor2_result});
426 0           while(my ($engine, $cf) = each %{$permsgstatus->{razor2_cf_score}}) {
  0            
427 0           dbg("razor2: results: engine $engine, highest cf score: $cf");
428             }
429              
430 0           return $permsgstatus->{razor2_result};
431             }
432              
433             # Check the cf value of a given message and return if it's within the
434             # given range
435             sub check_razor2_range {
436 0     0 0   my ($self, $permsgstatus, $body, $engine, $min, $max) = @_;
437              
438             # If Razor2 isn't available, or the general test is disabled, don't
439             # continue.
440 0 0         return unless $self->{razor2_available};
441 0 0         return unless $self->{main}->{conf}->{use_razor2};
442 0 0         return unless $self->{main}->{conf}->{scores}->{'RAZOR2_CHECK'};
443              
444             # If Razor2 hasn't been checked yet, go ahead and run it.
445 0 0         unless (defined $permsgstatus->{razor2_result}) {
446 0           $self->check_razor2($permsgstatus, $body);
447             }
448              
449 0           my $cf = 0;
450 0 0         if ($engine) {
451 0           $cf = $permsgstatus->{razor2_cf_score}->{$engine};
452 0 0         return unless defined $cf;
453             }
454             else {
455             # If no specific engine was given to the rule, find the highest cf
456             # determined and use that
457 0           while(my ($engine, $ecf) = each %{$permsgstatus->{razor2_cf_score}}) {
  0            
458 0 0         if ($ecf > $cf) {
459 0           $cf = $ecf;
460             }
461             }
462             }
463              
464 0 0 0       if ($cf >= $min && $cf <= $max) {
465 0           $permsgstatus->test_log(sprintf("cf: %3d", $cf));
466 0           return 1;
467             }
468              
469 0           return;
470             }
471              
472             1;
473              
474             =back
475              
476             =cut