File Coverage

blib/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
Criterion Covered Total %
statement 42 190 22.1
branch 2 80 2.5
condition 1 18 5.5
subroutine 9 19 47.3
pod 2 8 25.0
total 56 315 17.7


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Plugin::Pyzor - perform Pyzor check of messages
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::Pyzor
25              
26             =head1 DESCRIPTION
27              
28             Pyzor is a collaborative, networked system to detect and block spam
29             using identifying digests of messages.
30              
31             See http://pyzor.org/ for more information about Pyzor.
32              
33             =cut
34              
35             package Mail::SpamAssassin::Plugin::Pyzor;
36              
37 22     22   158 use Mail::SpamAssassin::Plugin;
  22         47  
  22         768  
38 22     22   122 use Mail::SpamAssassin::Logger;
  22         53  
  22         1242  
39 22     22   141 use Mail::SpamAssassin::Timeout;
  22         62  
  22         689  
40 22         1639 use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path
41 22     22   135 proc_status_ok exit_status_str);
  22         43  
42 22     22   149 use strict;
  22         48  
  22         576  
43 22     22   135 use warnings;
  22         45  
  22         688  
44             # use bytes;
45 22     22   129 use re 'taint';
  22         55  
  22         48648  
46              
47             our @ISA = qw(Mail::SpamAssassin::Plugin);
48              
49             sub new {
50 63     63 1 215 my $class = shift;
51 63         156 my $mailsaobject = shift;
52              
53 63   33     454 $class = ref($class) || $class;
54 63         370 my $self = $class->SUPER::new($mailsaobject);
55 63         175 bless ($self, $class);
56              
57             # are network tests enabled?
58 63 100       242 if ($mailsaobject->{local_tests_only}) {
59 62         254 $self->{pyzor_available} = 0;
60 62         243 dbg("pyzor: local tests only, disabling Pyzor");
61             }
62             else {
63 1         6 $self->{pyzor_available} = 1;
64 1         4 dbg("pyzor: network tests on, attempting Pyzor");
65             }
66              
67 63         327 $self->register_eval_rule("check_pyzor");
68              
69 63         308 $self->set_config($mailsaobject->{conf});
70              
71 63         658 return $self;
72             }
73              
74             sub set_config {
75 63     63 0 202 my ($self, $conf) = @_;
76 63         127 my @cmds;
77              
78             =head1 USER OPTIONS
79              
80             =over 4
81              
82             =item use_pyzor (0|1) (default: 1)
83              
84             Whether to use Pyzor, if it is available.
85              
86             =cut
87              
88 63         299 push (@cmds, {
89             setting => 'use_pyzor',
90             default => 1,
91             type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
92             });
93              
94             =item pyzor_max NUMBER (default: 5)
95              
96             This option sets how often a message's body checksum must have been
97             reported to the Pyzor server before SpamAssassin will consider the Pyzor
98             check as matched.
99              
100             As most clients should not be auto-reporting these checksums, you should
101             set this to a relatively low value, e.g. C<5>.
102              
103             =cut
104              
105 63         314 push (@cmds, {
106             setting => 'pyzor_max',
107             default => 5,
108             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
109             });
110              
111             =back
112              
113             =head1 ADMINISTRATOR OPTIONS
114              
115             =over 4
116              
117             =item pyzor_timeout n (default: 3.5)
118              
119             How many seconds you wait for Pyzor to complete, before scanning continues
120             without the Pyzor results. A numeric value is optionally suffixed by a
121             time unit (s, m, h, d, w, indicating seconds (default), minutes, hours,
122             days, weeks).
123              
124             You can configure Pyzor to have its own per-server timeout. Set this
125             plugin's timeout with that in mind. This plugin's timeout is a maximum
126             ceiling. If Pyzor takes longer than this to complete its communication
127             with all servers, no results are used by SpamAssassin.
128              
129             Pyzor servers do not yet synchronize their servers, so it can be
130             beneficial to check and report to more than one. See the pyzor-users
131             mailing list for alternate servers that are not published via
132             'pyzor discover'.
133              
134             If you are using multiple Pyzor servers, a good rule of thumb would be to
135             set the SpamAssassin plugin's timeout to be the same or just a bit more
136             than the per-server Pyzor timeout (e.g., 3.5 and 2 for two Pyzor servers).
137             If more than one of your Pyzor servers is always timing out, consider
138             removing one of them.
139              
140             =cut
141              
142 63         359 push (@cmds, {
143             setting => 'pyzor_timeout',
144             is_admin => 1,
145             default => 3.5,
146             type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION
147             });
148              
149             =item pyzor_options options
150              
151             Specify additional options to the pyzor(1) command. Please note that only
152             characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons.
153              
154             =cut
155              
156             push (@cmds, {
157             setting => 'pyzor_options',
158             is_admin => 1,
159             default => '',
160             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
161             code => sub {
162 0     0   0 my ($self, $key, $value, $line) = @_;
163 0 0       0 if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) {
164 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
165             }
166 0         0 $self->{pyzor_options} = $1;
167             }
168 63         692 });
169              
170             =item pyzor_path STRING
171              
172             This option tells SpamAssassin specifically where to find the C<pyzor>
173             client instead of relying on SpamAssassin to find it in the current
174             PATH. Note that if I<taint mode> is enabled in the Perl interpreter,
175             you should use this, as the current PATH will have been cleared.
176              
177             =cut
178              
179             push (@cmds, {
180             setting => 'pyzor_path',
181             is_admin => 1,
182             default => undef,
183             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
184             code => sub {
185 0     0   0 my ($self, $key, $value, $line) = @_;
186 0 0 0     0 if (!defined $value || !length $value) {
187 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
188             }
189 0         0 $value = untaint_file_path($value);
190 0 0       0 if (!-x $value) {
191 0         0 info("config: pyzor_path \"$value\" isn't an executable");
192 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
193             }
194              
195 0         0 $self->{pyzor_path} = $value;
196             }
197 63         619 });
198              
199 63         310 $conf->{parser}->register_commands(\@cmds);
200             }
201              
202             sub is_pyzor_available {
203 0     0 0   my ($self) = @_;
204              
205 0   0       my $pyzor = $self->{main}->{conf}->{pyzor_path} || '';
206 0 0         unless ($pyzor) {
207 0           $pyzor = Mail::SpamAssassin::Util::find_executable_in_env_path('pyzor');
208             }
209 0 0 0       unless ($pyzor && -x $pyzor) {
210 0           dbg("pyzor: pyzor is not available: no pyzor executable found");
211 0           return 0;
212             }
213              
214             # remember any found pyzor
215 0           $self->{main}->{conf}->{pyzor_path} = $pyzor;
216              
217 0           dbg("pyzor: pyzor is available: " . $self->{main}->{conf}->{pyzor_path});
218 0           return 1;
219             }
220              
221             sub get_pyzor_interface {
222 0     0 0   my ($self) = @_;
223              
224 0 0         if (!$self->{main}->{conf}->{use_pyzor}) {
    0          
225 0           dbg("pyzor: use_pyzor option not enabled, disabling Pyzor");
226 0           $self->{pyzor_interface} = "disabled";
227 0           $self->{pyzor_available} = 0;
228             }
229             elsif ($self->is_pyzor_available()) {
230 0           $self->{pyzor_interface} = "pyzor";
231 0           $self->{pyzor_available} = 1;
232             }
233             else {
234 0           dbg("pyzor: no pyzor found, disabling Pyzor");
235 0           $self->{pyzor_available} = 0;
236             }
237             }
238              
239             sub check_pyzor {
240 0     0 0   my ($self, $permsgstatus, $full) = @_;
241              
242             # initialize valid tags
243 0           $permsgstatus->{tag_data}->{PYZOR} = "";
244              
245 0           my $timer = $self->{main}->time_method("check_pyzor");
246              
247 0           $self->get_pyzor_interface();
248 0 0         return 0 unless $self->{pyzor_available};
249              
250 0           return $self->pyzor_lookup($permsgstatus, $full);
251             }
252              
253             sub pyzor_lookup {
254 0     0 0   my ($self, $permsgstatus, $fulltext) = @_;
255 0           my @response;
256             my $pyzor_count;
257 0           my $pyzor_whitelisted;
258 0           my $timeout = $self->{main}->{conf}->{pyzor_timeout};
259              
260 0           $pyzor_count = 0;
261 0           $pyzor_whitelisted = 0;
262 0           my $pid;
263              
264             # use a temp file here -- open2() is unreliable, buffering-wise, under spamd
265 0           my $tmpf = $permsgstatus->create_fulltext_tmpfile($fulltext);
266              
267             # note: not really tainted, this came from system configuration file
268 0           my $path = untaint_file_path($self->{main}->{conf}->{pyzor_path});
269 0   0       my $opts = untaint_var($self->{main}->{conf}->{pyzor_options}) || '';
270              
271 0           $permsgstatus->enter_helper_run_mode();
272              
273             my $timer = Mail::SpamAssassin::Timeout->new(
274 0           { secs => $timeout, deadline => $permsgstatus->{master_deadline} });
275             my $err = $timer->run_and_catch(sub {
276              
277 0     0     local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
  0            
278            
279 0           dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "check", "< $tmpf"));
280              
281 0           $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
282             $tmpf, 1, $path, split(' ', $opts), "check");
283 0 0         $pid or die "$!\n";
284              
285             # read+split avoids a Perl I/O bug (Bug 5985)
286 0           my($inbuf,$nread,$resp); $resp = '';
  0            
287 0           while ( $nread=read(PYZOR,$inbuf,8192) ) { $resp .= $inbuf }
  0            
288 0 0         defined $nread or die "error reading from pipe: $!";
289 0           @response = split(/^/m, $resp, -1); undef $resp;
  0            
290              
291 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
292 0 0         if (proc_status_ok($?,$errno)) {
    0          
293 0           dbg("pyzor: [%s] finished successfully", $pid);
294             } elsif (proc_status_ok($?,$errno, 0,1)) { # sometimes it exits with 1
295 0           dbg("pyzor: [%s] finished: %s", $pid, exit_status_str($?,$errno));
296             } else {
297 0           info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
298             }
299              
300 0 0         if (!@response) {
301             # this exact string is needed below
302 0           warn("no response\n"); # yes, this is possible
303             }
304 0           chomp for @response;
305 0           dbg("pyzor: got response: " . join("\\n", @response));
306              
307 0 0         if ($response[0] =~ /^Traceback/) {
308 0           warn("internal error, python traceback seen in response\n");
309             }
310              
311 0           });
312              
313 0 0         if (defined(fileno(*PYZOR))) { # still open
314 0 0         if ($pid) {
315 0 0         if (kill('TERM',$pid)) { dbg("pyzor: killed stale helper [$pid]") }
  0            
316 0           else { dbg("pyzor: killing helper application [$pid] failed: $!") }
317             }
318 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
319 0 0         proc_status_ok($?,$errno)
320             or info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
321             }
322 0           $permsgstatus->leave_helper_run_mode();
323              
324 0 0         if ($timer->timed_out()) {
325 0           dbg("pyzor: check timed out after $timeout seconds");
326 0           return 0;
327             }
328              
329 0 0         if ($err) {
330 0           chomp $err;
331 0 0         if ($err eq "__brokenpipe__ignore__") {
    0          
332 0           dbg("pyzor: check failed: broken pipe");
333             } elsif ($err eq "no response") {
334 0           dbg("pyzor: check failed: no response");
335             } else {
336 0           warn("pyzor: check failed: $err\n");
337             }
338 0           return 0;
339             }
340              
341 0           foreach my $one_response (@response) {
342             # this regexp is intended to be a little bit forgiving
343 0 0         if ($one_response =~ /^\S+\t.*?\t(\d+)\t(\d+)\s*$/) {
344             # until pyzor servers can sync their DBs,
345             # sum counts obtained from all servers
346 0           $pyzor_whitelisted += $2+0;
347 0           $pyzor_count += $1+0;
348             }
349             else {
350             # warn on failures to parse
351 0           dbg("pyzor: failure to parse response \"$one_response\"");
352             }
353             }
354              
355 0 0         $permsgstatus->set_tag('PYZOR', $pyzor_whitelisted ? "Whitelisted."
356             : "Reported $pyzor_count times.");
357              
358 0 0         if ($pyzor_count >= $self->{main}->{conf}->{pyzor_max}) {
359 0           dbg("pyzor: listed: COUNT=$pyzor_count/$self->{main}->{conf}->{pyzor_max} WHITELIST=$pyzor_whitelisted");
360 0           return 1;
361             }
362              
363 0           return 0;
364             }
365              
366             sub plugin_report {
367 0     0 1   my ($self, $options) = @_;
368              
369 0 0         return unless $self->{pyzor_available};
370 0 0         return unless $self->{main}->{conf}->{use_pyzor};
371              
372 0 0 0       if (!$options->{report}->{options}->{dont_report_to_pyzor} && $self->is_pyzor_available())
373             {
374             # use temporary file: open2() is unreliable due to buffering under spamd
375 0           my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text});
376 0 0         if ($self->pyzor_report($options, $tmpf)) {
377 0           $options->{report}->{report_available} = 1;
378 0           info("reporter: spam reported to Pyzor");
379 0           $options->{report}->{report_return} = 1;
380             }
381             else {
382 0           info("reporter: could not report spam to Pyzor");
383             }
384 0           $options->{report}->delete_fulltext_tmpfile();
385             }
386             }
387              
388             sub pyzor_report {
389 0     0 0   my ($self, $options, $tmpf) = @_;
390              
391             # note: not really tainted, this came from system configuration file
392 0           my $path = untaint_file_path($options->{report}->{conf}->{pyzor_path});
393 0   0       my $opts = untaint_var($options->{report}->{conf}->{pyzor_options}) || '';
394              
395 0           my $timeout = $self->{main}->{conf}->{pyzor_timeout};
396              
397 0           $options->{report}->enter_helper_run_mode();
398              
399 0           my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
400             my $err = $timer->run_and_catch(sub {
401              
402 0     0     local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
  0            
403              
404 0           dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "report", "< $tmpf"));
405              
406 0           my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
407             $tmpf, 1, $path, split(' ', $opts), "report");
408 0 0         $pid or die "$!\n";
409              
410 0           my($inbuf,$nread,$nread_all); $nread_all = 0;
  0            
411             # response is ignored, just check its existence
412 0           while ( $nread=read(PYZOR,$inbuf,8192) ) { $nread_all += $nread }
  0            
413 0 0         defined $nread or die "error reading from pipe: $!";
414              
415 0 0         dbg("pyzor: empty response") if $nread_all < 1;
416              
417 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
418             # closing a pipe also waits for the process executing on the pipe to
419             # complete, no need to explicitly call waitpid
420             # my $child_stat = waitpid($pid,0) > 0 ? $? : undef;
421 0 0         if (proc_status_ok($?,$errno, 0)) {
422 0           dbg("pyzor: [%s] reporter finished successfully", $pid);
423             } else {
424 0           info("pyzor: [%s] reporter error: %s", $pid, exit_status_str($?,$errno));
425             }
426              
427 0           });
428              
429 0           $options->{report}->leave_helper_run_mode();
430              
431 0 0         if ($timer->timed_out()) {
432 0           dbg("reporter: pyzor report timed out after $timeout seconds");
433 0           return 0;
434             }
435              
436 0 0         if ($err) {
437 0           chomp $err;
438 0 0         if ($err eq '__brokenpipe__ignore__') {
439 0           dbg("reporter: pyzor report failed: broken pipe");
440             } else {
441 0           warn("reporter: pyzor report failed: $err\n");
442             }
443 0           return 0;
444             }
445              
446 0           return 1;
447             }
448              
449             1;
450              
451             =back
452              
453             =cut