File Coverage

blib/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
Criterion Covered Total %
statement 42 191 21.9
branch 2 80 2.5
condition 1 18 5.5
subroutine 9 19 47.3
pod 2 8 25.0
total 56 316 17.7


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Plugin::Pyzor - perform Pyzor check of messages
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::Pyzor
25              
26             =head1 DESCRIPTION
27              
28             Pyzor is a collaborative, networked system to detect and block spam
29             using identifying digests of messages.
30              
31             See http://pyzor.org/ for more information about Pyzor.
32              
33             =cut
34              
35              
36             use Mail::SpamAssassin::Plugin;
37 22     22   160 use Mail::SpamAssassin::Logger;
  22         46  
  22         667  
38 22     22   109 use Mail::SpamAssassin::Timeout;
  22         45  
  22         1407  
39 22     22   131 use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path
  22         39  
  22         689  
40 22         1723 proc_status_ok exit_status_str);
41 22     22   117 use strict;
  22         37  
42 22     22   136 use warnings;
  22         47  
  22         503  
43 22     22   103 # use bytes;
  22         52  
  22         726  
44             use re 'taint';
45 22     22   124  
  22         52  
  22         45577  
46             our @ISA = qw(Mail::SpamAssassin::Plugin);
47              
48             my $class = shift;
49             my $mailsaobject = shift;
50 63     63 1 235  
51 63         160 $class = ref($class) || $class;
52             my $self = $class->SUPER::new($mailsaobject);
53 63   33     412 bless ($self, $class);
54 63         389  
55 63         153 # are network tests enabled?
56             if ($mailsaobject->{local_tests_only}) {
57             $self->{pyzor_available} = 0;
58 63 100       235 dbg("pyzor: local tests only, disabling Pyzor");
59 62         234 }
60 62         233 else {
61             $self->{pyzor_available} = 1;
62             dbg("pyzor: network tests on, attempting Pyzor");
63 1         4 }
64 1         3  
65             $self->register_eval_rule("check_pyzor");
66              
67 63         273 $self->set_config($mailsaobject->{conf});
68              
69 63         294 return $self;
70             }
71 63         635  
72             my ($self, $conf) = @_;
73             my @cmds;
74              
75 63     63 0 147 =head1 USER OPTIONS
76 63         118  
77             =over 4
78              
79             =item use_pyzor (0|1) (default: 1)
80              
81             Whether to use Pyzor, if it is available.
82              
83             =cut
84              
85             push (@cmds, {
86             setting => 'use_pyzor',
87             default => 1,
88 63         306 type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
89             });
90              
91             =item pyzor_max NUMBER (default: 5)
92              
93             This option sets how often a message's body checksum must have been
94             reported to the Pyzor server before SpamAssassin will consider the Pyzor
95             check as matched.
96              
97             As most clients should not be auto-reporting these checksums, you should
98             set this to a relatively low value, e.g. C<5>.
99              
100             =cut
101              
102             push (@cmds, {
103             setting => 'pyzor_max',
104             default => 5,
105 63         239 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
106             });
107              
108             =back
109              
110             =head1 ADMINISTRATOR OPTIONS
111              
112             =over 4
113              
114             =item pyzor_timeout n (default: 3.5)
115              
116             How many seconds you wait for Pyzor to complete, before scanning continues
117             without the Pyzor results. A numeric value is optionally suffixed by a
118             time unit (s, m, h, d, w, indicating seconds (default), minutes, hours,
119             days, weeks).
120              
121             You can configure Pyzor to have its own per-server timeout. Set this
122             plugin's timeout with that in mind. This plugin's timeout is a maximum
123             ceiling. If Pyzor takes longer than this to complete its communication
124             with all servers, no results are used by SpamAssassin.
125              
126             Pyzor servers do not yet synchronize their servers, so it can be
127             beneficial to check and report to more than one. See the pyzor-users
128             mailing list for alternate servers that are not published via
129             'pyzor discover'.
130              
131             If you are using multiple Pyzor servers, a good rule of thumb would be to
132             set the SpamAssassin plugin's timeout to be the same or just a bit more
133             than the per-server Pyzor timeout (e.g., 3.5 and 2 for two Pyzor servers).
134             If more than one of your Pyzor servers is always timing out, consider
135             removing one of them.
136              
137             =cut
138              
139             push (@cmds, {
140             setting => 'pyzor_timeout',
141             is_admin => 1,
142 63         260 default => 3.5,
143             type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION
144             });
145              
146             =item pyzor_options options
147              
148             Specify additional options to the pyzor(1) command. Please note that only
149             characters in the range [0-9A-Za-z =,._/-] are allowed for security reasons.
150              
151             =cut
152              
153             push (@cmds, {
154             setting => 'pyzor_options',
155             is_admin => 1,
156             default => '',
157             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
158             code => sub {
159             my ($self, $key, $value, $line) = @_;
160             if ($value !~ m{^([0-9A-Za-z =,._/-]+)$}) {
161             return $Mail::SpamAssassin::Conf::INVALID_VALUE;
162 0     0   0 }
163 0 0       0 $self->{pyzor_options} = $1;
164 0         0 }
165             });
166 0         0  
167             =item pyzor_path STRING
168 63         611  
169             This option tells SpamAssassin specifically where to find the C<pyzor>
170             client instead of relying on SpamAssassin to find it in the current
171             PATH. Note that if I<taint mode> is enabled in the Perl interpreter,
172             you should use this, as the current PATH will have been cleared.
173              
174             =cut
175              
176             push (@cmds, {
177             setting => 'pyzor_path',
178             is_admin => 1,
179             default => undef,
180             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
181             code => sub {
182             my ($self, $key, $value, $line) = @_;
183             if (!defined $value || !length $value) {
184             return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
185 0     0   0 }
186 0 0 0     0 $value = untaint_file_path($value);
187 0         0 if (!-x $value) {
188             info("config: pyzor_path \"$value\" isn't an executable");
189 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
190 0 0       0 }
191 0         0  
192 0         0 $self->{pyzor_path} = $value;
193             }
194             });
195 0         0  
196             $conf->{parser}->register_commands(\@cmds);
197 63         490 }
198              
199 63         291 my ($self) = @_;
200              
201             my $pyzor = $self->{main}->{conf}->{pyzor_path} || '';
202             unless ($pyzor) {
203 0     0 0   $pyzor = Mail::SpamAssassin::Util::find_executable_in_env_path('pyzor');
204             }
205 0   0       unless ($pyzor && -x $pyzor) {
206 0 0         dbg("pyzor: pyzor is not available: no pyzor executable found");
207 0           return 0;
208             }
209 0 0 0        
210 0           # remember any found pyzor
211 0           $self->{main}->{conf}->{pyzor_path} = $pyzor;
212              
213             dbg("pyzor: pyzor is available: " . $self->{main}->{conf}->{pyzor_path});
214             return 1;
215 0           }
216              
217 0           my ($self) = @_;
218 0            
219             if (!$self->{main}->{conf}->{use_pyzor}) {
220             dbg("pyzor: use_pyzor option not enabled, disabling Pyzor");
221             $self->{pyzor_interface} = "disabled";
222 0     0 0   $self->{pyzor_available} = 0;
223             }
224 0 0         elsif ($self->is_pyzor_available()) {
    0          
225 0           $self->{pyzor_interface} = "pyzor";
226 0           $self->{pyzor_available} = 1;
227 0           }
228             else {
229             dbg("pyzor: no pyzor found, disabling Pyzor");
230 0           $self->{pyzor_available} = 0;
231 0           }
232             }
233              
234 0           my ($self, $permsgstatus, $full) = @_;
235 0            
236             # initialize valid tags
237             $permsgstatus->{tag_data}->{PYZOR} = "";
238              
239             my $timer = $self->{main}->time_method("check_pyzor");
240 0     0 0    
241             $self->get_pyzor_interface();
242             return 0 unless $self->{pyzor_available};
243 0            
244             return $self->pyzor_lookup($permsgstatus, $full);
245 0           }
246              
247 0           my ($self, $permsgstatus, $fulltext) = @_;
248 0 0         my @response;
249             my $pyzor_count;
250 0           my $pyzor_whitelisted;
251             my $timeout = $self->{main}->{conf}->{pyzor_timeout};
252              
253             $pyzor_count = 0;
254 0     0 0   $pyzor_whitelisted = 0;
255 0           my $pid;
256              
257 0           # use a temp file here -- open2() is unreliable, buffering-wise, under spamd
258 0           my $tmpf = $permsgstatus->create_fulltext_tmpfile($fulltext);
259              
260 0           # note: not really tainted, this came from system configuration file
261 0           my $path = untaint_file_path($self->{main}->{conf}->{pyzor_path});
262 0           my $opts = untaint_var($self->{main}->{conf}->{pyzor_options}) || '';
263              
264             $permsgstatus->enter_helper_run_mode();
265 0            
266             my $timer = Mail::SpamAssassin::Timeout->new(
267             { secs => $timeout, deadline => $permsgstatus->{master_deadline} });
268 0           my $err = $timer->run_and_catch(sub {
269 0   0        
270             local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
271 0          
272             dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "check", "< $tmpf"));
273              
274 0           $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
275             $tmpf, 1, $path, split(' ', $opts), "check");
276             $pid or die "$!\n";
277 0     0      
  0            
278             # read+split avoids a Perl I/O bug (Bug 5985)
279 0           my($inbuf,$nread,$resp); $resp = '';
280             while ( $nread=read(PYZOR,$inbuf,8192) ) { $resp .= $inbuf }
281 0           defined $nread or die "error reading from pipe: $!";
282             @response = split(/^/m, $resp, -1); undef $resp;
283 0 0          
284             my $errno = 0; close PYZOR or $errno = $!;
285             if (proc_status_ok($?,$errno)) {
286 0           dbg("pyzor: [%s] finished successfully", $pid);
  0            
287 0           } elsif (proc_status_ok($?,$errno, 0,1)) { # sometimes it exits with 1
  0            
288 0 0         dbg("pyzor: [%s] finished: %s", $pid, exit_status_str($?,$errno));
289 0           } else {
  0            
290             info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
291 0 0         }
  0            
292 0 0          
    0          
293 0           if (!@response) {
294             # this exact string is needed below
295 0           warn("no response\n"); # yes, this is possible
296             return;
297 0           }
298             chomp for @response;
299              
300 0 0         if ($response[0] =~ /^Traceback/) {
301             warn("internal error, python traceback seen in response: ".
302 0           join("\\n", @response));
303 0           } else {
304             dbg("pyzor: got response: ".join("\\n", @response));
305 0           }
306              
307 0 0         });
308 0            
309             if (defined(fileno(*PYZOR))) { # still open
310             if ($pid) {
311 0           if (kill('TERM',$pid)) { dbg("pyzor: killed stale helper [$pid]") }
312             else { dbg("pyzor: killing helper application [$pid] failed: $!") }
313             }
314 0           my $errno = 0; close PYZOR or $errno = $!;
315             proc_status_ok($?,$errno)
316 0 0         or info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
317 0 0         }
318 0 0         $permsgstatus->leave_helper_run_mode();
  0            
319 0            
320             if ($timer->timed_out()) {
321 0 0         dbg("pyzor: check timed out after $timeout seconds");
  0            
322 0 0         return 0;
323             }
324              
325 0           if ($err) {
326             chomp $err;
327 0 0         if ($err eq "__brokenpipe__ignore__") {
328 0           dbg("pyzor: check failed: broken pipe");
329 0           } elsif ($err eq "no response") {
330             dbg("pyzor: check failed: no response");
331             } else {
332 0 0         warn("pyzor: check failed: $err\n");
333 0           }
334 0 0         return 0;
    0          
335 0           }
336              
337 0           foreach my $one_response (@response) {
338             # this regexp is intended to be a little bit forgiving
339 0           if ($one_response =~ /^\S+\t.*?\t(\d+)\t(\d+)\s*$/) {
340             # until pyzor servers can sync their DBs,
341 0           # sum counts obtained from all servers
342             $pyzor_whitelisted += $2+0;
343             $pyzor_count += $1+0;
344 0           }
345             else {
346 0 0         # warn on failures to parse
347             dbg("pyzor: failure to parse response \"$one_response\"");
348             }
349 0           }
350 0            
351             $permsgstatus->set_tag('PYZOR', $pyzor_whitelisted ? "Whitelisted."
352             : "Reported $pyzor_count times.");
353              
354 0           if ($pyzor_count >= $self->{main}->{conf}->{pyzor_max}) {
355             dbg("pyzor: listed: COUNT=$pyzor_count/$self->{main}->{conf}->{pyzor_max} WHITELIST=$pyzor_whitelisted");
356             return 1;
357             }
358 0 0          
359             return 0;
360             }
361 0 0          
362 0           my ($self, $options) = @_;
363 0            
364             return unless $self->{pyzor_available};
365             return unless $self->{main}->{conf}->{use_pyzor};
366 0            
367             if (!$options->{report}->{options}->{dont_report_to_pyzor} && $self->is_pyzor_available())
368             {
369             # use temporary file: open2() is unreliable due to buffering under spamd
370 0     0 1   my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text});
371             if ($self->pyzor_report($options, $tmpf)) {
372 0 0         $options->{report}->{report_available} = 1;
373 0 0         info("reporter: spam reported to Pyzor");
374             $options->{report}->{report_return} = 1;
375 0 0 0       }
376             else {
377             info("reporter: could not report spam to Pyzor");
378 0           }
379 0 0         $options->{report}->delete_fulltext_tmpfile();
380 0           }
381 0           }
382 0            
383             my ($self, $options, $tmpf) = @_;
384              
385 0           # note: not really tainted, this came from system configuration file
386             my $path = untaint_file_path($options->{report}->{conf}->{pyzor_path});
387 0           my $opts = untaint_var($options->{report}->{conf}->{pyzor_options}) || '';
388              
389             my $timeout = $self->{main}->{conf}->{pyzor_timeout};
390              
391             $options->{report}->enter_helper_run_mode();
392 0     0 0    
393             my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
394             my $err = $timer->run_and_catch(sub {
395 0            
396 0   0       local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
397              
398 0           dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "report", "< $tmpf"));
399              
400 0           my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
401             $tmpf, 1, $path, split(' ', $opts), "report");
402 0           $pid or die "$!\n";
403              
404             my($inbuf,$nread,$nread_all); $nread_all = 0;
405 0     0     # response is ignored, just check its existence
  0            
406             while ( $nread=read(PYZOR,$inbuf,8192) ) { $nread_all += $nread }
407 0           defined $nread or die "error reading from pipe: $!";
408              
409 0           dbg("pyzor: empty response") if $nread_all < 1;
410              
411 0 0         my $errno = 0; close PYZOR or $errno = $!;
412             # closing a pipe also waits for the process executing on the pipe to
413 0           # complete, no need to explicitly call waitpid
  0            
414             # my $child_stat = waitpid($pid,0) > 0 ? $? : undef;
415 0           if (proc_status_ok($?,$errno, 0)) {
  0            
416 0 0         dbg("pyzor: [%s] reporter finished successfully", $pid);
417             } else {
418 0 0         info("pyzor: [%s] reporter error: %s", $pid, exit_status_str($?,$errno));
419             }
420 0 0          
  0            
421             });
422              
423             $options->{report}->leave_helper_run_mode();
424 0 0          
425 0           if ($timer->timed_out()) {
426             dbg("reporter: pyzor report timed out after $timeout seconds");
427 0           return 0;
428             }
429              
430 0           if ($err) {
431             chomp $err;
432 0           if ($err eq '__brokenpipe__ignore__') {
433             dbg("reporter: pyzor report failed: broken pipe");
434 0 0         } else {
435 0           warn("reporter: pyzor report failed: $err\n");
436 0           }
437             return 0;
438             }
439 0 0          
440 0           return 1;
441 0 0         }
442 0            
443             1;
444 0            
445             =back
446 0            
447             =cut