File Coverage

blib/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
Criterion Covered Total %
statement 42 191 21.9
branch 2 80 2.5
condition 1 18 5.5
subroutine 9 19 47.3
pod 2 8 25.0
total 56 316 17.7


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17              
18             =head1 NAME
19              
20             Mail::SpamAssassin::Plugin::Pyzor - perform Pyzor check of messages
21              
22             =head1 SYNOPSIS
23              
24             loadplugin Mail::SpamAssassin::Plugin::Pyzor
25              
26             =head1 DESCRIPTION
27              
28             Pyzor is a collaborative, networked system to detect and block spam
29             using identifying digests of messages.
30              
31             See http://pyzor.org/ for more information about Pyzor.
32              
33             =cut
34              
35             package Mail::SpamAssassin::Plugin::Pyzor;
36              
37 21     21   162 use Mail::SpamAssassin::Plugin;
  21         58  
  21         649  
38 21     21   117 use Mail::SpamAssassin::Logger;
  21         48  
  21         1285  
39 21     21   159 use Mail::SpamAssassin::Timeout;
  21         44  
  21         693  
40 21         1782 use Mail::SpamAssassin::Util qw(untaint_var untaint_file_path
41 21     21   131 proc_status_ok exit_status_str);
  21         44  
42 21     21   141 use strict;
  21         49  
  21         524  
43 21     21   120 use warnings;
  21         45  
  21         757  
44             # use bytes;
45 21     21   141 use re 'taint';
  21         111  
  21         49296  
46              
47             our @ISA = qw(Mail::SpamAssassin::Plugin);
48              
49             sub new {
50 62     62 1 219 my $class = shift;
51 62         183 my $mailsaobject = shift;
52              
53 62   33     498 $class = ref($class) || $class;
54 62         398 my $self = $class->SUPER::new($mailsaobject);
55 62         168 bless ($self, $class);
56              
57             # are network tests enabled?
58 62 100       273 if ($mailsaobject->{local_tests_only}) {
59 61         234 $self->{pyzor_available} = 0;
60 61         237 dbg("pyzor: local tests only, disabling Pyzor");
61             }
62             else {
63 1         7 $self->{pyzor_available} = 1;
64 1         4 dbg("pyzor: network tests on, attempting Pyzor");
65             }
66              
67 62         382 $self->register_eval_rule("check_pyzor");
68              
69 62         354 $self->set_config($mailsaobject->{conf});
70              
71 62         695 return $self;
72             }
73              
74             sub set_config {
75 62     62 0 234 my ($self, $conf) = @_;
76 62         132 my @cmds;
77              
78             =head1 USER OPTIONS
79              
80             =over 4
81              
82             =item use_pyzor (0|1) (default: 1)
83              
84             Whether to use Pyzor, if it is available.
85              
86             =cut
87              
88 62         324 push (@cmds, {
89             setting => 'use_pyzor',
90             default => 1,
91             type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL
92             });
93              
94             =item pyzor_max NUMBER (default: 5)
95              
96             This option sets how often a message's body checksum must have been
97             reported to the Pyzor server before SpamAssassin will consider the Pyzor
98             check as matched.
99              
100             As most clients should not be auto-reporting these checksums, you should
101             set this to a relatively low value, e.g. C<5>.
102              
103             =cut
104              
105 62         271 push (@cmds, {
106             setting => 'pyzor_max',
107             default => 5,
108             type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
109             });
110              
111             =back
112              
113             =head1 ADMINISTRATOR OPTIONS
114              
115             =over 4
116              
117             =item pyzor_timeout n (default: 3.5)
118              
119             How many seconds you wait for Pyzor to complete, before scanning continues
120             without the Pyzor results. A numeric value is optionally suffixed by a
121             time unit (s, m, h, d, w, indicating seconds (default), minutes, hours,
122             days, weeks).
123              
124             You can configure Pyzor to have its own per-server timeout. Set this
125             plugin's timeout with that in mind. This plugin's timeout is a maximum
126             ceiling. If Pyzor takes longer than this to complete its communication
127             with all servers, no results are used by SpamAssassin.
128              
129             Pyzor servers do not yet synchronize their servers, so it can be
130             beneficial to check and report to more than one. See the pyzor-users
131             mailing list for alternate servers that are not published via
132             'pyzor discover'.
133              
134             If you are using multiple Pyzor servers, a good rule of thumb would be to
135             set the SpamAssassin plugin's timeout to be the same or just a bit more
136             than the per-server Pyzor timeout (e.g., 3.5 and 2 for two Pyzor servers).
137             If more than one of your Pyzor servers is always timing out, consider
138             removing one of them.
139              
140             =cut
141              
142 62         334 push (@cmds, {
143             setting => 'pyzor_timeout',
144             is_admin => 1,
145             default => 3.5,
146             type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION
147             });
148              
149             =item pyzor_options options
150              
151             Specify additional options to the pyzor(1) command. Please note that only
152             characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons.
153              
154             =cut
155              
156             push (@cmds, {
157             setting => 'pyzor_options',
158             is_admin => 1,
159             default => '',
160             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
161             code => sub {
162 0     0   0 my ($self, $key, $value, $line) = @_;
163 0 0       0 if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) {
164 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
165             }
166 0         0 $self->{pyzor_options} = $1;
167             }
168 62         690 });
169              
170             =item pyzor_path STRING
171              
172             This option tells SpamAssassin specifically where to find the C<pyzor>
173             client instead of relying on SpamAssassin to find it in the current
174             PATH. Note that if I<taint mode> is enabled in the Perl interpreter,
175             you should use this, as the current PATH will have been cleared.
176              
177             =cut
178              
179             push (@cmds, {
180             setting => 'pyzor_path',
181             is_admin => 1,
182             default => undef,
183             type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
184             code => sub {
185 0     0   0 my ($self, $key, $value, $line) = @_;
186 0 0 0     0 if (!defined $value || !length $value) {
187 0         0 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
188             }
189 0         0 $value = untaint_file_path($value);
190 0 0       0 if (!-x $value) {
191 0         0 info("config: pyzor_path \"$value\" isn't an executable");
192 0         0 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
193             }
194              
195 0         0 $self->{pyzor_path} = $value;
196             }
197 62         571 });
198              
199 62         339 $conf->{parser}->register_commands(\@cmds);
200             }
201              
202             sub is_pyzor_available {
203 0     0 0   my ($self) = @_;
204              
205 0   0       my $pyzor = $self->{main}->{conf}->{pyzor_path} || '';
206 0 0         unless ($pyzor) {
207 0           $pyzor = Mail::SpamAssassin::Util::find_executable_in_env_path('pyzor');
208             }
209 0 0 0       unless ($pyzor && -x $pyzor) {
210 0           dbg("pyzor: pyzor is not available: no pyzor executable found");
211 0           return 0;
212             }
213              
214             # remember any found pyzor
215 0           $self->{main}->{conf}->{pyzor_path} = $pyzor;
216              
217 0           dbg("pyzor: pyzor is available: " . $self->{main}->{conf}->{pyzor_path});
218 0           return 1;
219             }
220              
221             sub get_pyzor_interface {
222 0     0 0   my ($self) = @_;
223              
224 0 0         if (!$self->{main}->{conf}->{use_pyzor}) {
    0          
225 0           dbg("pyzor: use_pyzor option not enabled, disabling Pyzor");
226 0           $self->{pyzor_interface} = "disabled";
227 0           $self->{pyzor_available} = 0;
228             }
229             elsif ($self->is_pyzor_available()) {
230 0           $self->{pyzor_interface} = "pyzor";
231 0           $self->{pyzor_available} = 1;
232             }
233             else {
234 0           dbg("pyzor: no pyzor found, disabling Pyzor");
235 0           $self->{pyzor_available} = 0;
236             }
237             }
238              
239             sub check_pyzor {
240 0     0 0   my ($self, $permsgstatus, $full) = @_;
241              
242             # initialize valid tags
243 0           $permsgstatus->{tag_data}->{PYZOR} = "";
244              
245 0           my $timer = $self->{main}->time_method("check_pyzor");
246              
247 0           $self->get_pyzor_interface();
248 0 0         return 0 unless $self->{pyzor_available};
249              
250 0           return $self->pyzor_lookup($permsgstatus, $full);
251             }
252              
253             sub pyzor_lookup {
254 0     0 0   my ($self, $permsgstatus, $fulltext) = @_;
255 0           my @response;
256             my $pyzor_count;
257 0           my $pyzor_whitelisted;
258 0           my $timeout = $self->{main}->{conf}->{pyzor_timeout};
259              
260 0           $pyzor_count = 0;
261 0           $pyzor_whitelisted = 0;
262 0           my $pid;
263              
264             # use a temp file here -- open2() is unreliable, buffering-wise, under spamd
265 0           my $tmpf = $permsgstatus->create_fulltext_tmpfile($fulltext);
266              
267             # note: not really tainted, this came from system configuration file
268 0           my $path = untaint_file_path($self->{main}->{conf}->{pyzor_path});
269 0   0       my $opts = untaint_var($self->{main}->{conf}->{pyzor_options}) || '';
270              
271 0           $permsgstatus->enter_helper_run_mode();
272              
273             my $timer = Mail::SpamAssassin::Timeout->new(
274 0           { secs => $timeout, deadline => $permsgstatus->{master_deadline} });
275             my $err = $timer->run_and_catch(sub {
276              
277 0     0     local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
  0            
278            
279 0           dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "check", "< $tmpf"));
280              
281 0           $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
282             $tmpf, 1, $path, split(' ', $opts), "check");
283 0 0         $pid or die "$!\n";
284              
285             # read+split avoids a Perl I/O bug (Bug 5985)
286 0           my($inbuf,$nread,$resp); $resp = '';
  0            
287 0           while ( $nread=read(PYZOR,$inbuf,8192) ) { $resp .= $inbuf }
  0            
288 0 0         defined $nread or die "error reading from pipe: $!";
289 0           @response = split(/^/m, $resp, -1); undef $resp;
  0            
290              
291 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
292 0 0         if (proc_status_ok($?,$errno)) {
    0          
293 0           dbg("pyzor: [%s] finished successfully", $pid);
294             } elsif (proc_status_ok($?,$errno, 0,1)) { # sometimes it exits with 1
295 0           dbg("pyzor: [%s] finished: %s", $pid, exit_status_str($?,$errno));
296             } else {
297 0           info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
298             }
299              
300 0 0         if (!@response) {
301             # this exact string is needed below
302 0           warn("no response\n"); # yes, this is possible
303 0           return;
304             }
305 0           chomp for @response;
306 0           dbg("pyzor: got response: " . join("\\n", @response));
307              
308 0 0         if ($response[0] =~ /^Traceback/) {
309 0           warn("internal error, python traceback seen in response\n");
310             }
311              
312 0           });
313              
314 0 0         if (defined(fileno(*PYZOR))) { # still open
315 0 0         if ($pid) {
316 0 0         if (kill('TERM',$pid)) { dbg("pyzor: killed stale helper [$pid]") }
  0            
317 0           else { dbg("pyzor: killing helper application [$pid] failed: $!") }
318             }
319 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
320 0 0         proc_status_ok($?,$errno)
321             or info("pyzor: [%s] error: %s", $pid, exit_status_str($?,$errno));
322             }
323 0           $permsgstatus->leave_helper_run_mode();
324              
325 0 0         if ($timer->timed_out()) {
326 0           dbg("pyzor: check timed out after $timeout seconds");
327 0           return 0;
328             }
329              
330 0 0         if ($err) {
331 0           chomp $err;
332 0 0         if ($err eq "__brokenpipe__ignore__") {
    0          
333 0           dbg("pyzor: check failed: broken pipe");
334             } elsif ($err eq "no response") {
335 0           dbg("pyzor: check failed: no response");
336             } else {
337 0           warn("pyzor: check failed: $err\n");
338             }
339 0           return 0;
340             }
341              
342 0           foreach my $one_response (@response) {
343             # this regexp is intended to be a little bit forgiving
344 0 0         if ($one_response =~ /^\S+\t.*?\t(\d+)\t(\d+)\s*$/) {
345             # until pyzor servers can sync their DBs,
346             # sum counts obtained from all servers
347 0           $pyzor_whitelisted += $2+0;
348 0           $pyzor_count += $1+0;
349             }
350             else {
351             # warn on failures to parse
352 0           dbg("pyzor: failure to parse response \"$one_response\"");
353             }
354             }
355              
356 0 0         $permsgstatus->set_tag('PYZOR', $pyzor_whitelisted ? "Whitelisted."
357             : "Reported $pyzor_count times.");
358              
359 0 0         if ($pyzor_count >= $self->{main}->{conf}->{pyzor_max}) {
360 0           dbg("pyzor: listed: COUNT=$pyzor_count/$self->{main}->{conf}->{pyzor_max} WHITELIST=$pyzor_whitelisted");
361 0           return 1;
362             }
363              
364 0           return 0;
365             }
366              
367             sub plugin_report {
368 0     0 1   my ($self, $options) = @_;
369              
370 0 0         return unless $self->{pyzor_available};
371 0 0         return unless $self->{main}->{conf}->{use_pyzor};
372              
373 0 0 0       if (!$options->{report}->{options}->{dont_report_to_pyzor} && $self->is_pyzor_available())
374             {
375             # use temporary file: open2() is unreliable due to buffering under spamd
376 0           my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text});
377 0 0         if ($self->pyzor_report($options, $tmpf)) {
378 0           $options->{report}->{report_available} = 1;
379 0           info("reporter: spam reported to Pyzor");
380 0           $options->{report}->{report_return} = 1;
381             }
382             else {
383 0           info("reporter: could not report spam to Pyzor");
384             }
385 0           $options->{report}->delete_fulltext_tmpfile();
386             }
387             }
388              
389             sub pyzor_report {
390 0     0 0   my ($self, $options, $tmpf) = @_;
391              
392             # note: not really tainted, this came from system configuration file
393 0           my $path = untaint_file_path($options->{report}->{conf}->{pyzor_path});
394 0   0       my $opts = untaint_var($options->{report}->{conf}->{pyzor_options}) || '';
395              
396 0           my $timeout = $self->{main}->{conf}->{pyzor_timeout};
397              
398 0           $options->{report}->enter_helper_run_mode();
399              
400 0           my $timer = Mail::SpamAssassin::Timeout->new({ secs => $timeout });
401             my $err = $timer->run_and_catch(sub {
402              
403 0     0     local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
  0            
404              
405 0           dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "report", "< $tmpf"));
406              
407 0           my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
408             $tmpf, 1, $path, split(' ', $opts), "report");
409 0 0         $pid or die "$!\n";
410              
411 0           my($inbuf,$nread,$nread_all); $nread_all = 0;
  0            
412             # response is ignored, just check its existence
413 0           while ( $nread=read(PYZOR,$inbuf,8192) ) { $nread_all += $nread }
  0            
414 0 0         defined $nread or die "error reading from pipe: $!";
415              
416 0 0         dbg("pyzor: empty response") if $nread_all < 1;
417              
418 0 0         my $errno = 0; close PYZOR or $errno = $!;
  0            
419             # closing a pipe also waits for the process executing on the pipe to
420             # complete, no need to explicitly call waitpid
421             # my $child_stat = waitpid($pid,0) > 0 ? $? : undef;
422 0 0         if (proc_status_ok($?,$errno, 0)) {
423 0           dbg("pyzor: [%s] reporter finished successfully", $pid);
424             } else {
425 0           info("pyzor: [%s] reporter error: %s", $pid, exit_status_str($?,$errno));
426             }
427              
428 0           });
429              
430 0           $options->{report}->leave_helper_run_mode();
431              
432 0 0         if ($timer->timed_out()) {
433 0           dbg("reporter: pyzor report timed out after $timeout seconds");
434 0           return 0;
435             }
436              
437 0 0         if ($err) {
438 0           chomp $err;
439 0 0         if ($err eq '__brokenpipe__ignore__') {
440 0           dbg("reporter: pyzor report failed: broken pipe");
441             } else {
442 0           warn("reporter: pyzor report failed: $err\n");
443             }
444 0           return 0;
445             }
446              
447 0           return 1;
448             }
449              
450             1;
451              
452             =back
453              
454             =cut