File Coverage

blib/lib/Mail/SpamAssassin/Plugin/AttachmentPresent.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Copyright 2016 Web2All B.V.
3             #
4             # This Plugin is free software; you can redistribute
5             # it and/or modify it under the same terms as Perl 5.18.1.
6             #
7             # you may not use this file except in compliance with the License.
8             #
9             # Unless required by applicable law or agreed to in writing, software
10             # distributed under the License is distributed on an "AS IS" BASIS,
11             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12             #
13              
14             # Version control:
15             # $Revision: 164 $
16             # $Author: merijn $
17             # $Date: 2016-09-02 10:29:41 +0200 (Fri, 02 Sep 2016) $
18              
19             =head1 NAME
20              
21             Mail::SpamAssassin::Plugin::AttachmentPresent - SpamAssassin plugin to score mail based on attachments
22             including attachments inside archives.
23              
24             =head1 SYNOPSIS
25              
26             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
27             body RULENAME eval:attachmentpresent_archive_count()
28             body RULENAME eval:attachmentpresent_file_count()
29              
30             =head1 DESCRIPTION
31              
32             Get information about attached files, including inside archives.
33             Only supports Zip right now.
34              
35             =head1 CONFIGURATION
36              
37             None
38              
39             =head1 INSTALL
40              
41             =over
42              
43             =item Install the required Perl modules:
44              
45             Archive::Zip
46             IO::String
47             Mail::SpamAssassin::Plugin::AttachmentPresent
48              
49             Should already be installed by spamassassin
50              
51             =item Configure spamassassin
52              
53             Typically in local.cf, include lines:
54             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
55              
56             body HAS_JS_FILES eval:attachmentpresent_file_count('js')
57             describe HAS_JS_FILES The e-mail has attached javascript files (or inside archives)
58             score HAS_JS_FILES 0.001
59              
60             =back
61              
62             =cut
63              
64             package Mail::SpamAssassin::Plugin::AttachmentPresent;
65              
66 1     1   13622 use strict;
  1         1  
  1         23  
67 1     1   3 use warnings;
  1         1  
  1         22  
68              
69 1     1   197 use Mail::SpamAssassin::Plugin;
  0            
  0            
70             use Mail::SpamAssassin::Logger;
71              
72             use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
73             use IO::String;
74              
75             use base qw(Mail::SpamAssassin::Plugin);
76             our $VERSION = '1.03';
77              
78             our $LOG_FACILITY = 'AttachmentPresent';
79              
80             # Fields:
81             # mailsa - Mail::SpamAssassin instance
82             sub new {
83             my ($class, $mailsa) = @_;
84             $class = ref($class) || $class;
85             my $self = $class->SUPER::new($mailsa);
86             bless($self, $class);
87            
88             Mail::SpamAssassin::Logger::add_facilities($LOG_FACILITY);
89            
90             $self->register_eval_rule('attachmentpresent_archive_count');
91             $self->register_eval_rule('attachmentpresent_file_count');
92            
93             return $self;
94             }
95              
96              
97             sub l {
98             Mail::SpamAssassin::Logger::dbg("$LOG_FACILITY: " . join('', @_));
99             }
100              
101             sub _zip_error_handler(){
102             l("_zip_error_handler: ".$_[0]);
103             }
104              
105             sub _build_attachment_tree {
106             my ($self,$pms) = @_;
107            
108             # init storage
109             $pms->{'attachment_data'}={
110             'files' => [],
111             'archives' => []
112             };
113             l('_build_attachment_tree');
114             # $pms->{msg} Mail::SpamAssassin::Message
115             foreach my $part ($pms->{msg}->find_parts(qr/.*/, 1)) {
116             # $part Mail::SpamAssassin::Message::Node
117             # now we get all parts which are leaves (so text parts and attachments, not multiparts)
118             l('_build_attachment_tree->part');
119             # we ignore all parts which are part of the text body
120            
121             # For zipfiles, find out whats in them
122             # Content-Type: application/zip;
123             # Content-Transfer-Encoding: base64
124             my $ctt = lc($part->get_header('content-type') || '');
125             # Mail::SpamAssassin::Message::Node has _decode_header() method, but it doesnt decode
126             # Content-* headers and thus the filename in the Content-Type header is not decoded :(
127             # This seems to be hardcoded in the _decode_header() method so we cannot bypass it...
128             # so we included a modified version of _decode_header() in our module
129             # as decode_part_header()
130             $ctt = $self->decode_part_header($part, $ctt);
131             my $cte = lc($part->get_header('content-transfer-encoding') || '');
132            
133             l('_build_attachment_tree->part: content-type: '.$ctt);
134            
135             # consider the attachment a file if it has a name
136             my $attachment_filename='';
137             if($ctt =~ m/name\s*=\s*"?([^";]*)"?/is){
138             $attachment_filename=$1;
139             # lets be sure and remove any whitespace from the end
140             $attachment_filename =~ s/\s+$//;
141             l('_build_attachment_tree->part: part has name '.$attachment_filename);
142             push(@{$pms->{'attachment_data'}->{'files'}},$attachment_filename);
143             }
144            
145             # now process attachments
146            
147             # Zip
148             if ($ctt =~ /zip/i && $cte =~ /^base64$/){
149             # seems to be a zip attachment
150             l('_build_attachment_tree->part found Zip archive: '.$ctt);
151             # how much we grab? for now only 500kb, bigger files will just not
152             # be properly parsed as zip files
153             my $num_of_bytes=512000;
154            
155             my $zip_binary_head=$part->decode($num_of_bytes);
156             # use Archive::Zip
157             my $SH = IO::String->new($zip_binary_head);
158              
159             Archive::Zip::setErrorHandler( \&_zip_error_handler );
160             my $zip = Archive::Zip->new();
161             if($zip->readFromFileHandle( $SH ) != AZ_OK){
162             l("_build_attachment_tree: cannot read zipfile $attachment_filename");
163             # as we cannot read it its not a zip (or too big/corrupted)
164             # so skip processing.
165             next;
166             }
167            
168             # ok seems to be a zip
169             push(@{$pms->{'attachment_data'}->{'archives'}},$attachment_filename);
170            
171             # list all files in the zip file and add them as a file
172             my @members = $zip->members();
173             foreach my $member (@members){
174             push(@{$pms->{'attachment_data'}->{'files'}},$member->fileName());
175             }
176             }
177              
178             }
179              
180             }
181              
182             # Decode base64 and quoted-printable in headers according to RFC2047.
183             #
184             # It replaces Mail::SpamAssassin::Message::Node->_decode_header()
185             # but still calls internal methods of the Node so thats why we need
186             # the $part param.
187             # The $header_field_body param should be the header field data/body.
188             #
189             # So this method is a copy from Node but it will skip no header names.
190             #
191             # See also https://bz.apache.org/SpamAssassin/show_bug.cgi?id=6945
192             sub decode_part_header {
193             my($self, $part, $header_field_body) = @_;
194              
195             return '' unless defined $header_field_body && $header_field_body ne '';
196            
197             # deal with folding and cream the newlines and such
198             $header_field_body =~ s/\n[ \t]+/\n /g;
199             $header_field_body =~ s/\015?\012//gs;
200            
201             local($1,$2,$3);
202              
203             # Multiple encoded sections must ignore the interim whitespace.
204             # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
205             # separated by whitespace.
206             1 while $header_field_body =~
207             s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
208             ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
209             {$1$2}xsg;
210              
211             # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
212             # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
213             # or plain US-ASCII
214             $header_field_body =~
215             s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
216             { $part->__decode_header($1, uc($2), $3) }xsge;
217            
218            
219             return $header_field_body;
220             }
221              
222              
223             =head1 FUNCTIONS
224              
225             =over
226              
227             =item $int = attachmentpresent_archive_count([$ext[, $more_than]])
228              
229             Returns the amount of recognised archive files inside
230             this message. Currently only Zip files are recognised.
231             If the file could not be parsed because it was too big
232             or corrupted, its not counted.
233              
234             Optionally you can filter on extension, where $ext should
235             be set to the extension to filter on. Eg. $ext='zip'
236              
237             =cut
238              
239             sub attachmentpresent_archive_count {
240             my $self = shift;
241             my $pms = shift;
242             my $rendered = shift;# body tests: fully rendered message as array reference
243             my $extension = shift;
244             my $larger_than = shift || 0;
245              
246             l('attachmentpresent_archive_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
247            
248             # make sure we have attachment data read in.
249             if (!exists $pms->{'attachment_data'}) {
250             $self->_build_attachment_tree($pms);
251             }
252              
253             my $count=0;
254             if($extension){
255             foreach my $archive (@{$pms->{'attachment_data'}->{'archives'}}){
256             if($archive =~ m/\.$extension$/i){
257             $count++;
258             }
259             }
260             }else{
261             $count=scalar (@{$pms->{'attachment_data'}->{'archives'}});
262             }
263            
264             l('attachmentpresent_archive_count actual count: '.$count);
265             l('attachmentpresent_archive_count: '.(($count > $larger_than) ? 1 : 0));
266             return (($count > $larger_than) ? 1 : 0);
267             }
268              
269             =item $int = attachmentpresent_file_count([$ext[, $more_than]])
270              
271             Returns the amount of files inside this message. It also
272             counts files inside recognised archive files. Currently only
273             Zip files are recognised.
274              
275             Optionally you can filter on extension, where $ext should
276             be set to the extension to filter on. Eg. $ext='js'
277              
278             Optionally you can specify the minimum amount of files
279             (of the given type) which will be required to trigger the rule.
280             By default $more_than is 0, so at least one file of the
281             given type is needed, but you can set it to 4 if you want the
282             rule to trigger when at least 5 files are present.
283              
284             =cut
285              
286             sub attachmentpresent_file_count {
287             my $self = shift;
288             my $pms = shift;
289             my $rendered = shift;# body tests: fully rendered message as array reference
290             my $extension = shift;
291             my $larger_than = shift || 0;
292              
293             l('attachmentpresent_file_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
294            
295             # make sure we have attachment data read in.
296             if (!exists $pms->{'attachment_data'}) {
297             $self->_build_attachment_tree($pms);
298             }
299              
300             my $count=0;
301             if($extension){
302             foreach my $file (@{$pms->{'attachment_data'}->{'files'}}){
303             if($file =~ m/\.$extension$/i){
304             $count++;
305             l('attachmentpresent_file_count found matching file: '.$file);
306             }
307             }
308             }else{
309             $count=scalar (@{$pms->{'attachment_data'}->{'files'}});
310             }
311            
312             l('attachmentpresent_file_count actual count: '.$count);
313             l('attachmentpresent_file_count: '.(($count > $larger_than) ? 1 : 0));
314             return (($count > $larger_than) ? 1 : 0);
315             }
316              
317             =back
318             =cut
319              
320             1;