File Coverage

blib/lib/Mail/SpamAssassin/Plugin/AttachmentPresent.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Copyright 2016 Web2All B.V.
3             #
4             # This Plugin is free software; you can redistribute
5             # it and/or modify it under the same terms as Perl 5.18.1.
6             #
7             # you may not use this file except in compliance with the License.
8             #
9             # Unless required by applicable law or agreed to in writing, software
10             # distributed under the License is distributed on an "AS IS" BASIS,
11             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12             #
13              
14             # Version control:
15             # $Revision: 218 $
16             # $Author: merijn $
17             # $Date: 2017-03-01 11:11:37 +0100 (Wed, 01 Mar 2017) $
18              
19             =head1 NAME
20              
21             Mail::SpamAssassin::Plugin::AttachmentPresent - SpamAssassin plugin to score mail based on attachments
22             including attachments inside archives.
23              
24             =head1 SYNOPSIS
25              
26             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
27             body RULENAME eval:attachmentpresent_archive_count()
28             body RULENAME eval:attachmentpresent_file_count()
29              
30             =head1 DESCRIPTION
31              
32             Get information about attached files, including inside archives.
33             Only supports Zip right now.
34              
35             =head1 CONFIGURATION
36              
37             None
38              
39             =head1 INSTALL
40              
41             =over
42              
43             =item Install the required Perl modules:
44              
45             Encode::MIME::Header;
46             Archive::Zip
47             IO::String
48             Mail::SpamAssassin::Plugin::AttachmentPresent
49              
50             Should already be installed by spamassassin
51              
52             =item Configure spamassassin
53              
54             Typically in local.cf, include lines:
55             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
56              
57             body HAS_JS_FILES eval:attachmentpresent_file_count('js')
58             describe HAS_JS_FILES The e-mail has attached javascript files (or inside archives)
59             score HAS_JS_FILES 0.001
60              
61             =back
62              
63             =cut
64              
65             package Mail::SpamAssassin::Plugin::AttachmentPresent;
66              
67 1     1   12717 use strict;
  1         1  
  1         22  
68 1     1   3 use warnings;
  1         0  
  1         18  
69              
70 1     1   186 use Mail::SpamAssassin::Plugin;
  0            
  0            
71             use Mail::SpamAssassin::Logger;
72              
73             use Encode;
74             use Encode::MIME::Header;
75             use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
76             use IO::String;
77              
78             use base qw(Mail::SpamAssassin::Plugin);
79             our $VERSION = '1.05';
80              
81             our $LOG_FACILITY = 'AttachmentPresent';
82              
83             # Fields:
84             # mailsa - Mail::SpamAssassin instance
85             sub new {
86             my ($class, $mailsa) = @_;
87             $class = ref($class) || $class;
88             my $self = $class->SUPER::new($mailsa);
89             bless($self, $class);
90            
91             Mail::SpamAssassin::Logger::add_facilities($LOG_FACILITY);
92            
93             $self->register_eval_rule('attachmentpresent_archive_count');
94             $self->register_eval_rule('attachmentpresent_file_count');
95            
96             return $self;
97             }
98              
99              
100             sub l {
101             Mail::SpamAssassin::Logger::dbg("$LOG_FACILITY: " . join('', @_));
102             }
103              
104             sub _zip_error_handler(){
105             l("_zip_error_handler: ".$_[0]);
106             }
107              
108             sub _build_attachment_tree {
109             my ($self,$pms) = @_;
110            
111             # init storage
112             $pms->{'attachment_data'}={
113             'files' => [],
114             'archives' => []
115             };
116             l('_build_attachment_tree');
117             # $pms->{msg} Mail::SpamAssassin::Message
118             foreach my $part ($pms->{msg}->find_parts(qr/.*/, 1)) {
119             # $part Mail::SpamAssassin::Message::Node
120             # now we get all parts which are leaves (so text parts and attachments, not multiparts)
121             l('_build_attachment_tree->part');
122             # we ignore all parts which are part of the text body
123            
124             # For zipfiles, find out whats in them
125             # Content-Type: application/zip;
126             # Content-Transfer-Encoding: base64
127             my $ctt = $part->get_header('content-type') || '';
128             # Mail::SpamAssassin::Message::Node has _decode_header() method, but it doesnt decode
129             # Content-* headers and thus the filename in the Content-Type header is not decoded :(
130             $ctt=Encode::decode('MIME-Header', $ctt);
131             # $ctt might contain wide characters now
132              
133             my $cte = lc($part->get_header('content-transfer-encoding') || '');
134            
135             l('_build_attachment_tree->part: content-type: '.$ctt);
136            
137             # consider the attachment a file if it has a name
138             my $attachment_filename='';
139             if($ctt =~ m/name\s*=\s*"?([^";]*)"?/is){
140             $attachment_filename=$1;
141             # lets be sure and remove any whitespace from the end
142             $attachment_filename =~ s/\s+$//;
143             l('_build_attachment_tree->part: part has name '.$attachment_filename);
144             push(@{$pms->{'attachment_data'}->{'files'}},$attachment_filename);
145             }else{
146             # fallback check in Content-Disposition header
147             # some (spam) has Content-Disposition with filename but no filename in Content-Type,
148             # but mail clients will still display the attachment.
149             my $cdp = $part->get_header('content-disposition') || '';
150             # Content-* headers and thus the filename in the Content-Type header is not decoded :(
151             $cdp=Encode::decode('MIME-Header', $cdp);
152             # $cdp might contain wide characters now
153             if($cdp =~ m/name\s*=\s*"?([^";]*)"?/is){
154             $attachment_filename=$1;
155             # lets be sure and remove any whitespace from the end
156             $attachment_filename =~ s/\s+$//;
157             l('_build_attachment_tree->part: part has name (in Content-Disposition) '.$attachment_filename);
158             push(@{$pms->{'attachment_data'}->{'files'}},$attachment_filename);
159             }
160             }
161            
162             # now process attachments
163            
164             # Zip
165             if ($ctt =~ /zip/i && $cte =~ /^base64$/){
166             # seems to be a zip attachment
167             l('_build_attachment_tree->part found Zip archive: '.$ctt);
168             # how much we grab? for now only 500kb, bigger files will just not
169             # be properly parsed as zip files
170             my $num_of_bytes=512000;
171            
172             my $zip_binary_head=$part->decode($num_of_bytes);
173             # use Archive::Zip
174             my $SH = IO::String->new($zip_binary_head);
175              
176             Archive::Zip::setErrorHandler( \&_zip_error_handler );
177             my $zip = Archive::Zip->new();
178             if($zip->readFromFileHandle( $SH ) != AZ_OK){
179             l("_build_attachment_tree: cannot read zipfile $attachment_filename");
180             # as we cannot read it its not a zip (or too big/corrupted)
181             # so skip processing.
182             next;
183             }
184            
185             # ok seems to be a zip
186             push(@{$pms->{'attachment_data'}->{'archives'}},$attachment_filename);
187            
188             # list all files in the zip file and add them as a file
189             my @members = $zip->members();
190             foreach my $member (@members){
191             push(@{$pms->{'attachment_data'}->{'files'}},$member->fileName());
192             }
193             }
194              
195             }
196              
197             }
198              
199             =head1 FUNCTIONS
200              
201             =over
202              
203             =item $int = attachmentpresent_archive_count([$ext[, $more_than]])
204              
205             Returns the amount of recognised archive files inside
206             this message. Currently only Zip files are recognised.
207             If the file could not be parsed because it was too big
208             or corrupted, its not counted.
209              
210             Optionally you can filter on extension, where $ext should
211             be set to the extension to filter on. Eg. $ext='zip'
212              
213             =cut
214              
215             sub attachmentpresent_archive_count {
216             my $self = shift;
217             my $pms = shift;
218             my $rendered = shift;# body tests: fully rendered message as array reference
219             my $extension = shift;
220             my $larger_than = shift || 0;
221              
222             l('attachmentpresent_archive_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
223            
224             # make sure we have attachment data read in.
225             if (!exists $pms->{'attachment_data'}) {
226             $self->_build_attachment_tree($pms);
227             }
228              
229             my $count=0;
230             if($extension){
231             foreach my $archive (@{$pms->{'attachment_data'}->{'archives'}}){
232             if($archive =~ m/\.$extension$/i){
233             $count++;
234             }
235             }
236             }else{
237             $count=scalar (@{$pms->{'attachment_data'}->{'archives'}});
238             }
239            
240             l('attachmentpresent_archive_count actual count: '.$count);
241             l('attachmentpresent_archive_count: '.(($count > $larger_than) ? 1 : 0));
242             return (($count > $larger_than) ? 1 : 0);
243             }
244              
245             =item $int = attachmentpresent_file_count([$ext[, $more_than]])
246              
247             Returns the amount of files inside this message. It also
248             counts files inside recognised archive files. Currently only
249             Zip files are recognised.
250              
251             Optionally you can filter on extension, where $ext should
252             be set to the extension to filter on. Eg. $ext='js'
253              
254             Optionally you can specify the minimum amount of files
255             (of the given type) which will be required to trigger the rule.
256             By default $more_than is 0, so at least one file of the
257             given type is needed, but you can set it to 4 if you want the
258             rule to trigger when at least 5 files are present.
259              
260             =cut
261              
262             sub attachmentpresent_file_count {
263             my $self = shift;
264             my $pms = shift;
265             my $rendered = shift;# body tests: fully rendered message as array reference
266             my $extension = shift;
267             my $larger_than = shift || 0;
268              
269             l('attachmentpresent_file_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
270            
271             # make sure we have attachment data read in.
272             if (!exists $pms->{'attachment_data'}) {
273             $self->_build_attachment_tree($pms);
274             }
275              
276             my $count=0;
277             if($extension){
278             foreach my $file (@{$pms->{'attachment_data'}->{'files'}}){
279             if($file =~ m/\.$extension$/i){
280             $count++;
281             l('attachmentpresent_file_count found matching file: '.$file);
282             }
283             }
284             }else{
285             $count=scalar (@{$pms->{'attachment_data'}->{'files'}});
286             }
287            
288             l('attachmentpresent_file_count actual count: '.$count);
289             l('attachmentpresent_file_count: '.(($count > $larger_than) ? 1 : 0));
290             return (($count > $larger_than) ? 1 : 0);
291             }
292              
293             =back
294             =cut
295              
296             1;