File Coverage

blib/lib/Mail/SpamAssassin/Plugin/AttachmentPresent.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Copyright 2016 Web2All B.V.
3             #
4             # This Plugin is free software; you can redistribute
5             # it and/or modify it under the same terms as Perl 5.18.1.
6             #
7             # you may not use this file except in compliance with the License.
8             #
9             # Unless required by applicable law or agreed to in writing, software
10             # distributed under the License is distributed on an "AS IS" BASIS,
11             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12             #
13              
14             # Version control:
15             # $Revision: 169 $
16             # $Author: merijn $
17             # $Date: 2016-09-09 14:36:13 +0200 (Fri, 09 Sep 2016) $
18              
19             =head1 NAME
20              
21             Mail::SpamAssassin::Plugin::AttachmentPresent - SpamAssassin plugin to score mail based on attachments
22             including attachments inside archives.
23              
24             =head1 SYNOPSIS
25              
26             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
27             body RULENAME eval:attachmentpresent_archive_count()
28             body RULENAME eval:attachmentpresent_file_count()
29              
30             =head1 DESCRIPTION
31              
32             Get information about attached files, including inside archives.
33             Only supports Zip right now.
34              
35             =head1 CONFIGURATION
36              
37             None
38              
39             =head1 INSTALL
40              
41             =over
42              
43             =item Install the required Perl modules:
44              
45             Encode::MIME::Header;
46             Archive::Zip
47             IO::String
48             Mail::SpamAssassin::Plugin::AttachmentPresent
49              
50             Should already be installed by spamassassin
51              
52             =item Configure spamassassin
53              
54             Typically in local.cf, include lines:
55             loadplugin Mail::SpamAssassin::Plugin::AttachmentPresent
56              
57             body HAS_JS_FILES eval:attachmentpresent_file_count('js')
58             describe HAS_JS_FILES The e-mail has attached javascript files (or inside archives)
59             score HAS_JS_FILES 0.001
60              
61             =back
62              
63             =cut
64              
65             package Mail::SpamAssassin::Plugin::AttachmentPresent;
66              
67 1     1   17252 use strict;
  1         2  
  1         29  
68 1     1   4 use warnings;
  1         2  
  1         27  
69              
70 1     1   244 use Mail::SpamAssassin::Plugin;
  0            
  0            
71             use Mail::SpamAssassin::Logger;
72              
73             use Encode;
74             use Encode::MIME::Header;
75             use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
76             use IO::String;
77              
78             use base qw(Mail::SpamAssassin::Plugin);
79             our $VERSION = '1.04';
80              
81             our $LOG_FACILITY = 'AttachmentPresent';
82              
83             # Fields:
84             # mailsa - Mail::SpamAssassin instance
85             sub new {
86             my ($class, $mailsa) = @_;
87             $class = ref($class) || $class;
88             my $self = $class->SUPER::new($mailsa);
89             bless($self, $class);
90            
91             Mail::SpamAssassin::Logger::add_facilities($LOG_FACILITY);
92            
93             $self->register_eval_rule('attachmentpresent_archive_count');
94             $self->register_eval_rule('attachmentpresent_file_count');
95            
96             return $self;
97             }
98              
99              
100             sub l {
101             Mail::SpamAssassin::Logger::dbg("$LOG_FACILITY: " . join('', @_));
102             }
103              
104             sub _zip_error_handler(){
105             l("_zip_error_handler: ".$_[0]);
106             }
107              
108             sub _build_attachment_tree {
109             my ($self,$pms) = @_;
110            
111             # init storage
112             $pms->{'attachment_data'}={
113             'files' => [],
114             'archives' => []
115             };
116             l('_build_attachment_tree');
117             # $pms->{msg} Mail::SpamAssassin::Message
118             foreach my $part ($pms->{msg}->find_parts(qr/.*/, 1)) {
119             # $part Mail::SpamAssassin::Message::Node
120             # now we get all parts which are leaves (so text parts and attachments, not multiparts)
121             l('_build_attachment_tree->part');
122             # we ignore all parts which are part of the text body
123            
124             # For zipfiles, find out whats in them
125             # Content-Type: application/zip;
126             # Content-Transfer-Encoding: base64
127             my $ctt = $part->get_header('content-type') || '';
128             # Mail::SpamAssassin::Message::Node has _decode_header() method, but it doesnt decode
129             # Content-* headers and thus the filename in the Content-Type header is not decoded :(
130             $ctt=Encode::decode('MIME-Header', $ctt);
131             # $ctt might contain wide characters now
132              
133             my $cte = lc($part->get_header('content-transfer-encoding') || '');
134            
135             l('_build_attachment_tree->part: content-type: '.$ctt);
136            
137             # consider the attachment a file if it has a name
138             my $attachment_filename='';
139             if($ctt =~ m/name\s*=\s*"?([^";]*)"?/is){
140             $attachment_filename=$1;
141             # lets be sure and remove any whitespace from the end
142             $attachment_filename =~ s/\s+$//;
143             l('_build_attachment_tree->part: part has name '.$attachment_filename);
144             push(@{$pms->{'attachment_data'}->{'files'}},$attachment_filename);
145             }
146            
147             # now process attachments
148            
149             # Zip
150             if ($ctt =~ /zip/i && $cte =~ /^base64$/){
151             # seems to be a zip attachment
152             l('_build_attachment_tree->part found Zip archive: '.$ctt);
153             # how much we grab? for now only 500kb, bigger files will just not
154             # be properly parsed as zip files
155             my $num_of_bytes=512000;
156            
157             my $zip_binary_head=$part->decode($num_of_bytes);
158             # use Archive::Zip
159             my $SH = IO::String->new($zip_binary_head);
160              
161             Archive::Zip::setErrorHandler( \&_zip_error_handler );
162             my $zip = Archive::Zip->new();
163             if($zip->readFromFileHandle( $SH ) != AZ_OK){
164             l("_build_attachment_tree: cannot read zipfile $attachment_filename");
165             # as we cannot read it its not a zip (or too big/corrupted)
166             # so skip processing.
167             next;
168             }
169            
170             # ok seems to be a zip
171             push(@{$pms->{'attachment_data'}->{'archives'}},$attachment_filename);
172            
173             # list all files in the zip file and add them as a file
174             my @members = $zip->members();
175             foreach my $member (@members){
176             push(@{$pms->{'attachment_data'}->{'files'}},$member->fileName());
177             }
178             }
179              
180             }
181              
182             }
183              
184             =head1 FUNCTIONS
185              
186             =over
187              
188             =item $int = attachmentpresent_archive_count([$ext[, $more_than]])
189              
190             Returns the amount of recognised archive files inside
191             this message. Currently only Zip files are recognised.
192             If the file could not be parsed because it was too big
193             or corrupted, its not counted.
194              
195             Optionally you can filter on extension, where $ext should
196             be set to the extension to filter on. Eg. $ext='zip'
197              
198             =cut
199              
200             sub attachmentpresent_archive_count {
201             my $self = shift;
202             my $pms = shift;
203             my $rendered = shift;# body tests: fully rendered message as array reference
204             my $extension = shift;
205             my $larger_than = shift || 0;
206              
207             l('attachmentpresent_archive_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
208            
209             # make sure we have attachment data read in.
210             if (!exists $pms->{'attachment_data'}) {
211             $self->_build_attachment_tree($pms);
212             }
213              
214             my $count=0;
215             if($extension){
216             foreach my $archive (@{$pms->{'attachment_data'}->{'archives'}}){
217             if($archive =~ m/\.$extension$/i){
218             $count++;
219             }
220             }
221             }else{
222             $count=scalar (@{$pms->{'attachment_data'}->{'archives'}});
223             }
224            
225             l('attachmentpresent_archive_count actual count: '.$count);
226             l('attachmentpresent_archive_count: '.(($count > $larger_than) ? 1 : 0));
227             return (($count > $larger_than) ? 1 : 0);
228             }
229              
230             =item $int = attachmentpresent_file_count([$ext[, $more_than]])
231              
232             Returns the amount of files inside this message. It also
233             counts files inside recognised archive files. Currently only
234             Zip files are recognised.
235              
236             Optionally you can filter on extension, where $ext should
237             be set to the extension to filter on. Eg. $ext='js'
238              
239             Optionally you can specify the minimum amount of files
240             (of the given type) which will be required to trigger the rule.
241             By default $more_than is 0, so at least one file of the
242             given type is needed, but you can set it to 4 if you want the
243             rule to trigger when at least 5 files are present.
244              
245             =cut
246              
247             sub attachmentpresent_file_count {
248             my $self = shift;
249             my $pms = shift;
250             my $rendered = shift;# body tests: fully rendered message as array reference
251             my $extension = shift;
252             my $larger_than = shift || 0;
253              
254             l('attachmentpresent_file_count ('.($extension ? $extension : 'all').')'.($larger_than ? ' more than '.$larger_than : ''));
255            
256             # make sure we have attachment data read in.
257             if (!exists $pms->{'attachment_data'}) {
258             $self->_build_attachment_tree($pms);
259             }
260              
261             my $count=0;
262             if($extension){
263             foreach my $file (@{$pms->{'attachment_data'}->{'files'}}){
264             if($file =~ m/\.$extension$/i){
265             $count++;
266             l('attachmentpresent_file_count found matching file: '.$file);
267             }
268             }
269             }else{
270             $count=scalar (@{$pms->{'attachment_data'}->{'files'}});
271             }
272            
273             l('attachmentpresent_file_count actual count: '.$count);
274             l('attachmentpresent_file_count: '.(($count > $larger_than) ? 1 : 0));
275             return (($count > $larger_than) ? 1 : 0);
276             }
277              
278             =back
279             =cut
280              
281             1;