File Coverage

blib/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm
Criterion Covered Total %
statement 28 122 22.9
branch 0 70 0.0
condition 1 50 2.0
subroutine 6 15 40.0
pod 1 9 11.1
total 36 266 13.5


line stmt bran cond sub pod time code
1             # <@LICENSE>
2             # Licensed to the Apache Software Foundation (ASF) under one or more
3             # contributor license agreements. See the NOTICE file distributed with
4             # this work for additional information regarding copyright ownership.
5             # The ASF licenses this file to you under the Apache License, Version 2.0
6             # (the "License"); you may not use this file except in compliance with
7             # the License. You may obtain a copy of the License at:
8             #
9             # http://www.apache.org/licenses/LICENSE-2.0
10             #
11             # Unless required by applicable law or agreed to in writing, software
12             # distributed under the License is distributed on an "AS IS" BASIS,
13             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14             # See the License for the specific language governing permissions and
15             # limitations under the License.
16             # </@LICENSE>
17             #
18             # -------------------------------------------------------
19             # ImageInfo Plugin for SpamAssassin
20             # Version: 0.7
21             # Created: 2006-08-02
22             # Modified: 2007-01-17
23             #
24             # Changes:
25             # 0.7 - added image_name_regex to allow pattern matching on the image name
26             # - added support for image/pjpeg content types (progressive jpeg)
27             # - updated imageinfo.cf with a few sample rules for using image_name_regex()
28             # 0.6 - fixed dems_ bug in image_size_range_
29             # 0.5 - added image_named and image_to_text_ratio
30             # 0.4 - added image_size_exact and image_size_range
31             # 0.3 - added jpeg support
32             # 0.2 - optimized by theo
33             # 0.1 - added gif/png support
34             #
35             #
36             # Usage:
37             # image_count()
38             #
39             # body RULENAME eval:image_count(<type>,<min>,[max])
40             # type: 'all','gif','png', or 'jpeg'
41             # min: required, message contains at least this
42             # many images
43             # max: optional, if specified, message must not
44             # contain more than this number of images
45             #
46             # image_count() examples
47             #
48             # body ONE_IMAGE eval:image_count('all',1,1)
49             # body ONE_OR_MORE_IMAGES eval:image_count('all',1)
50             # body ONE_PNG eval:image_count('png',1,1)
51             # body TWO_GIFS eval:image_count('gif',2,2)
52             # body MANY_JPEGS eval:image_count('gif',5)
53             #
54             # pixel_coverage()
55             #
56             # body RULENAME eval:pixel_coverage(<type>,<min>,[max])
57             # type: 'all','gif','png', or 'jpeg'
58             # min: required, message contains at least this
59             # much pixel area
60             # max: optional, if specified, message must not
61             # contain more than this much pixel area
62             #
63             # pixel_coverage() examples
64             #
65             # body LARGE_IMAGE_AREA eval:pixel_coverage('all',150000) # catches any images that are 150k pixel/sq or higher
66             # body SMALL_GIF_AREA eval:pixel_coverage('gif',1,40000) # catches only gifs that 1 to 40k pixel/sql
67             #
68             # image_name_regex()
69             #
70             # body RULENAME eval:image_name_regex(<regex>)
71             # regex: full quoted regexp, see examples below
72             #
73             # image_name_regex() examples
74             #
75             # body CG_DOUBLEDOT_GIF eval:image_name_regex('/^\w{2,9}\.\.gif$/i') # catches double dot gifs abcd..gif
76             #
77             #
78             #
79             # -------------------------------------------------------
80              
81             package Mail::SpamAssassin::Plugin::ImageInfo;
82              
83 21     21   7132 use Mail::SpamAssassin::Plugin;
  21         55  
  21         673  
84 21     21   127 use Mail::SpamAssassin::Logger;
  21         46  
  21         1204  
85 21     21   145 use strict;
  21         53  
  21         540  
86 21     21   5703 use warnings;
  21         56  
  21         877  
87             # use bytes;
88 21     21   128 use re 'taint';
  21         44  
  21         56769  
89              
90             our @ISA = qw(Mail::SpamAssassin::Plugin);
91              
92             # constructor: register the eval rule
93             sub new {
94 62     62 1 270 my $class = shift;
95 62         154 my $mailsaobject = shift;
96              
97             # some boilerplate...
98 62   33     466 $class = ref($class) || $class;
99 62         378 my $self = $class->SUPER::new($mailsaobject);
100 62         206 bless ($self, $class);
101              
102 62         343 $self->register_eval_rule ("image_count");
103 62         275 $self->register_eval_rule ("pixel_coverage");
104 62         250 $self->register_eval_rule ("image_size_exact");
105 62         247 $self->register_eval_rule ("image_size_range");
106 62         284 $self->register_eval_rule ("image_named");
107 62         245 $self->register_eval_rule ("image_name_regex");
108 62         252 $self->register_eval_rule ("image_to_text_ratio");
109              
110 62         563 return $self;
111             }
112              
113             # -----------------------------------------
114              
115             my %get_details = (
116             'gif' => sub {
117             my ($pms, $part) = @_;
118             my $header = $part->decode(13);
119              
120             # make sure this is actually a valid gif..
121             return unless $header =~ s/^GIF(8[79]a)//;
122             my $version = $1;
123              
124             my ($width, $height, $packed, $bgcolor, $aspect) = unpack("vvCCC", $header);
125             my $color_table_size = 1 << (($packed & 0x07) + 1);
126              
127             # for future enhancements
128             #my $global_color_table = $packed & 0x80;
129             #my $has_global_color_table = $global_color_table ? 1 : 0;
130             #my $sorted_colors = ($packed & 0x08)?1:0;
131             #my $resolution = ((($packed & 0x70) >> 4) + 1);
132              
133             if ($height && $width) {
134             my $area = $width * $height;
135             $pms->{imageinfo}->{pc_gif} += $area;
136             $pms->{imageinfo}->{dems_gif}->{"${height}x${width}"} = 1;
137             $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
138             dbg("imageinfo: gif image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.), with $color_table_size color table");
139             }
140             },
141              
142             'png' => sub {
143             my ($pms, $part) = @_;
144             my $data = $part->decode();
145              
146             return unless (substr($data, 0, 8) eq "\x89PNG\x0d\x0a\x1a\x0a");
147              
148             my $datalen = length $data;
149             my $pos = 8;
150             my $chunksize = 8;
151             my ($width, $height) = ( 0, 0 );
152             my ($depth, $ctype, $compression, $filter, $interlace);
153              
154             while ($pos < $datalen) {
155             my ($len, $type) = unpack("Na4", substr($data, $pos, $chunksize));
156             $pos += $chunksize;
157              
158             last if $type eq "IEND"; # end of png image.
159              
160             next unless ( $type eq "IHDR" && $len == 13 );
161              
162             my $bytes = substr($data, $pos, $len + 4);
163             my $crc = unpack("N", substr($bytes, -4, 4, ""));
164              
165             if ($type eq "IHDR" && $len == 13) {
166             ($width, $height, $depth, $ctype, $compression, $filter, $interlace) = unpack("NNCCCCC", $bytes);
167             last;
168             }
169             }
170              
171             if ($height && $width) {
172             my $area = $width * $height;
173             $pms->{imageinfo}->{pc_png} += $area;
174             $pms->{imageinfo}->{dems_png}->{"${height}x${width}"} = 1;
175             $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
176             dbg("imageinfo: png image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)");
177             }
178             },
179              
180             'jpeg' => sub {
181             my ($pms, $part) = @_;
182              
183             my $data = $part->decode();
184              
185             my $index = substr($data, 0, 2);
186             return unless $index eq "\xFF\xD8";
187              
188             my $pos = 2;
189             my $chunksize = 4;
190             my ($prec, $height, $width, $comps) = (undef,0,0,undef);
191             while (1) {
192             my ($xx, $mark, $len) = unpack("CCn", substr($data, $pos, $chunksize));
193             last if (!defined $xx || $xx != 0xFF);
194             last if (!defined $mark || $mark == 0xDA || $mark == 0xD9);
195             last if (!defined $len || $len < 2);
196             $pos += $chunksize;
197             my $block = substr($data, $pos, $len - 2);
198             my $blocklen = length($block);
199             if ( ($mark >= 0xC0 && $mark <= 0xC3) || ($mark >= 0xC5 && $mark <= 0xC7) ||
200             ($mark >= 0xC9 && $mark <= 0xCB) || ($mark >= 0xCD && $mark <= 0xCF) ) {
201             ($prec, $height, $width, $comps) = unpack("CnnC", substr($block, 0, 6, ""));
202             last;
203             }
204             $pos += $blocklen;
205             }
206              
207             if ($height && $width) {
208             my $area = $height * $width;
209             $pms->{imageinfo}->{pc_jpeg} += $area;
210             $pms->{imageinfo}->{dems_jpeg}->{"${height}x${width}"} = 1;
211             $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
212             dbg("imageinfo: jpeg image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)");
213             }
214              
215             },
216              
217             );
218              
219             sub _get_images {
220 0     0     my ($self,$pms) = @_;
221 0           my $result = 0;
222              
223 0           foreach my $type ( 'all', keys %get_details ) {
224 0           $pms->{'imageinfo'}->{"pc_$type"} = 0;
225 0           $pms->{'imageinfo'}->{"count_$type"} = 0;
226             }
227              
228 0           foreach my $p ($pms->{msg}->find_parts(qr@^image/(?:gif|png|jpe?g)$@, 1)) {
229             # make sure its base64 encoded
230 0   0       my $cte = lc($p->get_header('content-transfer-encoding') || '');
231 0 0         next if ($cte !~ /^base64$/);
232              
233 0           my ($type) = $p->{'type'} =~ m@/(\w+)$@;
234 0 0         $type = 'jpeg' if $type eq 'jpg';
235 0 0 0       if ($type && exists $get_details{$type}) {
236 0           $get_details{$type}->($pms,$p);
237 0           $pms->{'imageinfo'}->{"count_$type"} ++;
238             }
239             }
240              
241 0           foreach my $name ( keys %{$pms->{'imageinfo'}->{"names_all"}} ) {
  0            
242 0           dbg("imageinfo: image name $name found");
243             }
244              
245 0           foreach my $type ( keys %get_details ) {
246 0           $pms->{'imageinfo'}->{'pc_all'} += $pms->{'imageinfo'}->{"pc_$type"};
247 0           $pms->{'imageinfo'}->{'count_all'} += $pms->{'imageinfo'}->{"count_$type"};
248 0           foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}} ) {
  0            
249 0           dbg("imageinfo: adding $dem to dems_all");
250 0           $pms->{'imageinfo'}->{'dems_all'}->{$dem} = 1;
251             }
252             }
253             }
254              
255             # -----------------------------------------
256              
257             sub image_named {
258 0     0 0   my ($self,$pms,$body,$name) = @_;
259 0 0         return unless (defined $name);
260              
261             # make sure we have image data read in.
262 0 0         if (!exists $pms->{'imageinfo'}) {
263 0           $self->_get_images($pms);
264             }
265              
266 0 0         return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
267 0 0         return 1 if (exists $pms->{'imageinfo'}->{"names_all"}->{$name});
268 0           return 0;
269             }
270              
271             # -----------------------------------------
272              
273             sub image_name_regex {
274 0     0 0   my ($self,$pms,$body,$re) = @_;
275 0 0         return unless (defined $re);
276              
277             # make sure we have image data read in.
278 0 0         if (!exists $pms->{'imageinfo'}) {
279 0           $self->_get_images($pms);
280             }
281              
282 0 0         return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
283              
284 0           my $hit = 0;
285 0           foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) {
  0            
286 0           dbg("imageinfo: checking image named $name against regex $re");
287 0 0         if (eval { $name =~ /$re/ }) { $hit = 1 }
  0            
  0            
288 0 0         dbg("imageinfo: error in regex /$re/ - $@") if $@;
289 0 0         if ($hit) {
290 0           dbg("imageinfo: image_name_regex hit on $name");
291 0           return 1;
292             }
293             }
294 0           return 0;
295              
296             }
297              
298             # -----------------------------------------
299              
300             sub image_count {
301 0     0 0   my ($self,$pms,$body,$type,$min,$max) = @_;
302              
303 0 0         return unless defined $min;
304              
305             # make sure we have image data read in.
306 0 0         if (!exists $pms->{'imageinfo'}) {
307 0           $self->_get_images($pms);
308             }
309              
310             # dbg("imageinfo: count: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"count_$type"});
311 0           return result_check($min, $max, $pms->{'imageinfo'}->{"count_$type"});
312             }
313              
314             # -----------------------------------------
315              
316             sub pixel_coverage {
317 0     0 0   my ($self,$pms,$body,$type,$min,$max) = @_;
318              
319 0 0 0       return unless (defined $type && defined $min);
320              
321             # make sure we have image data read in.
322 0 0         if (!exists $pms->{'imageinfo'}) {
323 0           $self->_get_images($pms);
324             }
325              
326             # dbg("imageinfo: pc_$type: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"pc_$type"});
327 0           return result_check($min, $max, $pms->{'imageinfo'}->{"pc_$type"});
328             }
329              
330             # -----------------------------------------
331              
332             sub image_to_text_ratio {
333 0     0 0   my ($self,$pms,$body,$type,$min,$max) = @_;
334 0 0 0       return unless (defined $type && defined $min && defined $max);
      0        
335              
336             # make sure we have image data read in.
337 0 0         if (!exists $pms->{'imageinfo'}) {
338 0           $self->_get_images($pms);
339             }
340              
341             # depending on how you call this eval (body vs rawbody),
342             # the $textlen will differ.
343 0           my $textlen = length(join('',@$body));
344              
345 0 0 0       return 0 unless ( $textlen > 0 && exists $pms->{'imageinfo'}->{"pc_$type"} && $pms->{'imageinfo'}->{"pc_$type"} > 0);
      0        
346              
347 0           my $ratio = $textlen / $pms->{'imageinfo'}->{"pc_$type"};
348 0           dbg("imageinfo: image ratio=$ratio, min=$min max=$max");
349 0           return result_check($min, $max, $ratio, 1);
350             }
351              
352             # -----------------------------------------
353              
354             sub image_size_exact {
355 0     0 0   my ($self,$pms,$body,$type,$height,$width) = @_;
356 0 0 0       return unless (defined $type && defined $height && defined $width);
      0        
357              
358             # make sure we have image data read in.
359 0 0         if (!exists $pms->{'imageinfo'}) {
360 0           $self->_get_images($pms);
361             }
362              
363 0 0         return 0 unless (exists $pms->{'imageinfo'}->{"dems_$type"});
364 0 0         return 1 if (exists $pms->{'imageinfo'}->{"dems_$type"}->{"${height}x${width}"});
365 0           return 0;
366             }
367              
368             # -----------------------------------------
369              
370             sub image_size_range {
371 0     0 0   my ($self,$pms,$body,$type,$minh,$minw,$maxh,$maxw) = @_;
372 0 0 0       return unless (defined $type && defined $minh && defined $minw);
      0        
373              
374             # make sure we have image data read in.
375 0 0         if (!exists $pms->{'imageinfo'}) {
376 0           $self->_get_images($pms);
377             }
378              
379 0           my $name = 'dems_'.$type;
380 0 0         return unless (exists $pms->{'imageinfo'}->{$name});
381              
382 0           foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) {
  0            
383 0           my ($h,$w) = split(/x/,$dem);
384 0 0         next if ($h < $minh); # height less than min height
385 0 0         next if ($w < $minw); # width less than min width
386 0 0 0       next if (defined $maxh && $h > $maxh); # height more than max height
387 0 0 0       next if (defined $maxw && $w > $maxw); # width more than max width
388              
389             # if we make it here, we have a match
390 0           return 1;
391             }
392              
393 0           return 0;
394             }
395              
396             # -----------------------------------------
397              
398             sub result_check {
399 0     0 0   my ($min, $max, $value, $nomaxequal) = @_;
400 0 0         return 0 unless defined $value;
401 0 0         return 0 if ($value < $min);
402 0 0 0       return 0 if (defined $max && $value > $max);
403 0 0 0       return 0 if (defined $nomaxequal && $nomaxequal && $value == $max);
      0        
404 0           return 1;
405             }
406              
407             # -----------------------------------------
408              
409             1;