File Coverage

blib/lib/HTTP/Proxy/BodyFilter/Adnix.pm
Criterion Covered Total %
statement 15 66 22.7
branch 0 24 0.0
condition 0 21 0.0
subroutine 5 8 62.5
pod 2 2 100.0
total 22 121 18.1


line stmt bran cond sub pod time code
1             # $Id: Adnix.pm,v 1.1.1.1 2004/06/19 09:37:19 cosimo Exp $
2              
3             =head1 NAME
4              
5             HTTP::Proxy::BodyFilter::Adnix - Automatically block advertising images with custom regexes
6              
7             =head1 SYNOPSIS
8              
9             use HTTP::Proxy::BodyFilter::Adnix
10              
11             # Use default blacklist rules and default placeholder image
12             $proxy->push_filter(
13             mime => 'image/*',
14             response => HTTP::Proxy::BodyFilter::Adnix->new();
15             );
16              
17             # OR ...
18              
19             # You must supply your custom rules for filtering
20             my $filter = HTTP::Proxy::BodyFilter::Adnix->new(
21             deny => [ 'spammingserver.com', 'WeSpamYou.org', ... ],
22             image => 'http://www.mydomain.com/mylogo.png'
23             );
24             $proxy->push_filter(
25             mime => 'image/*',
26             response => $filter
27             );
28              
29             =head1 ABSTRACT
30              
31             This class acts as a plugin filter module for HTTP::Proxy distribution.
32             Its purpose is to block advertising (but you can define it) images
33             to avoid wasting bandwidth for these images.
34              
35             =head1 DESCRIPTION
36              
37             C filter module is based on
38             C class that is part of C distribution.
39              
40             This filter tries to detect advertising images into your HTTP requests,
41             and it replaces them with an image of your choice.
42             Detection is done through a set of regular expression you can customize.
43              
44             If you're wondering where the name C comes from, you should
45             really read the wonderful book B by Carl Sagan.
46              
47             For more detailed information on C, see
48             its documentation on CPAN.
49              
50             =head2 EXPORT
51              
52             None by default.
53              
54             =head1 METHODS
55              
56             =cut
57              
58             package HTTP::Proxy::BodyFilter::Adnix;
59              
60 1     1   7485 use strict;
  1         3  
  1         162  
61 1     1   6 use Carp;
  1         3  
  1         68  
62 1     1   6 use base qw(HTTP::Proxy::BodyFilter);
  1         13  
  1         959  
63 1     1   311 use vars qw($VERSION $IMAGE);
  1         3  
  1         708  
64              
65             $VERSION = '0.01';
66             our @UGLY_IMAGE = qw(
67             5089 474e 0a0d 0a1a 0000 0d00 4849 5244 0000 1000 0000 1000 0608 0000 1f00 fff3
68             0061 0000 6206 474b 0044 00ff 00ff a0ff a7bd 0093 0000 7009 5948 0073 0b00 0012
69             0b00 0112 ddd2 fc7e 0000 0700 4974 454d d307 140c 240b ee31 e796 0095 0000 4935
70             4144 7854 639c 4060 0680 6628 4520 898c 4281 88c0 9a64 9403 803e 4f24 8c50 1762
71             8320 8606 0881 d183 1c40 440d d074 30c0 8110 0008 da6b 9616 bfd8 6986 0000 0000
72             4549 444e 42ae 8260
73             );
74              
75             =head2 init()
76              
77             Internal. Gets called on filter initialization.
78             Accepts the options that customize filter behaviour.
79              
80             =over 4
81              
82             =item image
83              
84             String. Filesystem path to PNG image to be used as a placeholder for all detected
85             advertising images.
86              
87             =item deny
88              
89             Array reference. Must contain all regular expressions that block images.
90             This means that if the current image matches any of these regular expressions,
91             it will be blocked.
92              
93             =back
94              
95             =cut
96              
97             sub init
98             {
99 0     0 1   my $self = shift;
100 0           my %opt;
101              
102 0 0         if( @_ % 1 == 0 ) {
103 0           %opt = @_;
104             }
105             else {
106 0           croak "You must supply key => value options";
107             }
108              
109             # Set path of placeholder image
110 0 0         if( exists $opt{image} ) {
111 0           $self->{_image} = $opt{image};
112             }
113              
114 0   0       $self->{_image} ||= '/usr/local/share/replaced.png';
115              
116             # Set regex blacklist
117 0 0 0       if( exists $opt{deny} && ref $opt{deny} eq 'ARRAY' ) {
118 0           $self->{_denylist} = $opt{deny};
119             }
120 0   0       $self->{_denylist} ||= [ map { qr($_) }
  0            
121             'ad[vs\.]',
122             'adv?server',
123             '468x60',
124             'doubleclick\.net',
125             'promot[ie]',
126             ];
127              
128             # Set regex whitelist (empty at start)
129 0 0 0       if( exists $opt{allow} && ref $opt{allow} eq 'ARRAY' ) {
130 0           $self->{_allowlist} = $opt{allow};
131             }
132 0   0       $self->{_allowlist} ||= [];
133              
134             # Do I need this? TODO must ask Philippe
135 0           $self->{rw} = delete $opt{rw};
136              
137             }
138              
139              
140             =head2 filter()
141              
142             This is where the hard work gets done.
143             Every image is matched against a set of regexes and if it matches B of
144             these, it is B downloaded and B by the
145             placeholder image.
146              
147             The intention here is to save bandwidth and to remove those annoying
148             banners.
149              
150             =cut
151              
152             sub filter
153             {
154 0     0 1   my($self, $headers, $message) = @_;
155 0           my $uri = $message->uri();
156              
157             # "DECLINE" non-image urls
158             #return 0 unless $uri =~ /\.(gif|jpe?g|png)/i;
159              
160             # Load placeholder image if not yet done
161 0 0         if( ! $IMAGE ) {
162 0           $self->_loadImage();
163             }
164            
165 0           foreach( @{ $self->{_denylist} } ) {
  0            
166 0           my $re = $_;
167 0 0         if( $uri =~ $re ) {
168 0           $self->proxy()->log( '', '', 'blocked ad image('.$uri.')' );
169 0 0         my $response = HTTP::Response->new(
170             200,
171             'OK',
172             HTTP::Headers->new(
173             Content_Type => ( $self->{_image} =~ /\.(gif|png|jpg)/i ? qq{image/$1} : 'image/png' ),
174             Content_Length => -s $IMAGE,
175             ),
176             $IMAGE
177             );
178              
179 0           $self->proxy()->response($response);
180 0           last;
181             }
182             }
183              
184 0           return 1;
185             }
186              
187              
188              
189             =head2 _loadImage()
190              
191             Internal function. Tries to load the image to be used as a placeholder
192             for all advertising images. If no remote/local image can be loaded,
193             an hardcoded binary PNG image is used.
194              
195             =cut
196              
197             sub _loadImage
198             {
199 0     0     my $self = $_[0];
200 0           my $loaded = 0;
201              
202             #$self->{_image} = lc $self->{_image};
203              
204             # If image is specified as URL, try to load it
205 0 0 0       if( ($self->{_image} =~ m|^[Hh][Tt][Tt][Pp]://|) || ($self->{_image} =~ m|^[Ff][Tt][Pp]://|) ) {
206 0           eval {
207 1     1   783 use LWP::Simple ();
  1         112664  
  1         430  
208 0           $IMAGE = LWP::Simple::get($self->{_image});
209 0 0 0       $loaded = 1 if defined $IMAGE && length($IMAGE) > 0;
210             };
211 0 0         if( ! $loaded ) {
212 0           croak "Can't load image $$self{_image}";
213             }
214             }
215              
216             else {
217              
218             # Try to load image from disk
219 0 0         if( open IMG, $self->{_image} ) {
220 0           binmode(IMG);
221 0           local $/ = undef;
222 0           $IMAGE = ;
223 0   0       $loaded = (length($IMAGE) > 0) && close(IMG);
224             }
225              
226             }
227            
228 0 0         if( $loaded ) {
229             #printf STDERR 'loaded replace image from %s (%d bytes)'."\n", $self->{_image}, length($IMAGE);
230             # Here proxy object is not yet prepared
231 0           $self->proxy()->log('', '', sprintf('loaded replace image from %s (%d bytes)', $self->{_image}, length($IMAGE)));
232             }
233             else {
234             # If all else failed, load static binary PNG data
235 0           $IMAGE = "";
236              
237 0           for( @UGLY_IMAGE ) {
238 0           my($b1,$b2) = unpack('A2 A2',$_);
239 0           $b1 = chr hex $b1;
240 0           $b2 = chr hex $b2;
241 0           $IMAGE .= $b2 . $b1;
242             }
243             }
244              
245 0           return $loaded;
246             }
247              
248             1;
249              
250             #
251             # END OF MODULE
252             #
253              
254              
255             =head1 SEE ALSO
256              
257             For more information, you should read C distribution documentation.
258             If you find this class useful or want to report complaints or bugs, please
259             do it through the good CPAN bug report system on http://rt.cpan.org.
260              
261             This class has been derived from original work by Philippe "Book" Bruhat,
262             author of L distribution. Go check out his good work!
263              
264             =head1 AUTHOR
265              
266             Cosimo Streppone Ecosimo@cpan.orgE
267              
268             =head1 COPYRIGHT AND LICENSE
269              
270             Copyright 2004 by Cosimo Streppone
271              
272             This library is free software; you can redistribute it and/or modify
273             it under the same terms as Perl itself.
274              
275             =cut
276              
277             1;
278