| blib/lib/HTTP/Proxy/BodyFilter/Adnix.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 15 | 66 | 22.7 |
| branch | 0 | 24 | 0.0 |
| condition | 0 | 21 | 0.0 |
| subroutine | 5 | 8 | 62.5 |
| pod | 2 | 2 | 100.0 |
| total | 22 | 121 | 18.1 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | # $Id: Adnix.pm,v 1.1.1.1 2004/06/19 09:37:19 cosimo Exp $ | ||||||
| 2 | |||||||
| 3 | =head1 NAME | ||||||
| 4 | |||||||
| 5 | HTTP::Proxy::BodyFilter::Adnix - Automatically block advertising images with custom regexes | ||||||
| 6 | |||||||
| 7 | =head1 SYNOPSIS | ||||||
| 8 | |||||||
| 9 | use HTTP::Proxy::BodyFilter::Adnix | ||||||
| 10 | |||||||
| 11 | # Use default blacklist rules and default placeholder image | ||||||
| 12 | $proxy->push_filter( | ||||||
| 13 | mime => 'image/*', | ||||||
| 14 | response => HTTP::Proxy::BodyFilter::Adnix->new(); | ||||||
| 15 | ); | ||||||
| 16 | |||||||
| 17 | # OR ... | ||||||
| 18 | |||||||
| 19 | # You must supply your custom rules for filtering | ||||||
| 20 | my $filter = HTTP::Proxy::BodyFilter::Adnix->new( | ||||||
| 21 | deny => [ 'spammingserver.com', 'WeSpamYou.org', ... ], | ||||||
| 22 | image => 'http://www.mydomain.com/mylogo.png' | ||||||
| 23 | ); | ||||||
| 24 | $proxy->push_filter( | ||||||
| 25 | mime => 'image/*', | ||||||
| 26 | response => $filter | ||||||
| 27 | ); | ||||||
| 28 | |||||||
| 29 | =head1 ABSTRACT | ||||||
| 30 | |||||||
| 31 | This class acts as a plugin filter module for HTTP::Proxy distribution. | ||||||
| 32 | Its purpose is to block advertising (but you can define it) images | ||||||
| 33 | to avoid wasting bandwidth for these images. | ||||||
| 34 | |||||||
| 35 | =head1 DESCRIPTION | ||||||
| 36 | |||||||
| 37 | C |
||||||
| 38 | C |
||||||
| 39 | |||||||
| 40 | This filter tries to detect advertising images into your HTTP requests, | ||||||
| 41 | and it replaces them with an image of your choice. | ||||||
| 42 | Detection is done through a set of regular expression you can customize. | ||||||
| 43 | |||||||
| 44 | If you're wondering where the name C |
||||||
| 45 | really read the wonderful book B |
||||||
| 46 | |||||||
| 47 | For more detailed information on C |
||||||
| 48 | its documentation on CPAN. | ||||||
| 49 | |||||||
| 50 | =head2 EXPORT | ||||||
| 51 | |||||||
| 52 | None by default. | ||||||
| 53 | |||||||
| 54 | =head1 METHODS | ||||||
| 55 | |||||||
| 56 | =cut | ||||||
| 57 | |||||||
| 58 | package HTTP::Proxy::BodyFilter::Adnix; | ||||||
| 59 | |||||||
| 60 | 1 | 1 | 7485 | use strict; | |||
| 1 | 3 | ||||||
| 1 | 162 | ||||||
| 61 | 1 | 1 | 6 | use Carp; | |||
| 1 | 3 | ||||||
| 1 | 68 | ||||||
| 62 | 1 | 1 | 6 | use base qw(HTTP::Proxy::BodyFilter); | |||
| 1 | 13 | ||||||
| 1 | 959 | ||||||
| 63 | 1 | 1 | 311 | use vars qw($VERSION $IMAGE); | |||
| 1 | 3 | ||||||
| 1 | 708 | ||||||
| 64 | |||||||
| 65 | $VERSION = '0.01'; | ||||||
| 66 | our @UGLY_IMAGE = qw( | ||||||
| 67 | 5089 474e 0a0d 0a1a 0000 0d00 4849 5244 0000 1000 0000 1000 0608 0000 1f00 fff3 | ||||||
| 68 | 0061 0000 6206 474b 0044 00ff 00ff a0ff a7bd 0093 0000 7009 5948 0073 0b00 0012 | ||||||
| 69 | 0b00 0112 ddd2 fc7e 0000 0700 4974 454d d307 140c 240b ee31 e796 0095 0000 4935 | ||||||
| 70 | 4144 7854 639c 4060 0680 6628 4520 898c 4281 88c0 9a64 9403 803e 4f24 8c50 1762 | ||||||
| 71 | 8320 8606 0881 d183 1c40 440d d074 30c0 8110 0008 da6b 9616 bfd8 6986 0000 0000 | ||||||
| 72 | 4549 444e 42ae 8260 | ||||||
| 73 | ); | ||||||
| 74 | |||||||
| 75 | =head2 init() | ||||||
| 76 | |||||||
| 77 | Internal. Gets called on filter initialization. | ||||||
| 78 | Accepts the options that customize filter behaviour. | ||||||
| 79 | |||||||
| 80 | =over 4 | ||||||
| 81 | |||||||
| 82 | =item image | ||||||
| 83 | |||||||
| 84 | String. Filesystem path to PNG image to be used as a placeholder for all detected | ||||||
| 85 | advertising images. | ||||||
| 86 | |||||||
| 87 | =item deny | ||||||
| 88 | |||||||
| 89 | Array reference. Must contain all regular expressions that block images. | ||||||
| 90 | This means that if the current image matches any of these regular expressions, | ||||||
| 91 | it will be blocked. | ||||||
| 92 | |||||||
| 93 | =back | ||||||
| 94 | |||||||
| 95 | =cut | ||||||
| 96 | |||||||
| 97 | sub init | ||||||
| 98 | { | ||||||
| 99 | 0 | 0 | 1 | my $self = shift; | |||
| 100 | 0 | my %opt; | |||||
| 101 | |||||||
| 102 | 0 | 0 | if( @_ % 1 == 0 ) { | ||||
| 103 | 0 | %opt = @_; | |||||
| 104 | } | ||||||
| 105 | else { | ||||||
| 106 | 0 | croak "You must supply key => value options"; | |||||
| 107 | } | ||||||
| 108 | |||||||
| 109 | # Set path of placeholder image | ||||||
| 110 | 0 | 0 | if( exists $opt{image} ) { | ||||
| 111 | 0 | $self->{_image} = $opt{image}; | |||||
| 112 | } | ||||||
| 113 | |||||||
| 114 | 0 | 0 | $self->{_image} ||= '/usr/local/share/replaced.png'; | ||||
| 115 | |||||||
| 116 | # Set regex blacklist | ||||||
| 117 | 0 | 0 | 0 | if( exists $opt{deny} && ref $opt{deny} eq 'ARRAY' ) { | |||
| 118 | 0 | $self->{_denylist} = $opt{deny}; | |||||
| 119 | } | ||||||
| 120 | 0 | 0 | $self->{_denylist} ||= [ map { qr($_) } | ||||
| 0 | |||||||
| 121 | 'ad[vs\.]', | ||||||
| 122 | 'adv?server', | ||||||
| 123 | '468x60', | ||||||
| 124 | 'doubleclick\.net', | ||||||
| 125 | 'promot[ie]', | ||||||
| 126 | ]; | ||||||
| 127 | |||||||
| 128 | # Set regex whitelist (empty at start) | ||||||
| 129 | 0 | 0 | 0 | if( exists $opt{allow} && ref $opt{allow} eq 'ARRAY' ) { | |||
| 130 | 0 | $self->{_allowlist} = $opt{allow}; | |||||
| 131 | } | ||||||
| 132 | 0 | 0 | $self->{_allowlist} ||= []; | ||||
| 133 | |||||||
| 134 | # Do I need this? TODO must ask Philippe | ||||||
| 135 | 0 | $self->{rw} = delete $opt{rw}; | |||||
| 136 | |||||||
| 137 | } | ||||||
| 138 | |||||||
| 139 | |||||||
| 140 | =head2 filter() | ||||||
| 141 | |||||||
| 142 | This is where the hard work gets done. | ||||||
| 143 | Every image is matched against a set of regexes and if it matches B |
||||||
| 144 | these, it is B |
||||||
| 145 | placeholder image. | ||||||
| 146 | |||||||
| 147 | The intention here is to save bandwidth and to remove those annoying | ||||||
| 148 | banners. | ||||||
| 149 | |||||||
| 150 | =cut | ||||||
| 151 | |||||||
| 152 | sub filter | ||||||
| 153 | { | ||||||
| 154 | 0 | 0 | 1 | my($self, $headers, $message) = @_; | |||
| 155 | 0 | my $uri = $message->uri(); | |||||
| 156 | |||||||
| 157 | # "DECLINE" non-image urls | ||||||
| 158 | #return 0 unless $uri =~ /\.(gif|jpe?g|png)/i; | ||||||
| 159 | |||||||
| 160 | # Load placeholder image if not yet done | ||||||
| 161 | 0 | 0 | if( ! $IMAGE ) { | ||||
| 162 | 0 | $self->_loadImage(); | |||||
| 163 | } | ||||||
| 164 | |||||||
| 165 | 0 | foreach( @{ $self->{_denylist} } ) { | |||||
| 0 | |||||||
| 166 | 0 | my $re = $_; | |||||
| 167 | 0 | 0 | if( $uri =~ $re ) { | ||||
| 168 | 0 | $self->proxy()->log( '', '', 'blocked ad image('.$uri.')' ); | |||||
| 169 | 0 | 0 | my $response = HTTP::Response->new( | ||||
| 170 | 200, | ||||||
| 171 | 'OK', | ||||||
| 172 | HTTP::Headers->new( | ||||||
| 173 | Content_Type => ( $self->{_image} =~ /\.(gif|png|jpg)/i ? qq{image/$1} : 'image/png' ), | ||||||
| 174 | Content_Length => -s $IMAGE, | ||||||
| 175 | ), | ||||||
| 176 | $IMAGE | ||||||
| 177 | ); | ||||||
| 178 | |||||||
| 179 | 0 | $self->proxy()->response($response); | |||||
| 180 | 0 | last; | |||||
| 181 | } | ||||||
| 182 | } | ||||||
| 183 | |||||||
| 184 | 0 | return 1; | |||||
| 185 | } | ||||||
| 186 | |||||||
| 187 | |||||||
| 188 | |||||||
| 189 | =head2 _loadImage() | ||||||
| 190 | |||||||
| 191 | Internal function. Tries to load the image to be used as a placeholder | ||||||
| 192 | for all advertising images. If no remote/local image can be loaded, | ||||||
| 193 | an hardcoded binary PNG image is used. | ||||||
| 194 | |||||||
| 195 | =cut | ||||||
| 196 | |||||||
| 197 | sub _loadImage | ||||||
| 198 | { | ||||||
| 199 | 0 | 0 | my $self = $_[0]; | ||||
| 200 | 0 | my $loaded = 0; | |||||
| 201 | |||||||
| 202 | #$self->{_image} = lc $self->{_image}; | ||||||
| 203 | |||||||
| 204 | # If image is specified as URL, try to load it | ||||||
| 205 | 0 | 0 | 0 | if( ($self->{_image} =~ m|^[Hh][Tt][Tt][Pp]://|) || ($self->{_image} =~ m|^[Ff][Tt][Pp]://|) ) { | |||
| 206 | 0 | eval { | |||||
| 207 | 1 | 1 | 783 | use LWP::Simple (); | |||
| 1 | 112664 | ||||||
| 1 | 430 | ||||||
| 208 | 0 | $IMAGE = LWP::Simple::get($self->{_image}); | |||||
| 209 | 0 | 0 | 0 | $loaded = 1 if defined $IMAGE && length($IMAGE) > 0; | |||
| 210 | }; | ||||||
| 211 | 0 | 0 | if( ! $loaded ) { | ||||
| 212 | 0 | croak "Can't load image $$self{_image}"; | |||||
| 213 | } | ||||||
| 214 | } | ||||||
| 215 | |||||||
| 216 | else { | ||||||
| 217 | |||||||
| 218 | # Try to load image from disk | ||||||
| 219 | 0 | 0 | if( open IMG, $self->{_image} ) { | ||||
| 220 | 0 | binmode(IMG); | |||||
| 221 | 0 | local $/ = undef; | |||||
| 222 | 0 | $IMAGE = |
|||||
| 223 | 0 | 0 | $loaded = (length($IMAGE) > 0) && close(IMG); | ||||
| 224 | } | ||||||
| 225 | |||||||
| 226 | } | ||||||
| 227 | |||||||
| 228 | 0 | 0 | if( $loaded ) { | ||||
| 229 | #printf STDERR 'loaded replace image from %s (%d bytes)'."\n", $self->{_image}, length($IMAGE); | ||||||
| 230 | # Here proxy object is not yet prepared | ||||||
| 231 | 0 | $self->proxy()->log('', '', sprintf('loaded replace image from %s (%d bytes)', $self->{_image}, length($IMAGE))); | |||||
| 232 | } | ||||||
| 233 | else { | ||||||
| 234 | # If all else failed, load static binary PNG data | ||||||
| 235 | 0 | $IMAGE = ""; | |||||
| 236 | |||||||
| 237 | 0 | for( @UGLY_IMAGE ) { | |||||
| 238 | 0 | my($b1,$b2) = unpack('A2 A2',$_); | |||||
| 239 | 0 | $b1 = chr hex $b1; | |||||
| 240 | 0 | $b2 = chr hex $b2; | |||||
| 241 | 0 | $IMAGE .= $b2 . $b1; | |||||
| 242 | } | ||||||
| 243 | } | ||||||
| 244 | |||||||
| 245 | 0 | return $loaded; | |||||
| 246 | } | ||||||
| 247 | |||||||
| 248 | 1; | ||||||
| 249 | |||||||
| 250 | # | ||||||
| 251 | # END OF MODULE | ||||||
| 252 | # | ||||||
| 253 | |||||||
| 254 | |||||||
| 255 | =head1 SEE ALSO | ||||||
| 256 | |||||||
| 257 | For more information, you should read C |
||||||
| 258 | If you find this class useful or want to report complaints or bugs, please | ||||||
| 259 | do it through the good CPAN bug report system on http://rt.cpan.org. | ||||||
| 260 | |||||||
| 261 | This class has been derived from original work by Philippe "Book" Bruhat, | ||||||
| 262 | author of L |
||||||
| 263 | |||||||
| 264 | =head1 AUTHOR | ||||||
| 265 | |||||||
| 266 | Cosimo Streppone E |
||||||
| 267 | |||||||
| 268 | =head1 COPYRIGHT AND LICENSE | ||||||
| 269 | |||||||
| 270 | Copyright 2004 by Cosimo Streppone | ||||||
| 271 | |||||||
| 272 | This library is free software; you can redistribute it and/or modify | ||||||
| 273 | it under the same terms as Perl itself. | ||||||
| 274 | |||||||
| 275 | =cut | ||||||
| 276 | |||||||
| 277 | 1; | ||||||
| 278 |