File Coverage

blib/lib/HTTP/ProxyTest.pm
Criterion Covered Total %
statement 67 134 50.0
branch 15 72 20.8
condition 4 30 13.3
subroutine 12 13 92.3
pod 0 5 0.0
total 98 254 38.5


line stmt bran cond sub pod time code
1             package HTTP::ProxyTest;
2              
3 1     1   6152 use 5.006;
  1         3  
  1         100  
4             our $VERSION = '0.11';
5             # $Id: ProxyTest.pm,v 1.3 2011/08/01 21:03:09 gunnarh Exp $
6              
7             =head1 NAME
8              
9             HTTP::ProxyTest - Reject an HTTP request if passed via an open proxy
10              
11             =head1 SYNOPSIS
12              
13             use HTTP::ProxyTest;
14              
15             proxytest(
16             -nmap => '/usr/local/bin/nmap',
17             -whitelist => '/usr/local/etc/ProxyTest_whitelist',
18             -log => '/var/log/open_proxy.log',
19             );
20              
21             =head1 DESCRIPTION
22              
23             Robots that send comment spam are often hidden behind anonymous open
24             proxy servers. You can use C to look for open proxies
25             on-the-fly and prevent such spam robots from submitting their crap.
26             The module is particularly useful if you don't want to bother your
27             web site visitors with CAPTCHAs etc.
28              
29             C tests certain ports of C that are
30             often used for anonymous open proxies, and denies access if an open
31             proxy is found, i.e. it responds with status "403 Forbidden" and
32             exits. The module was designed to make use of the Nmap security
33             scanner (L) in order to speed up things and/or
34             increase the number of ports to be considered for testing.
35             Consequently, if Nmap is currently not available to you, you are
36             advised to download and install that program.
37              
38             The strong point of C, compared to other similar CPAN
39             modules (see L), is its speed. Since Nmap limits the number
40             of ports to test, C can do on-the-fly testing fast
41             enough to cover quite a few proxy port candidates, without causing any
42             significant response delay. The same seems not to be true for other
43             modules.
44              
45             =head2 Arguments
46              
47             Below are the arguments that can be passed the B
48             function, which by the way is the only function of C
49             that you are supposed to call from outside the module. B
50             takes hash style key=Evalue arguments (see L).
51             All the arguments are optional.
52              
53             =over 4
54              
55             =item B<-nmap>
56              
57             Path to the nmap executable; no value by default.
58              
59             If -nmap is set, C will test those -primary ports
60             that Nmap reports to be either open or filtered, while it will only
61             test those -secondary ports that Nmap reports to be open.
62              
63             If -nmap is not set, C will test all the -primary
64             ports and skip the -secondary ports.
65              
66             =item B<-primary>
67              
68             Reference to an array of ports where the risk of carrying an open
69             proxy is not insignificant. Default value:
70              
71             [ 80, 3128, 8080 ]
72              
73             =item B<-secondary>
74              
75             Reference to an array of ports which are less likely, compared to
76             the -primary ports, to carry an open proxy. Default value:
77              
78             [ 808, 6588, 8000, 8088 ]
79              
80             =item B<-test_url>
81              
82             Web address used for proxy testing; defaults to
83             C<'http://gunnar.cc/proxy_test.txt'>, which is the address to a tiny
84             text file on my own server. Even if that address works fine when I'm
85             writing this, there is no guarantee that it will keep working for all
86             time, so you are recommended to set -test_url to a resource that you
87             control. Choose a URL to a tiny page on a reliable server which
88             includes the status line C<200 OK> in the responses.
89              
90             =item B<-content_substr>
91              
92             A string that shall be included in the content string of the response;
93             defaults to C<'y4dWP:a7w'>. To prevent false positives,
94             C will not report that a host carries an open proxy,
95             unless it has confirmed an occurrence of -content_substr in the
96             response content string.
97              
98             Obviously, if you set -test_url, you will most likely need to set
99             -content_substr as well.
100              
101             =item B<-timeout>
102              
103             When doing proxy testing, C expects to establish a
104             server connection within -timeout seconds after a request, or else
105             the request is aborted. Defaults to 4.
106              
107             =item B<-whitelist>
108              
109             Path to a DBM database with IP addresses of hosts that passed the
110             proxy tests during the last week; no value by default. If you set
111             -whitelist, C will maintain the database and skip
112             testing for hosts in the 'whitelist'.
113              
114             =item B<-log>
115              
116             Path to a text file where information about requests from hosts with
117             open proxies is logged; no value by default.
118              
119             =item B<-log_maxbytes>
120              
121             Maximum size in bytes of the -log file; defaults to C<1_000_000>. If
122             -log is set, and when the max size is touched, C
123             halves the file size by removing the oldest entries.
124              
125             =back
126              
127             =head1 EXAMPLES
128              
129             =head2 Perl web apps
130              
131             After having adapted the L code, you can simply insert it
132             e.g. before any form generating or form data processing code portion
133             of a Perl program. To shorten the code to be inserted in various
134             programs, you can place a wrapper in one of the C<@INC> directories.
135              
136             # proxytest.pl
137             use HTTP::ProxyTest;
138             proxytest(
139             -nmap => '/usr/local/bin/nmap',
140             -whitelist => '/usr/local/etc/ProxyTest_whitelist',
141             -log => '/var/log/open_proxy.log',
142             );
143             1;
144              
145             Now you can invoke C by just saying:
146              
147             require 'proxytest.pl';
148              
149             =head2 PHP web apps
150              
151             This example of how to invoke C from PHP begins with
152             this script, located in one of the PHP C directories:
153              
154            
155             // proxytest.php
156             function proxytest() {
157             $args = implode(' ', array(
158             getenv('REMOTE_ADDR'),
159             getenv('HTTP_HOST'),
160             getenv('REQUEST_URI'),
161             ));
162             exec('/path/to/proxytest.pl ' . $args, $error);
163             if ( count($error) ) {
164             header('HTTP/1.0 403 Forbidden');
165             echo ( implode( "\n", array_slice($error, 3) ) );
166             exit;
167             }
168             }
169             proxytest();
170             ?>
171              
172             Then we add some code to the wrapper and make it an executable Perl
173             script.
174              
175             #!/usr/bin/perl
176             # proxytest.pl
177             use HTTP::ProxyTest;
178             if ( $ENV{_} and $ENV{_} eq '/path/to/proxytest.php' ) {
179             @ENV{ qw/REMOTE_ADDR HTTP_HOST REQUEST_URI/ } = @ARGV;
180             }
181             proxytest(
182             -nmap => '/usr/local/bin/nmap',
183             -whitelist => '/usr/local/etc/ProxyTest_whitelist',
184             -log => '/var/log/open_proxy.log',
185             );
186             1;
187            
188             Finally the single line call from a PHP program:
189              
190             include 'proxytest.php';
191              
192             =head1 DEPENDENCIES
193              
194             This module is dependent on the L set of modules.
195              
196             Also, even if it's possible to use C without access
197             to the Nmap security scanner, we'd better consider Nmap to be a
198             S<'soft dependency'>, a.k.a. strong recommendation.
199              
200             =head1 CAVEAT
201              
202             In case of C being invoked via a server wide wrapper,
203             and the web server may be run as more than one user (e.g. because of
204             Apache suEXEC), you should pay attention to the permissions of the
205             DBM and log files. You may want to make sure that those files are
206             'world writable'.
207              
208             =head1 AUTHOR, COPYRIGHT AND LICENSE
209              
210             Copyright (c) 2010-2011 Gunnar Hjalmarsson
211             http://www.gunnar.cc/cgi-bin/contact.pl
212              
213             This module is free software; you can redistribute it and/or modify it
214             under the same terms as Perl itself.
215              
216             =head1 SEE ALSO
217              
218             L,
219             L
220              
221             =cut
222              
223 1     1   6 use strict;
  1         2  
  1         40  
224 1     1   4 use warnings;
  1         5  
  1         31  
225 1     1   4 use Carp;
  1         1  
  1         63  
226 1     1   1057 use LWP::UserAgent;
  1         73180  
  1         37  
227 1     1   1197 use SDBM_File;
  1         4052  
  1         61  
228 1     1   9 use Fcntl qw(:DEFAULT :flock);
  1         2  
  1         587  
229              
230             BEGIN {
231 1     1   6 require Exporter;
232 1         17 our @ISA = 'Exporter';
233 1         2110 our @EXPORT = 'proxytest';
234             }
235              
236             local $Carp::CarpLevel = 2;
237             local our $useragent;
238             our $time = time;
239              
240             sub proxytest {
241 1 50   1 0 154 my ($ip) = $ENV{REMOTE_ADDR} =~ /^(\d+(?:\.\d+){3})$/ or return;
242 1 50       4 my $args = &arguments or return;
243 1         6 my $path = $ENV{PATH};
244 1         7 $ENV{PATH} = '';
245 1         8 my $white = update_whitelist( $args->{whitelist} );
246 1 50       5 TEST: {
247 1         4 last TEST if $white->{$ip};
248              
249 1         6 my $ports = portselect($ip, $args);
250 1         5 foreach my $port ( @$ports ) {
251 3         53 $useragent->proxy('http', "http://$ip:$port");
252 3         415 my $res = $useragent->get( $args->{test_url} );
253 3 50 33     6325 if ( $res->is_success and
254             index( $res->content, $args->{content_substr} ) >= 0 ) {
255 0         0 caught($ip, $port, $args);
256 0         0 untie %$white;
257 0         0 exit;
258             }
259             }
260              
261 1         17 $white->{$ip} = $time;
262             }
263 1         32 untie %$white;
264 1         13 $ENV{PATH} = $path; # don't interfere with rest of program
265             }
266              
267             sub arguments {
268 1     1 0 13 my %defaults = (
269             primary => [ 80, 3128, 8080 ],
270             secondary => [ 808, 6588, 8000, 8088 ],
271             test_url => 'http://gunnar.cc/proxy_test.txt',
272             content_substr => 'y4dWP:a7w',
273             timeout => 4,
274             log_maxbytes => 1_000_000,
275             );
276 1         13 my %valid_keys = (
277             -nmap => 'nmap',
278             -primary => 'primary',
279             -secondary => 'secondary',
280             -test_url => 'test_url',
281             -content_substr => 'content_substr',
282             -timeout => 'timeout',
283             -whitelist => 'whitelist',
284             -log => 'log',
285             -log_maxbytes => 'log_maxbytes',
286             );
287              
288 1 50       5 @_ % 2 == 0 or croak 'key=>value pairs are expected';
289              
290 1         3 my %args;
291 1         5 while ( my $arg = shift ) {
292 0         0 my $key = lc $arg;
293 0 0       0 $valid_keys{$key} or croak "Unknown argument key '$key'";
294 0         0 $args{ $valid_keys{$key} } = shift;
295             }
296              
297 1 50       5 if ( $args{nmap} ) {
298 0 0       0 -f $args{nmap} or croak "File '$args{nmap}' does not exist";
299 0 0       0 -x $args{nmap} or croak "'$args{nmap}' is not an executable file";
300             }
301              
302 1         2 for ('whitelist', 'log') {
303 2 50       8 PATHCHECKS: {
304 2         2 last PATHCHECKS unless $args{$_};
305 0 0       0 my $file = $_ eq 'whitelist' ? $args{$_}.'.pag' : $args{$_};
306 0 0       0 if ( -f $file ) {
307 0 0 0     0 last PATHCHECKS if -r $file and -w _;
308 0         0 croak "Argument -$_: The user this script runs as ",
309             "does not have write access to '$file'";
310             }
311 0         0 require File::Basename;
312 0         0 my $dir = ( File::Basename::fileparse($file) )[1];
313 0 0       0 if ( -d $dir ) {
314 0 0 0     0 last PATHCHECKS if -r $dir and -w _ and -x _;
      0        
315 0         0 croak "Argument -$_: The user this script runs as ",
316             "does not have write access to '$dir'";
317             }
318 0         0 croak "Argument -$_: Can't find any directory '$dir'";
319             }
320             }
321              
322 1         3 for ('primary', 'secondary') {
323 2 50       6 if ( exists $args{$_} ) {
324 0 0       0 ref($args{$_}) eq 'ARRAY' or croak "Argument -$_ shall be an arrayref";
325 0   0     0 my $err = grep /\D/ || $_ < 0 || $_ > 65535, @{ $args{$_} };
  0         0  
326 0 0       0 $err == 0 or croak "Argument -$_: $err elements are not valid port numbers";
327             } else {
328 2         7 $args{$_} = $defaults{$_};
329             }
330             }
331 1 50 33     6 $args{primary}->[0] or $args{secondary}->[0] or
332             croak 'There should be at least one port to test';
333 1 50 33     11 unless ( $args{nmap} or $args{primary}->[0] ) {
334 0         0 croak 'Argument -primary may not refer to an empty list when no Nmap scanning is done';
335             }
336              
337 1         3 for ('timeout', 'log_maxbytes') {
338 2 50       5 if ( $args{$_} ) {
339 0 0       0 $args{$_} =~ /^\d+$/ or croak "Argument -$_ shall be a positive integer";
340             } else {
341 2         6 $args{$_} = $defaults{$_};
342             }
343             }
344              
345 1         6 for ('test_url', 'content_substr') {
346 2   33     13 $args{$_} ||= $defaults{$_};
347             }
348 1         15 $useragent = LWP::UserAgent->new(
349             timeout => $args{timeout},
350             agent => "HTTP::ProxyTest/$VERSION",
351             requests_redirectable => [],
352             );
353 1         14233 my $res = $useragent->get( $args{test_url} );
354 1 50       226046 unless ( $res->is_success ) {
355             # no fatal error, since a temporary glitch
356             # might be the cause of the failure
357 0         0 carp 'Argument -test_url: Response status ', $res->status_line;
358 0         0 return undef;
359             }
360 1 50       24 unless ( index( $res->content, $args{content_substr} ) >= 0 ) {
361 0         0 croak 'Argument -content_substr: The string ',
362             "'$args{content_substr}' not found in the source of $args{test_url}";
363             }
364              
365 1         50 \%args
366             }
367              
368             sub update_whitelist {
369 1     1 0 4 my $whitelist = shift;
370 1 50       6 return {} unless $whitelist;
371              
372 0 0       0 tie my %white, 'SDBM_File', $whitelist, O_CREAT|O_RDWR, 0666 or die $!;
373 0         0 my @oldies = grep $white{$_} < $time - 604800, keys %white;
374 0         0 delete @white{ @oldies };
375 0         0 \%white
376             }
377              
378             sub portselect {
379 1     1 0 3 my ($ip, $args) = @_;
380 1 50       6 return $args->{primary} unless $args->{nmap};
381              
382 0           my (%count, @open, @filtered);
383 0 0         my $ports = join ',', map { $count{$_}++ ? () : $_ }
  0            
384 0           @{ $args->{primary} }, @{ $args->{secondary} };
  0            
385 0           my $nmap_result = qx( $args->{nmap} -PN -p $ports $ip );
386 0 0 0       croak 'Nmap scan failed' if !$nmap_result or $?;
387 0           while ( $nmap_result =~ m,^(\d+)/tcp\s+(open|filtered)\b,gm ) {
388 0           my ($port, $state) = ($1, $2);
389 0 0         if ( $state eq 'open' ) {
  0 0          
390 0           push @open, $port;
391             } elsif ( grep $_ eq $port, @{ $args->{primary} } ) {
392 0           push @filtered, $port;
393             }
394             }
395 0           [ @open, @filtered ]
396             }
397              
398             sub caught {
399 0     0 0   my ($ip, $port, $args) = @_;
400 0   0       my $host = gethostbyaddr( pack('C4', split /\./, $ip), 2 ) || "IP $ip";
401 0           print "Status: 403 Forbidden\n",
402             "Content-type: text/html; charset=UTF-8\n\n";
403 0           print "403 Forbidden\n",
404             "

403 Forbidden

\n

The host you are using ($host) ",

405             "appears to carry an open proxy on port $port.

\n",
406             "\n";
407 0 0         return unless $args->{log};
408              
409 0 0         open my $log, '+>>', $args->{log} or die $!;
410 0           flock $log, LOCK_EX;
411 0 0         print $log "Date: ", scalar localtime $time, "\n",
412             "URL: ", ( lc substr($ENV{REQUEST_URI}, 0, 4) eq 'http' ?
413             '' : "http://$ENV{HTTP_HOST}" ), "$ENV{REQUEST_URI}\n",
414             "IP: $ip\n";
415 0 0         print $log "Host name: $host\n" unless substr($host, 3) eq $ip;
416 0           print $log "Port: $port\n\n";
417              
418 0           my $oldfh = select $log; $|++; select $oldfh;
  0            
  0            
419 0 0         return unless -s $log > $args->{log_maxbytes};
420              
421 0           seek $log, $args->{log_maxbytes} / 2, 0;
422 0           my $latest = do { local $/; <$log> };
  0            
  0            
423 0           $latest =~ s/.+?\n\n//s;
424 0           seek $log, 0, 0;
425 0           truncate $log, 0;
426 0           print $log $latest;
427             }
428              
429             1;
430