File Coverage

blib/lib/LWPx/ParanoidAgent.pm
Criterion Covered Total %
statement 169 233 72.5
branch 68 142 47.8
condition 45 91 49.4
subroutine 17 20 85.0
pod 5 7 71.4
total 304 493 61.6


line stmt bran cond sub pod time code
1             package LWPx::ParanoidAgent;
2             require LWP::UserAgent;
3              
4 2     2   40567 use vars qw(@ISA $VERSION);
  2         3  
  2         150  
5             @ISA = qw(LWP::UserAgent);
6             $VERSION = '1.12';
7              
8             require HTTP::Request;
9             require HTTP::Response;
10              
11 2     2   775 use HTTP::Status ();
  2         5676  
  2         41  
12 2     2   9 use strict;
  2         4  
  2         32  
13 2     2   1244 use Net::DNS;
  2         139274  
  2         229  
14 2     2   1269 use LWP::Debug ();
  2         817  
  2         4831  
15              
16             sub new {
17 1     1 1 15 my $class = shift;
18 1         2 my %opts = @_;
19              
20 1   50     6 my $blocked_hosts = delete $opts{blocked_hosts} || [];
21 1   50     5 my $whitelisted_hosts = delete $opts{whitelisted_hosts} || [];
22 1         2 my $resolver = delete $opts{resolver};
23 1         2 my $paranoid_proxy = delete $opts{paranoid_proxy};
24 1   50     5 $opts{timeout} ||= 15;
25            
26 1         9 my $self = LWP::UserAgent->new( %opts );
27              
28 1         2008 $self->{'blocked_hosts'} = $blocked_hosts;
29 1         3 $self->{'whitelisted_hosts'} = $whitelisted_hosts;
30 1         2 $self->{'resolver'} = $resolver;
31 1         1 $self->{'paranoid_proxy'} = $paranoid_proxy;
32              
33 1         3 $self = bless $self, $class;
34 1         3 return $self;
35             }
36              
37             # returns seconds remaining given a request
38             sub _time_remain {
39 0     0   0 my $self = shift;
40 0         0 my $req = shift;
41              
42 0         0 my $now = time();
43 0   0     0 my $start_time = $req->{_time_begin} || $now;
44 0         0 return $start_time + $self->{timeout} - $now;
45             }
46              
47             sub _resolve {
48 1     1   172 my ($self, $host, $request, $timeout, $depth) = @_;
49 1         4 my $res = $self->resolver;
50 1   50     13 $depth ||= 0;
51              
52 1 50       3 die "CNAME recursion depth limit exceeded.\n" if $depth > 10;
53 1 50       3 die "DNS lookup resulted in bad host." if $self->_bad_host($host);
54              
55             # return the IP address if it looks like one and wasn't marked bad
56 1 50       4 return ($host) if $host =~ /^\d+\.\d+\.\d+\.\d+$/;
57              
58 1 50       11 my $dns_ref = $res->bgsend($host)
59             or die "No sock from bgsend";
60 1         25 my $sock;
61              
62             # Net::DNS 1.03 started returning IO::Select objects instead of sockets
63 1 50       12 if (UNIVERSAL::isa($dns_ref, "IO::Select")) {
64 0         0 my $handles = [ $dns_ref->handles ];
65 0         0 $sock = $handles->[0]->[0];
66             }
67             else {
68 1         3 $sock = $dns_ref;
69             }
70              
71             # wait for the socket to become readable, unless this is from our test
72             # mock resolver.
73 1 50 33     11 unless ($dns_ref && $dns_ref eq "MOCK") {
74 0         0 my $rin = '';
75 0         0 vec($rin, fileno($sock), 1) = 1;
76 0         0 my $nf = select($rin, undef, undef, $self->_time_remain($request));
77 0 0       0 die "DNS lookup timeout" unless $nf;
78             }
79              
80 1 50       6 my $packet = $res->bgread($dns_ref)
81             or die "DNS bgread failure";
82 1         8 $dns_ref = $sock = undef;
83              
84 1         1 my @addr;
85             my $cname;
86 1         14 foreach my $rr ($packet->answer) {
87 1 50       37 if ($rr->type eq "A") {
    0          
88 1 50       28 die "Suspicious DNS results from A record\n" if $self->_bad_host($rr->address);
89             # untaints the address:
90 0         0 push @addr, join(".", ($rr->address =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/));
91             } elsif ($rr->type eq "CNAME") {
92             # will be checked for validity in the recursion path
93 0         0 $cname = $rr->cname;
94             }
95             }
96              
97 0 0       0 return @addr if @addr;
98 0 0       0 return () unless $cname;
99 0         0 return $self->_resolve($cname, $request, $timeout, $depth + 1);
100             }
101              
102             sub _host_list_match {
103 23     23   16 my $self = shift;
104 23         24 my $list_name = shift;
105 23         15 my $host = shift;
106              
107 23 50       13 foreach my $rule (@{ $self->{$list_name} || [] }) {
  23         59  
108 35 50       53 if (ref $rule eq "CODE") {
    100          
109 0 0       0 return 1 if $rule->($host);
110             } elsif (ref $rule) {
111             # assume regexp
112 12 50       50 return 1 if $host =~ /$rule/;
113             } else {
114 23 100       68 return 1 if $host eq $rule;
115             }
116             }
117             }
118              
119             sub _bad_host {
120 11     11   23 my $self = shift;
121 11         12 my $host = lc(shift);
122              
123 11 50       22 return 0 if $self->_host_list_match("whitelisted_hosts", $host);
124 11 50       14 return 1 if $self->_host_list_match("blocked_hosts", $host);
125 11 50 33     65 return 1 if
126             $host =~ /^localhost$/i || # localhost is bad. even though it'd be stopped in
127             # a later call to _bad_host with the IP address
128             $host =~ /\s/i; # any whitespace is questionable
129              
130             # Let's assume it's an IP address now, and get it into 32 bits.
131             # Uf at any time something doesn't look like a number, then it's
132             # probably a hostname and we've already either whitelisted or
133             # blacklisted those, so we'll just say it's okay and it'll come
134             # back here later when the resolver finds an IP address.
135 11         51 my @parts = split(/\./, $host);
136 11 50       17 return 0 if @parts > 4;
137              
138             # un-octal/un-hex the parts, or return if there's a non-numeric part
139 11         10 my $overflow_flag = 0;
140 11         13 foreach (@parts) {
141 26 100 100     90 return 0 unless /^\d+$/ || /^0x[a-f\d]+$/;
142 24     0   80 local $SIG{__WARN__} = sub { $overflow_flag = 1; };
  0         0  
143 24 100       85 $_ = oct($_) if /^0/;
144             }
145              
146             # a purely numeric address shouldn't overflow.
147 9 50       11 return 1 if $overflow_flag;
148              
149 9         9 my $addr; # network order packed IP address
150              
151 9 100       22 if (@parts == 1) {
    100          
    100          
    50          
152             # a - 32 bits
153 1 50       4 return 1 if
154             $parts[0] > 0xffffffff;
155 1         2 $addr = pack("N", $parts[0]);
156             } elsif (@parts == 2) {
157             # a.b - 8.24 bits
158 4 50 33     21 return 1 if
159             $parts[0] > 0xff ||
160             $parts[1] > 0xffffff;
161 4         9 $addr = pack("N", $parts[0] << 24 | $parts[1]);
162             } elsif (@parts == 3) {
163             # a.b.c - 8.8.16 bits
164 1 50 33     9 return 1 if
      33        
165             $parts[0] > 0xff ||
166             $parts[1] > 0xff ||
167             $parts[2] > 0xffff;
168 1         4 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2]);
169             } elsif (@parts == 4) {
170             # a.b.c.d - 8.8.8.8 bits
171 3 50 33     34 return 1 if
      33        
      33        
172             $parts[0] > 0xff ||
173             $parts[1] > 0xff ||
174             $parts[2] > 0xff ||
175             $parts[3] > 0xff;
176 3         9 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2] << 8 | $parts[3]);
177             } else {
178 0         0 return 1;
179             }
180              
181 9         16 my $haddr = unpack("N", $addr); # host order IP address
182 9 100 66     129 return 1 if
      100        
      66        
      66        
      66        
      100        
      100        
      100        
      66        
183             ($haddr & 0xFF000000) == 0x00000000 || # 0.0.0.0/8
184             ($haddr & 0xFF000000) == 0x0A000000 || # 10.0.0.0/8
185             ($haddr & 0xFF000000) == 0x7F000000 || # 127.0.0.0/8
186             ($haddr & 0xFFF00000) == 0xAC100000 || # 172.16.0.0/12
187             ($haddr & 0xFFFF0000) == 0xA9FE0000 || # 169.254.0.0/16
188             ($haddr & 0xFFFF0000) == 0xC0A80000 || # 192.168.0.0/16
189             ($haddr & 0xFFFFFF00) == 0xC0000200 || # 192.0.2.0/24 "TEST-NET" docs/example code
190             ($haddr & 0xFFFFFF00) == 0xC0586300 || # 192.88.99.0/24 6to4 relay anycast addresses
191             $haddr == 0xFFFFFFFF || # 255.255.255.255
192             ($haddr & 0xF0000000) == 0xE0000000; # multicast addresses
193              
194             # as final IP address check, pass in the canonical a.b.c.d decimal form
195             # to the blacklisted host check to see if matches as bad there.
196 1         2 my $can_ip = join(".", map { ord } split //, $addr);
  4         6  
197 1 50       3 return 1 if $self->_host_list_match("blocked_hosts", $can_ip);
198              
199             # looks like an okay IP address
200 0         0 return 0;
201             }
202              
203             sub request {
204 9     9 1 12456 my ($self, $req, $arg, $size, $previous) = @_;
205              
206             # walk back to the first request, and set our _time_begin to its _time_begin, or if
207             # we're the first, then use current time. used by LWPx::Protocol::http_paranoid
208 9         12 my $first_res = $previous; # previous is the previous response that invoked this request
209 9   33     20 $first_res = $first_res->previous while $first_res && $first_res->previous;
210 9 50       23 $req->{_time_begin} = $first_res ? $first_res->request->{_time_begin} : time();
211              
212 9         16 my $host = $req->uri->host;
213 9 100       262 if ($self->_bad_host($host)) {
214 8         20 my $err_res = HTTP::Response->new(403, "Unauthorized access to blocked host");
215 8         182 $err_res->request($req);
216 8         42 $err_res->header("Client-Date" => HTTP::Date::time2str(time));
217 8         385 $err_res->header("Client-Warning" => "Internal response");
218 8         239 $err_res->header("Content-Type" => "text/plain");
219 8         209 $err_res->content("403 Unauthorized access to blocked host\n");
220 8         121 return $err_res;
221             }
222              
223 1 50       7 if (my $pp = $self->{paranoid_proxy}) {
224             $req->uri("$pp?url=" . eurl($req->uri) .
225             "&timeout=" . ($self->{timeout} + 0) .
226 0         0 "&max_size=" . ($self->{max_size} + 0));
227             }
228              
229 1         16 return $self->SUPER::request($req, $arg, $size, $previous);
230             }
231              
232             # taken from LWP::UserAgent and modified slightly. (proxy support removed,
233             # and map http and https schemes to separate protocol handlers)
234             sub send_request
235             {
236 1     1 0 324 my ($self, $request, $arg, $size) = @_;
237 1         6 $self->_request_sanity_check($request);
238              
239 1         4 my ($method, $url) = ($request->method, $request->uri);
240              
241 1         18 local($SIG{__DIE__}); # protect against user defined die handlers
242              
243             # Check that we have a METHOD and a URL first
244 1 50       3 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Method missing")
245             unless $method;
246 1 50       4 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL missing")
247             unless $url;
248 1 50       8 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL must be absolute")
249             unless $url->scheme;
250 1 50       15 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST,
251             "ParanoidAgent doesn't support going through proxies. ".
252             "In that case, do your paranoia at your proxy instead.")
253             if $self->_need_proxy($url);
254              
255 1         2 my $scheme = $url->scheme;
256 1 50 33     15 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Only http and https are supported by ParanoidAgent")
257             unless $scheme eq "http" || $scheme eq "https";
258              
259 1         4 LWP::Debug::trace("$method $url");
260              
261 1         10 my $protocol;
262              
263             {
264             # Honor object-specific restrictions by forcing protocol objects
265             # into class LWP::Protocol::nogo.
266 1         1 my $x;
  1         1  
267 1 50       17 if($x = $self->protocols_allowed) {
    50          
268 0 0       0 if(grep lc($_) eq $scheme, @$x) {
269 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s allowed protocols (@$x)");
270             }
271             else {
272 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s allowed protocols (@$x)");
273 0         0 require LWP::Protocol::nogo;
274 0         0 $protocol = LWP::Protocol::nogo->new;
275             }
276             }
277             elsif ($x = $self->protocols_forbidden) {
278 0 0       0 if(grep lc($_) eq $scheme, @$x) {
279 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s forbidden protocols (@$x)");
280 0         0 require LWP::Protocol::nogo;
281 0         0 $protocol = LWP::Protocol::nogo->new;
282             }
283             else {
284 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s forbidden protocols (@$x)");
285             }
286             }
287             # else fall thru and create the protocol object normally
288             }
289              
290 1 50       41 unless ($protocol) {
291 1         15 LWP::Protocol::implementor("${scheme}_paranoid", "LWPx::Protocol::${scheme}_paranoid");
292 1         86 eval "require LWPx::Protocol::${scheme}_paranoid;";
293 1 50       6 if ($@) {
294 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
295 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
296 0         0 return $response;
297             }
298              
299 1 50       2 $protocol = eval { LWP::Protocol::create($scheme eq "http" ? "http_paranoid" : "https_paranoid", $self) };
  1         15  
300 1 50       36 if ($@) {
301 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
302 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
303 0 0       0 if ($scheme eq "https") {
304 0         0 $response->message($response->message . " (Crypt::SSLeay not installed)");
305 0         0 $response->content_type("text/plain");
306 0         0 $response->content(<
307             LWP will support https URLs if the Crypt::SSLeay module is installed.
308             More information at .
309             EOT
310             }
311 0         0 return $response;
312             }
313             }
314              
315             # Extract fields that will be used below
316             my ($timeout, $cookie_jar, $use_eval, $parse_head, $max_size) =
317 1         2 @{$self}{qw(timeout cookie_jar use_eval parse_head max_size)};
  1         4  
318              
319 1         4 my $response;
320 1         2 my $proxy = undef;
321 1 50       3 if ($use_eval) {
322             # we eval, and turn dies into responses below
323 1         2 eval {
324 1         6 $response = $protocol->request($request, $proxy,
325             $arg, $size, $timeout);
326             };
327 1   33     6 my $error = $@ || $response->header( 'x-died' );
328 1 50       3 if ($error) {
329 1         3 $error =~ s/ at .* line \d+.*//s; # remove file/line number
330 1         5 $response = _new_response($request,
331             &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
332             $error);
333             }
334             }
335             else {
336 0         0 $response = $protocol->request($request, $proxy,
337             $arg, $size, $timeout);
338             # XXX: Should we die unless $response->is_success ???
339             }
340              
341 1         4 $response->request($request); # record request for reference
342 1 50       8 $cookie_jar->extract_cookies($response) if $cookie_jar;
343 1         3 $response->header("Client-Date" => HTTP::Date::time2str(time));
344 1 50       50 $self->run_handlers("response_done", $response) if $self->can('run_handlers');
345 1         20 return $response;
346             }
347              
348             # blocked hostnames, compiled patterns, or subrefs
349             sub blocked_hosts
350             {
351 1     1 1 25 my $self = shift;
352 1 50       31 if (@_) {
353 1         9 my @hosts = @_;
354 1         5 $self->{'blocked_hosts'} = \@hosts;
355 1         8 return;
356             }
357 0 0       0 return @{ $self->{'blocked_hosts'} || [] };
  0         0  
358             }
359              
360             # whitelisted hostnames, compiled patterns, or subrefs
361             sub whitelisted_hosts
362             {
363 1     1 1 501616 my $self = shift;
364 1 50       26 if (@_) {
365 1         13 my @hosts = @_;
366 1         10 $self->{'whitelisted_hosts'} = \@hosts;
367 1         13 return;
368             }
369 0 0       0 return @{ $self->{'whitelisted_hosts'} || [] };
  0         0  
370             }
371              
372             # get/set Net::DNS resolver object
373             sub resolver
374             {
375 3     3 1 3582 my $self = shift;
376 3 100       11 if (@_) {
377 2         6 $self->{'resolver'} = shift;
378 2         588 require UNIVERSAL ;
379             die "Not a Net::DNS::Resolver object" unless
380 2 50       17 UNIVERSAL::isa($self->{'resolver'}, "Net::DNS::Resolver");
381             }
382 3   33     10 return $self->{'resolver'} ||= Net::DNS::Resolver->new;
383             }
384              
385             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
386             # staying there in future versions: needed by our modified version of send_request
387             sub _need_proxy
388             {
389 1     1   2 my($self, $url) = @_;
390 1 50       3 $url = $HTTP::URI_CLASS->new($url) unless ref $url;
391              
392 1   50     2 my $scheme = $url->scheme || return;
393 1 50       10 if (my $proxy = $self->{'proxy'}{$scheme}) {
394 0 0 0     0 if ($self->{'no_proxy'} && @{ $self->{'no_proxy'} }) {
  0         0  
395 0 0       0 if (my $host = eval { $url->host }) {
  0         0  
396 0         0 for my $domain (@{ $self->{'no_proxy'} }) {
  0         0  
397 0 0       0 if ($host =~ /\Q$domain\E$/) {
398 0         0 LWP::Debug::trace("no_proxy configured");
399 0         0 return;
400             }
401             }
402             }
403             }
404 0         0 LWP::Debug::debug("Proxied to $proxy");
405 0         0 return $HTTP::URI_CLASS->new($proxy);
406             }
407 1         8 LWP::Debug::debug('Not proxied');
408 1         5 undef;
409             }
410              
411             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
412             # staying there in future versions: needed by our modified version of send_request
413             sub _request_sanity_check {
414 1     1   1 my($self, $request) = @_;
415             # some sanity checking
416 1 50       3 if (defined $request) {
417 1 50       4 if (ref $request) {
418 1 50 33     36 Carp::croak("You need a request object, not a " . ref($request) . " object")
      33        
      33        
419             if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
420             !$request->can('method') or !$request->can('uri');
421             }
422             else {
423 0         0 Carp::croak("You need a request object, not '$request'");
424             }
425             }
426             else {
427 0         0 Carp::croak("No request object passed in");
428             }
429             }
430              
431             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
432             # staying there in future versions: needed by our modified version of send_request
433             sub _new_response {
434 1     1   3 my($request, $code, $message) = @_;
435 1         12 my $response = HTTP::Response->new($code, $message);
436 1         75 $response->request($request);
437 1         14 $response->header("Client-Date" => HTTP::Date::time2str(time));
438 1         120 $response->header("Client-Warning" => "Internal response");
439 1         39 $response->header("Content-Type" => "text/plain");
440 1         57 $response->content("$code $message\n");
441 1         20 return $response;
442             }
443              
444             sub eurl {
445 0     0 0   my $a = $_[0];
446 0           $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
  0            
447 0           $a =~ tr/ /+/;
448 0           return $a;
449             }
450              
451             1;
452              
453             __END__