File Coverage

blib/lib/LWPx/ParanoidAgent.pm
Criterion Covered Total %
statement 181 228 79.3
branch 84 140 60.0
condition 53 91 58.2
subroutine 18 20 90.0
pod 5 7 71.4
total 341 486 70.1


line stmt bran cond sub pod time code
1             package LWPx::ParanoidAgent;
2             require LWP::UserAgent;
3              
4 6     6   42245 use vars qw(@ISA $VERSION);
  6         6  
  6         609  
5             @ISA = qw(LWP::UserAgent);
6             $VERSION = '1.08_02';
7              
8             require HTTP::Request;
9             require HTTP::Response;
10              
11 6     6   2635 use HTTP::Status ();
  6         18982  
  6         187  
12 6     6   35 use strict;
  6         31  
  6         150  
13 6     6   3871 use Net::DNS;
  6         541918  
  6         1103  
14 6     6   4399 use LWP::Debug ();
  6         3569  
  6         20509  
15              
16             sub new {
17 5     5 1 85 my $class = shift;
18 5         15 my %opts = @_;
19              
20 5   50     35 my $blocked_hosts = delete $opts{blocked_hosts} || [];
21 5   50     25 my $whitelisted_hosts = delete $opts{whitelisted_hosts} || [];
22 5         10 my $resolver = delete $opts{resolver};
23 5         5 my $paranoid_proxy = delete $opts{paranoid_proxy};
24 5   50     30 $opts{timeout} ||= 15;
25              
26 5         50 my $self = LWP::UserAgent->new( %opts );
27              
28 5         11480 $self->{'blocked_hosts'} = $blocked_hosts;
29 5         20 $self->{'whitelisted_hosts'} = $whitelisted_hosts;
30 5         5 $self->{'resolver'} = $resolver;
31 5         10 $self->{'paranoid_proxy'} = $paranoid_proxy;
32              
33 5         10 $self = bless $self, $class;
34 5         15 return $self;
35             }
36              
37             # returns seconds remaining given a request
38             sub _time_remain {
39 3     3   7 my $self = shift;
40 3         6 my $req = shift;
41              
42 3         4 my $now = time();
43 3   33     13 my $start_time = $req->{_time_begin} || $now;
44 3         311755 return $start_time + $self->{timeout} - $now;
45             }
46              
47             sub _resolve {
48 10     10   20 my ($self, $host, $request, $timeout, $depth) = @_;
49 10         31 my $res = $self->resolver;
50 10   100     50 $depth ||= 0;
51              
52 10 50       25 die "CNAME recursion depth limit exceeded.\n" if $depth > 10;
53 10 100       28 die "DNS lookup resulted in bad host." if $self->_bad_host($host);
54              
55             # return the IP address if it looks like one and wasn't marked bad
56 9 100       58 return ($host) if $host =~ /^\d+\.\d+\.\d+\.\d+$/;
57              
58 5 50       74 my $sock = $res->bgsend($host)
59             or die "No sock from bgsend";
60              
61             # wait for the socket to become readable, unless this is from our test
62             # mock resolver.
63 5 100 66     8668 unless ($sock && $sock eq "MOCK") {
64 3         8 my $rin = '';
65 3         25 vec($rin, fileno($sock), 1) = 1;
66 3         18 my $nf = select($rin, undef, undef, $self->_time_remain($request));
67 3 50       26 die "DNS lookup timeout" unless $nf;
68             }
69              
70 5 50       6839 my $packet = $res->bgread($sock)
71             or die "DNS bgread failure";
72 5         3537 $sock = undef;
73              
74 5         146 my @addr;
75             my $cname;
76 5         43 foreach my $rr ($packet->answer) {
77 5 100       116 if ($rr->type eq "A") {
    50          
78 4 100       91 die "Suspicious DNS results from A record\n" if $self->_bad_host($rr->address);
79             # untaints the address:
80 3         15 push @addr, join(".", ($rr->address =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/));
81             } elsif ($rr->type eq "CNAME") {
82             # will be checked for validity in the recursion path
83 1         35 $cname = $rr->cname;
84             }
85             }
86              
87 4 100       224 return @addr if @addr;
88 1 50       5 return () unless $cname;
89 1         10 return $self->_resolve($cname, $request, $timeout, $depth + 1);
90             }
91              
92             sub _host_list_match {
93 72     72   69 my $self = shift;
94 72         81 my $list_name = shift;
95 72         103 my $host = shift;
96              
97 72 50       64 foreach my $rule (@{ $self->{$list_name} || [] }) {
  72         256  
98 103 50       269 if (ref $rule eq "CODE") {
    100          
99 0 0       0 return 1 if $rule->($host);
100             } elsif (ref $rule) {
101             # assume regexp
102 34 100       245 return 1 if $host =~ /$rule/;
103             } else {
104 69 100       347 return 1 if $host eq $rule;
105             }
106             }
107             }
108              
109             sub _bad_host {
110 38     38   126 my $self = shift;
111 38         75 my $host = lc(shift);
112              
113 38 100       91 return 0 if $self->_host_list_match("whitelisted_hosts", $host);
114 30 100       77 return 1 if $self->_host_list_match("blocked_hosts", $host);
115 27 100 66     226 return 1 if
116             $host =~ /^localhost$/i || # localhost is bad. even though it'd be stopped in
117             # a later call to _bad_host with the IP address
118             $host =~ /\s/i; # any whitespace is questionable
119              
120             # Let's assume it's an IP address now, and get it into 32 bits.
121             # Uf at any time something doesn't look like a number, then it's
122             # probably a hostname and we've already either whitelisted or
123             # blacklisted those, so we'll just say it's okay and it'll come
124             # back here later when the resolver finds an IP address.
125 26         129 my @parts = split(/\./, $host);
126 26 50       72 return 0 if @parts > 4;
127              
128             # un-octal/un-hex the parts, or return if there's a non-numeric part
129 26         56 my $overflow_flag = 0;
130 26         43 foreach (@parts) {
131 59 100 100     316 return 0 unless /^\d+$/ || /^0x[a-f\d]+$/;
132 49     0   253 local $SIG{__WARN__} = sub { $overflow_flag = 1; };
  0         0  
133 49 100       287 $_ = oct($_) if /^0/;
134             }
135              
136             # a purely numeric address shouldn't overflow.
137 16 50       38 return 1 if $overflow_flag;
138              
139 16         16 my $addr; # network order packed IP address
140              
141 16 100       70 if (@parts == 1) {
    100          
    100          
    50          
142             # a - 32 bits
143 2 50       7 return 1 if
144             $parts[0] > 0xffffffff;
145 2         8 $addr = pack("N", $parts[0]);
146             } elsif (@parts == 2) {
147             # a.b - 8.24 bits
148 4 50 33     29 return 1 if
149             $parts[0] > 0xff ||
150             $parts[1] > 0xffffff;
151 4         14 $addr = pack("N", $parts[0] << 24 | $parts[1]);
152             } elsif (@parts == 3) {
153             # a.b.c - 8.8.16 bits
154 1 50 33     13 return 1 if
      33        
155             $parts[0] > 0xff ||
156             $parts[1] > 0xff ||
157             $parts[2] > 0xffff;
158 1         6 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2]);
159             } elsif (@parts == 4) {
160             # a.b.c.d - 8.8.8.8 bits
161 9 50 33     133 return 1 if
      33        
      33        
162             $parts[0] > 0xff ||
163             $parts[1] > 0xff ||
164             $parts[2] > 0xff ||
165             $parts[3] > 0xff;
166 9         55 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2] << 8 | $parts[3]);
167             } else {
168 0         0 return 1;
169             }
170              
171 16         55 my $haddr = unpack("N", $addr); # host order IP address
172 16 100 100     304 return 1 if
      100        
      66        
      66        
      66        
      100        
      100        
      100        
      66        
173             ($haddr & 0xFF000000) == 0x00000000 || # 0.0.0.0/8
174             ($haddr & 0xFF000000) == 0x0A000000 || # 10.0.0.0/8
175             ($haddr & 0xFF000000) == 0x7F000000 || # 127.0.0.0/8
176             ($haddr & 0xFFF00000) == 0xAC100000 || # 172.16.0.0/12
177             ($haddr & 0xFFFF0000) == 0xA9FE0000 || # 169.254.0.0/16
178             ($haddr & 0xFFFF0000) == 0xC0A80000 || # 192.168.0.0/16
179             ($haddr & 0xFFFFFF00) == 0xC0000200 || # 192.0.2.0/24 "TEST-NET" docs/example code
180             ($haddr & 0xFFFFFF00) == 0xC0586300 || # 192.88.99.0/24 6to4 relay anycast addresses
181             $haddr == 0xFFFFFFFF || # 255.255.255.255
182             ($haddr & 0xF0000000) == 0xE0000000; # multicast addresses
183              
184             # as final IP address check, pass in the canonical a.b.c.d decimal form
185             # to the blacklisted host check to see if matches as bad there.
186 4         20 my $can_ip = join(".", map { ord } split //, $addr);
  16         35  
187 4 100       17 return 1 if $self->_host_list_match("blocked_hosts", $can_ip);
188              
189             # looks like an okay IP address
190 3         15 return 0;
191             }
192              
193             sub request {
194 24     24 1 28553 my ($self, $req, $arg, $size, $previous) = @_;
195              
196             # walk back to the first request, and set our _time_begin to its _time_begin, or if
197             # we're the first, then use current time. used by LWPx::Protocol::http_paranoid
198 24         40 my $first_res = $previous; # previous is the previous response that invoked this request
199 24   100     138 $first_res = $first_res->previous while $first_res && $first_res->previous;
200 24 100       172 $req->{_time_begin} = $first_res ? $first_res->request->{_time_begin} : time();
201              
202 24         91 my $host = $req->uri->host;
203 24 100       887 if ($self->_bad_host($host)) {
204 15         104 my $err_res = HTTP::Response->new(403, "Unauthorized access to blocked host");
205 15         530 $err_res->request($req);
206 15         125 $err_res->header("Client-Date" => HTTP::Date::time2str(time));
207 15         1127 $err_res->header("Client-Warning" => "Internal response");
208 15         761 $err_res->header("Content-Type" => "text/plain");
209 15         566 $err_res->content("403 Unauthorized access to blocked host\n");
210 15         261 return $err_res;
211             }
212              
213 9 50       45 if (my $pp = $self->{paranoid_proxy}) {
214             $req->uri("$pp?url=" . eurl($req->uri) .
215             "&timeout=" . ($self->{timeout} + 0) .
216 0         0 "&max_size=" . ($self->{max_size} + 0));
217             }
218              
219 9         89 return $self->SUPER::request($req, $arg, $size, $previous);
220             }
221              
222             # taken from LWP::UserAgent and modified slightly. (proxy support removed,
223             # and map http and https schemes to separate protocol handlers)
224             sub send_request
225             {
226 9     9 0 2612 my ($self, $request, $arg, $size) = @_;
227 9         31 $self->_request_sanity_check($request);
228              
229 9         29 my ($method, $url) = ($request->method, $request->uri);
230              
231 9         187 local($SIG{__DIE__}); # protect against user defined die handlers
232              
233             # Check that we have a METHOD and a URL first
234 9 50       28 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Method missing")
235             unless $method;
236 9 50       33 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL missing")
237             unless $url;
238 9 50       77 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL must be absolute")
239             unless $url->scheme;
240 9 50       174 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST,
241             "ParanoidAgent doesn't support going through proxies. ".
242             "In that case, do your paranoia at your proxy instead.")
243             if $self->_need_proxy($url);
244              
245 9         25 my $scheme = $url->scheme;
246 9 50 33     119 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Only http and https are supported by ParanoidAgent")
247             unless $scheme eq "http" || $scheme eq "https";
248              
249 9         40 LWP::Debug::trace("$method $url");
250              
251 9         77 my $protocol;
252              
253             {
254             # Honor object-specific restrictions by forcing protocol objects
255             # into class LWP::Protocol::nogo.
256 9         16 my $x;
  9         9  
257 9 50       63 if($x = $self->protocols_allowed) {
    50          
258 0 0       0 if(grep lc($_) eq $scheme, @$x) {
259 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s allowed protocols (@$x)");
260             }
261             else {
262 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s allowed protocols (@$x)");
263 0         0 require LWP::Protocol::nogo;
264 0         0 $protocol = LWP::Protocol::nogo->new;
265             }
266             }
267             elsif ($x = $self->protocols_forbidden) {
268 0 0       0 if(grep lc($_) eq $scheme, @$x) {
269 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s forbidden protocols (@$x)");
270 0         0 require LWP::Protocol::nogo;
271 0         0 $protocol = LWP::Protocol::nogo->new;
272             }
273             else {
274 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s forbidden protocols (@$x)");
275             }
276             }
277             # else fall thru and create the protocol object normally
278             }
279              
280 9 50       228 unless ($protocol) {
281 9         67 LWP::Protocol::implementor("${scheme}_paranoid", "LWPx::Protocol::${scheme}_paranoid");
282 9         964 eval "require LWPx::Protocol::${scheme}_paranoid;";
283 9 50       42 if ($@) {
284 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
285 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
286 0         0 return $response;
287             }
288              
289 9 50       18 $protocol = eval { LWP::Protocol::create($scheme eq "http" ? "http_paranoid" : "https_paranoid", $self) };
  9         61  
290 9 50       250 if ($@) {
291 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
292 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
293 0 0       0 if ($scheme eq "https") {
294 0         0 $response->message($response->message . " (Crypt::SSLeay not installed)");
295 0         0 $response->content_type("text/plain");
296 0         0 $response->content(<
297             LWP will support https URLs if the Crypt::SSLeay module is installed.
298             More information at .
299             EOT
300             }
301 0         0 return $response;
302             }
303             }
304              
305             # Extract fields that will be used below
306             my ($timeout, $cookie_jar, $use_eval, $parse_head, $max_size) =
307 9         18 @{$self}{qw(timeout cookie_jar use_eval parse_head max_size)};
  9         34  
308              
309 9         11 my $response;
310 9         14 my $proxy = undef;
311 9 50       22 if ($use_eval) {
312             # we eval, and turn dies into responses below
313 9         13 eval {
314 9         43 $response = $protocol->request($request, $proxy,
315             $arg, $size, $timeout);
316             };
317 9   66     91 my $error = $@ || $response->header( 'x-died' );
318 9 100       567 if ($error) {
319 2         21 $error =~ s/ at .* line \d+.*//s; # remove file/line number
320 2         14 $response = _new_response($request,
321             &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
322             $error);
323             }
324             }
325             else {
326 0         0 $response = $protocol->request($request, $proxy,
327             $arg, $size, $timeout);
328             # XXX: Should we die unless $response->is_success ???
329             }
330              
331 9         46 $response->request($request); # record request for reference
332 9 50       94 $cookie_jar->extract_cookies($response) if $cookie_jar;
333 9         47 $response->header("Client-Date" => HTTP::Date::time2str(time));
334 9 50       951 $self->run_handlers("response_done", $response) if $self->can('run_handlers');
335 9         458 return $response;
336             }
337              
338             # blocked hostnames, compiled patterns, or subrefs
339             sub blocked_hosts
340             {
341 1     1 1 36 my $self = shift;
342 1 50       28 if (@_) {
343 1         10 my @hosts = @_;
344 1         14 $self->{'blocked_hosts'} = \@hosts;
345 1         12 return;
346             }
347 0 0       0 return @{ $self->{'blocked_hosts'} || [] };
  0         0  
348             }
349              
350             # whitelisted hostnames, compiled patterns, or subrefs
351             sub whitelisted_hosts
352             {
353 1     1 1 501888 my $self = shift;
354 1 50       27 if (@_) {
355 1         27 my @hosts = @_;
356 1         12 $self->{'whitelisted_hosts'} = \@hosts;
357 1         21 return;
358             }
359 0 0       0 return @{ $self->{'whitelisted_hosts'} || [] };
  0         0  
360             }
361              
362             # get/set Net::DNS resolver object
363             sub resolver
364             {
365 19     19 1 17631 my $self = shift;
366 19 100       66 if (@_) {
367 8         46 $self->{'resolver'} = shift;
368 8         3648 require UNIVERSAL ;
369             die "Not a Net::DNS::Resolver object" unless
370 8 50       116 UNIVERSAL::isa($self->{'resolver'}, "Net::DNS::Resolver");
371             }
372 19   33     97 return $self->{'resolver'} ||= Net::DNS::Resolver->new;
373             }
374              
375             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
376             # staying there in future versions: needed by our modified version of send_request
377             sub _need_proxy
378             {
379 9     9   46 my($self, $url) = @_;
380 9 50       33 $url = $HTTP::URI_CLASS->new($url) unless ref $url;
381              
382 9   50     28 my $scheme = $url->scheme || return;
383 9 50       136 if (my $proxy = $self->{'proxy'}{$scheme}) {
384 0 0 0     0 if ($self->{'no_proxy'} && @{ $self->{'no_proxy'} }) {
  0         0  
385 0 0       0 if (my $host = eval { $url->host }) {
  0         0  
386 0         0 for my $domain (@{ $self->{'no_proxy'} }) {
  0         0  
387 0 0       0 if ($host =~ /\Q$domain\E$/) {
388 0         0 LWP::Debug::trace("no_proxy configured");
389 0         0 return;
390             }
391             }
392             }
393             }
394 0         0 LWP::Debug::debug("Proxied to $proxy");
395 0         0 return $HTTP::URI_CLASS->new($proxy);
396             }
397 9         56 LWP::Debug::debug('Not proxied');
398 9         49 undef;
399             }
400              
401             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
402             # staying there in future versions: needed by our modified version of send_request
403             sub _request_sanity_check {
404 9     9   15 my($self, $request) = @_;
405             # some sanity checking
406 9 50       30 if (defined $request) {
407 9 50       24 if (ref $request) {
408 9 50 33     174 Carp::croak("You need a request object, not a " . ref($request) . " object")
      33        
      33        
409             if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
410             !$request->can('method') or !$request->can('uri');
411             }
412             else {
413 0         0 Carp::croak("You need a request object, not '$request'");
414             }
415             }
416             else {
417 0         0 Carp::croak("No request object passed in");
418             }
419             }
420              
421             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
422             # staying there in future versions: needed by our modified version of send_request
423             sub _new_response {
424 2     2   5 my($request, $code, $message) = @_;
425 2         23 my $response = HTTP::Response->new($code, $message);
426 2         149 $response->request($request);
427 2         34 $response->header("Client-Date" => HTTP::Date::time2str(time));
428 2         270 $response->header("Client-Warning" => "Internal response");
429 2         130 $response->header("Content-Type" => "text/plain");
430 2         96 $response->content("$code $message\n");
431 2         54 return $response;
432             }
433              
434             sub eurl {
435 0     0 0   my $a = $_[0];
436 0           $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
  0            
437 0           $a =~ tr/ /+/;
438 0           return $a;
439             }
440              
441             1;
442              
443             __END__