File Coverage

blib/lib/LWPx/ParanoidAgent.pm
Criterion Covered Total %
statement 166 228 72.8
branch 67 140 47.8
condition 45 91 49.4
subroutine 17 20 85.0
pod 5 7 71.4
total 300 486 61.7


line stmt bran cond sub pod time code
1             package LWPx::ParanoidAgent;
2             require LWP::UserAgent;
3              
4 2     2   73109 use vars qw(@ISA $VERSION);
  2         5  
  2         214  
5             @ISA = qw(LWP::UserAgent);
6             $VERSION = '1.10';
7              
8             require HTTP::Request;
9             require HTTP::Response;
10              
11 2     2   3569 use HTTP::Status ();
  2         9800  
  2         54  
12 2     2   18 use strict;
  2         10  
  2         64  
13 2     2   9869 use Net::DNS;
  2         400545  
  2         233  
14 2     2   7509 use LWP::Debug ();
  2         1029  
  2         7858  
15             require Net::SSL;
16              
17             # fixes https://github.com/csirtgadgets/LWPx-ParanoidAgent/issues/4
18             $Net::HTTPS::SSL_SOCKET_CLASS = 'Net::SSL';
19              
20             sub new {
21 1     1 1 27 my $class = shift;
22 1         3 my %opts = @_;
23              
24 1   50     10 my $blocked_hosts = delete $opts{blocked_hosts} || [];
25 1   50     8 my $whitelisted_hosts = delete $opts{whitelisted_hosts} || [];
26 1         3 my $resolver = delete $opts{resolver};
27 1         4 my $paranoid_proxy = delete $opts{paranoid_proxy};
28 1   50     9 $opts{timeout} ||= 15;
29            
30 1         14 my $self = LWP::UserAgent->new( %opts );
31              
32 1         5346 $self->{'blocked_hosts'} = $blocked_hosts;
33 1         39 $self->{'whitelisted_hosts'} = $whitelisted_hosts;
34 1         3 $self->{'resolver'} = $resolver;
35 1         2 $self->{'paranoid_proxy'} = $paranoid_proxy;
36              
37 1         5 $self = bless $self, $class;
38 1         7 return $self;
39             }
40              
41             # returns seconds remaining given a request
42             sub _time_remain {
43 0     0   0 my $self = shift;
44 0         0 my $req = shift;
45              
46 0         0 my $now = time();
47 0   0     0 my $start_time = $req->{_time_begin} || $now;
48 0         0 return $start_time + $self->{timeout} - $now;
49             }
50              
51             sub _resolve {
52 1     1   12 my ($self, $host, $request, $timeout, $depth) = @_;
53 1         12 my $res = $self->resolver;
54 1   50     16 $depth ||= 0;
55              
56 1 50       4 die "CNAME recursion depth limit exceeded.\n" if $depth > 10;
57 1 50       6 die "DNS lookup resulted in bad host." if $self->_bad_host($host);
58              
59             # return the IP address if it looks like one and wasn't marked bad
60 1 50       7 return ($host) if $host =~ /^\d+\.\d+\.\d+\.\d+$/;
61              
62 1 50       17 my $sock = $res->bgsend($host)
63             or die "No sock from bgsend";
64              
65             # wait for the socket to become readable, unless this is from our test
66             # mock resolver.
67 1 50 33     60 unless ($sock && $sock eq "MOCK") {
68 0         0 my $rin = '';
69 0         0 vec($rin, fileno($sock), 1) = 1;
70 0         0 my $nf = select($rin, undef, undef, $self->_time_remain($request));
71 0 0       0 die "DNS lookup timeout" unless $nf;
72             }
73              
74 1 50       6 my $packet = $res->bgread($sock)
75             or die "DNS bgread failure";
76 1         14 $sock = undef;
77              
78 1         2 my @addr;
79             my $cname;
80 1         28 foreach my $rr ($packet->answer) {
81 1 50       75 if ($rr->type eq "A") {
    0          
82 1 50       57 die "Suspicious DNS results from A record\n" if $self->_bad_host($rr->address);
83             # untaints the address:
84 0         0 push @addr, join(".", ($rr->address =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/));
85             } elsif ($rr->type eq "CNAME") {
86             # will be checked for validity in the recursion path
87 0         0 $cname = $rr->cname;
88             }
89             }
90              
91 0 0       0 return @addr if @addr;
92 0 0       0 return () unless $cname;
93 0         0 return $self->_resolve($cname, $request, $timeout, $depth + 1);
94             }
95              
96             sub _host_list_match {
97 23     23   29 my $self = shift;
98 23         45 my $list_name = shift;
99 23         37 my $host = shift;
100              
101 23 50       28 foreach my $rule (@{ $self->{$list_name} || [] }) {
  23         99  
102 35 50       131 if (ref $rule eq "CODE") {
    100          
103 0 0       0 return 1 if $rule->($host);
104             } elsif (ref $rule) {
105             # assume regexp
106 12 50       5830 return 1 if $host =~ /$rule/;
107             } else {
108 23 100       124 return 1 if $host eq $rule;
109             }
110             }
111             }
112              
113             sub _bad_host {
114 11     11   34 my $self = shift;
115 11         20 my $host = lc(shift);
116              
117 11 50       70 return 0 if $self->_host_list_match("whitelisted_hosts", $host);
118 11 50       35 return 1 if $self->_host_list_match("blocked_hosts", $host);
119 11 50 33     107 return 1 if
120             $host =~ /^localhost$/i || # localhost is bad. even though it'd be stopped in
121             # a later call to _bad_host with the IP address
122             $host =~ /\s/i; # any whitespace is questionable
123              
124             # Let's assume it's an IP address now, and get it into 32 bits.
125             # Uf at any time something doesn't look like a number, then it's
126             # probably a hostname and we've already either whitelisted or
127             # blacklisted those, so we'll just say it's okay and it'll come
128             # back here later when the resolver finds an IP address.
129 11         54 my @parts = split(/\./, $host);
130 11 50       30 return 0 if @parts > 4;
131              
132             # un-octal/un-hex the parts, or return if there's a non-numeric part
133 11         17 my $overflow_flag = 0;
134 11         23 foreach (@parts) {
135 26 100 100     160 return 0 unless /^\d+$/ || /^0x[a-f\d]+$/;
136 24     0   141 local $SIG{__WARN__} = sub { $overflow_flag = 1; };
  0         0  
137 24 100       172 $_ = oct($_) if /^0/;
138             }
139              
140             # a purely numeric address shouldn't overflow.
141 9 50       24 return 1 if $overflow_flag;
142              
143 9         10 my $addr; # network order packed IP address
144              
145 9 100       42 if (@parts == 1) {
    100          
    100          
    50          
146             # a - 32 bits
147 1 50       5 return 1 if
148             $parts[0] > 0xffffffff;
149 1         5 $addr = pack("N", $parts[0]);
150             } elsif (@parts == 2) {
151             # a.b - 8.24 bits
152 4 50 33     35 return 1 if
153             $parts[0] > 0xff ||
154             $parts[1] > 0xffffff;
155 4         16 $addr = pack("N", $parts[0] << 24 | $parts[1]);
156             } elsif (@parts == 3) {
157             # a.b.c - 8.8.16 bits
158 1 50 33     15 return 1 if
      33        
159             $parts[0] > 0xff ||
160             $parts[1] > 0xff ||
161             $parts[2] > 0xffff;
162 1         14 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2]);
163             } elsif (@parts == 4) {
164             # a.b.c.d - 8.8.8.8 bits
165 3 50 33     50 return 1 if
      33        
      33        
166             $parts[0] > 0xff ||
167             $parts[1] > 0xff ||
168             $parts[2] > 0xff ||
169             $parts[3] > 0xff;
170 3         16 $addr = pack("N", $parts[0] << 24 | $parts[1] << 16 | $parts[2] << 8 | $parts[3]);
171             } else {
172 0         0 return 1;
173             }
174              
175 9         26 my $haddr = unpack("N", $addr); # host order IP address
176 9 100 66     187 return 1 if
      100        
      66        
      66        
      66        
      100        
      100        
      100        
      66        
177             ($haddr & 0xFF000000) == 0x00000000 || # 0.0.0.0/8
178             ($haddr & 0xFF000000) == 0x0A000000 || # 10.0.0.0/8
179             ($haddr & 0xFF000000) == 0x7F000000 || # 127.0.0.0/8
180             ($haddr & 0xFFF00000) == 0xAC100000 || # 172.16.0.0/12
181             ($haddr & 0xFFFF0000) == 0xA9FE0000 || # 169.254.0.0/16
182             ($haddr & 0xFFFF0000) == 0xC0A80000 || # 192.168.0.0/16
183             ($haddr & 0xFFFFFF00) == 0xC0000200 || # 192.0.2.0/24 "TEST-NET" docs/example code
184             ($haddr & 0xFFFFFF00) == 0xC0586300 || # 192.88.99.0/24 6to4 relay anycast addresses
185             $haddr == 0xFFFFFFFF || # 255.255.255.255
186             ($haddr & 0xF0000000) == 0xE0000000; # multicast addresses
187              
188             # as final IP address check, pass in the canonical a.b.c.d decimal form
189             # to the blacklisted host check to see if matches as bad there.
190 1         20 my $can_ip = join(".", map { ord } split //, $addr);
  4         13  
191 1 50       5 return 1 if $self->_host_list_match("blocked_hosts", $can_ip);
192              
193             # looks like an okay IP address
194 0         0 return 0;
195             }
196              
197             sub request {
198 9     9 1 1186022 my ($self, $req, $arg, $size, $previous) = @_;
199              
200             # walk back to the first request, and set our _time_begin to its _time_begin, or if
201             # we're the first, then use current time. used by LWPx::Protocol::http_paranoid
202 9         21 my $first_res = $previous; # previous is the previous response that invoked this request
203 9   33     32 $first_res = $first_res->previous while $first_res && $first_res->previous;
204 9 50       40 $req->{_time_begin} = $first_res ? $first_res->request->{_time_begin} : time();
205              
206 9         29 my $host = $req->uri->host;
207 9 100       531 if ($self->_bad_host($host)) {
208 8         31 my $err_res = HTTP::Response->new(403, "Unauthorized access to blocked host");
209 8         351 $err_res->request($req);
210 8         86 $err_res->header("Client-Date" => HTTP::Date::time2str(time));
211 8         597 $err_res->header("Client-Warning" => "Internal response");
212 8         454 $err_res->header("Content-Type" => "text/plain");
213 8         340 $err_res->content("403 Unauthorized access to blocked host\n");
214 8         154 return $err_res;
215             }
216              
217 1 50       6 if (my $pp = $self->{paranoid_proxy}) {
218 0         0 $req->uri("$pp?url=" . eurl($req->uri) .
219             "&timeout=" . ($self->{timeout} + 0) .
220             "&max_size=" . ($self->{max_size} + 0));
221             }
222              
223 1         35 return $self->SUPER::request($req, $arg, $size, $previous);
224             }
225              
226             # taken from LWP::UserAgent and modified slightly. (proxy support removed,
227             # and map http and https schemes to separate protocol handlers)
228             sub send_request
229             {
230 1     1 0 640 my ($self, $request, $arg, $size) = @_;
231 1         13 $self->_request_sanity_check($request);
232              
233 1         5 my ($method, $url) = ($request->method, $request->uri);
234              
235 1         29 local($SIG{__DIE__}); # protect against user defined die handlers
236              
237             # Check that we have a METHOD and a URL first
238 1 50       3 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Method missing")
239             unless $method;
240 1 50       4 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL missing")
241             unless $url;
242 1 50       12 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "URL must be absolute")
243             unless $url->scheme;
244 1 50       20 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST,
245             "ParanoidAgent doesn't support going through proxies. ".
246             "In that case, do your paranoia at your proxy instead.")
247             if $self->_need_proxy($url);
248              
249 1         4 my $scheme = $url->scheme;
250 1 50 33     25 return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, "Only http and https are supported by ParanoidAgent")
251             unless $scheme eq "http" || $scheme eq "https";
252              
253 1         5 LWP::Debug::trace("$method $url");
254              
255 1         9 my $protocol;
256              
257             {
258             # Honor object-specific restrictions by forcing protocol objects
259             # into class LWP::Protocol::nogo.
260 1         2 my $x;
  1         2  
261 1 50       31 if($x = $self->protocols_allowed) {
    50          
262 0 0       0 if(grep lc($_) eq $scheme, @$x) {
263 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s allowed protocols (@$x)");
264             }
265             else {
266 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s allowed protocols (@$x)");
267 0         0 require LWP::Protocol::nogo;
268 0         0 $protocol = LWP::Protocol::nogo->new;
269             }
270             }
271             elsif ($x = $self->protocols_forbidden) {
272 0 0       0 if(grep lc($_) eq $scheme, @$x) {
273 0         0 LWP::Debug::trace("$scheme URLs are among $self\'s forbidden protocols (@$x)");
274 0         0 require LWP::Protocol::nogo;
275 0         0 $protocol = LWP::Protocol::nogo->new;
276             }
277             else {
278 0         0 LWP::Debug::trace("$scheme URLs aren't among $self\'s forbidden protocols (@$x)");
279             }
280             }
281             # else fall thru and create the protocol object normally
282             }
283              
284 1 50       54 unless ($protocol) {
285 1         29 LWP::Protocol::implementor("${scheme}_paranoid", "LWPx::Protocol::${scheme}_paranoid");
286 1         114 eval "require LWPx::Protocol::${scheme}_paranoid;";
287 1 50       11 if ($@) {
288 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
289 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
290 0         0 return $response;
291             }
292              
293 1 50       3 $protocol = eval { LWP::Protocol::create($scheme eq "http" ? "http_paranoid" : "https_paranoid", $self) };
  1         16  
294 1 50       62 if ($@) {
295 0         0 $@ =~ s/ at .* line \d+.*//s; # remove file/line number
296 0         0 my $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
297 0 0       0 if ($scheme eq "https") {
298 0         0 $response->message($response->message . " (Crypt::SSLeay not installed)");
299 0         0 $response->content_type("text/plain");
300 0         0 $response->content(<
301             LWP will support https URLs if the Crypt::SSLeay module is installed.
302             More information at .
303             EOT
304             }
305 0         0 return $response;
306             }
307             }
308              
309             # Extract fields that will be used below
310 1         6 my ($timeout, $cookie_jar, $use_eval, $parse_head, $max_size) =
311 1         4 @{$self}{qw(timeout cookie_jar use_eval parse_head max_size)};
312              
313 1         2 my $response;
314 1         3 my $proxy = undef;
315 1 50       5 if ($use_eval) {
316             # we eval, and turn dies into responses below
317 1         2 eval {
318 1         7 $response = $protocol->request($request, $proxy,
319             $arg, $size, $timeout);
320             };
321 1   33     6 my $error = $@ || $response->header( 'x-died' );
322 1 50       4 if ($error) {
323 1         3 $error =~ s/ at .* line \d+.*//s; # remove file/line number
324 1         8 $response = _new_response($request,
325             &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
326             $error);
327             }
328             }
329             else {
330 0         0 $response = $protocol->request($request, $proxy,
331             $arg, $size, $timeout);
332             # XXX: Should we die unless $response->is_success ???
333             }
334              
335 1         5 $response->request($request); # record request for reference
336 1 50       13 $cookie_jar->extract_cookies($response) if $cookie_jar;
337 1         42 $response->header("Client-Date" => HTTP::Date::time2str(time));
338 1 50       84 $self->run_handlers("response_done", $response) if $self->can('run_handlers');
339 1         35 return $response;
340             }
341              
342             # blocked hostnames, compiled patterns, or subrefs
343             sub blocked_hosts
344             {
345 1     1 1 53 my $self = shift;
346 1 50       991 if (@_) {
347 1         20 my @hosts = @_;
348 1         3 $self->{'blocked_hosts'} = \@hosts;
349 1         12 return;
350             }
351 0 0       0 return @{ $self->{'blocked_hosts'} || [] };
  0         0  
352             }
353              
354             # whitelisted hostnames, compiled patterns, or subrefs
355             sub whitelisted_hosts
356             {
357 1     1 1 504316 my $self = shift;
358 1 50       517 if (@_) {
359 1         16 my @hosts = @_;
360 1         14 $self->{'whitelisted_hosts'} = \@hosts;
361 1         34 return;
362             }
363 0 0       0 return @{ $self->{'whitelisted_hosts'} || [] };
  0         0  
364             }
365              
366             # get/set Net::DNS resolver object
367             sub resolver
368             {
369 3     3 1 14102 my $self = shift;
370 3 100       24 if (@_) {
371 2         20 $self->{'resolver'} = shift;
372 2         2381 require UNIVERSAL ;
373 2 50       98 die "Not a Net::DNS::Resolver object" unless
374             UNIVERSAL::isa($self->{'resolver'}, "Net::DNS::Resolver");
375             }
376 3   33     26 return $self->{'resolver'} ||= Net::DNS::Resolver->new;
377             }
378              
379             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
380             # staying there in future versions: needed by our modified version of send_request
381             sub _need_proxy
382             {
383 1     1   3 my($self, $url) = @_;
384 1 50       5 $url = $HTTP::URI_CLASS->new($url) unless ref $url;
385              
386 1   50     4 my $scheme = $url->scheme || return;
387 1 50       19 if (my $proxy = $self->{'proxy'}{$scheme}) {
388 0 0 0     0 if ($self->{'no_proxy'} && @{ $self->{'no_proxy'} }) {
  0         0  
389 0 0       0 if (my $host = eval { $url->host }) {
  0         0  
390 0         0 for my $domain (@{ $self->{'no_proxy'} }) {
  0         0  
391 0 0       0 if ($host =~ /\Q$domain\E$/) {
392 0         0 LWP::Debug::trace("no_proxy configured");
393 0         0 return;
394             }
395             }
396             }
397             }
398 0         0 LWP::Debug::debug("Proxied to $proxy");
399 0         0 return $HTTP::URI_CLASS->new($proxy);
400             }
401 1         19 LWP::Debug::debug('Not proxied');
402 1         7 undef;
403             }
404              
405             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
406             # staying there in future versions: needed by our modified version of send_request
407             sub _request_sanity_check {
408 1     1   2 my($self, $request) = @_;
409             # some sanity checking
410 1 50       5 if (defined $request) {
411 1 50       4 if (ref $request) {
412 1 50 33     55 Carp::croak("You need a request object, not a " . ref($request) . " object")
      33        
      33        
413             if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
414             !$request->can('method') or !$request->can('uri');
415             }
416             else {
417 0         0 Carp::croak("You need a request object, not '$request'");
418             }
419             }
420             else {
421 0         0 Carp::croak("No request object passed in");
422             }
423             }
424              
425             # Taken directly from LWP::UserAgent because it was private there, and we can't depend on it
426             # staying there in future versions: needed by our modified version of send_request
427             sub _new_response {
428 1     1   3 my($request, $code, $message) = @_;
429 1         20 my $response = HTTP::Response->new($code, $message);
430 1         117 $response->request($request);
431 1         31 $response->header("Client-Date" => HTTP::Date::time2str(time));
432 1         174 $response->header("Client-Warning" => "Internal response");
433 1         62 $response->header("Content-Type" => "text/plain");
434 1         71 $response->content("$code $message\n");
435 1         36 return $response;
436             }
437              
438             sub eurl {
439 0     0 0   my $a = $_[0];
440 0           $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
  0            
441 0           $a =~ tr/ /+/;
442 0           return $a;
443             }
444              
445             1;
446              
447             __END__