File Coverage

blib/lib/LWPx/Protocol/http_paranoid.pm
Criterion Covered Total %
statement 33 226 14.6
branch 4 128 3.1
condition 2 44 4.5
subroutine 6 18 33.3
pod 1 3 33.3
total 46 419 10.9


line stmt bran cond sub pod time code
1             # $Id: http_paranoid.pm 2 2005-06-01 23:12:25Z bradfitz $
2             #
3              
4             package LWPx::Protocol::http_paranoid;
5              
6 1     1   7 use strict;
  1         2  
  1         132  
7              
8             require LWP::Debug;
9             require HTTP::Response;
10             require HTTP::Status;
11             require Net::HTTP;
12              
13 1     1   8 use vars qw(@ISA $TOO_LATE $TIME_REMAIN);
  1         2  
  1         125  
14              
15             require LWP::Protocol;
16             @ISA = qw(LWP::Protocol);
17              
18 1     1   7 use vars qw(@ISA @EXTRA_SOCK_OPTS);
  1         3  
  1         3673  
19              
20             my $CRLF = "\015\012";
21              
22             # lame hack using globals in this package to communicate to sysread in the
23             # package at bottom, but whatchya gonna do? Don't want to go modify
24             # Net::HTTP::* to pass explicit timeouts to all the sysreads.
25             sub _set_time_remain {
26 0     0   0 my $now = time;
27 0 0       0 return unless defined $TOO_LATE;
28 0         0 $TIME_REMAIN = $TOO_LATE - $now;
29 0 0       0 $TIME_REMAIN = 0 if $TIME_REMAIN < 0;
30             }
31              
32              
33             sub _extra_sock_opts # to be overridden by subclass
34             {
35 0     0   0 return @EXTRA_SOCK_OPTS;
36             }
37              
38             sub _new_socket
39             {
40 1     1   3 my($self, $host, $port, $timeout, $request) = @_;
41              
42 1         13 my $conn_cache = $self->{ua}{conn_cache};
43 1 50       5 if ($conn_cache) {
44 0 0       0 if (my $sock = $conn_cache->withdraw("http", "$host:$port")) {
45 0 0 0     0 return $sock if $sock && !$sock->can_read(0);
46             # if the socket is readable, then either the peer has closed the
47             # connection or there are some garbage bytes on it. In either
48             # case we abandon it.
49 0         0 $sock->close;
50             }
51             }
52              
53 1         8 my @addrs = $self->{ua}->_resolve($host, $request, $timeout);
54 0 0       0 unless (@addrs) {
55 0         0 die "Can't connect to $host:$port (No suitable addresses found)";
56             }
57              
58 0         0 my $sock;
59 0         0 local($^W) = 0; # IO::Socket::INET can be noisy
60              
61 0   0     0 while (! $sock && @addrs) {
62 0         0 my $addr = shift @addrs;
63              
64 0 0       0 my $conn_timeout = $request->{_timebegin} ?
65             (time() - $request->{_timebegin}) :
66             $timeout;
67 0         0 $sock = $self->socket_class->new(PeerAddr => $addr,
68             PeerPort => $port,
69             Proto => 'tcp',
70             Timeout => $conn_timeout,
71             KeepAlive => !!$conn_cache,
72             SendTE => 1,
73             $self->_extra_sock_opts($addr,$port),
74             );
75             }
76              
77 0 0       0 unless ($sock) {
78             # IO::Socket::INET leaves additional error messages in $@
79 0         0 $@ =~ s/^.*?: //;
80 0         0 die "Can't connect to $host:$port ($@)";
81             }
82              
83             # perl 5.005's IO::Socket does not have the blocking method.
84 0         0 eval { $sock->blocking(0); };
  0         0  
85              
86 0         0 $sock;
87             }
88              
89              
90              
91             sub socket_class
92             {
93 0     0 0 0 my $self = shift;
94 0   0     0 (ref($self) || $self) . "::Socket";
95             }
96              
97             sub _get_sock_info
98             {
99 0     0   0 my($self, $res, $sock) = @_;
100 0 0       0 if (defined(my $peerhost = $sock->peerhost)) {
101 0         0 $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
102             }
103             }
104              
105             sub _fixup_header
106             {
107 0     0   0 my($self, $h, $url, $proxy) = @_;
108              
109             # Extract 'Host' header
110 0         0 my $hhost = $url->authority;
111 0 0       0 if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
112             # add authorization header if we need them. HTTP URLs do
113             # not really support specification of user and password, but
114             # we allow it.
115 0 0 0     0 if (defined($1) && not $h->header('Authorization')) {
116 0         0 require URI::Escape;
117 0         0 $h->authorization_basic(map URI::Escape::uri_unescape($_),
118             split(":", $1, 2));
119             }
120             }
121 0         0 $h->init_header('Host' => $hhost);
122              
123             }
124              
125             sub hlist_remove {
126 0     0 0 0 my($hlist, $k) = @_;
127 0         0 $k = lc $k;
128 0         0 for (my $i = @$hlist - 2; $i >= 0; $i -= 2) {
129 0 0       0 next unless lc($hlist->[$i]) eq $k;
130 0         0 splice(@$hlist, $i, 2);
131             }
132             }
133              
134             sub request
135             {
136 1     1 1 3 my($self, $request, $proxy, $arg, $size, $timeout) = @_;
137 1         8 LWP::Debug::trace('()');
138              
139             # paranoid: now $timeout means total time, not just between bytes coming in.
140             # avoids attacker servers from tarpitting a service that fetches URLs.
141 1         5 $TOO_LATE = undef;
142 1         2 $TIME_REMAIN = undef;
143 1 50       6 if ($timeout) {
144 1   33     6 my $start_time = $request->{_time_begin} || time();
145 1         3 $TOO_LATE = $start_time + $timeout;
146             }
147              
148 1   50     9 $size ||= 4096;
149              
150             # check method
151 1         9 my $method = $request->method;
152 1 50       29 unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
153 0         0 return new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
154             'Library does not allow method ' .
155             "$method for 'http:' URLs";
156             }
157              
158 1         5 my $url = $request->url;
159 1         11 my($host, $port, $fullpath);
160              
161 1         8 $host = $url->host;
162 1         59 $port = $url->port;
163 1         60 $fullpath = $url->path_query;
164 1 50       20 $fullpath = "/$fullpath" unless $fullpath =~ m,^/,;
165              
166             # connect to remote sites
167 1         7 my $socket = $self->_new_socket($host, $port, $timeout, $request);
168              
169 0           my @h;
170 0           my $request_headers = $request->headers->clone;
171 0           $self->_fixup_header($request_headers, $url, $proxy);
172              
173             $request_headers->scan(sub {
174 0     0     my($k, $v) = @_;
175 0           $k =~ s/^://;
176 0           $v =~ s/\n/ /g;
177 0           push(@h, $k, $v);
178 0           });
179              
180 0           my $content_ref = $request->content_ref;
181 0 0         $content_ref = $$content_ref if ref($$content_ref);
182 0           my $chunked;
183             my $has_content;
184              
185 0 0         if (ref($content_ref) eq 'CODE') {
186 0           my $clen = $request_headers->header('Content-Length');
187 0 0         $has_content++ if $clen;
188 0 0         unless (defined $clen) {
189 0           push(@h, "Transfer-Encoding" => "chunked");
190 0           $has_content++;
191 0           $chunked++;
192             }
193             }
194             else {
195             # Set (or override) Content-Length header
196 0           my $clen = $request_headers->header('Content-Length');
197 0 0 0       if (defined($$content_ref) && length($$content_ref)) {
    0          
198 0           $has_content++;
199 0 0 0       if (!defined($clen) || $clen ne length($$content_ref)) {
200 0 0         if (defined $clen) {
201 0           warn "Content-Length header value was wrong, fixed";
202 0           hlist_remove(\@h, 'Content-Length');
203             }
204 0           push(@h, 'Content-Length' => length($$content_ref));
205             }
206             }
207             elsif ($clen) {
208 0           warn "Content-Length set when there is not content, fixed";
209 0           hlist_remove(\@h, 'Content-Length');
210             }
211             }
212              
213 0           my $req_buf = $socket->format_request($method, $fullpath, @h);
214             #print "------\n$req_buf\n------\n";
215              
216             # XXX need to watch out for write timeouts
217             # FIXME_BRAD: make it non-blocking and select during the write
218             {
219 0           my $n = $socket->syswrite($req_buf, length($req_buf));
  0            
220 0 0         die $! unless defined($n);
221 0 0         die "short write" unless $n == length($req_buf);
222             #LWP::Debug::conns($req_buf);
223             }
224              
225 0           my($code, $mess, @junk);
226 0           my $drop_connection;
227              
228 0 0         if ($has_content) {
229 0           my $write_wait = 0;
230 0 0 0       $write_wait = 2
231             if ($request_headers->header("Expect") || "") =~ /100-continue/;
232              
233 0           my $eof;
234             my $wbuf;
235 0           my $woffset = 0;
236 0 0         if (ref($content_ref) eq 'CODE') {
237 0           my $buf = &$content_ref();
238 0 0         $buf = "" unless defined($buf);
239 0 0         $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
240             if $chunked;
241 0           $wbuf = \$buf;
242             }
243             else {
244 0           $wbuf = $content_ref;
245 0           $eof = 1;
246             }
247              
248 0           my $fbits = '';
249 0           vec($fbits, fileno($socket), 1) = 1;
250              
251 0           while ($woffset < length($$wbuf)) {
252              
253 0           my $time_before;
254              
255 0           my $now = time();
256 0 0         if ($now > $TOO_LATE) {
257 0           die "Request took too long.";
258             }
259              
260 0           my $sel_timeout = $TOO_LATE - $now;
261 0 0         if ($write_wait) {
262 0           $time_before = time;
263 0 0         $sel_timeout = $write_wait if $write_wait < $sel_timeout;
264             }
265              
266 0           my $rbits = $fbits;
267 0 0         my $wbits = $write_wait ? undef : $fbits;
268 0           my $nfound = select($rbits, $wbits, undef, $sel_timeout);
269 0 0         unless (defined $nfound) {
270 0           die "select failed: $!";
271             }
272              
273 0 0         if ($write_wait) {
274 0           $write_wait -= time - $time_before;
275 0 0         $write_wait = 0 if $write_wait < 0;
276             }
277              
278 0 0 0       if (defined($rbits) && $rbits =~ /[^\0]/) {
279             # readable
280 0           my $buf = $socket->_rbuf;
281              
282 0           _set_time_remain();
283              
284 0           my $n = $socket->sysread($buf, 1024, length($buf));
285 0 0         unless ($n) {
286 0           die "EOF";
287             }
288 0           $socket->_rbuf($buf);
289 0 0         if ($buf =~ /\015?\012\015?\012/) {
290             # a whole response present
291 0           ($code, $mess, @h) = $socket->read_response_headers(laxed => 1,
292             junk_out => \@junk,
293             );
294 0 0         if ($code eq "100") {
295 0           $write_wait = 0;
296 0           undef($code);
297             }
298             else {
299 0           $drop_connection++;
300 0           last;
301             # XXX should perhaps try to abort write in a nice way too
302             }
303             }
304             }
305 0 0 0       if (defined($wbits) && $wbits =~ /[^\0]/) {
306 0           my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset);
307 0 0         unless ($n) {
308 0 0         die "syswrite: $!" unless defined $n;
309 0           die "syswrite: no bytes written";
310             }
311 0           $woffset += $n;
312              
313 0 0 0       if (!$eof && $woffset >= length($$wbuf)) {
314             # need to refill buffer from $content_ref code
315 0           my $buf = &$content_ref();
316 0 0         $buf = "" unless defined($buf);
317 0 0         $eof++ unless length($buf);
318 0 0         $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
319             if $chunked;
320 0           $wbuf = \$buf;
321 0           $woffset = 0;
322             }
323             }
324             }
325             }
326              
327 0           _set_time_remain();
328 0 0         ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
329             unless $code;
330 0 0         ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
331             if $code eq "100";
332              
333 0           my $response = HTTP::Response->new($code, $mess);
334 0           my $peer_http_version = $socket->peer_http_version;
335 0           $response->protocol("HTTP/$peer_http_version");
336 0           while (@h) {
337 0           my($k, $v) = splice(@h, 0, 2);
338 0           $response->push_header($k, $v);
339             }
340 0 0         $response->push_header("Client-Junk" => \@junk) if @junk;
341              
342 0           $response->request($request);
343 0           $self->_get_sock_info($response, $socket);
344              
345 0 0         if ($method eq "CONNECT") {
346 0           $response->{client_socket} = $socket; # so it can be picked up
347 0           return $response;
348             }
349              
350 0 0         if (my @te = $response->remove_header('Transfer-Encoding')) {
351 0           $response->push_header('Client-Transfer-Encoding', \@te);
352             }
353 0           $response->push_header('Client-Response-Num', $socket->increment_response_count);
354              
355 0           my $complete;
356             $response = $self->collect($arg, $response, sub {
357 0     0     my $buf = ""; #prevent use of uninitialized value in SSLeay.xs
358 0           my $n;
359 0           READ:
360             {
361 0           _set_time_remain();
362 0           $n = $socket->read_entity_body($buf, $size);
363 0 0         die "Can't read entity body: $!" unless defined $n;
364 0 0         redo READ if $n == -1;
365             }
366 0 0         $complete++ if !$n;
367 0           return \$buf;
368 0           } );
369 0 0         $drop_connection++ unless $complete;
370              
371 0           _set_time_remain();
372 0           @h = $socket->get_trailers;
373 0           while (@h) {
374 0           my($k, $v) = splice(@h, 0, 2);
375 0           $response->push_header($k, $v);
376             }
377              
378             # keep-alive support
379 0 0         unless ($drop_connection) {
380 0 0         if (my $conn_cache = $self->{ua}{conn_cache}) {
381 0   0       my %connection = map { (lc($_) => 1) }
  0            
382             split(/\s*,\s*/, ($response->header("Connection") || ""));
383 0 0 0       if (($peer_http_version eq "1.1" && !$connection{close}) ||
      0        
384             $connection{"keep-alive"})
385             {
386 0           LWP::Debug::debug("Keep the http connection to $host:$port");
387 0           $conn_cache->deposit("http", "$host:$port", $socket);
388             }
389             }
390             }
391              
392 0           $response;
393             }
394              
395              
396             #-----------------------------------------------------------
397             package LWPx::Protocol::http_paranoid::SocketMethods;
398              
399             sub sysread {
400 0     0     my $self = shift;
401 0           my $timeout = $LWPx::Protocol::http_paranoid::TIME_REMAIN;
402              
403 0 0         if (defined $timeout) {
404 0 0         die "read timeout" unless $self->can_read($timeout);
405             }
406             else {
407             # since we have made the socket non-blocking we
408             # use select to wait for some data to arrive
409 0 0         $self->can_read(undef) || die "Assert";
410             }
411 0   0       sysread($self, $_[0], $_[1], $_[2] || 0);
412             }
413              
414             sub can_read {
415 0     0     my($self, $timeout) = @_;
416 0           my $fbits = '';
417 0           vec($fbits, fileno($self), 1) = 1;
418 0           my $nfound = select($fbits, undef, undef, $timeout);
419 0 0         die "select failed: $!" unless defined $nfound;
420 0           return $nfound > 0;
421             }
422              
423             sub ping {
424 0     0     my $self = shift;
425 0           !$self->can_read(0);
426             }
427              
428             sub increment_response_count {
429 0     0     my $self = shift;
430 0           return ++${*$self}{'myhttp_response_count'};
  0            
431             }
432              
433             #-----------------------------------------------------------
434             package LWPx::Protocol::http_paranoid::Socket;
435 1     1   8 use vars qw(@ISA);
  1         2  
  1         79  
436             @ISA = qw(LWPx::Protocol::http_paranoid::SocketMethods Net::HTTP);
437              
438             1;