File Coverage

blib/lib/Hijk.pm
Criterion Covered Total %
statement 116 195 59.4
branch 65 154 42.2
condition 40 107 37.3
subroutine 11 13 84.6
pod 0 1 0.0
total 232 470 49.3


line stmt bran cond sub pod time code
1             package Hijk;
2 18     18   343324 use strict;
  18         26  
  18         427  
3 18     18   61 use warnings;
  18         19  
  18         379  
4 18     18   8619 use Time::HiRes;
  18         18625  
  18         72  
5 18     18   9412 use POSIX qw(:errno_h);
  18         89718  
  18         87  
6 18     18   29336 use Socket qw(PF_INET SOCK_STREAM pack_sockaddr_in inet_ntoa $CRLF SOL_SOCKET SO_ERROR);
  18         46142  
  18         3498  
7 18     18   90 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
  18         20  
  18         3969  
8              
9             our $VERSION = "0.27";
10              
11             sub Hijk::Error::CONNECT_TIMEOUT () { 1 << 0 } # 1
12             sub Hijk::Error::READ_TIMEOUT () { 1 << 1 } # 2
13             sub Hijk::Error::TIMEOUT () { Hijk::Error::READ_TIMEOUT | Hijk::Error::CONNECT_TIMEOUT } # 3
14             sub Hijk::Error::CANNOT_RESOLVE () { 1 << 2 } # 4
15             sub Hijk::Error::REQUEST_SELECT_ERROR () { 1 << 3 } # 8
16             sub Hijk::Error::REQUEST_WRITE_ERROR () { 1 << 4 } # 16
17             sub Hijk::Error::REQUEST_ERROR () { Hijk::Error::REQUEST_SELECT_ERROR | Hijk::Error::REQUEST_WRITE_ERROR } # 24
18             sub Hijk::Error::RESPONSE_READ_ERROR () { 1 << 5 } # 32
19             sub Hijk::Error::RESPONSE_BAD_READ_VALUE () { 1 << 6 } # 64
20             sub Hijk::Error::RESPONSE_ERROR () { Hijk::Error::RESPONSE_READ_ERROR | Hijk::Error::RESPONSE_BAD_READ_VALUE } # 96
21              
22             sub _read_http_message {
23 12     12   10449 my ($fd, $read_length, $read_timeout, $parse_chunked, $head_as_array, $method) = @_;
24 12 100 66     59 $read_timeout = undef if defined($read_timeout) && $read_timeout <= 0;
25              
26 12         14 my ($body,$buf,$decapitated,$nbytes,$proto);
27 12         12 my $status_code = 0;
28 12 100       34 my $header = $head_as_array ? [] : {};
29 12         14 my $no_content_len = 0;
30 12         15 my $head = "";
31 18     18   78 my $method_has_no_content = do { no warnings qw(uninitialized); $method eq "HEAD" };
  18         19  
  18         28001  
  12         12  
  12         30  
32 12         11 my $close_connection;
33 12         36 vec(my $rin = '', $fd, 1) = 1;
34 12   66     20 do {
      33        
      66        
35 26 50 33     48 return ($close_connection,undef,0,undef,undef, Hijk::Error::READ_TIMEOUT)
      33        
36             if ((_select($rin, undef, undef, $read_timeout) != 1) || (defined($read_timeout) && $read_timeout <= 0));
37              
38 26         152 my $nbytes = POSIX::read($fd, $buf, $read_length);
39 26 100 66     165 return ($close_connection, $proto, $status_code, $header, $body)
      66        
      66        
40             if $no_content_len && $decapitated && (!defined($nbytes) || $nbytes == 0);
41 24 50       38 if (!defined($nbytes)) {
42 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
43             return (
44 0 0       0 $close_connection, undef, 0, undef, undef,
45             Hijk::Error::RESPONSE_READ_ERROR,
46             "Failed to read http " . ($decapitated ? "body": "head") . " from socket",
47             $!+0,
48             "$!",
49             );
50             }
51              
52 24 100       42 if ($nbytes == 0) {
53             return (
54 5 50       46 $close_connection, undef, 0, undef, undef,
55             Hijk::Error::RESPONSE_BAD_READ_VALUE,
56             "Wasn't expecting a 0 byte response for http " . ($decapitated ? "body": "head" ) . ". This shouldn't happen",
57             );
58             }
59              
60 19 100       25 if ($decapitated) {
61 9         92 $body .= $buf;
62 9 50       32 if (!$no_content_len) {
63 0         0 $read_length -= $nbytes;
64             }
65             }
66             else {
67 10         32 $head .= $buf;
68 10         114 my $neck_pos = index($head, "${CRLF}${CRLF}");
69 10 100       34 if ($neck_pos > 0) {
70 7         11 $decapitated = 1;
71 7         19 $body = substr($head, $neck_pos+4);
72 7         25 $head = substr($head, 0, $neck_pos);
73 7         14 $proto = substr($head, 0, 8);
74 7         8 $status_code = substr($head, 9, 3);
75 7 50       24 $method_has_no_content = 1 if $status_code == 204; # 204 NO CONTENT, see http://tools.ietf.org/html/rfc2616#page-60
76 7         21 substr($head, 0, index($head, $CRLF) + 2, ""); # 2 = length($CRLF)
77              
78 7         7 my ($doing_chunked, $content_length, $trailer_mode, $trailer_value_is_true);
79 7         206 for (split /${CRLF}/o, $head) {
80 45         121 my ($key, $value) = split /: /, $_, 2;
81 45         76 my $key_lc = lc($key);
82              
83             # Figure this out now so we don't need to scan the
84             # list later under $head_as_array, and just for
85             # simplicity and to avoid duplicating code later
86             # when !$head_as_array.
87 45 100 66     309 if ($key_lc eq 'transfer-encoding' and $value eq 'chunked') {
    100 66        
    100 66        
    100 66        
88 3         4 $doing_chunked = 1;
89             } elsif ( ($key_lc eq 'content-length') || (lc($key) eq 'content-length') ) {
90 2         3 $content_length = $value;
91             } elsif ($key_lc eq 'connection' and $value eq 'close') {
92 2         3 $close_connection = 1;
93             } elsif ($key_lc eq 'trailer' and $value) {
94 2         2 $trailer_value_is_true = 1;
95             }
96              
97 45 100       49 if ($head_as_array) {
98 13         26 push @$header => $key, $value;
99             } else {
100 32         59 $header->{$key} = $value;
101             }
102             }
103              
104             # We're processing the headers as a stream, and we
105             # only want to turn on $trailer_mode if
106             # Transfer-Encoding=chunked && Trailer=TRUE. However I
107             # don't think there's any guarantee that
108             # Transfer-Encoding comes before Trailer, so we're
109             # effectively doing a second-pass here.
110 7 100 100     40 if ($doing_chunked and $trailer_value_is_true) {
111 2         3 $trailer_mode = 1;
112             }
113              
114 7 100       19 if ($doing_chunked) {
115 3 50       6 die "PANIC: The experimental Hijk support for chunked transfer encoding needs to be explicitly enabled with parse_chunked => 1"
116             unless $parse_chunked;
117              
118             # if there is chunked encoding we have to ignore content length even if we have it
119             return (
120             $close_connection, $proto, $status_code, $header,
121             _read_chunked_body(
122             $body, $fd, $read_length, $read_timeout,
123             $head_as_array
124             ? $trailer_mode
125 3 100       21 : ($header->{Trailer} ? 1 : 0),
    100          
126             ),
127             );
128             }
129              
130 4 100       10 if (defined $content_length) {
131 2 50       5 if ($content_length == 0) {
132 0         0 $read_length = 0;
133             } else {
134 2         17 $read_length = $content_length - length($body);
135             }
136             } else {
137 2         2 $read_length = 10204;
138 2         13 $no_content_len = 1;
139             }
140             }
141             }
142             } while( !$decapitated || (!$method_has_no_content && ($read_length > 0 || $no_content_len)) );
143 2         8 return ($close_connection, $proto, $status_code, $header, $body);
144             }
145              
146             sub _read_chunked_body {
147 3     3   6 my ($buf,$fd,$read_length,$read_timeout,$true_trailer_header) = @_;
148 3         3 my $chunk_size = 0;
149 3         5 my $body = "";
150 3         11 my $trailer_mode = 0;
151 3         3 my $wait_for_last_clrf = 0;
152 3         9 vec(my $rin = '', $fd, 1) = 1;
153 3         6 while(1) {
154             # just read a 10k block and process it until it is consumed
155 23 50 33     102 if (length($buf) < 3 || length($buf) < $chunk_size || $wait_for_last_clrf > 0) {
      33        
156 0 0 0     0 return (undef, Hijk::Error::READ_TIMEOUT)
      0        
157             if ((_select($rin, undef, undef, $read_timeout) != 1) || (defined($read_timeout) && $read_timeout <= 0));
158 0         0 my $current_buf = "";
159 0         0 my $nbytes = POSIX::read($fd, $current_buf, $read_length);
160 0 0       0 if (!defined($nbytes)) {
161 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
162             return (
163             undef,
164 0         0 Hijk::Error::RESPONSE_READ_ERROR,
165             "Failed to chunked http body from socket",
166             $!+0,
167             "$!",
168             );
169             }
170              
171 0 0       0 if ($nbytes == 0) {
172             return (
173             undef,
174 0         0 Hijk::Error::RESPONSE_BAD_READ_VALUE,
175             "Wasn't expecting a 0 byte response for chunked http body. This shouldn't happen, buf:<$buf>, current_buf:<$current_buf>",
176             );
177             }
178              
179 0         0 $buf .= $current_buf;
180             }
181              
182 23 50       32 if ($wait_for_last_clrf > 0) {
183 0         0 $wait_for_last_clrf -= length($buf);
184 0 0       0 return $body if ($wait_for_last_clrf <= 0);
185             }
186              
187 23 100       26 if ($trailer_mode) {
188             # http://tools.ietf.org/html/rfc2616#section-14.40
189             # http://tools.ietf.org/html/rfc2616#section-3.6.1
190             # A server using chunked transfer-coding in a response MUST NOT use the
191             # trailer for any header fields unless at least one of the following is
192             # true:
193              
194             # a)the request included a TE header field that indicates "trailers" is
195             # acceptable in the transfer-coding of the response, as described in
196             # section 14.39; or,
197              
198             # b)the server is the origin server for the response, the trailer
199             # fields consist entirely of optional metadata, and the recipient
200             # could use the message (in a manner acceptable to the origin server)
201             # without receiving this metadata. In other words, the origin server
202             # is willing to accept the possibility that the trailer fields might
203             # be silently discarded along the path to the client.
204              
205             # in case of trailer mode, we just read everything until the next CRLFCRLF
206 2         5 my $neck_pos = index($buf, "${CRLF}${CRLF}");
207 2 50       10 if ($neck_pos > 0) {
208 2         19 return $body;
209             }
210             } else {
211 21 100 66     47 if ($chunk_size > 0 && length($buf) >= $chunk_size) {
212 9         16 $body .= substr($buf, 0, $chunk_size - 2); # our chunk size includes the following CRLF
213 9         11 $buf = substr($buf, $chunk_size);
214 9         8 $chunk_size = 0;
215             } else {
216 12         13 my $neck_pos = index($buf, ${CRLF});
217 12 50       12 if ($neck_pos > 0) {
    0          
218 12         18 $chunk_size = hex(substr($buf, 0, $neck_pos));
219 12 100       17 if ($chunk_size == 0) {
220 3 100       7 if ($true_trailer_header) {
221 2         2 $trailer_mode = 1;
222             } else {
223 1         1 $buf = substr($buf, $neck_pos + 2);
224             # in case we are missing the ending CLRF, we have to wait for it
225             # otherwise it is left int he socket
226 1 50       2 if (length($buf) < 2) {
227 0         0 $wait_for_last_clrf = 2 - length($buf);
228             } else {
229 1         9 return $body;
230             }
231             }
232             } else {
233 9         7 $chunk_size += 2; # include the following CRLF
234 9         18 $buf = substr($buf, $neck_pos + 2);
235             }
236             } elsif($neck_pos == 0) {
237             return (
238             undef,
239 0         0 Hijk::Error::RESPONSE_BAD_READ_VALUE,
240             "Wasn't expecting CLRF without chunk size. This shouldn't happen, buf:<$buf>",
241             );
242             }
243             }
244             }
245             }
246             }
247              
248             sub _construct_socket {
249 0     0   0 my ($host, $port, $connect_timeout) = @_;
250              
251             # If we can't find the IP address there'll be no point in even
252             # setting up a socket.
253 0         0 my $addr;
254             {
255 0         0 my $inet_aton = gethostbyname($host);
  0         0  
256 0 0       0 return (undef, {error => Hijk::Error::CANNOT_RESOLVE}) unless defined $inet_aton;
257 0         0 $addr = pack_sockaddr_in($port, $inet_aton);
258             }
259              
260 0         0 my $tcp_proto = getprotobyname("tcp");
261 0         0 my $soc;
262 0 0       0 socket($soc, PF_INET, SOCK_STREAM, $tcp_proto) || die "Failed to construct TCP socket: $!";
263 0 0       0 my $flags = fcntl($soc, F_GETFL, 0) or die "Failed to set fcntl F_GETFL flag: $!";
264 0 0       0 fcntl($soc, F_SETFL, $flags | O_NONBLOCK) or die "Failed to set fcntl O_NONBLOCK flag: $!";
265              
266 0 0 0     0 if (!connect($soc, $addr) && $! != EINPROGRESS) {
267 0         0 die "Failed to connect $!";
268             }
269              
270 0 0 0     0 $connect_timeout = undef if defined($connect_timeout) && $connect_timeout <= 0;
271 0         0 vec(my $rout = '', fileno($soc), 1) = 1;
272 0 0       0 if (_select(undef, $rout, undef, $connect_timeout) != 1) {
273 0 0       0 if (defined($connect_timeout)) {
274 0         0 return (undef, {error => Hijk::Error::CONNECT_TIMEOUT});
275             } else {
276             return (
277             undef,
278             {
279 0         0 error => Hijk::Error::REQUEST_SELECT_ERROR,
280             error_message => "select() error on constructing the socket",
281             errno_number => $!+0,
282             errno_string => "$!",
283             },
284             );
285             }
286             }
287              
288 0 0       0 if ($! = unpack("L", getsockopt($soc, SOL_SOCKET, SO_ERROR))) {
289 0         0 die $!;
290             }
291              
292 0         0 return $soc;
293             }
294              
295             sub _build_http_message {
296 36     36   264 my $args = $_[0];
297 36 100 100     147 my $path_and_qs = ($args->{path} || "/") . ( defined($args->{query_string}) ? ("?".$args->{query_string}) : "" );
298             return join(
299             $CRLF,
300             ($args->{method} || "GET")." $path_and_qs " . ($args->{protocol} || "HTTP/1.1"),
301             ($args->{no_default_host_header}
302             ? ()
303             : ("Host: $args->{host}")),
304             defined($args->{body}) ? ("Content-Length: " . length($args->{body})) : (),
305             ($args->{head} and @{$args->{head}}) ? (
306             map {
307 14         91 $args->{head}[2*$_] . ": " . $args->{head}[2*$_+1]
308 10         302 } 0..$#{$args->{head}}/2
309             ) : (),
310             ""
311 36 100 100     330 ) . $CRLF . (defined($args->{body}) ? $args->{body} : "");
    100 50        
    100 100        
    100          
312             }
313              
314             our $SOCKET_CACHE = {};
315              
316             sub request {
317 0     0 0 0 my $args = $_[0];
318              
319             # Backwards compatibility for code that provided the old timeout
320             # argument.
321 0 0       0 $args->{connect_timeout} = $args->{read_timeout} = $args->{timeout} if exists $args->{timeout};
322              
323             # Ditto for providing a default socket cache, allow for setting it
324             # to "socket_cache => undef" to disable the cache.
325 0 0       0 $args->{socket_cache} = $SOCKET_CACHE unless exists $args->{socket_cache};
326              
327             # Provide a default for the read_length option
328 0 0       0 $args->{read_length} = 10 * 2 ** 10 unless exists $args->{read_length};
329              
330             # Use $; so we can use the $socket_cache->{$$, $host, $port}
331             # idiom to access the cache.
332 0 0       0 my $cache_key; $cache_key = join($;, $$, @$args{qw(host port)}) if defined $args->{socket_cache};
  0         0  
333              
334 0         0 my $soc;
335 0 0 0     0 if (defined $cache_key and exists $args->{socket_cache}->{$cache_key}) {
336 0         0 $soc = $args->{socket_cache}->{$cache_key};
337             } else {
338 0         0 ($soc, my $error) = _construct_socket(@$args{qw(host port connect_timeout)});
339 0 0       0 return $error if $error;
340 0 0       0 $args->{socket_cache}->{$cache_key} = $soc if defined $cache_key;
341 0 0       0 $args->{on_connect}->() if exists $args->{on_connect};
342             }
343              
344 0         0 my $r = _build_http_message($args);
345 0         0 my $total = length($r);
346 0         0 my $left = $total;
347              
348 0         0 vec(my $rout = '', fileno($soc), 1) = 1;
349 0         0 while ($left > 0) {
350 0 0       0 if (_select(undef, $rout, undef, undef) != 1) {
351 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
352             return {
353 0         0 error => Hijk::Error::REQUEST_SELECT_ERROR,
354             error_message => "Got error on select() before the write() when while writing the HTTP request the socket",
355             errno_number => $!+0,
356             errno_string => "$!",
357             };
358             }
359              
360 0         0 my $rc = syswrite($soc,$r,$left, $total - $left);
361 0 0       0 if (!defined($rc)) {
362 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
363 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
364 0         0 shutdown($soc, 2);
365             return {
366 0         0 error => Hijk::Error::REQUEST_WRITE_ERROR,
367             error_message => "Got error trying to write the HTTP request with write() to the socket",
368             errno_number => $!+0,
369             errno_string => "$!",
370             };
371             }
372 0         0 $left -= $rc;
373             }
374              
375 0         0 my ($close_connection,$proto,$status,$head,$body,$error,$error_message,$errno_number,$errno_string);
376             eval {
377             ($close_connection,$proto,$status,$head,$body,$error,$error_message,$errno_number,$errno_string) =
378 0         0 _read_http_message(fileno($soc), @$args{qw(read_length read_timeout parse_chunked head_as_array method)});
379 0         0 1;
380 0 0       0 } or do {
381 0   0     0 my $err = $@ || "zombie error";
382 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
383 0         0 shutdown($soc, 2);
384 0         0 die $err;
385             };
386              
387 0 0 0     0 if ($status == 0
      0        
      0        
388             # We always close connections for 1.0 because some servers LIE
389             # and say that they're 1.0 but don't close the connection on
390             # us! An example of this. Test::HTTP::Server (used by the
391             # ShardedKV::Storage::Rest tests) is an example of such a
392             # server. In either case we can't cache a connection for a 1.0
393             # server anyway, so BEGONE!
394             or $close_connection
395             or (defined $proto and $proto eq 'HTTP/1.0')) {
396 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
397 0         0 shutdown($soc, 2);
398             }
399             return {
400 0 0       0 proto => $proto,
    0          
    0          
    0          
401             status => $status,
402             head => $head,
403             body => $body,
404             defined($error) ? ( error => $error ) : (),
405             defined($error_message) ? ( error_message => $error_message ) : (),
406             defined($errno_number) ? ( errno_number => $errno_number ) : (),
407             defined($errno_string) ? ( errno_string => $errno_string ) : (),
408             };
409             }
410              
411             sub _select {
412 27     27   739 my ($rbits, $wbits, $ebits, $timeout) = @_;
413 27         32 while (1) {
414 37         83 my $start = Time::HiRes::time();
415 37         1998528 my $nfound = select($rbits, $wbits, $ebits, $timeout);
416 37 100 66     380 if ($nfound == -1 && $! == EINTR) {
417 10 50       1983 $timeout -= Time::HiRes::time() - $start if $timeout;
418 10         31 next;
419             }
420 27         131 return $nfound;
421             }
422             }
423              
424             1;
425              
426             __END__