File Coverage

blib/lib/Hijk.pm
Criterion Covered Total %
statement 163 195 83.5
branch 93 154 60.3
condition 55 107 51.4
subroutine 13 13 100.0
pod 0 1 0.0
total 324 470 68.9


line stmt bran cond sub pod time code
1             package Hijk;
2 18     18   1012022 use strict;
  18         170  
  18         561  
3 18     18   80 use warnings;
  18         29  
  18         420  
4 18     18   7557 use Time::HiRes;
  18         19817  
  18         61  
5 18     18   8819 use POSIX qw(:errno_h);
  18         105487  
  18         95  
6 18     18   33797 use Socket qw(PF_INET SOCK_STREAM pack_sockaddr_in $CRLF SOL_SOCKET SO_ERROR);
  18         53150  
  18         3390  
7 18     18   121 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
  18         31  
  18         4102  
8              
9             our $VERSION = "0.28";
10              
11             sub Hijk::Error::CONNECT_TIMEOUT () { 1 << 0 } # 1
12             sub Hijk::Error::READ_TIMEOUT () { 1 << 1 } # 2
13             sub Hijk::Error::TIMEOUT () { Hijk::Error::READ_TIMEOUT | Hijk::Error::CONNECT_TIMEOUT } # 3
14             sub Hijk::Error::CANNOT_RESOLVE () { 1 << 2 } # 4
15             sub Hijk::Error::REQUEST_SELECT_ERROR () { 1 << 3 } # 8
16             sub Hijk::Error::REQUEST_WRITE_ERROR () { 1 << 4 } # 16
17             sub Hijk::Error::REQUEST_ERROR () { Hijk::Error::REQUEST_SELECT_ERROR | Hijk::Error::REQUEST_WRITE_ERROR } # 24
18             sub Hijk::Error::RESPONSE_READ_ERROR () { 1 << 5 } # 32
19             sub Hijk::Error::RESPONSE_BAD_READ_VALUE () { 1 << 6 } # 64
20             sub Hijk::Error::RESPONSE_ERROR () { Hijk::Error::RESPONSE_READ_ERROR | Hijk::Error::RESPONSE_BAD_READ_VALUE } # 96
21              
22             sub _read_http_message {
23 16     16   15534 my ($fd, $read_length, $read_timeout, $parse_chunked, $head_as_array, $method) = @_;
24 16 100 100     95 $read_timeout = undef if defined($read_timeout) && $read_timeout <= 0;
25              
26 16         40 my ($body,$buf,$decapitated,$nbytes,$proto);
27 16         24 my $status_code = 0;
28 16 100       46 my $header = $head_as_array ? [] : {};
29 16         26 my $no_content_len = 0;
30 16         37 my $head = "";
31 18     18   110 my $method_has_no_content = do { no warnings qw(uninitialized); $method eq "HEAD" };
  18         24  
  18         32141  
  16         19  
  16         44  
32 16         22 my $close_connection;
33 16         75 vec(my $rin = '', $fd, 1) = 1;
34 16   66     37 do {
      66        
      66        
35 30 100 66     175 return ($close_connection,undef,0,undef,undef, Hijk::Error::READ_TIMEOUT)
      66        
36             if ((_select($rin, undef, undef, $read_timeout) != 1) || (defined($read_timeout) && $read_timeout <= 0));
37              
38 29         422 my $nbytes = POSIX::read($fd, $buf, $read_length);
39 29 100 66     240 return ($close_connection, $proto, $status_code, $header, $body)
      66        
      100        
40             if $no_content_len && $decapitated && (!defined($nbytes) || $nbytes == 0);
41 27 50       58 if (!defined($nbytes)) {
42 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
43             return (
44 0 0       0 $close_connection, undef, 0, undef, undef,
45             Hijk::Error::RESPONSE_READ_ERROR,
46             "Failed to read http " . ($decapitated ? "body": "head") . " from socket",
47             $!+0,
48             "$!",
49             );
50             }
51              
52 27 100       69 if ($nbytes == 0) {
53             return (
54 5 50       49 $close_connection, undef, 0, undef, undef,
55             Hijk::Error::RESPONSE_BAD_READ_VALUE,
56             "Wasn't expecting a 0 byte response for http " . ($decapitated ? "body": "head" ) . ". This shouldn't happen",
57             );
58             }
59              
60 22 100       57 if ($decapitated) {
61 9         148 $body .= $buf;
62 9 50       40 if (!$no_content_len) {
63 0         0 $read_length -= $nbytes;
64             }
65             }
66             else {
67 13         150 $head .= $buf;
68 13         235 my $neck_pos = index($head, "${CRLF}${CRLF}");
69 13 100       50 if ($neck_pos > 0) {
70 10         16 $decapitated = 1;
71 10         59 $body = substr($head, $neck_pos+4);
72 10         89 $head = substr($head, 0, $neck_pos);
73 10         26 $proto = substr($head, 0, 8);
74 10         29 $status_code = substr($head, 9, 3);
75 10 50       46 $method_has_no_content = 1 if $status_code == 204; # 204 NO CONTENT, see http://tools.ietf.org/html/rfc2616#page-60
76 10         36 substr($head, 0, index($head, $CRLF) + 2, ""); # 2 = length($CRLF)
77              
78 10         20 my ($doing_chunked, $content_length, $trailer_mode, $trailer_value_is_true);
79 10         178 for (split /${CRLF}/o, $head) {
80 57         211 my ($key, $value) = split /: /, $_, 2;
81 57         98 my $key_lc = lc($key);
82              
83             # Figure this out now so we don't need to scan the
84             # list later under $head_as_array, and just for
85             # simplicity and to avoid duplicating code later
86             # when !$head_as_array.
87 57 100 66     380 if ($key_lc eq 'transfer-encoding' and $value eq 'chunked') {
    100 66        
    100 66        
    100 66        
88 3         4 $doing_chunked = 1;
89             } elsif ( ($key_lc eq 'content-length') || (lc($key) eq 'content-length') ) {
90 4         6 $content_length = $value;
91             } elsif ($key_lc eq 'connection' and $value eq 'close') {
92 4         6 $close_connection = 1;
93             } elsif ($key_lc eq 'trailer' and $value) {
94 2         2 $trailer_value_is_true = 1;
95             }
96              
97 57 100       107 if ($head_as_array) {
98 13         27 push @$header => $key, $value;
99             } else {
100 44         99 $header->{$key} = $value;
101             }
102             }
103              
104             # We're processing the headers as a stream, and we
105             # only want to turn on $trailer_mode if
106             # Transfer-Encoding=chunked && Trailer=TRUE. However I
107             # don't think there's any guarantee that
108             # Transfer-Encoding comes before Trailer, so we're
109             # effectively doing a second-pass here.
110 10 100 100     47 if ($doing_chunked and $trailer_value_is_true) {
111 2         4 $trailer_mode = 1;
112             }
113              
114 10 100       34 if ($doing_chunked) {
115 3 50       9 die "PANIC: The experimental Hijk support for chunked transfer encoding needs to be explicitly enabled with parse_chunked => 1"
116             unless $parse_chunked;
117              
118             # if there is chunked encoding we have to ignore content length even if we have it
119             return (
120             $close_connection, $proto, $status_code, $header,
121             _read_chunked_body(
122             $body, $fd, $read_length, $read_timeout,
123             $head_as_array
124             ? $trailer_mode
125 3 100       18 : ($header->{Trailer} ? 1 : 0),
    100          
126             ),
127             );
128             }
129              
130 7 100       20 if (defined $content_length) {
131 4 50       19 if ($content_length == 0) {
132 0         0 $read_length = 0;
133             } else {
134 4         50 $read_length = $content_length - length($body);
135             }
136             } else {
137 3         6 $read_length = 10204;
138 3         20 $no_content_len = 1;
139             }
140             }
141             }
142             } while( !$decapitated || (!$method_has_no_content && ($read_length > 0 || $no_content_len)) );
143 5         27 return ($close_connection, $proto, $status_code, $header, $body);
144             }
145              
146             sub _read_chunked_body {
147 3     3   9 my ($buf,$fd,$read_length,$read_timeout,$true_trailer_header) = @_;
148 3         6 my $chunk_size = 0;
149 3         4 my $body = "";
150 3         6 my $trailer_mode = 0;
151 3         3 my $wait_for_last_clrf = 0;
152 3         11 vec(my $rin = '', $fd, 1) = 1;
153 3         7 while(1) {
154             # just read a 10k block and process it until it is consumed
155 23 50 33     79 if (length($buf) < 3 || length($buf) < $chunk_size || $wait_for_last_clrf > 0) {
      33        
156 0 0 0     0 return (undef, Hijk::Error::READ_TIMEOUT)
      0        
157             if ((_select($rin, undef, undef, $read_timeout) != 1) || (defined($read_timeout) && $read_timeout <= 0));
158 0         0 my $current_buf = "";
159 0         0 my $nbytes = POSIX::read($fd, $current_buf, $read_length);
160 0 0       0 if (!defined($nbytes)) {
161 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
162             return (
163             undef,
164 0         0 Hijk::Error::RESPONSE_READ_ERROR,
165             "Failed to chunked http body from socket",
166             $!+0,
167             "$!",
168             );
169             }
170              
171 0 0       0 if ($nbytes == 0) {
172             return (
173             undef,
174 0         0 Hijk::Error::RESPONSE_BAD_READ_VALUE,
175             "Wasn't expecting a 0 byte response for chunked http body. This shouldn't happen, buf:<$buf>, current_buf:<$current_buf>",
176             );
177             }
178              
179 0         0 $buf .= $current_buf;
180             }
181              
182 23 50       47 if ($wait_for_last_clrf > 0) {
183 0         0 $wait_for_last_clrf -= length($buf);
184 0 0       0 return $body if ($wait_for_last_clrf <= 0);
185             }
186              
187 23 100       31 if ($trailer_mode) {
188             # http://tools.ietf.org/html/rfc2616#section-14.40
189             # http://tools.ietf.org/html/rfc2616#section-3.6.1
190             # A server using chunked transfer-coding in a response MUST NOT use the
191             # trailer for any header fields unless at least one of the following is
192             # true:
193              
194             # a)the request included a TE header field that indicates "trailers" is
195             # acceptable in the transfer-coding of the response, as described in
196             # section 14.39; or,
197              
198             # b)the server is the origin server for the response, the trailer
199             # fields consist entirely of optional metadata, and the recipient
200             # could use the message (in a manner acceptable to the origin server)
201             # without receiving this metadata. In other words, the origin server
202             # is willing to accept the possibility that the trailer fields might
203             # be silently discarded along the path to the client.
204              
205             # in case of trailer mode, we just read everything until the next CRLFCRLF
206 2         5 my $neck_pos = index($buf, "${CRLF}${CRLF}");
207 2 50       6 if ($neck_pos > 0) {
208 2         19 return $body;
209             }
210             } else {
211 21 100 66     44 if ($chunk_size > 0 && length($buf) >= $chunk_size) {
212 9         17 $body .= substr($buf, 0, $chunk_size - 2); # our chunk size includes the following CRLF
213 9         13 $buf = substr($buf, $chunk_size);
214 9         11 $chunk_size = 0;
215             } else {
216 12         17 my $neck_pos = index($buf, ${CRLF});
217 12 50       17 if ($neck_pos > 0) {
    0          
218 12         20 $chunk_size = hex(substr($buf, 0, $neck_pos));
219 12 100       18 if ($chunk_size == 0) {
220 3 100       7 if ($true_trailer_header) {
221 2         3 $trailer_mode = 1;
222             } else {
223 1         1 $buf = substr($buf, $neck_pos + 2);
224             # in case we are missing the ending CLRF, we have to wait for it
225             # otherwise it is left int he socket
226 1 50       3 if (length($buf) < 2) {
227 0         0 $wait_for_last_clrf = 2 - length($buf);
228             } else {
229 1         10 return $body;
230             }
231             }
232             } else {
233 9         9 $chunk_size += 2; # include the following CRLF
234 9         17 $buf = substr($buf, $neck_pos + 2);
235             }
236             } elsif($neck_pos == 0) {
237             return (
238             undef,
239 0         0 Hijk::Error::RESPONSE_BAD_READ_VALUE,
240             "Wasn't expecting CLRF without chunk size. This shouldn't happen, buf:<$buf>",
241             );
242             }
243             }
244             }
245             }
246             }
247              
248             sub _construct_socket {
249 5     5   20 my ($host, $port, $connect_timeout) = @_;
250              
251             # If we can't find the IP address there'll be no point in even
252             # setting up a socket.
253 5         9 my $addr;
254             {
255 5         12 my $inet_aton = gethostbyname($host);
  5         1174  
256 5 50       108 return (undef, {error => Hijk::Error::CANNOT_RESOLVE}) unless defined $inet_aton;
257 5         50 $addr = pack_sockaddr_in($port, $inet_aton);
258             }
259              
260 5         446 my $tcp_proto = getprotobyname("tcp");
261 5         14 my $soc;
262 5 50       252 socket($soc, PF_INET, SOCK_STREAM, $tcp_proto) || die "Failed to construct TCP socket: $!";
263 5 50       46 my $flags = fcntl($soc, F_GETFL, 0) or die "Failed to set fcntl F_GETFL flag: $!";
264 5 50       39 fcntl($soc, F_SETFL, $flags | O_NONBLOCK) or die "Failed to set fcntl O_NONBLOCK flag: $!";
265              
266 5 50 33     1287 if (!connect($soc, $addr) && $! != EINPROGRESS) {
267 0         0 die "Failed to connect $!";
268             }
269              
270 5 50 33     31 $connect_timeout = undef if defined($connect_timeout) && $connect_timeout <= 0;
271 5         37 vec(my $rout = '', fileno($soc), 1) = 1;
272 5 50       32 if (_select(undef, $rout, undef, $connect_timeout) != 1) {
273 0 0       0 if (defined($connect_timeout)) {
274 0         0 return (undef, {error => Hijk::Error::CONNECT_TIMEOUT});
275             } else {
276             return (
277             undef,
278             {
279 0         0 error => Hijk::Error::REQUEST_SELECT_ERROR,
280             error_message => "select() error on constructing the socket",
281             errno_number => $!+0,
282             errno_string => "$!",
283             },
284             );
285             }
286             }
287              
288 5 100       75 if ($! = unpack("L", getsockopt($soc, SOL_SOCKET, SO_ERROR))) {
289 1         32 die $!;
290             }
291              
292 4         17 return $soc;
293             }
294              
295             sub _build_http_message {
296 40     40   566 my $args = $_[0];
297 40 100 100     216 my $path_and_qs = ($args->{path} || "/") . ( defined($args->{query_string}) ? ("?".$args->{query_string}) : "" );
298             return join(
299             $CRLF,
300             ($args->{method} || "GET")." $path_and_qs " . ($args->{protocol} || "HTTP/1.1"),
301             ($args->{no_default_host_header}
302             ? ()
303             : ("Host: $args->{host}")),
304             defined($args->{body}) ? ("Content-Length: " . length($args->{body})) : (),
305             ($args->{head} and @{$args->{head}}) ? (
306             map {
307 14         106 $args->{head}[2*$_] . ": " . $args->{head}[2*$_+1]
308 10         32 } 0..$#{$args->{head}}/2
309             ) : (),
310             ""
311 40 100 100     409 ) . $CRLF . (defined($args->{body}) ? $args->{body} : "");
    100 100        
    100 100        
    100          
312             }
313              
314             our $SOCKET_CACHE = {};
315              
316             sub request {
317 5     5 0 12018592 my $args = $_[0];
318              
319             # Backwards compatibility for code that provided the old timeout
320             # argument.
321 5 50       45 $args->{connect_timeout} = $args->{read_timeout} = $args->{timeout} if exists $args->{timeout};
322              
323             # Ditto for providing a default socket cache, allow for setting it
324             # to "socket_cache => undef" to disable the cache.
325 5 50       48 $args->{socket_cache} = $SOCKET_CACHE unless exists $args->{socket_cache};
326              
327             # Provide a default for the read_length option
328 5 50       27 $args->{read_length} = 10 * 2 ** 10 unless exists $args->{read_length};
329              
330             # Use $; so we can use the $socket_cache->{$$, $host, $port}
331             # idiom to access the cache.
332 5 50       15 my $cache_key; $cache_key = join($;, $$, @$args{qw(host port)}) if defined $args->{socket_cache};
  5         46  
333              
334 5         17 my $soc;
335 5 50 33     60 if (defined $cache_key and exists $args->{socket_cache}->{$cache_key}) {
336 0         0 $soc = $args->{socket_cache}->{$cache_key};
337             } else {
338 5         71 ($soc, my $error) = _construct_socket(@$args{qw(host port connect_timeout)});
339 4 50       10 return $error if $error;
340 4 50       16 $args->{socket_cache}->{$cache_key} = $soc if defined $cache_key;
341 4 50       11 $args->{on_connect}->() if exists $args->{on_connect};
342             }
343              
344 4         11 my $r = _build_http_message($args);
345 4         11 my $total = length($r);
346 4         10 my $left = $total;
347              
348 4         30 vec(my $rout = '', fileno($soc), 1) = 1;
349 4         15 while ($left > 0) {
350 4 50       9 if (_select(undef, $rout, undef, undef) != 1) {
351 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
352             return {
353 0         0 error => Hijk::Error::REQUEST_SELECT_ERROR,
354             error_message => "Got error on select() before the write() when while writing the HTTP request the socket",
355             errno_number => $!+0,
356             errno_string => "$!",
357             };
358             }
359              
360 4         151 my $rc = syswrite($soc,$r,$left, $total - $left);
361 4 50       18 if (!defined($rc)) {
362 0 0 0     0 next if ($! == EWOULDBLOCK || $! == EAGAIN || $! == EINTR);
      0        
363 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
364 0         0 shutdown($soc, 2);
365             return {
366 0         0 error => Hijk::Error::REQUEST_WRITE_ERROR,
367             error_message => "Got error trying to write the HTTP request with write() to the socket",
368             errno_number => $!+0,
369             errno_string => "$!",
370             };
371             }
372 4         11 $left -= $rc;
373             }
374              
375 4         7 my ($close_connection,$proto,$status,$head,$body,$error,$error_message,$errno_number,$errno_string);
376             eval {
377             ($close_connection,$proto,$status,$head,$body,$error,$error_message,$errno_number,$errno_string) =
378 4         51 _read_http_message(fileno($soc), @$args{qw(read_length read_timeout parse_chunked head_as_array method)});
379 4         17 1;
380 4 50       8 } or do {
381 0   0     0 my $err = $@ || "zombie error";
382 0 0       0 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
383 0         0 shutdown($soc, 2);
384 0         0 die $err;
385             };
386              
387 4 50 100     50 if ($status == 0
      33        
      66        
388             # We always close connections for 1.0 because some servers LIE
389             # and say that they're 1.0 but don't close the connection on
390             # us! An example of this. Test::HTTP::Server (used by the
391             # ShardedKV::Storage::Rest tests) is an example of such a
392             # server. In either case we can't cache a connection for a 1.0
393             # server anyway, so BEGONE!
394             or $close_connection
395             or (defined $proto and $proto eq 'HTTP/1.0')) {
396 4 50       30 delete $args->{socket_cache}->{$cache_key} if defined $cache_key;
397 4         267 shutdown($soc, 2);
398             }
399             return {
400 4 100       176 proto => $proto,
    50          
    50          
    50          
401             status => $status,
402             head => $head,
403             body => $body,
404             defined($error) ? ( error => $error ) : (),
405             defined($error_message) ? ( error_message => $error_message ) : (),
406             defined($errno_number) ? ( errno_number => $errno_number ) : (),
407             defined($errno_string) ? ( errno_string => $errno_string ) : (),
408             };
409             }
410              
411             sub _select {
412 40     40   1383 my ($rbits, $wbits, $ebits, $timeout) = @_;
413 40         60 while (1) {
414 50         264 my $start = Time::HiRes::time();
415 50         12007701 my $nfound = select($rbits, $wbits, $ebits, $timeout);
416 50 100 66     724 if ($nfound == -1 && $! == EINTR) {
417 10 50       2602 $timeout -= Time::HiRes::time() - $start if $timeout;
418 10         47 next;
419             }
420 40         284 return $nfound;
421             }
422             }
423              
424             1;
425              
426             __END__