File Coverage

blib/lib/Net/HTTP/Tiny.pm
Criterion Covered Total %
statement 25 151 16.5
branch 6 102 5.8
condition 0 12 0.0
subroutine 9 15 60.0
pod 1 1 100.0
total 41 281 14.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Net::HTTP::Tiny - minimal HTTP client
4              
5             =head1 SYNOPSIS
6              
7             use Net::HTTP::Tiny qw(http_get);
8              
9             $dat = http_get("http://maia.usno.navy.mil/ser7/tai-utc.dat");
10              
11             =head1 DESCRIPTION
12              
13             This module provides an easy interface to retrieve files using the HTTP
14             protocol. The location of a file to retrieve is specified using a URL.
15             The module conforms to HTTP/1.1, and follows redirections (up to a limit
16             of five chained redirections). Content-MD5 is checked, if the optional
17             module L is installed. IPv6 is supported, if the optional
18             module L is installed. Only retrieval is supported,
19             not posting or anything more exotic.
20              
21             =cut
22              
23             package Net::HTTP::Tiny;
24              
25 3     3   72876 { use 5.006; }
  3         10  
  3         209  
26 3     3   14 use warnings;
  3         10  
  3         88  
27 3     3   14 use strict;
  3         11  
  3         107  
28              
29 3     3   15 use Carp qw(croak);
  3         5  
  3         268  
30              
31             our $VERSION = "0.001";
32              
33             # Set up superclass manually, rather than via "parent", to avoid non-core
34             # dependency.
35 3     3   16 use Exporter ();
  3         4  
  3         13710  
36             our @ISA = qw(Exporter);
37             our @EXPORT_OK = qw(http_get);
38              
39             =head1 FUNCTIONS
40              
41             =over
42              
43             =item http_get(URL)
44              
45             I must be a URL using the C scheme. The file that it refers to
46             is retrieved from the HTTP server, and its content is returned in the form
47             of a string of octets. If any error occurs then the function Cs.
48             Possible errors include the URL being malformed, inability to contact the
49             HTTP server, and the HTTP server reporting that the file doesn't exist.
50              
51             =cut
52              
53             {
54             local $SIG{__DIE__};
55 3     3   3224 eval("$]" >= 5.008 ? q{
  3         99  
  3         51  
56             use utf8 ();
57             *_downgrade = \&utf8::downgrade;
58             } : q{
59             sub _downgrade($) {
60             # Logic copied from Scalar::String. See there
61             # for explanation; the code depends on accidents
62             # of the Perl 5.6 implementation.
63             return if unpack("C", "\xaa".$_[0]) == 170;
64             {
65             use bytes;
66             $_[0] =~ /\A[\x00-\x7f\x80-\xbf\xc2\xc3]*\z/
67             or die "Wide character";
68             }
69             use utf8;
70             ($_[0]) = ($_[0] =~ /\A([\x00-\xff]*)\z/);
71             }
72             });
73             die $@ unless $@ eq "";
74             }
75              
76 29     29   3743 sub _croak($) { croak "HTTP error: $_[0]" }
77              
78             #
79             # HTTP URL interpretation is governed by RFC 3986 (generic URI syntax),
80             # RFC 2616 (HTTP/1.1, giving top-level syntax), and RFC 2396 (older
81             # generic URI syntax, to which RFC 2616 refers). There is no formal
82             # specification for the syntax of HTTP URLs in the context of RFC 3986's
83             # base syntax, so this code merges the various sources in what seems like
84             # a reasonable manner. Generally, RFC 3986 is used to determine which
85             # characters are permitted in each component, and RFC 2616 determines
86             # higher-level structure.
87             #
88              
89             my $safechar_rx = qr/[0-9A-Za-z\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]/;
90             my $hexpair_rx = qr/\%[0-9A-Fa-f]{2}/;
91              
92             my $d8_rx = qr/25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]/;
93             my $ipv4_address_rx = qr/$d8_rx\.$d8_rx\.$d8_rx\.$d8_rx/o;
94              
95             my $h16_rx = qr/[0-9A-Fa-f]{1,4}/;
96             my $ls32_rx = qr/$h16_rx\:$h16_rx|$ipv4_address_rx/o;
97             my $ipv6_address_rx = qr/
98             (?:) (?:$h16_rx\:){6} $ls32_rx
99             | \:\: (?:$h16_rx\:){5} $ls32_rx
100             | (?: $h16_rx )? \:\: (?:$h16_rx\:){4} $ls32_rx
101             | (?: (?:$h16_rx\:){0,1} $h16_rx )? \:\: (?:$h16_rx\:){3} $ls32_rx
102             | (?: (?:$h16_rx\:){0,2} $h16_rx )? \:\: (?:$h16_rx\:){2} $ls32_rx
103             | (?: (?:$h16_rx\:){0,3} $h16_rx )? \:\: (?:$h16_rx\:) $ls32_rx
104             | (?: (?:$h16_rx\:){0,4} $h16_rx )? \:\: $ls32_rx
105             | (?: (?:$h16_rx\:){0,5} $h16_rx )? \:\: $h16_rx
106             | (?: (?:$h16_rx\:){0,6} $h16_rx )? \:\:
107             /xo;
108              
109             my $ip_future_rx = qr/[vV][0-9A-Fa-f]+\.(?:$safechar_rx|\:)+/o;
110             my $ip_literal_rx = qr/\[(?:$ipv6_address_rx|$ip_future_rx)\]/o;
111             my $hostname_rx = qr/
112             (?:[0-9A-Za-z](?:[\-0-9A-Za-z]*[0-9A-Za-z])?\.)*
113             [A-Za-z](?:[\-0-9A-Za-z]*[0-9A-Za-z])?
114             /x;
115             my $host_rx = qr/$ip_literal_rx|$ipv4_address_rx|$hostname_rx/o;
116             my $port_rx = qr/[0-9]+/;
117              
118             my $http_prefix_rx = qr/[hH][tT][tT][pP]\:\/\//;
119             my $path_and_query_rx = qr/\/(?:$safechar_rx|[\:\@\/\?]|$hexpair_rx)*/;
120             my $http_url_rx =
121             qr/$http_prefix_rx(?>$host_rx)(?:\:$port_rx?)?$path_and_query_rx?/xo;
122              
123             sub _parse_http_url($) {
124 169     169   126261 my($url) = @_;
125 169         2701 my($host, $port, $pathquery) = ($url =~ m/\A
126             $http_prefix_rx
127             ((?>$host_rx))(?:\:($port_rx)?)?
128             ($path_and_query_rx)?
129             \z/xo);
130 169 100       718 defined $host or _croak "<$url> is not an http URL";
131             return {
132 140 100       886 host => $host,
    100          
133             port => defined($port) ? 0+$port : 80,
134             path_and_query => defined($pathquery) ? $pathquery : "/",
135             };
136             }
137              
138             my $blksize = 0x8000;
139             my $timeout = 50;
140              
141             my $socket_class;
142             sub _open_tcp($$) {
143 0     0   0 my($host, $port) = @_;
144 0 0       0 if($host =~ /\A\[v/) {
145 0         0 _croak "IP addresses from the future not supported";
146             }
147             $socket_class ||=
148             eval { local $SIG{__DIE__};
149             require IO::Socket::IP;
150             IO::Socket::IP->VERSION(0.08);
151             "IO::Socket::IP";
152 0   0     0 } || do {
      0        
153             require IO::Socket::INET;
154             IO::Socket::INET->VERSION(1.24);
155             "IO::Socket::INET";
156             };
157 0 0       0 if($host =~ /\A\[/) {
158 0 0       0 _croak "IPv6 support requires IO::Socket::IP"
159             unless $socket_class eq "IO::Socket::IP";
160             }
161 0 0       0 my $bare_host = $host =~ /\A\[(.*)\]\z/s ? $1 : $host;
162 0 0 0     0 $port >= 1 && $port <= 65535
163             or _croak "failed to connect to $host TCP port $port: ".
164             "invalid port number";
165             return $socket_class->new(
166             PeerHost => $bare_host,
167             PeerPort => $port,
168             Proto => "tcp",
169             Timeout => $timeout,
170 0   0     0 ) || do {
171             my $err = $@;
172             chomp $err;
173             $err =~ s/\AIO::Socket::[A-Z0-9]+: //;
174             $err ne "" or $err = "$socket_class didn't say why";
175             _croak "failed to connect to $host TCP port $port: $err";
176             };
177             }
178              
179             sub _check_timeout($$$) {
180 0     0   0 my($sock, $writing, $what) = @_;
181 0         0 vec(my $b = "", $sock->fileno, 1) = 1;
182 0 0       0 my $s = select($writing ? undef : $b, $writing ? $b : undef, $b,
    0          
183             $timeout);
184 0 0       0 $s >= 1 or _croak "failed to $what: @{[$s ? $! : q(timed out)]}";
  0 0       0  
185             }
186              
187             sub _recv_more_response($$$) {
188 0     0   0 my($conn, $rbufp, $eof_ok) = @_;
189 0         0 _check_timeout($conn, 0, "receive response");
190 0         0 my $n = $conn->sysread($$rbufp, $blksize, length($$rbufp));
191 0 0       0 defined $n or _croak "failed to receive response: $!";
192 0 0       0 $n != 0 and return 1;
193 0 0       0 $eof_ok or _croak "failed to receive response: unexpected EOF";
194 0         0 return 0;
195             }
196              
197             sub _recv_line($$) {
198 0     0   0 my($conn, $rbufp) = @_;
199 0         0 while(1) {
200 0 0       0 $$rbufp =~ s/\A(.*?)\r?\n//s and return $1;
201 0         0 _recv_more_response($conn, $rbufp, 0);
202             }
203             }
204              
205             my $token_rx = qr/[\!\#\$\%\&\'\*\+\-\.0-9A-Z\^\_\`a-z\|\~]+/;
206             my $quoted_string_rx = qr/\"(?>[\ -\[\]-\~\x80-\xff]+|\\[\ -\~\x80-\xff])*\"/;
207             my $lws_rx = qr/[\ \t]*/;
208              
209             sub _recv_headers($$$) {
210 0     0   0 my($conn, $rbufp, $h) = @_;
211 0         0 my $curhdr;
212 0         0 while(1) {
213 0         0 my $l = _recv_line($conn, $rbufp);
214 0 0       0 if($l =~ /\A[ \t]/) {
215 0 0       0 defined $curhdr
216             or _croak "malformed response from server";
217 0         0 $curhdr .= $l;
218 0         0 next;
219             }
220 0 0       0 if(defined $curhdr) {
221 0 0       0 $curhdr =~ /\A($token_rx)$lws_rx:(.*)\z/so
222             or _croak "malformed response from server";
223 0         0 my($hname, $value) = ($1, $2);
224 0         0 $hname = lc($hname);
225 0 0       0 $h->{$hname} = exists($h->{$hname}) ?
226             $h->{$hname}.",".$value : $value;
227             }
228 0 0       0 $l eq "" and last;
229 0         0 $curhdr = $l;
230             }
231             }
232              
233             my $loaded_digest_md5;
234             sub _http_get_one($) {
235 0     0   0 my($url) = @_;
236 0         0 my $params = _parse_http_url($url);
237 0         0 my $request = "GET @{[$params->{path_and_query}]} HTTP/1.1\r\n".
  0         0  
238             "Connection: close\r\n".
239 0         0 "Host: @{[$params->{host}]}".
240 0 0       0 "@{[$params->{port}==80?q():q(:).$params->{port}]}\r\n".
241             "Accept-Encoding: identity\r\n".
242             "\r\n";
243 0         0 my $conn = _open_tcp($params->{host}, $params->{port});
244             {
245 0         0 my $len = length($request);
  0         0  
246 0         0 local $SIG{PIPE} = "IGNORE";
247 0         0 for(my $pos = 0; $pos != $len; ) {
248 0         0 _check_timeout($conn, 1, "send request");
249 0         0 my $n = $conn->syswrite($request, $len-$pos, $pos);
250 0 0       0 defined $n or _croak "failed to send request: $!";
251 0         0 $pos += $n;
252             }
253 0         0 $request = undef;
254             }
255 0         0 my $rbuf = "";
256 0         0 my %response;
257 0         0 while(1) {
258 0         0 my $l = _recv_line($conn, \$rbuf);
259 0 0       0 $l =~ /\A
260             HTTP\/[0-9]+\.[0-9]+[\ \t]+
261             ([0-9]{3}\ [\ -\~\x80-\xff]*)
262             \z/x
263             or _croak "malformed response from server";
264 0         0 my $status = $1;
265 0         0 $status =~ s/([^\ -\~])/sprintf("%%%02X", ord($1))/eg;
  0         0  
266 0 0       0 $status =~ /\A(?:[13]|200)/ or _croak $status;
267 0         0 my %h;
268 0         0 _recv_headers($conn, \$rbuf, \%h);
269 0 0       0 if($status !~ /\A1/) {
270 0         0 $response{status} = $status;
271 0         0 $response{headers} = \%h;
272 0         0 last;
273             }
274             }
275 0 0       0 return \%response unless $response{status} =~ /\A200/;
276 0 0       0 my $ce = lc(exists($response{headers}->{"content-encoding"}) ?
277             $response{headers}->{"content-encoding"} : "identity");
278 0 0       0 $ce =~ /\A${lws_rx}identity${lws_rx}\z/o
279             or _croak "unsupported Content-Encoding";
280 0 0       0 my $te = lc(exists($response{headers}->{"transfer-encoding"}) ?
281             $response{headers}->{"transfer-encoding"} : "identity");
282 0 0       0 if($te =~ /\A${lws_rx}chunked${lws_rx}\z/o) {
    0          
    0          
283 0         0 $response{body} = "";
284 0         0 while(1) {
285 0 0       0 _recv_line($conn, \$rbuf) =~ /\A
286             ([0-9A-Fa-f]+)$lws_rx
287             (?>
288             ;$lws_rx$token_rx$lws_rx
289             (?>\=$lws_rx
290             (?:$token_rx|$quoted_string_rx)
291             $lws_rx
292             )?
293             )*
294             \z/xo or _croak "malformed chunk";
295 0         0 my $csize = $1;
296 0         0 $csize =~ s/\A0+//;
297 0 0       0 last if $csize eq "";
298 0 0       0 length($csize) <= 8 or _croak "excessive chunk length";
299 0         0 $csize = hex($csize);
300 0         0 while(length($rbuf) < $csize) {
301 0         0 _recv_more_response($conn, \$rbuf, 0);
302             }
303 0         0 $response{body} .= substr($rbuf, 0, $csize, "");
304 0 0       0 _recv_line($conn, \$rbuf) eq ""
305             or _croak "malformed chunk";
306             }
307 0         0 _recv_headers($conn, \$rbuf, $response{headers});
308             } elsif($te !~ /\A${lws_rx}identity${lws_rx}\z/o) {
309 0         0 _croak "unsupported Transfer-Encoding";
310             } elsif(exists $response{headers}->{"content-length"}) {
311 0 0       0 $response{headers}->{"content-length"}
312             =~ /\A$lws_rx([0-9]+)$lws_rx\z/o
313             or _croak "malformed Content-Length";
314 0         0 my $body_length = $1;
315 0 0       0 $body_length < 0xffffffff or _croak "excessive Content-Length";
316 0         0 $response{body} = $rbuf;
317 0         0 while(length($response{body}) < $body_length) {
318 0         0 _recv_more_response($conn, \$response{body}, 0);
319             }
320 0         0 substr $response{body}, $body_length,
321             length($response{body})-$body_length, "";
322             } else {
323 0         0 $response{body} = $rbuf;
324 0         0 1 while _recv_more_response($conn, \$response{body}, 1);
325             }
326 0         0 $conn = undef;
327 0 0       0 if(exists $response{headers}->{"content-md5"}) {
328 0 0       0 $response{headers}->{"content-md5"}
329             =~ /\A$lws_rx([A-Za-z0-9\+\/]{21}[AQgw])\=\=$lws_rx\z/o
330             or _croak "malformed Content-MD5";
331 0         0 my $digest = $1;
332 0 0       0 unless(defined $loaded_digest_md5) {
333 0 0       0 $loaded_digest_md5 = eval { local $SIG{__DIE__};
  0         0  
334 0         0 require Digest::MD5;
335 0         0 Digest::MD5->VERSION(2.17);
336 0         0 1;
337             } ? 1 : 0;
338             }
339 0 0       0 if($loaded_digest_md5) {
340 0 0       0 Digest::MD5::md5_base64($response{body}) eq $digest
341             or _croak "Content-MD5 mismatch";
342             }
343             }
344 0         0 return \%response;
345             }
346              
347             sub http_get($) {
348 1     1 1 8 my($url) = @_;
349 1         15 _downgrade($url);
350 0           my %seen;
351 0           for(my $redir_limit = 6; $redir_limit--; ) {
352 0           my $response = _http_get_one($url);
353 0 0         $response->{status} =~ /\A200/ and return $response->{body};
354 0           $seen{$url} = undef;
355 0           my $loc = $response->{headers}->{location};
356 0 0         defined $loc or _croak "redirection with no target";
357 0 0         if($loc =~ /\A$lws_rx($http_url_rx)$lws_rx\z/o) {
    0          
358 0           $url = $1;
359             } elsif($loc =~ /\A$lws_rx($path_and_query_rx)$lws_rx\z/o) {
360             # Illegal, but common and easy to handle sanely.
361 0           my $pathquery = $1;
362 0           $url =~ s/\A($http_prefix_rx[^\/]*).*\z/$1$pathquery/so;
363             } else {
364 0           _croak "redirection to malformed target";
365             }
366 0 0         exists $seen{$url} and _croak "redirection loop";
367             }
368 0           _croak "too many redirections";
369             }
370              
371             =back
372              
373             =head1 BUGS
374              
375             IPv6 support is largely untested. Reports of experiences with it would
376             be appreciated.
377              
378             =head1 SEE ALSO
379              
380             L,
381             L,
382             L
383              
384             =head1 AUTHOR
385              
386             Andrew Main (Zefram)
387              
388             =head1 COPYRIGHT
389              
390             Copyright (C) 2012 Andrew Main (Zefram)
391              
392             =head1 LICENSE
393              
394             This module is free software; you can redistribute it and/or modify it
395             under the same terms as Perl itself.
396              
397             =cut
398              
399             1;