File Coverage

Bio/Root/HTTPget.pm
Criterion Covered Total %
statement 12 146 8.2
branch 0 92 0.0
condition 0 83 0.0
subroutine 4 12 33.3
pod 5 5 100.0
total 21 338 6.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for fallback HTTP get operations.
3             #
4             # Module is proxy-aware
5             #
6             # Please direct questions and support issues to
7             #
8             # Cared for by Chris Dagdigian
9             # but all of the good stuff was written by
10             # Lincoln Stein.
11             #
12             # You may distribute this module under the same terms as perl itself
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Root::HTTPget - module for fallback HTTP get operations when
19             LWP:: is unavailable
20              
21             =head1 SYNOPSIS
22              
23             use Bio::Root::HTTPget;
24             my $web = Bio::Root::HTTPget->new();
25              
26             my $response = $web->get('http://localhost');
27             $response = $web->get('http://localhost/images');
28              
29             $response = eval { $web->get('http://fred:secret@localhost/ladies_only/')
30             } or warn $@;
31              
32             $response = eval { $web->get('http://jeff:secret@localhost/ladies_only/')
33             } or warn $@;
34              
35             $response = $web->get('http://localhost/images/navauthors.gif');
36             $response = $web->get(-url=>'http://www.google.com',
37             -proxy=>'http://www.modperl.com');
38              
39             =head1 DESCRIPTION
40              
41             This is basically an last-chance module for doing network HTTP get
42             requests in situations where more advanced external CPAN modules such
43             as LWP:: are not installed.
44              
45             The particular reason this module was developed was so that the Open
46             Bio Database Access code can fallback to fetching the default registry
47             files from http://open-bio.org/registry/ without having to depend on
48             external dependencies like Bundle::LWP for network HTTP access.
49              
50             The core of this module was written by Lincoln Stein. It can handle proxies
51             and HTTP-based proxy authentication.
52              
53             =head1 FEEDBACK
54              
55             =head2 Mailing Lists
56              
57             User feedback is an integral part of the evolution of this
58             and other Bioperl modules. Send your comments and suggestions preferably
59             to one of the Bioperl mailing lists.
60             Your participation is much appreciated.
61              
62             bioperl-l@bioperl.org - General discussion
63             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64              
65             =head2 Reporting Bugs
66              
67             Report bugs to the Bioperl bug tracking system to help us keep track
68             the bugs and their resolution. Bug reports can be submitted via the
69             web:
70              
71             https://github.com/bioperl/bioperl-live/issues
72              
73             =head1 AUTHOR - Lincoln Stein
74              
75             # Please direct questions and support issues to I
76              
77             Cared for by Chris Dagdigian
78              
79             =head1 APPENDIX
80              
81             The rest of the documentation details each of the object
82             methods. Internal methods are usually preceded with a _
83              
84             =cut
85              
86              
87             # Let the code begin...
88              
89             package Bio::Root::HTTPget;
90              
91 1     1   3 use strict;
  1         2  
  1         22  
92 1     1   3 use warnings;
  1         1  
  1         24  
93 1     1   423 use IO::Socket qw(:DEFAULT :crlf);
  1         8945  
  1         3  
94              
95 1     1   623 use base qw(Bio::Root::Root);
  1         1  
  1         1562  
96              
97             {
98             # default attributes, in case used as a class/sub call
99             my %attributes;
100              
101             =head2 get
102              
103             Title : get
104             Usage : my $resp = get(-url => $url);
105             Function:
106             Returns : string
107             Args : -url => URL to HTTPGet
108             -proxy => proxy to use
109             -user => username for proxy or authentication
110             -pass => password for proxy or authentication
111             -timeout => timeout
112              
113             =cut
114              
115             sub get {
116 0     0 1   my $self;
117 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
118 0           $self = shift;
119             }
120            
121 0           my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
122             __PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
123 0   0       my $dest = $proxy || $url;
124              
125 0 0         my ($host,$port,$path,$user,$pass)
126             = _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
127 0   0       $auth_user ||= $user;
128 0   0       $auth_pass ||= $pass;
129 0 0         if ($self) {
130 0 0         unless ($proxy) {
131 0           $proxy = $self->proxy;
132             }
133 0 0         unless ($auth_user) {
134 0           ($auth_user, $auth_pass) = $self->authentication;
135             }
136             }
137 0 0         $path = $url if $proxy;
138            
139             # set up the connection
140 0 0         my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
141              
142             # the request
143 0           print $socket "GET $path HTTP/1.0$CRLF";
144 0           print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
145             # Support virtual hosts
146 0           print $socket "HOST: $host$CRLF";
147              
148 0 0 0       if ($auth_user && $auth_pass) { # authentication information
149 0           my $token = _encode_base64("$auth_user:$auth_pass");
150 0           print $socket "Authorization: Basic $token$CRLF";
151             }
152 0           print $socket "$CRLF";
153              
154             # read the response
155 0           my $response;
156             {
157 0           local $/ = "$CRLF$CRLF";
  0            
158 0           $response = <$socket>;
159             }
160              
161 0           my ($status_line,@other_lines) = split $CRLF,$response;
162 0 0         my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
163             or __PACKAGE__->throw("invalid response from web server: got $response");
164              
165 0           my %headers = map {/^(\S+): (.+)/} @other_lines;
  0            
166 0 0 0       if ($stat_code == 302 || $stat_code == 301) { # redirect
    0          
    0          
167             my $location = $headers{Location} or
168 0 0         __PACKAGE__->throw("invalid redirect: no Location header");
169 0           return get(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass); # recursive call
170             }
171              
172             elsif ($stat_code == 401) { # auth required
173 0           my $auth_required = $headers{'WWW-Authenticate'};
174 0 0         $auth_required =~ /^Basic realm="([^\"]+)"/
175             or __PACKAGE__->throw("server requires unknown type of".
176             " authentication: $auth_required");
177 0           __PACKAGE__->throw("request failed: $status_line, realm = $1");
178             }
179              
180             elsif ($stat_code != 200) {
181 0           __PACKAGE__->throw("request failed: $status_line");
182             }
183              
184 0           $response = '';
185 0           while (1) {
186 0           my $bytes = read($socket,$response,2048,length $response);
187 0 0         last unless $bytes > 0;
188             }
189              
190 0           $response;
191             }
192              
193             =head2 getFH
194              
195             Title : getFH
196             Usage :
197             Function:
198             Example :
199             Returns : string
200             Args :
201              
202             =cut
203              
204             sub getFH {
205 0     0 1   my $self;
206 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
207 0           $self = shift;
208             }
209 0           my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
210             __PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
211 0   0       my $dest = $proxy || $url;
212              
213 0 0         my ($host,$port,$path,$user,$pass)
214             = _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
215 0   0       $auth_user ||= $user;
216 0   0       $auth_pass ||= $pass;
217 0 0         $path = $url if $proxy;
218              
219             # set up the connection
220 0 0         my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
221              
222             # the request
223 0           print $socket "GET $path HTTP/1.0$CRLF";
224 0           print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
225             # Support virtual hosts
226 0           print $socket "HOST: $host$CRLF";
227              
228 0 0 0       if ($auth_user && $auth_pass) { # authentication information
229 0           my $token = _encode_base64("$auth_user:$auth_pass");
230 0           print $socket "Authorization: Basic $token$CRLF";
231             }
232 0           print $socket "$CRLF";
233              
234             # read the response
235 0           my $response;
236             {
237 0           local $/ = "$CRLF$CRLF";
  0            
238 0           $response = <$socket>;
239             }
240              
241 0           my ($status_line,@other_lines) = split $CRLF,$response;
242 0 0         my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
243             or __PACKAGE__->throw("invalid response from web server: got $response");
244              
245 0           my %headers = map {/^(\S+): (.+)/} @other_lines;
  0            
246 0 0 0       if ($stat_code == 302 || $stat_code == 301) { # redirect
    0          
    0          
247             my $location = $headers{Location} or
248 0 0         __PACKAGE__->throw("invalid redirect: no Location header");
249 0           return getFH(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass); # recursive call
250             }
251              
252             elsif ($stat_code == 401) { # auth required
253 0           my $auth_required = $headers{'WWW-Authenticate'};
254 0 0         $auth_required =~ /^Basic realm="([^\"]+)"/
255             or __PACKAGE__->throw("server requires unknown type of ".
256             "authentication: $auth_required");
257 0           __PACKAGE__->throw("request failed: $status_line, realm = $1");
258             }
259              
260             elsif ($stat_code != 200) {
261 0           __PACKAGE__->throw("request failed: $status_line");
262             }
263              
264             # Now that we are reasonably sure the socket and request
265             # are OK we pass the socket back as a filehandle so it can
266             # be processed by the caller...
267              
268 0           $socket;
269              
270             }
271              
272              
273             =head2 _http_parse_url
274              
275             Title :
276             Usage :
277             Function:
278             Example :
279             Returns :
280             Args :
281              
282             =cut
283              
284             sub _http_parse_url {
285 0     0     my $self;
286 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
287 0           $self = shift;
288             }
289 0           my $url = shift;
290 0 0         my ($user,$pass,$hostent,$path) =
291             $url =~ m!^http://(?:([^:]+):([^:]+)@)?([^/]+)(/?[^\#]*)! or return;
292 0   0       $path ||= '/';
293 0           my ($host,$port) = split(':',$hostent);
294 0   0       return ($host,$port||80,$path,$user,$pass);
295             }
296              
297             =head2 _http_connect
298              
299             Title :
300             Usage :
301             Function:
302             Example :
303             Returns :
304             Args :
305              
306             =cut
307              
308             sub _http_connect {
309 0     0     my ($host,$port,$timeout) = @_;
310 0           my $sock = IO::Socket::INET->new(Proto => 'tcp',
311             Type => SOCK_STREAM,
312             PeerHost => $host,
313             PeerPort => $port,
314             Timeout => $timeout,
315             );
316 0           $sock;
317             }
318              
319              
320             =head2 _encode_base64
321              
322             Title :
323             Usage :
324             Function:
325             Example :
326             Returns :
327             Args :
328              
329             =cut
330              
331             sub _encode_base64 {
332 0     0     my $self;
333 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
334 0           $self = shift;
335             }
336 0           my $res = "";
337 0           my $eol = $_[1];
338 0 0         $eol = "\n" unless defined $eol;
339 0           pos($_[0]) = 0; # ensure start at the beginning
340              
341 0           $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));
342              
343 0           $res =~ tr|` -_|AA-Za-z0-9+/|; # `# help emacs
344             # fix padding at the end
345 0           my $padding = (3 - length($_[0]) % 3) % 3;
346 0 0         $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
  0            
347             # break encoded string into lines of no more than 76 characters each
348 0 0         if (length $eol) {
349 0           $res =~ s/(.{1,76})/$1$eol/g;
350             }
351 0           return $res;
352             }
353              
354              
355             =head2 proxy
356              
357             Title : proxy
358             Usage : $httpproxy = $db->proxy('http') or
359             $db->proxy(['http','ftp'], 'http://myproxy' )
360             Function: Get/Set a proxy for use of proxy. Defaults to environment variable
361             http_proxy if present.
362             Returns : a string indicating the proxy
363             Args : $protocol : string for the protocol to set/get
364             $proxyurl : url of the proxy to use for the specified protocol
365             $username : username (if proxy requires authentication)
366             $password : password (if proxy requires authentication)
367              
368             =cut
369              
370             sub proxy {
371 0     0 1   my $self;
372 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
373 0           $self = shift;
374             }
375 0           my ($protocol,$proxy,$username,$password) = @_;
376 0 0         my $atts = ref($self) ? $self : \%attributes;
377 0   0       $protocol ||= 'http';
378 0 0         if (!$proxy) {
379 0 0         if (defined $ENV{http_proxy}) {
380 0           $proxy = $ENV{http_proxy};
381 0 0         if ($proxy =~ /\@/) {
382 0           ($username, $password, $proxy) = $proxy =~ m{http://(\S+):(\S+)\@(\S+)};
383 0           $proxy = 'http://'.$proxy;
384             }
385             }
386             }
387 0 0         if (defined $proxy) {
388             # default to class method call
389 0 0 0       __PACKAGE__->authentication($username, $password)
390             if ($username && $password);
391 0           $atts->{'_proxy'}->{$protocol} = $proxy;
392             }
393 0           return $atts->{'_proxy'}->{$protocol};
394             }
395              
396             =head2 clear_proxy
397              
398             Title : clear_proxy
399             Usage : my $old_prozy = $db->clear_proxy('http')
400             Function: Unsets (clears) the proxy for the protocol indicated
401             Returns : a string indicating the old proxy value
402             Args : $protocol : string for the protocol to clear
403              
404             =cut
405              
406             sub clear_proxy {
407 0     0 1   my $self;
408 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
409 0           $self = shift;
410             }
411 0           my ($protocol) = @_;
412 0 0         my $atts = ref($self) ? $self : \%attributes;
413 0   0       $protocol ||= 'http';
414 0           delete $atts->{'_proxy'}->{$protocol};
415             }
416              
417             =head2 authentication
418              
419             Title : authentication
420             Usage : $db->authentication($user,$pass)
421             Function: Get/Set authentication credentials
422             Returns : Array of user/pass
423             Args : Array or user/pass
424              
425              
426             =cut
427              
428             sub authentication {
429 0     0 1   my $self;
430 0 0 0       if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
      0        
431 0           $self = shift;
432             }
433 0 0         my $atts = ref($self) ? $self : \%attributes;
434 0 0         if (@_) {
435 0           my ($u,$p) = @_;
436 0 0         my $atts = ref($self) ? $self : \%attributes;
437            
438 0           $atts->{'_authentication'} = [ $u,$p];
439             }
440 0 0         return @{$atts->{'_authentication'} || []};
  0            
441             }
442              
443             }
444              
445             1;