| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Bio::Das::HTTP::Fetch; | 
| 2 |  |  |  |  |  |  | # file: Fetch.pm | 
| 3 |  |  |  |  |  |  | # $Id: Fetch.pm,v 1.18 2009/08/26 21:57:11 lstein Exp $ | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | Bio::Das::HTTP::Fetch - Manage the HTTP protocol for DAS transactions | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | my $fetcher      = Bio::Das::HTTP::Fetch->new( | 
| 12 |  |  |  |  |  |  | -request   => $request, | 
| 13 |  |  |  |  |  |  | -headers   => {'Accept-encoding' => 'gzip'}, | 
| 14 |  |  |  |  |  |  | -proxy     => $proxy, | 
| 15 |  |  |  |  |  |  | -norfcwarn => $nowarn, | 
| 16 |  |  |  |  |  |  | ); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | $fetcher->send_request(); | 
| 19 |  |  |  |  |  |  | $fetcher->read(); | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | my $request          = $fetcher->request; | 
| 22 |  |  |  |  |  |  | my $socket           = $fetcher->socket; | 
| 23 |  |  |  |  |  |  | my $error            = $fetcher->error; | 
| 24 |  |  |  |  |  |  | my $url              = $fetcher->url; | 
| 25 |  |  |  |  |  |  | my $path             = $fetcher->path; | 
| 26 |  |  |  |  |  |  | my $outgoing_args    = $fetcher->outgoing_args; | 
| 27 |  |  |  |  |  |  | my $outgoing_headers = $fetcher->outgoing_headers; | 
| 28 |  |  |  |  |  |  | my $auth             = $fetcher->auth; | 
| 29 |  |  |  |  |  |  | my $incoming_header  = $fetcher->incoming_header; | 
| 30 |  |  |  |  |  |  | my $method           = $fetcher->method; | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | my $protocol         = $fetcher->mode([$new_protocol]); | 
| 33 |  |  |  |  |  |  | my $status           = $fetcher->status([$new_status]); | 
| 34 |  |  |  |  |  |  | my $debug            = $fetcher->debug([$new_debug]); | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | my ($protocol,$host,$port,$path,$user,$pass) = $fetcher->parse_url($url); | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | This is a low-level class that is used for managing multiplexed | 
| 41 |  |  |  |  |  |  | connections to DAS HTTP servers.  It is used internally by L<Bio::Das> | 
| 42 |  |  |  |  |  |  | and it is unlikely that application programs will ever interact with | 
| 43 |  |  |  |  |  |  | it directly.  The exception is when writing custom authentication | 
| 44 |  |  |  |  |  |  | subroutines to fetch username/password information for | 
| 45 |  |  |  |  |  |  | password-protected servers, in which case an L<Bio::Das::HTTP::Fetch> | 
| 46 |  |  |  |  |  |  | is passed to the authentication subroutine. | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | =head2 METHODS | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | Following is a complete list of methods implemented by | 
| 51 |  |  |  |  |  |  | Bio::Das::HTTP::Fetch. | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | =over 4 | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | =cut | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | BEGIN { | 
| 58 | 1 |  |  | 1 |  | 256 | eval "use Errno 'EINPROGRESS','EWOULDBLOCK'"; | 
|  | 1 |  |  | 1 |  | 13 |  | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 234 |  | 
| 59 | 1 | 50 |  |  |  | 39 | unless (defined &EINPROGRESS) { | 
| 60 | 0 |  |  |  |  | 0 | eval "use constant EINPROGRESS => 115; use constant EWOULDBLOCK => 11"; | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  | } | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 1 |  |  | 1 |  | 9 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 304 |  | 
| 65 | 1 |  |  | 1 |  | 3978 | use IO::Socket qw(:DEFAULT :crlf); | 
|  | 1 |  |  |  |  | 22400 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 66 | 1 |  |  | 1 |  | 1374 | use Bio::Das::Util; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 52 |  | 
| 67 | 1 |  |  | 1 |  | 533 | use Bio::Das::Request; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 36 |  | 
| 68 | 1 |  |  | 1 |  | 972 | use MIME::Base64;  # For HTTP authenication encoding | 
|  | 1 |  |  |  |  | 827 |  | 
|  | 1 |  |  |  |  | 74 |  | 
| 69 | 1 |  |  | 1 |  | 9 | use Carp 'croak'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 70 | 1 |  |  | 1 |  | 4 | use vars '$VERSION'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 53 |  | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | $VERSION = '1.11'; | 
| 73 |  |  |  |  |  |  | my $ERROR = '';   # for errors that occur before we create the object | 
| 74 |  |  |  |  |  |  |  | 
| 75 | 1 |  |  | 1 |  | 5 | use constant READ_UNIT => 1024 * 5;  # 5K read units | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 4016 |  | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =item $fetcher = Bio::Das::HTTP::Request->new(@args) | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | Create a new fetcher object.  At the time the object is created, it | 
| 80 |  |  |  |  |  |  | will attempt to establish a non-blocking connection with the remote | 
| 81 |  |  |  |  |  |  | server.  This means that the call to new() may be returned before the | 
| 82 |  |  |  |  |  |  | connection is established. | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | Arguments are as follows: | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | Name         Description | 
| 87 |  |  |  |  |  |  | ----         ----------- | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | -request     The Bio::Das::Request to run. | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | -headers     A hashref containing additional | 
| 92 |  |  |  |  |  |  | headers to attach to the HTTP request. | 
| 93 |  |  |  |  |  |  | Typically used to enable data stream compression. | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | -proxy       An HTTP proxy to use. | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | -norfcwarn   Disable the warning that appears when the request | 
| 98 |  |  |  |  |  |  | contains username/password information attached to | 
| 99 |  |  |  |  |  |  | the URL. | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | -debug       Activate verbose debugging messages | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =cut | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | # notes: | 
| 106 |  |  |  |  |  |  | # -request: an object implements the following methods: | 
| 107 |  |  |  |  |  |  | #            ->url()            return the url for the request | 
| 108 |  |  |  |  |  |  | #            ->method()         return the method for the request ('auto' allowed) | 
| 109 |  |  |  |  |  |  | #            ->args()           return the args for the request | 
| 110 |  |  |  |  |  |  | #            ->headers($hash)   do something with the HTTP headers (canonicalized) | 
| 111 |  |  |  |  |  |  | #            ->start_body()     the body is starting, so do initialization | 
| 112 |  |  |  |  |  |  | #            ->body($string)    a piece of the body text | 
| 113 |  |  |  |  |  |  | #            ->finish_body()    the body has finished, so do cleanup | 
| 114 |  |  |  |  |  |  | #            ->error()          set an error message | 
| 115 |  |  |  |  |  |  | # | 
| 116 |  |  |  |  |  |  | #  the request should return undef to abort the fetch and cause immediate cleanup | 
| 117 |  |  |  |  |  |  | # | 
| 118 |  |  |  |  |  |  | # -request: a Bio::Das::Request object | 
| 119 |  |  |  |  |  |  | # | 
| 120 |  |  |  |  |  |  | # -headers: hashref whose keys are HTTP headers and whose values are scalars or array refs | 
| 121 |  |  |  |  |  |  | #           required headers will be added | 
| 122 |  |  |  |  |  |  | # | 
| 123 |  |  |  |  |  |  | sub new { | 
| 124 | 6 |  |  | 6 | 1 | 10 | my $pack = shift; | 
| 125 | 6 |  |  |  |  | 36 | my ($request,$headers,$proxy,$norfcwarn,$debug) = rearrange(['request', | 
| 126 |  |  |  |  |  |  | 'headers', | 
| 127 |  |  |  |  |  |  | 'proxy', | 
| 128 |  |  |  |  |  |  | 'norfcwarn', | 
| 129 |  |  |  |  |  |  | 'debug', | 
| 130 |  |  |  |  |  |  | ],@_); | 
| 131 | 6 | 50 |  |  |  | 362 | croak "Please provide a -request argument" unless $request; | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | # parse URL, return components | 
| 134 | 6 |  | 33 |  |  | 30 | my $dest = $proxy || $request->url; | 
| 135 | 6 |  |  |  |  | 26 | my ($mode,$host,$port,$path,$user,$pass) = $pack->parse_url($dest,$norfcwarn); | 
| 136 | 6 | 50 |  |  |  | 16 | croak "invalid url: $dest\n" unless $host; | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 6 | 50 | 33 |  |  | 48 | if (!$user && $request->auth) { | 
| 139 | 0 |  |  |  |  | 0 | ($user,$pass) = $request->auth; | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | # no headers to send by default | 
| 143 | 6 |  | 50 |  |  | 13 | $headers ||= {}; | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | # connect to remote host in nonblocking way | 
| 146 | 6 |  |  |  |  | 23 | my $sock = $pack->connect($mode,$host,$port); | 
| 147 | 6 | 50 |  |  |  | 21 | unless ($sock) { | 
| 148 | 0 |  |  |  |  | 0 | $request->error($pack->error); | 
| 149 | 0 |  |  |  |  | 0 | return; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 6 | 50 |  |  |  | 21 | $path = $request->url if $proxy; | 
| 153 | 6 | 50 |  |  |  | 22 | my $auth = ($user ? encode_base64("$user:$pass") : ""); | 
| 154 | 6 |  |  |  |  | 14 | chomp($auth); | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 6 |  |  |  |  | 10 | $debug=0; | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | # save the rest of our information | 
| 159 | 6 |  |  |  |  | 129 | return bless { | 
| 160 |  |  |  |  |  |  | # ("waiting", "reading header", "reading body", or "parsing body") | 
| 161 |  |  |  |  |  |  | status            => 'waiting', | 
| 162 |  |  |  |  |  |  | socket            => $sock, | 
| 163 |  |  |  |  |  |  | path              => $path, | 
| 164 |  |  |  |  |  |  | request           => $request, | 
| 165 |  |  |  |  |  |  | outgoing_headers  => $headers, | 
| 166 |  |  |  |  |  |  | host              => $host, | 
| 167 |  |  |  |  |  |  | # rather than encoding for every request | 
| 168 |  |  |  |  |  |  | auth              => $auth, | 
| 169 |  |  |  |  |  |  | mode              => $mode, #http vs https | 
| 170 |  |  |  |  |  |  | debug             => $debug, | 
| 171 |  |  |  |  |  |  | incoming_header   => undef,  # none yet | 
| 172 |  |  |  |  |  |  | },$pack; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | # this will return the socket associated with the object | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | =item $socket = $fetcher->socket | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | Return the IO::Socket associated with the HTTP request.  The socket | 
| 180 |  |  |  |  |  |  | is marked nonblocking and may not yet be in a connected state. | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | =item $path = $fetcher->path | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | Return the path part of the HTTP request. | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | =item $request = $fetcher->request | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | Return the L<Bio::Das::Request> object that the fetcher will attempt | 
| 189 |  |  |  |  |  |  | to satisfy. | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | =item $args = $fetcher->args | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | Returns a hashref containing the CGI arguments to be passed to the | 
| 194 |  |  |  |  |  |  | HTTP server.  This is simply delegated to the request's args() method. | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | =item $url = $fetcher->url | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | Returns the URL for the HTTP request. This is simply delegated to the | 
| 200 |  |  |  |  |  |  | request's url() method. | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | =item $headers = $fetcher->outgoing_headers | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | Returns a hashref containing the HTTP headers that will be sent in the | 
| 205 |  |  |  |  |  |  | request. | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | =item $host = $fetcher->host | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | Returns the host to which the fetcher will connect.  Note that this is | 
| 210 |  |  |  |  |  |  | B<not> necessarily the same host as the DAS server, as this method | 
| 211 |  |  |  |  |  |  | will return the name of the B<proxy> if an HTTP proxy has been | 
| 212 |  |  |  |  |  |  | specified.  To get the DAS server hostname, call | 
| 213 |  |  |  |  |  |  | $fetcher->request->host. | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | =item $credentials = $fetcher->auth | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | Return the authentication credentials as a base64-encoded string. | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | =item $header = $fetcher->incoming_header | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | Retrieve the incoming HTTP header.  Depending on the state of the | 
| 222 |  |  |  |  |  |  | connection, the header may be empty or incomplete. | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | =cut | 
| 225 |  |  |  |  |  |  |  | 
| 226 | 12 |  |  | 12 | 1 | 60 | sub socket           { shift->{socket}           } | 
| 227 | 6 |  |  | 6 | 1 | 18 | sub path             { shift->{path}             } | 
| 228 | 156 |  |  | 156 | 1 | 1236 | sub request          { shift->{request}          } | 
| 229 | 12 |  |  | 12 | 0 | 32 | sub outgoing_args    { shift->request->args      } | 
| 230 | 0 |  |  | 0 | 1 | 0 | sub url              { shift->request->url       } | 
| 231 | 6 |  |  | 6 | 1 | 77 | sub outgoing_headers { shift->{outgoing_headers} } | 
| 232 | 0 |  |  | 0 | 1 | 0 | sub host             { shift->{host}             }  # mostly for debugging purposes | 
| 233 | 6 |  |  | 6 | 1 | 18 | sub auth             { shift->{auth}             } | 
| 234 | 0 |  |  | 0 | 1 | 0 | sub incoming_header  { shift->{incoming_header}  }  # buffer for header data | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | =item $mode = $fetcher->mode([$new_mode]) | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | This misnamed method gets or sets the protocol, which is one of 'http' | 
| 240 |  |  |  |  |  |  | for regular cleartext transactions or 'https' for transactions using | 
| 241 |  |  |  |  |  |  | the encrypting SSL/TLS protocol.  Note that you must have | 
| 242 |  |  |  |  |  |  | IO::Socket::SSL and its associated libraries in order to use SSL/TLS. | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | =cut | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | sub mode { | 
| 247 | 6 |  |  | 6 | 1 | 19 | my $self = shift; | 
| 248 | 6 |  |  |  |  | 23 | my $d    = $self->{mode}; | 
| 249 | 6 | 50 |  |  |  | 25 | $self->{mode} = shift if @_; | 
| 250 | 6 |  |  |  |  | 18707 | $d; | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | =item $mode = $fetcher->mode([$new_mode]) | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | This misnamed method gets or sets the protocol, which is one of 'http' | 
| 256 |  |  |  |  |  |  | for regular cleartext transactions or 'https' for transactions using | 
| 257 |  |  |  |  |  |  | the encrypting SSL/TLS protocol.  Note that you must have | 
| 258 |  |  |  |  |  |  | IO::Socket::SSL and its associated libraries in order to use SSL/TLS. | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | =cut | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | sub method   { | 
| 263 | 6 |  |  | 6 | 0 | 12 | my $self = shift; | 
| 264 | 6 |  |  |  |  | 27 | my $meth = uc $self->request->method; | 
| 265 | 6 | 50 |  |  |  | 24 | return 'GET' unless $meth; | 
| 266 | 6 | 50 |  |  |  | 30 | if ($meth eq 'AUTO') { | 
| 267 | 6 | 50 |  |  |  | 27 | return $self->outgoing_args ? 'POST' : 'GET'; | 
| 268 |  |  |  |  |  |  | } | 
| 269 | 0 |  |  |  |  | 0 | return $meth; | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | =item $status = $fetcher->status([$new_status]) | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | This method is used to interrogate or change the status of the | 
| 275 |  |  |  |  |  |  | transaction. The status keeps track of what has been done so far, and | 
| 276 |  |  |  |  |  |  | is one of: | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | waiting          # request not yet sent | 
| 279 |  |  |  |  |  |  | reading header   # request sent, waiting for HTTP header | 
| 280 |  |  |  |  |  |  | reading body     # HTTP header received, waiting for HTTP body | 
| 281 |  |  |  |  |  |  | parsing body     # HTTP body partially received, parsing it | 
| 282 |  |  |  |  |  |  | 0                # transaction finished normally, EOF. | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | =cut | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | sub status   { | 
| 287 | 384 |  |  | 384 | 1 | 523 | my $self = shift; | 
| 288 | 384 |  |  |  |  | 734 | my $d    = $self->{status}; | 
| 289 | 384 | 100 |  |  |  | 977 | if (@_) { | 
| 290 | 24 |  |  |  |  | 57 | $self->{status} = shift; | 
| 291 | 24 | 50 |  |  |  | 63 | warn "STATUS $self->{status}" if $self->debug; | 
| 292 |  |  |  |  |  |  | } | 
| 293 | 384 |  |  |  |  | 1599 | $d; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | =item $debug = $fetcher->debug([$new_debug]) | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | Get or set the debug flag, which enables verbose diagnostic messages. | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | =cut | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | sub debug { | 
| 303 | 150 |  |  | 150 | 1 | 236 | my $self = shift; | 
| 304 | 150 |  |  |  |  | 281 | my $d    = $self->{debug}; | 
| 305 | 150 | 50 |  |  |  | 359 | $self->{debug} = shift if @_; | 
| 306 | 150 |  |  |  |  | 408 | $d; | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | =item ($protocol,$host,$port,$path,$user,$pass) = Bio::Das::HTTP::Fetch->parse_url($url,$norfcwarn) | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | This method is invoked as a class method (as | 
| 312 |  |  |  |  |  |  | Bio::Das::HTTP::Fetch->parse_url) to parse a URL into its | 
| 313 |  |  |  |  |  |  | components. The $norfcwarn flag inhibits a warning about the unsafe | 
| 314 |  |  |  |  |  |  | nature of embedding username/password information in the URL of | 
| 315 |  |  |  |  |  |  | unencrypted transactions. | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | =cut | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | # very basic URL-parsing sub | 
| 320 |  |  |  |  |  |  | sub parse_url { | 
| 321 | 6 |  |  | 6 | 1 | 11 | my $self = shift; | 
| 322 | 6 |  |  |  |  | 10 | my ($url,$norfcwarn)  = @_; | 
| 323 |  |  |  |  |  |  |  | 
| 324 | 6 | 50 |  |  |  | 57 | my ($ssl,$hostent,$path) = $url =~ m!^http(s?)://([^/]+)(/?[^\#]*)! or return; | 
| 325 | 6 |  | 50 |  |  | 18 | $path ||= '/'; | 
| 326 |  |  |  |  |  |  |  | 
| 327 | 6 |  |  |  |  | 9 | my ($user,$pass); | 
| 328 | 6 |  |  |  |  | 32 | ($user, $hostent) = $hostent =~ /^(.*@)?(.*)/; | 
| 329 | 6 | 50 |  |  |  | 15 | ($user, $pass) = split(':',substr($user,0,length($user)-1)) if $user; | 
| 330 | 6 | 0 | 33 |  |  | 19 | if ($pass && !$ssl && !$norfcwarn) { | 
|  |  |  | 33 |  |  |  |  | 
| 331 | 0 |  |  |  |  | 0 | warn "Using password in unencrypted URI against RFC #2396 recommendation"; | 
| 332 |  |  |  |  |  |  | } | 
| 333 |  |  |  |  |  |  |  | 
| 334 | 6 |  |  |  |  | 21 | my ($host,$port) = split(':',$hostent); | 
| 335 | 6 |  |  |  |  | 9 | my ($mode,$defport); | 
| 336 | 6 | 50 |  |  |  | 16 | if ($ssl) { | 
| 337 | 0 |  |  |  |  | 0 | $mode='https'; | 
| 338 | 0 |  |  |  |  | 0 | $defport=443; | 
| 339 |  |  |  |  |  |  | } else { | 
| 340 | 6 |  |  |  |  | 11 | $mode='http'; | 
| 341 | 6 |  |  |  |  | 10 | $defport=80; | 
| 342 |  |  |  |  |  |  | } | 
| 343 | 6 |  | 33 |  |  | 36 | return ($mode,$host,$port||$defport,$path,$user,$pass); | 
| 344 |  |  |  |  |  |  | } | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | =item $socket = Bio::Das::HTTP::Fetch->connect($protocol,$host,$port) | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  | This method is used to make a nonblocking connection to the indicated | 
| 349 |  |  |  |  |  |  | host and port.  $protocol is one of 'http' or 'https'.  The resulting | 
| 350 |  |  |  |  |  |  | IO::Socket will be returned in case of success.  Undef will be | 
| 351 |  |  |  |  |  |  | returned in case of other errors. | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | =cut | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | # this is called to connect to remote host | 
| 356 |  |  |  |  |  |  | sub connect { | 
| 357 | 6 |  |  | 6 | 1 | 11 | my $pack = shift; | 
| 358 | 6 |  |  |  |  | 10 | my ($mode,$host,$port) = @_; | 
| 359 | 6 |  |  |  |  | 9 | my $sock; | 
| 360 | 6 | 50 |  |  |  | 15 | if ($mode eq 'https') { | 
| 361 | 0 |  |  |  |  | 0 | load_ssl(); | 
| 362 | 0 |  |  |  |  | 0 | $sock = IO::Socket::SSL->new(Proto => 'tcp', | 
| 363 |  |  |  |  |  |  | Type => SOCK_STREAM, | 
| 364 |  |  |  |  |  |  | SSL_use_cert => 0, | 
| 365 |  |  |  |  |  |  | SSL_verify_mode => 0x00) | 
| 366 |  |  |  |  |  |  | } else { | 
| 367 | 6 |  |  |  |  | 110 | $sock = IO::Socket::INET->new(Proto => 'tcp', | 
| 368 |  |  |  |  |  |  | Type  => SOCK_STREAM) | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 6 | 50 |  |  |  | 1553 | return unless $sock; | 
| 372 | 6 |  |  |  |  | 28 | $sock->blocking(0); | 
| 373 | 6 | 50 |  |  |  | 6781 | my $host_ip = inet_aton($host) or return $pack->error("410 Unknown host $host"); | 
| 374 | 6 |  |  |  |  | 49 | my $addr = sockaddr_in($port,$host_ip); | 
| 375 | 6 |  |  |  |  | 141 | my $result = $sock->IO::Socket::INET::connect($addr);  # don't allow SSL to do its handshake yet! | 
| 376 | 6 | 50 |  |  |  | 921 | return $sock if $result;  # return the socket if connected immediately | 
| 377 | 0 | 0 |  |  |  | 0 | return $sock if $! == EINPROGRESS;  # or if it's in progress | 
| 378 | 0 |  |  |  |  | 0 | return;                             # return undef on other errors | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | =item $status = $fetcher->send_request() | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | This method sends the HTTP request and returns the resulting status. | 
| 384 |  |  |  |  |  |  | Because of the vagaries of nonblocking IO, the complete request can be | 
| 385 |  |  |  |  |  |  | sent in one shot, in which case the returned status will be "reading | 
| 386 |  |  |  |  |  |  | header", or only a partial request might have been written, in which | 
| 387 |  |  |  |  |  |  | case the returned status will be "waiting."  In the latter case, | 
| 388 |  |  |  |  |  |  | send_request() should be called again until the complete request has | 
| 389 |  |  |  |  |  |  | been submitted. | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | If a communications error occurs, send_request() will return undef, in | 
| 392 |  |  |  |  |  |  | which case it should not be called again. | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | =cut | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | # this is called to send the HTTP request | 
| 397 |  |  |  |  |  |  | sub send_request { | 
| 398 | 6 |  |  | 6 | 1 | 16 | my $self = shift; | 
| 399 | 6 | 50 |  |  |  | 35 | warn "$self->send_request()" if $self->debug; | 
| 400 |  |  |  |  |  |  |  | 
| 401 | 6 | 50 |  |  |  | 34 | die "not in right state, expected state 'waiting' but got '",$self->status,"'" | 
| 402 |  |  |  |  |  |  | unless $self->status eq 'waiting'; | 
| 403 |  |  |  |  |  |  |  | 
| 404 | 6 | 50 |  |  |  | 70 | unless ($self->{socket}->connected) { | 
| 405 | 0 |  |  |  |  | 0 | $! = $self->{socket}->sockopt(SO_ERROR); | 
| 406 | 0 |  |  |  |  | 0 | return $self->error("411 Couldn't connect: $!") ; | 
| 407 |  |  |  |  |  |  | } | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | # if we're in https mode, then we need to complete the | 
| 410 |  |  |  |  |  |  | # SSL handshake at this point | 
| 411 | 6 | 50 |  |  |  | 215 | if ($self->mode eq 'https') { | 
| 412 | 0 | 0 |  |  |  | 0 | $self->complete_ssl_handshake($self->{socket}) || return $self->error("412 SSL error ".$self->{socket}->error); | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  |  | 
| 415 | 6 |  | 33 |  |  | 205 | $self->{formatted_request} ||= $self->format_request(); | 
| 416 |  |  |  |  |  |  |  | 
| 417 | 6 | 50 |  |  |  | 20 | warn "SENDING $self->{formatted_request}" if $self->debug; | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | # Send the header and request.  Note that we have to respect | 
| 420 |  |  |  |  |  |  | # both IO::Socket EWOULDBLOCK errors as well as the dodgy | 
| 421 |  |  |  |  |  |  | # IO::Socket::SSL "SSL wants a write" error. | 
| 422 | 6 |  |  |  |  | 875 | my $bytes = syswrite($self->{socket},$self->{formatted_request}); | 
| 423 | 6 | 50 |  |  |  | 27 | if (!$bytes) { | 
| 424 | 0 | 0 |  |  |  | 0 | return $self->status if $! == EWOULDBLOCK;  # still trying | 
| 425 | 0 | 0 |  |  |  | 0 | return $self->status if $self->{socket}->errstr =~ /SSL wants a write/; | 
| 426 | 0 |  |  |  |  | 0 | return $self->error("412 Communications error: $!"); | 
| 427 |  |  |  |  |  |  | } | 
| 428 | 6 | 50 |  |  |  | 85 | if ($bytes >= length $self->{formatted_request}) { | 
| 429 | 6 |  |  |  |  | 23 | $self->status('reading header'); | 
| 430 |  |  |  |  |  |  | } else { | 
| 431 | 0 |  |  |  |  | 0 | substr($self->{formatted_request},0,$bytes) = '';  # truncate and try again | 
| 432 |  |  |  |  |  |  | } | 
| 433 | 6 |  |  |  |  | 18 | $self->status; | 
| 434 |  |  |  |  |  |  | } | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | =item $status = $fetcher->read() | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | This method is called when the fetcher is in one of the read states | 
| 439 |  |  |  |  |  |  | (reading header, reading body or parsing body).  If successful, it | 
| 440 |  |  |  |  |  |  | returns the new status.  If unsuccessful, it returns undef. | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | On the end of the transaction read() will return numeric 0. | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | =cut | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | # this is called when the socket is ready to be read | 
| 447 |  |  |  |  |  |  | sub read { | 
| 448 | 120 |  |  | 120 | 1 | 221 | my $self = shift; | 
| 449 | 120 |  |  |  |  | 314 | my $stat = $self->status; | 
| 450 | 120 | 100 |  |  |  | 406 | return $self->read_header if $stat eq 'reading header'; | 
| 451 | 114 | 50 | 33 |  |  | 1452 | return $self->read_body   if $stat eq 'reading body' | 
| 452 |  |  |  |  |  |  | or $stat eq 'parsing body'; | 
| 453 |  |  |  |  |  |  | } | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | # read the header through to the $CRLF$CRLF (blank line) | 
| 456 |  |  |  |  |  |  | # return a true value for 200 OK | 
| 457 |  |  |  |  |  |  | sub read_header { | 
| 458 | 6 |  |  | 6 | 0 | 13 | my $self = shift; | 
| 459 |  |  |  |  |  |  |  | 
| 460 | 6 |  | 50 |  |  | 320 | my $bytes = sysread($self->{socket},$self->{header},READ_UNIT,length ($self->{header}||'')); | 
| 461 | 6 | 50 |  |  |  | 29 | if (!defined $bytes) { | 
| 462 | 0 | 0 |  |  |  | 0 | return $self->status if $! == EWOULDBLOCK; | 
| 463 | 0 | 0 |  |  |  | 0 | return $self->status if $self->{socket}->errstr =~ /SSL wants a read/; | 
| 464 |  |  |  |  |  |  | } | 
| 465 | 6 | 50 |  |  |  | 19 | return $self->error("412 Communications error") unless $bytes > 0; | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | # have we found the CRLF yet? | 
| 468 | 6 |  |  |  |  | 70 | my $i = rindex($self->{header},"$CRLF$CRLF"); | 
| 469 | 6 | 50 |  |  |  | 24 | return $self->status unless $i >= 0;  # no, so keep waiting | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  | # found the header | 
| 472 |  |  |  |  |  |  | # If we have stuff after the header, then process it | 
| 473 | 6 |  |  |  |  | 25 | my $header     = substr($self->{header},0,$i); | 
| 474 | 6 |  |  |  |  | 48 | my $extra_data = substr($self->{header},$i+4); | 
| 475 |  |  |  |  |  |  |  | 
| 476 | 6 |  |  |  |  | 88 | my ($status_line,@other_lines) = split $CRLF,$header; | 
| 477 | 6 |  |  |  |  | 76 | my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!; | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | # If unauthorized, capture the realm for the authentication | 
| 480 | 6 | 50 |  |  |  | 31 | if($stat_code == 401){ | 
| 481 |  |  |  |  |  |  | # Can't use do_headers, Request will barf on lack of X-Das version | 
| 482 | 0 | 0 |  |  |  | 0 | if(my ($line) = grep /^WWW-Authenticate:\s+/, @other_lines){ | 
| 483 | 0 |  |  |  |  | 0 | my ($scheme,$realm) = $line =~ /^\S+:\s+(\S+)\s+realm="(.*?)"/; | 
| 484 | 0 | 0 |  |  |  | 0 | if($scheme ne 'Basic'){ | 
| 485 | 0 |  |  |  |  | 0 | $self->error("413 Authentication scheme '$scheme' is not supported"); | 
| 486 |  |  |  |  |  |  | } | 
| 487 |  |  |  |  |  |  | # The realm is actually allowed to be blank according to RFC #1945 BNF | 
| 488 | 0 |  |  |  |  | 0 | return $self->error("$stat_code '$realm' realm needs proper authentication"); | 
| 489 |  |  |  |  |  |  | } | 
| 490 |  |  |  |  |  |  | } | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | # On non-200 status codes return an error | 
| 493 | 6 | 50 |  |  |  | 17 | return $self->error("$stat_code $stat_msg") unless $stat_code == 200; | 
| 494 |  |  |  |  |  |  |  | 
| 495 |  |  |  |  |  |  | # handle header | 
| 496 | 6 | 50 |  |  |  | 35 | $self->do_headers(@other_lines) || return; | 
| 497 |  |  |  |  |  |  |  | 
| 498 | 6 |  |  |  |  | 22 | $self->status('reading body'); | 
| 499 | 6 | 50 | 50 |  |  | 30 | $self->do_body($extra_data) || return if length $extra_data; | 
| 500 |  |  |  |  |  |  |  | 
| 501 | 6 |  |  |  |  | 20 | undef $self->{header};  # don't need header now | 
| 502 | 6 |  |  |  |  | 25 | return $self->status; | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | sub read_body { | 
| 506 | 114 |  |  | 114 | 0 | 180 | my $self = shift; | 
| 507 | 114 |  |  |  |  | 130 | my $data; | 
| 508 | 114 |  |  |  |  | 4539 | my $result = sysread($self->{socket},$data,READ_UNIT); | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | # call do_body() if we read data | 
| 511 | 114 | 100 |  |  |  | 318 | if ($result) { | 
|  |  | 50 |  |  |  |  |  | 
| 512 | 108 | 50 |  |  |  | 353 | $self->do_body($data) or return; | 
| 513 | 108 |  |  |  |  | 527 | return $self->status; | 
| 514 |  |  |  |  |  |  | } | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | # call request's finish_body() method on normal EOF | 
| 517 |  |  |  |  |  |  | elsif (defined $result) { | 
| 518 | 6 | 50 | 50 |  |  | 16 | $self->request->finish_body or return if $self->request; | 
| 519 | 6 |  |  |  |  | 15 | $self->status(0); | 
| 520 | 6 |  |  |  |  | 16 | return 0; | 
| 521 |  |  |  |  |  |  | } | 
| 522 |  |  |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | # sysread() returned undef, so error out | 
| 524 |  |  |  |  |  |  | else { | 
| 525 | 0 | 0 |  |  |  | 0 | return $self->status if $! == EWOULDBLOCK;  # well, this is OK | 
| 526 | 0 | 0 |  |  |  | 0 | return $self->status if $self->{socket}->errstr =~ /SSL wants a write/; | 
| 527 | 0 |  |  |  |  | 0 | my $errmsg = "read error: $!"; | 
| 528 | 0 | 0 |  |  |  | 0 | if (my $cb = $self->request) { | 
| 529 | 0 |  |  |  |  | 0 | $cb->finish_body; | 
| 530 | 0 |  |  |  |  | 0 | $cb->error("412 Communications error: $errmsg"); | 
| 531 |  |  |  |  |  |  | } | 
| 532 | 0 |  |  |  |  | 0 | return $self->error("412 Communications error: $errmsg"); | 
| 533 |  |  |  |  |  |  | } | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | } | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | =item $http_request_string = $fetcher->format_request | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | This method generates the appropriate GET or POST HTTP request and the | 
| 540 |  |  |  |  |  |  | HTTP request headers. | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | =cut | 
| 543 |  |  |  |  |  |  |  | 
| 544 |  |  |  |  |  |  | # this generates the appropriate GET or POST request | 
| 545 |  |  |  |  |  |  | sub format_request { | 
| 546 | 6 |  |  | 6 | 1 | 16 | my $self    = shift; | 
| 547 | 6 |  |  |  |  | 27 | my $method  = $self->method; | 
| 548 | 6 |  |  |  |  | 29 | my $args    = $self->format_args; | 
| 549 | 6 |  |  |  |  | 29 | my $path    = $self->path; | 
| 550 | 6 |  |  |  |  | 26 | my $auth    = $self->auth; | 
| 551 | 6 |  |  |  |  | 26 | my $host    = $self->request->host; | 
| 552 |  |  |  |  |  |  |  | 
| 553 | 6 |  |  |  |  | 74 | my @additional_headers = ('User-agent' => join('/',__PACKAGE__,$VERSION), | 
| 554 |  |  |  |  |  |  | 'Host'       => $host); | 
| 555 | 6 | 50 |  |  |  | 17 | push @additional_headers,('Authorization' => "Basic $auth") if $auth; | 
| 556 | 6 | 100 | 66 |  |  | 47 | push @additional_headers,('Content-length' => length $args, | 
| 557 |  |  |  |  |  |  | 'Content-type'   => 'application/x-www-form-urlencoded') | 
| 558 |  |  |  |  |  |  | if $args && $method eq 'POST'; | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | # probably don't want to do this | 
| 561 | 6 | 100 | 66 |  |  | 49 | $method = 'GET' if $method eq 'POST' && !$args; | 
| 562 |  |  |  |  |  |  |  | 
| 563 |  |  |  |  |  |  | # there is an automatic CRLF pair at the bottom of headers, so don't add it | 
| 564 | 6 |  |  |  |  | 37 | my $headers = $self->format_headers(@additional_headers); | 
| 565 |  |  |  |  |  |  |  | 
| 566 | 6 |  |  |  |  | 48 | return join CRLF,"$method $path HTTP/1.0",$headers,$args; | 
| 567 |  |  |  |  |  |  | } | 
| 568 |  |  |  |  |  |  |  | 
| 569 |  |  |  |  |  |  | =item $cgi_query_string = $fetcher->format_args | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | This method generates the CGI query string. | 
| 572 |  |  |  |  |  |  |  | 
| 573 |  |  |  |  |  |  | =cut | 
| 574 |  |  |  |  |  |  |  | 
| 575 |  |  |  |  |  |  | # this creates the CGI request string | 
| 576 |  |  |  |  |  |  | sub format_args { | 
| 577 | 6 |  |  | 6 | 1 | 8 | my $self = shift; | 
| 578 | 6 |  |  |  |  | 10 | my @args; | 
| 579 | 6 | 50 |  |  |  | 19 | if (my $a = $self->outgoing_args) { | 
| 580 | 6 |  |  |  |  | 55 | foreach (keys %$a) { | 
| 581 | 14 | 100 |  |  |  | 51 | next unless defined $a->{$_}; | 
| 582 | 5 |  |  |  |  | 16 | my $key    = escape($_); | 
| 583 | 5 | 50 |  |  |  | 28 | my @values = ref($a->{$_}) eq 'ARRAY' ? map { escape($_) } @{$a->{$_}} | 
|  | 4 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 19 |  | 
| 584 |  |  |  |  |  |  | : $a->{$_}; | 
| 585 | 5 |  |  |  |  | 17 | push @args,"$key=$_" foreach (grep {$_ ne ''} @values); | 
|  | 4 |  |  |  |  | 36 |  | 
| 586 |  |  |  |  |  |  | } | 
| 587 |  |  |  |  |  |  | } | 
| 588 |  |  |  |  |  |  |  | 
| 589 |  |  |  |  |  |  | #print STDERR "ARGS: ",join (';',@args) , "\n"; | 
| 590 | 6 |  |  |  |  | 32 | return join ';',@args; | 
| 591 |  |  |  |  |  |  | } | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | =item $headers = $fetcher->format_headers | 
| 595 |  |  |  |  |  |  |  | 
| 596 |  |  |  |  |  |  | This method generates the outgoing HTTP request headers, for use by | 
| 597 |  |  |  |  |  |  | format_request(). | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | =cut | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | # this creates the request headers | 
| 602 |  |  |  |  |  |  | sub format_headers { | 
| 603 | 6 |  |  | 6 | 1 | 11 | my $self    = shift; | 
| 604 | 6 |  |  |  |  | 21 | my @additional_headers = @_; | 
| 605 |  |  |  |  |  |  |  | 
| 606 |  |  |  |  |  |  | # this order allows overriding | 
| 607 | 6 |  |  |  |  | 15 | my %headers = (@additional_headers,%{$self->outgoing_headers}); | 
|  | 6 |  |  |  |  | 23 |  | 
| 608 |  |  |  |  |  |  |  | 
| 609 |  |  |  |  |  |  | # clean up the headers | 
| 610 | 6 |  |  |  |  | 17 | my %clean_headers; | 
| 611 | 6 |  |  |  |  | 59 | for my $h (keys %headers) { | 
| 612 | 30 | 50 |  |  |  | 98 | next if $h =~ /\s/;  # no whitespace allowed - invalid header | 
| 613 | 30 | 50 |  |  |  | 111 | my @values = ref($headers{$h}) eq 'ARRAY' ? @{$headers{$h}} | 
|  | 0 |  |  |  |  | 0 |  | 
| 614 |  |  |  |  |  |  | : $headers{$h}; | 
| 615 | 30 |  |  |  |  | 69 | foreach (@values) { s/[\n\r\t]/ / }        # replace newlines and tabs with spaces | 
|  | 30 |  |  |  |  | 313 |  | 
| 616 | 30 |  |  |  |  | 524 | $clean_headers{canonicalize($h)} = \@values;  # canonicalize | 
| 617 |  |  |  |  |  |  | } | 
| 618 |  |  |  |  |  |  |  | 
| 619 | 6 |  |  |  |  | 15 | my @lines; | 
| 620 | 6 |  |  |  |  | 25 | for my $k (keys %clean_headers) { | 
| 621 | 30 |  |  |  |  | 35 | for my $v (@{$clean_headers{$k}}) { | 
|  | 30 |  |  |  |  | 52 |  | 
| 622 | 30 |  |  |  |  | 105 | push @lines,"$k: $v"; | 
| 623 |  |  |  |  |  |  | } | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  |  | 
| 626 | 6 |  |  |  |  | 67 | return join CRLF,@lines,''; | 
| 627 |  |  |  |  |  |  | } | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  |  | 
| 630 |  |  |  |  |  |  | =item $escaped_string = $fetcher->escape($unescaped_string) | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | This method performs URL escaping on the passed string. | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | =cut | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  |  | 
| 637 |  |  |  |  |  |  | sub escape { | 
| 638 | 9 |  |  | 9 | 1 | 17 | my $s = shift; | 
| 639 | 9 |  |  |  |  | 68 | $s =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; | 
|  | 8 |  |  |  |  | 64 |  | 
| 640 | 9 |  |  |  |  | 37 | $s; | 
| 641 |  |  |  |  |  |  | } | 
| 642 |  |  |  |  |  |  |  | 
| 643 |  |  |  |  |  |  | =item $canonicalized_string = $fetcher->canonicalize($uncanonicalized_string) | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | This method canonicalizes the case of HTTP headers. | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  | =cut | 
| 648 |  |  |  |  |  |  |  | 
| 649 |  |  |  |  |  |  | sub canonicalize { | 
| 650 | 72 |  |  | 72 | 1 | 339 | my $s = shift; | 
| 651 | 72 |  |  |  |  | 159 | $s = ucfirst lc $s; | 
| 652 | 72 |  |  |  |  | 260 | $s =~ s/(-\w)/uc $1/eg; | 
|  | 66 |  |  |  |  | 217 |  | 
| 653 | 72 |  |  |  |  | 402 | $s; | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  | =item $fetcher->do_headers(@header_lines) | 
| 657 |  |  |  |  |  |  |  | 
| 658 |  |  |  |  |  |  | This method parses the incoming HTTP header and saves the fields | 
| 659 |  |  |  |  |  |  | internally where they can be accessed using the headers() method. | 
| 660 |  |  |  |  |  |  |  | 
| 661 |  |  |  |  |  |  | =cut | 
| 662 |  |  |  |  |  |  |  | 
| 663 |  |  |  |  |  |  | sub do_headers { | 
| 664 | 6 |  |  | 6 | 1 | 14 | my $self = shift; | 
| 665 | 6 |  |  |  |  | 16 | my @header_lines = @_; | 
| 666 |  |  |  |  |  |  |  | 
| 667 |  |  |  |  |  |  | # split 'em into a hash, merge duplicates with semicolons | 
| 668 | 6 |  |  |  |  | 8 | my %headers; | 
| 669 | 6 |  |  |  |  | 17 | foreach (@header_lines) { | 
| 670 | 42 | 50 |  |  |  | 234 | my ($header,$value) = /^(\S+): (.+)$/ or next; | 
| 671 | 42 | 50 |  |  |  | 124 | $headers{canonicalize($header)} = $headers{$header} ? "; $value" : $value; | 
| 672 |  |  |  |  |  |  | } | 
| 673 |  |  |  |  |  |  |  | 
| 674 | 6 | 50 |  |  |  | 36 | if (my $request = $self->request) { | 
| 675 | 6 | 50 |  |  |  | 94 | $request->headers(\%headers) || return $self->error($request->error); | 
| 676 |  |  |  |  |  |  | } | 
| 677 | 6 |  |  |  |  | 45 | 1; | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  |  | 
| 680 |  |  |  |  |  |  | =item $result = $fetcher->do_body($body_data) | 
| 681 |  |  |  |  |  |  |  | 
| 682 |  |  |  |  |  |  | This method handles the parsing of the DAS document data by sending it | 
| 683 |  |  |  |  |  |  | to the Bio::Das::Request object.  It returns a true result if parsing | 
| 684 |  |  |  |  |  |  | was successful, or false otherwise. | 
| 685 |  |  |  |  |  |  |  | 
| 686 |  |  |  |  |  |  | =cut | 
| 687 |  |  |  |  |  |  | # this is called to read the body of the message and act on it | 
| 688 |  |  |  |  |  |  | sub do_body { | 
| 689 | 114 |  |  | 114 | 1 | 219 | my $self = shift; | 
| 690 | 114 |  |  |  |  | 246 | my $data = shift; | 
| 691 |  |  |  |  |  |  |  | 
| 692 | 114 | 50 |  |  |  | 343 | my $request = $self->request or return; | 
| 693 | 114 | 100 |  |  |  | 349 | if ($self->status eq 'reading body') { # transition | 
| 694 | 6 | 50 |  |  |  | 44 | $request->start_body or return; | 
| 695 | 6 |  |  |  |  | 19 | $self->status('parsing body'); | 
| 696 |  |  |  |  |  |  | } | 
| 697 |  |  |  |  |  |  |  | 
| 698 | 114 | 50 |  |  |  | 386 | warn "parsing()...." if $self->debug; | 
| 699 | 114 |  |  |  |  | 512 | return $request->body($data); | 
| 700 |  |  |  |  |  |  | } | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  | =item $error = $fetcher->error([$new_error]) | 
| 703 |  |  |  |  |  |  |  | 
| 704 |  |  |  |  |  |  | When called without arguments, error() returns the last error message | 
| 705 |  |  |  |  |  |  | generated by the module.  When called with arguments, error() sets the | 
| 706 |  |  |  |  |  |  | error message and returns undef. | 
| 707 |  |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  | =cut | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | # warn in case of error and return undef | 
| 711 |  |  |  |  |  |  | sub error { | 
| 712 | 120 |  |  | 120 | 1 | 211 | my $self = shift; | 
| 713 | 120 | 50 |  |  |  | 309 | if (@_) { | 
| 714 | 0 | 0 |  |  |  | 0 | unless (ref $self) { | 
| 715 | 0 |  |  |  |  | 0 | $ERROR = "@_"; | 
| 716 | 0 |  |  |  |  | 0 | return; | 
| 717 |  |  |  |  |  |  | } | 
| 718 | 0 | 0 |  |  |  | 0 | warn "$self->{url}: ",@_ if $self->debug; | 
| 719 | 0 |  |  |  |  | 0 | $self->{error} = "@_"; | 
| 720 | 0 |  |  |  |  | 0 | return; | 
| 721 |  |  |  |  |  |  | } else { | 
| 722 | 120 | 50 |  |  |  | 641 | return ref($self) ? $self->{error} : $ERROR; | 
| 723 |  |  |  |  |  |  | } | 
| 724 |  |  |  |  |  |  | } | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | =item $fetcher->load_ssl | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | This method performs initialization needed to use SSL/TLS transactions. | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | =cut | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  | sub load_ssl { | 
| 733 | 0 | 0 |  | 0 | 1 |  | eval 'require IO::Socket::SSL' or croak "Must have IO::Socket::SSL installed to use https: urls: $@"; | 
| 734 |  |  |  |  |  |  |  | 
| 735 |  |  |  |  |  |  | # cheating a bit -- IO::Socket::SSL doesn't have this function, and needs to! | 
| 736 | 0 | 0 |  |  |  |  | eval <<'END' unless defined &IO::Socket::SSL::pending; | 
| 737 |  |  |  |  |  |  | sub IO::Socket::SSL::pending { | 
| 738 |  |  |  |  |  |  | my $self = shift; | 
| 739 |  |  |  |  |  |  | my $ssl  = ${*$self}{'_SSL_object'}; | 
| 740 |  |  |  |  |  |  | return Net::SSLeay::pending($ssl); # * | 
| 741 |  |  |  |  |  |  | } | 
| 742 |  |  |  |  |  |  | END | 
| 743 |  |  |  |  |  |  |  | 
| 744 |  |  |  |  |  |  | } | 
| 745 |  |  |  |  |  |  |  | 
| 746 |  |  |  |  |  |  | =item $fetcher->complete_ssl_handshake($sock) | 
| 747 |  |  |  |  |  |  |  | 
| 748 |  |  |  |  |  |  | This method is called to complete the SSL handshake, which must be | 
| 749 |  |  |  |  |  |  | performed in blocking mode.  After completing the connection, the | 
| 750 |  |  |  |  |  |  | socket is set back to nonblocking. | 
| 751 |  |  |  |  |  |  |  | 
| 752 |  |  |  |  |  |  | =cut | 
| 753 |  |  |  |  |  |  |  | 
| 754 |  |  |  |  |  |  | sub complete_ssl_handshake { | 
| 755 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 756 | 0 |  |  |  |  |  | my $sock = shift; | 
| 757 | 0 |  |  |  |  |  | $sock->blocking(1);  # handshake requires nonblocking i/o | 
| 758 | 0 |  |  |  |  |  | my $result = $sock->connect_SSL($sock); | 
| 759 | 0 |  |  |  |  |  | $sock->blocking(0); | 
| 760 |  |  |  |  |  |  | } | 
| 761 |  |  |  |  |  |  |  | 
| 762 |  |  |  |  |  |  | # necessary to define these methods so that IO::Socket::INET objects will act like | 
| 763 |  |  |  |  |  |  | # IO::Socket::SSL objects. | 
| 764 | 0 |  |  | 0 | 0 |  | sub IO::Socket::INET::pending { 0     } | 
| 765 | 0 |  |  | 0 | 0 |  | sub IO::Socket::INET::errstr  { undef } | 
| 766 |  |  |  |  |  |  |  | 
| 767 |  |  |  |  |  |  |  | 
| 768 |  |  |  |  |  |  | =head1 AUTHOR | 
| 769 |  |  |  |  |  |  |  | 
| 770 |  |  |  |  |  |  | Lincoln Stein <lstein@cshl.org>. | 
| 771 |  |  |  |  |  |  |  | 
| 772 |  |  |  |  |  |  | Copyright (c) 2001 Cold Spring Harbor Laboratory | 
| 773 |  |  |  |  |  |  |  | 
| 774 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 775 |  |  |  |  |  |  | it under the same terms as Perl itself.  See DISCLAIMER.txt for | 
| 776 |  |  |  |  |  |  | disclaimers of warranty. | 
| 777 |  |  |  |  |  |  |  | 
| 778 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 779 |  |  |  |  |  |  |  | 
| 780 |  |  |  |  |  |  | L<Bio::Das::Request>, L<Bio::Das::HTTP::Fetch>, | 
| 781 |  |  |  |  |  |  | L<Bio::Das::Segment>, L<Bio::Das::Type>, L<Bio::Das::Stylesheet>, | 
| 782 |  |  |  |  |  |  | L<Bio::Das::Source>, L<Bio::RangeI> | 
| 783 |  |  |  |  |  |  |  | 
| 784 |  |  |  |  |  |  | =cut | 
| 785 |  |  |  |  |  |  |  | 
| 786 |  |  |  |  |  |  | 1; |