File Coverage

blib/lib/Perlbal/ClientHTTP.pm
Criterion Covered Total %
statement 246 267 92.1
branch 86 126 68.2
condition 32 52 61.5
subroutine 36 37 97.3
pod 3 16 18.7
total 403 498 80.9


line stmt bran cond sub pod time code
1             ######################################################################
2             # HTTP Connection from a reverse proxy client. GET/HEAD only.
3             # most functionality is implemented in the base class.
4             #
5             # Copyright 2004, Danga Interactive, Inc.
6             # Copyright 2005-2007, Six Apart, Ltd.
7             #
8              
9             package Perlbal::ClientHTTP;
10 22     22   412 use strict;
  22         45  
  22         885  
11 22     22   124 use warnings;
  22         50  
  22         1015  
12 22     22   188 no warnings qw(deprecated);
  22         49  
  22         786  
13              
14 22     22   119 use base "Perlbal::ClientHTTPBase";
  22         42  
  22         6436  
15 22     22   455 use Perlbal::Util;
  22         49  
  22         1211  
16              
17 22         165 use fields ('put_in_progress', # 1 when we're currently waiting for an async job to return
18             'put_fh', # file handle to use for writing data
19             'put_fh_filename', # filename of put_fh
20             'put_final_name', # final pathname of put_fh
21             'put_pos', # file offset to write next data at
22              
23             'content_length', # length of document being transferred
24             'content_length_remain', # bytes remaining to be read
25             'chunked_upload_state', # bool/obj: if processing a chunked upload, Perlbal::ChunkedUploadState object, else undef
26             'md5_ctx', # Digest::MD5 used to verify Content-MD5
27 22     22   124 );
  22         85  
28              
29 22     22   1960 use HTTP::Date ();
  22         49  
  22         393  
30 22     22   114 use File::Path;
  22         39  
  22         2541  
31              
32 22     22   132 use Errno qw( EPIPE );
  22         42  
  22         1133  
33 22     22   113 use POSIX qw( O_CREAT O_TRUNC O_WRONLY O_RDONLY O_EXCL ENOENT EEXIST );
  22         45  
  22         227  
34 22     22   2384 use Digest::MD5;
  22         49  
  22         99491  
35              
36             # class list of directories we know exist
37             our (%VerifiedDirs);
38              
39             sub new {
40 42     42 1 110 my $class = shift;
41              
42 42         248 my $self = fields::new($class);
43 42         26752 $self->SUPER::new(@_);
44 42         1590 $self->init;
45 42         258 return $self;
46             }
47              
48             # upcasting a generic ClientHTTPBase (from a service selector) to a
49             # "full-fledged" ClientHTTP.
50             sub new_from_base {
51 7     7 0 19 my $class = shift;
52 7         174 my Perlbal::ClientHTTPBase $cb = shift; # base object
53 7         38 Perlbal::Util::rebless($cb, $class);
54 7         36 $cb->init;
55              
56 7         43 $cb->watch_read(1); # enable our reads, so we can get PUT/POST data
57 7         228 $cb->handle_request; # this will disable reads, if GET/HEAD/etc
58 7         298 return $cb;
59             }
60              
61             sub init {
62 49     49 0 89 my Perlbal::ClientHTTP $self = shift;
63 49         127 $self->{put_in_progress} = 0;
64 49         108 $self->{put_fh} = undef;
65 49         113 $self->{put_pos} = 0;
66 49         101 $self->{chunked_upload_state} = undef;
67 49         101 $self->{md5_ctx} = undef;
68 49         103 $self->{put_final_name} = undef;
69             }
70              
71             sub close {
72 41     41 1 162 my Perlbal::ClientHTTP $self = shift;
73              
74             # don't close twice
75 41 50       146 return if $self->{closed};
76              
77 41         85 $self->{put_fh} = undef;
78 41         233 $self->SUPER::close(@_);
79             }
80              
81             sub setup_keepalive {
82 63     63 0 131 my Perlbal::ClientHTTP $self = $_[0];
83 63   100     302 my $not_done_reading = defined $self->{content_length_remain} && $self->{content_length_remain} > 0;
84              
85 63 100       771 return $self->SUPER::setup_keepalive($_[1], $not_done_reading ? 0 : undef);
86             }
87              
88             sub event_read {
89 99     99 1 812963 my Perlbal::ClientHTTP $self = shift;
90 99         302 $self->{alive_time} = $Perlbal::tick_time;
91              
92             # see if we have headers?
93 99 100       403 if ($self->{req_headers}) {
94 40 50       163 if ($self->{req_headers}->request_method eq 'PUT') {
95 40         117 $self->event_read_put;
96             } else {
97             # since we have headers and we're not doing any special
98             # handling above, let's just disable read notification, because
99             # we won't do anything with the data
100 0         0 $self->watch_read(0);
101             }
102 40         852 return;
103             }
104              
105             # try and get the headers, if they're all here
106 59 100       585 my $hd = $self->read_request_headers
107             or return;
108              
109 56         253 $self->handle_request;
110             }
111              
112             # one-time routing of new request to the right handlers
113             sub handle_request {
114 63     63 0 135 my Perlbal::ClientHTTP $self = shift;
115 63         150 my $hd = $self->{req_headers};
116              
117 63         401 $self->check_req_headers;
118              
119             # fully formed request received
120 63         139 $self->{requests}++;
121              
122             # notify that we're about to serve
123 63 50       739 return if $self->{service}->run_hook('start_web_request', $self);
124 63 100       239 return if $self->{service}->run_hook('start_http_request', $self);
125              
126             # GET/HEAD requests (local, from disk)
127 62 100 100     1162 if ($hd->request_method eq 'GET' || $hd->request_method eq 'HEAD') {
128             # and once we have it, start serving
129 45         390 $self->watch_read(0);
130 45         1439 return $self->_serve_request($hd);
131             }
132              
133             # PUT requests
134 17 100       54 return $self->handle_put if $hd->request_method eq 'PUT';
135              
136             # DELETE requests
137 3 50       14 return $self->handle_delete if $hd->request_method eq 'DELETE';
138              
139             # else, bad request
140 0         0 return $self->send_response(400);
141             }
142              
143             sub handle_put {
144 14     14 0 31 my Perlbal::ClientHTTP $self = shift;
145 14         35 my $hd = $self->{req_headers};
146              
147 14 100       73 return $self->send_response(403) unless $self->{service}->{enable_put};
148              
149 13 100 100     89 $self->{md5_ctx} = $self->{service}->{enable_md5} && $hd->header('Content-MD5') ? Digest::MD5->new : undef;
150              
151 13 100       53 return if $self->handle_put_chunked;
152              
153             # they want to put something, so let's setup and wait for more reads
154 12         38 my $clen =
155             $self->{content_length} =
156             $self->{content_length_remain} =
157             $hd->header('Content-length') + 0;
158              
159             # return a 400 (bad request) if we got no content length or if it's
160             # bigger than any specified max put size
161 12 50 33     129 return $self->send_response(400, "Content-length of $clen is invalid.")
      33        
      33        
162             if ! defined($clen) ||
163             $clen < 0 ||
164             ($self->{service}->{max_put_size} &&
165             $clen > $self->{service}->{max_put_size});
166              
167             # if we are supposed to read data and have some data already from a header over-read, note it
168 12 50 66     107 if ($clen && defined $self->{read_ahead} && $self->{read_ahead} > 0) {
      66        
169 0         0 $self->{content_length_remain} -= $self->{read_ahead};
170             }
171              
172 12 50       55 return if $self->{service}->run_hook('handle_put', $self);
173              
174             # error in filename? (any .. is an error)
175 12         50 my $uri = $self->{req_headers}->request_uri;
176 12 50       546 return $self->send_response(400, 'Invalid filename')
177             if $uri =~ /\.\./;
178              
179             # now we want to get the URI
180 12 50       127 return $self->send_response(400, 'Invalid filename')
181             unless $uri =~ m!^
182             ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is
183             # one+ conservative characters
184             / # path separator
185             ([\w\-\.]+) # $2: and the filename, one+ conservative characters
186             $!x;
187              
188             # sanitize uri into path and file into a disk path and filename
189 12   100     78 my ($path, $filename) = ($1 || '', $2);
190              
191             # the final action we'll be taking, eventually, is to start an async
192             # file open of the requested disk path. but we might need to verify
193             # the min_put_directory first.
194             my $start_open = sub {
195 11     11   53 my $disk_path = $self->{service}->{docroot} . '/' . $path;
196 11         49 $self->start_put_open($disk_path, $filename);
197 12         83 };
198              
199             # verify minput if necessary
200 12 100       71 if ($self->{service}->{min_put_directory}) {
201 3 50       13 my @elems = grep { defined $_ && length $_ } split '/', $path;
  9         45  
202 3 50       16 return $self->send_response(400, 'Does not meet minimum directory requirement')
203             unless scalar(@elems) >= $self->{service}->{min_put_directory};
204 3         16 my $req_path = '/' . join('/', splice(@elems, 0, $self->{service}->{min_put_directory}));
205 3         10 my $extra_path = '/' . join('/', @elems);
206 3         16 $self->validate_min_put_directory($req_path, $extra_path, $filename, $start_open);
207             } else {
208 9         19 $start_open->();
209             }
210              
211 12         235 return;
212             }
213              
214             sub handle_put_chunked {
215 13     13 0 30 my Perlbal::ClientHTTP $self = shift;
216 13         41 my $req_hd = $self->{req_headers};
217 13         43 my $te = $req_hd->header("Transfer-Encoding");
218 13 100 66     77 return unless $te && $te eq "chunked";
219              
220 1         3 my $eh = $req_hd->header("Expect");
221 1 50 33     12 if ($eh && $eh =~ /\b100-continue\b/) {
222 1         10 $self->write(\ "HTTP/1.1 100 Continue\r\n\r\n");
223             }
224              
225 1         3 my $max_size = $self->{service}{max_chunked_request_size};
226              
227             # error in filename? (any .. is an error)
228 1         6 my $uri = $self->{req_headers}->request_uri;
229 1 50       5 return $self->send_response(400, 'Invalid filename')
230             if $uri =~ /\.\./;
231              
232             # now we want to get the URI
233 1 50       16 return $self->send_response(400, 'Invalid filename')
234             unless $uri =~ m!^
235             ((?:/[\w\-\.]+)*) # $1: zero+ path components of /FOO where foo is
236             # one+ conservative characters
237             / # path separator
238             ([\w\-\.]+) # $2: and the filename, one+ conservative characters
239             $!x;
240              
241             # sanitize uri into path and file into a disk path and filename
242 1   50     9 my ($path, $filename) = ($1 || '', $2);
243              
244 1         4 my $disk_path = $self->{service}->{docroot} . '/' . $path;
245              
246 1         3 $self->{chunked_upload_state} = Perlbal::ChunkedUploadState->new(%{{
247             on_new_chunk => sub {
248 32     32   40 my $cref = shift;
249 32         43 my $len = length($$cref);
250 32         45 push @{$self->{read_buf}}, $cref;
  32         75  
251              
252 32         71 $self->{read_ahead} += $len;
253 32         54 $self->{content_length} += $len;
254              
255             # if too large, disconnect them...
256 32 50 33     180 if ($max_size && $self->{content_length} > $max_size) {
257             # TODO: delete file at this point? we're disconnecting them
258             # to prevent them from writing more, but do we care to keep
259             # what they already wrote?
260 0         0 $self->close;
261 0         0 return;
262             }
263              
264             # Reading too far ahead of our AIO subsystem will cause us to buffer it in memory.
265 32 50       71 $self->watch_read(0) if $self->{read_ahead} >= 1024 * 1024; # arbitrary
266             # ->put_writeout clears {read_ahead}, so we run it after we need that
267 32 100       123 $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary
268             },
269             on_disconnect => sub {
270 0     0   0 warn "Disconnect during chunked PUT.\n";
271              
272             # TODO: do we unlink the file here, since it wasn't a proper close
273             # ending in a zero-length chunk? perhaps a config option? for
274             # now we'll just leave it on disk with what we've got so far:
275 0         0 $self->close('remote_closure_during_chunked_put');
276             },
277             on_zero_chunk => sub {
278 1     1   3 $self->{chunked_upload_state} = undef;
279 1         5 $self->watch_read(0);
280              
281             # kick off any necessary aio writes:
282 1         31 $self->put_writeout;
283             # this will do nothing, if a put is already in progress:
284 1         5 $self->put_close;
285             },
286 1         30 }});
287              
288 1         6 $self->start_put_open($disk_path, $filename);
289              
290 1         20 return 1;
291             }
292              
293             # called when we're requested to do a delete
294             sub handle_delete {
295 3     3 0 7 my Perlbal::ClientHTTP $self = shift;
296              
297 3 100       20 return $self->send_response(403) unless $self->{service}->{enable_delete};
298              
299 2         12 $self->watch_read(0);
300              
301             # error in filename? (any .. is an error)
302 2         65 my $uri = $self->{req_headers}->request_uri;
303 2 50       10 return $self->send_response(400, 'Invalid filename')
304             if $uri =~ /\.\./;
305              
306             # now we want to get the URI
307 2 50       16 if ($uri =~ m!^(?:/[\w\-\.]+)+$!) {
308             # now attempt the unlink
309             Perlbal::AIO::aio_unlink($self->{service}->{docroot} . '/' . $uri, sub {
310 2     2   4 my $err = shift;
311 2 100       21 if ($err == 0) {
    50          
312             # delete was successful
313 1         8 return $self->send_response(204);
314             } elsif ($! == ENOENT) {
315             # no such file
316 1         7 return $self->send_response(404);
317             } else {
318             # failure...
319 0         0 return $self->send_response(400, "$!");
320             }
321 2         29 });
322             } else {
323             # bad URI, don't accept the delete
324 0         0 return $self->send_response(400, 'Invalid filename');
325             }
326             }
327              
328             sub event_read_put {
329 40     40 0 54 my Perlbal::ClientHTTP $self = shift;
330              
331 40 100       124 if (my $cus = $self->{chunked_upload_state}) {
332 30         96 $cus->on_readable($self);
333 30         196 return;
334             }
335              
336             # read in data and shove it on the read buffer
337 10         45 my $dataref = $self->read($self->{content_length_remain});
338              
339             # unless they disconnected prematurely
340 10 50       265 unless (defined $dataref) {
341 0         0 $self->close('remote_closure');
342 0         0 return;
343             }
344              
345             # got some data
346 10         19 push @{$self->{read_buf}}, $dataref;
  10         25  
347 10         22 my $clen = length($$dataref);
348 10         26 $self->{read_size} += $clen;
349 10         19 $self->{read_ahead} += $clen;
350 10         17 $self->{content_length_remain} -= $clen;
351              
352 10 50       29 if ($self->{content_length_remain}) {
353             # Reading too far ahead of our AIO subsystem will cause us to buffer it in memory.
354 0 0       0 $self->watch_read(0) if $self->{read_ahead} >= 1024 * 1024; # arbitrary
355             # ->put_writeout clears {read_ahead}, so we run it after we need that
356 0 0       0 $self->put_writeout if $self->{read_ahead} >= 8192; # arbitrary
357             } else {
358             # now, if we've filled the content of this put, we're done
359 10         37 $self->watch_read(0);
360 10         341 $self->put_writeout;
361             }
362             }
363              
364             # verify that a minimum put directory exists. if/when it's verified,
365             # perhaps cached, the provided callback will be run.
366             sub validate_min_put_directory {
367 3     3 0 7 my Perlbal::ClientHTTP $self = shift;
368 3         10 my ($req_path, $extra_path, $filename, $callback) = @_;
369              
370 3         13 my $disk_dir = $self->{service}->{docroot} . '/' . $req_path;
371 3 100       15 return $callback->() if $VerifiedDirs{$disk_dir};
372              
373 2         5 $self->{put_in_progress} = 1;
374              
375             Perlbal::AIO::aio_open($disk_dir, O_RDONLY, 0755, sub {
376 2     2   5 my $fh = shift;
377 2         7 $self->{put_in_progress} = 0;
378              
379             # if error return failure
380 2 100       12 return $self->send_response(404, "Base directory does not exist") unless $fh;
381 1         15 CORE::close($fh);
382              
383             # mindir existed, mark it as so and start the open for the rest of the path
384 1         4 $VerifiedDirs{$disk_dir} = 1;
385 1         4 $callback->();
386 2         23 });
387             }
388              
389             # attempt to open a file being PUT for writing to disk
390             sub start_put_open {
391 13     13 0 28 my Perlbal::ClientHTTP $self = shift;
392 13         34 my ($path, $file) = @_;
393 13         16 my ($fs_path, $open_flags);
394              
395 13         29 $self->{put_in_progress} = 1;
396 13 100       50 if ($self->{md5_ctx}) {
397 2         15 $fs_path = "$path/$file.$$." . int(rand(0xffffffff)) . '.tmp';
398 2         6 $self->{put_final_name} = "$path/$file";
399 2         4 $open_flags = O_CREAT | O_EXCL | O_WRONLY;
400             } else {
401 11         38 $fs_path = "$path/$file";
402 11         20 $open_flags = O_CREAT | O_TRUNC | O_WRONLY;
403             }
404              
405             Perlbal::AIO::aio_open($fs_path, $open_flags, 0644, sub {
406             # get the fd
407 13     13   33 my $fh = shift;
408              
409             # verify file was opened
410 13         30 $self->{put_in_progress} = 0;
411              
412 13 100       40 if (! $fh) {
413 1 50 0     16 if ($! == ENOENT) {
    0          
414             # directory doesn't exist, so let's manually create it
415 1         3 eval { File::Path::mkpath($path, 0, 0755); };
  1         928  
416 1 50       6 return $self->system_error("Unable to create directory", "path = $path, file = $file") if $@;
417              
418             # should be created, call self recursively to try
419 1         9 return $self->start_put_open($path, $file);
420             } elsif ($! == EEXIST && $self->{put_final_name}) {
421             # temp name collision, bail hard because this should be near impossible already
422 0         0 Perlbal::log('crit', "Failure to open exclusively $fs_path as temp file in PUT");
423 0         0 return $self->_simple_response(500);
424             } else {
425 0         0 return $self->system_error("Internal error", "error = $!, path = $path, file = $file");
426             }
427             }
428              
429 12         25 $self->{put_fh} = $fh;
430 12         31 $self->{put_pos} = 0;
431 12         34 $self->{put_fh_filename} = $fs_path;
432              
433             # We just opened the file, haven't read_ahead any bytes, are expecting 0 bytes for read and we're
434             # not in chunked mode, so close the file immediately, we're done.
435 12 100 66     95 unless ($self->{read_ahead} || $self->{content_length_remain} || $self->{chunked_upload_state}) {
      100        
436             # FIXME this should be done through AIO
437 1         6 $self->put_close;
438 1         6 return;
439             }
440              
441 11         54 $self->put_writeout;
442 13         148 });
443             }
444              
445             # called when we've got some put data to write out
446             sub put_writeout {
447 36     36 0 56 my Perlbal::ClientHTTP $self = shift;
448 36 50       109 Carp::confess("wrong class for $self") unless ref $self eq "Perlbal::ClientHTTP";
449              
450 36 50       151 return if $self->{service}->run_hook('put_writeout', $self);
451 36 50       93 return if $self->{put_in_progress};
452 36 50       91 return unless $self->{put_fh};
453 36 100       251 return unless $self->{read_ahead};
454              
455 24         38 my $data = join("", map { $$_ } @{$self->{read_buf}});
  42         833  
  24         56  
456 24         56 my $count = length $data;
457 24         38 my $md5_ctx = $self->{md5_ctx};
458 24 100       125 $md5_ctx->add($data) if $md5_ctx;
459              
460             # reset our input buffer
461 24         61 $self->{read_buf} = [];
462 24         66 $self->{read_ahead} = 0;
463              
464             # After copying out and clearing the buffer, turn reads back on again to fill up another buffer.
465 24 100 66     180 $self->watch_read(1) if $self->{content_length_remain} || $self->{chunked_upload_state};
466              
467             # okay, file is open, write some data
468 24         186 $self->{put_in_progress} = 1;
469              
470 24         113 Perlbal::AIO::set_file_for_channel($self->{put_fh_filename});
471             Perlbal::AIO::aio_write($self->{put_fh}, $self->{put_pos}, $count, $data, sub {
472 24 50   24   87 return if $self->{closed};
473              
474             # see how many bytes written
475 24         51 my $bytes = shift() + 0;
476              
477 24         46 $self->{put_pos} += $bytes;
478 24         43 $self->{put_in_progress} = 0;
479              
480             # now recursively call ourselves?
481 24 50       74 if ($self->{read_ahead}) {
482 0         0 $self->put_writeout;
483 0         0 return;
484             }
485              
486 24 100 66     157 return if $self->{content_length_remain} || $self->{chunked_upload_state};
487              
488             # we're done putting this file, so close it.
489             # FIXME this should be done through AIO
490 10         41 $self->put_close;
491 24         213 });
492             }
493              
494             sub put_check_md5 {
495 2     2 0 6 my Perlbal::ClientHTTP $self = shift;
496              
497 2         17 my $actual = $self->{md5_ctx}->b64digest;
498 2         10 my $expect = $self->{req_headers}->header("Content-MD5");
499 2         15 $expect =~ s/=+\s*\z//;
500 2 100       7 if ($actual eq $expect) {
501             Perlbal::AIO::aio_rename($self->{put_fh_filename}, $self->{put_final_name}, sub {
502 1     1   2 my $err = shift;
503 1         3 $self->{put_fh_filename} = undef;
504 1         3 $self->{put_final_name} = undef;
505 1 50       4 if ($err == 0) {
506 1         7 return $self->send_response(201);
507             } else {
508 0         0 return $self->system_error("Error renaming file", "error in rename: $!");
509             }
510 1         12 });
511             } else {
512             Perlbal::AIO::aio_unlink($self->{put_fh_filename}, sub {
513 1     1   3 my $err = shift;
514 1         4 $self->{put_fh_filename} = undef;
515 1         3 $self->{put_final_name} = undef;
516 1 50       5 if ($err == 0) {
517 1         11 return $self->send_response(400,
518             "Content-MD5 mismatch, expected: $expect actual: $actual");
519             } else {
520 0         0 return $self->system_error("Error unlinking file", "error in unlink: $!");
521             }
522 1         11 });
523             }
524             }
525              
526             sub put_close {
527 12     12 0 26 my Perlbal::ClientHTTP $self = shift;
528 12 50       36 return if $self->{put_in_progress};
529 12 50       36 return unless $self->{put_fh};
530              
531 12 50       189 if (CORE::close($self->{put_fh})) {
532 12         27 $self->{put_fh} = undef;
533              
534 12 100       169 return $self->put_check_md5 if $self->{md5_ctx};
535 10         84 return $self->send_response(200);
536             } else {
537 0           return $self->system_error("Error saving file", "error in close: $!");
538             }
539             }
540              
541             1;
542              
543             # Local Variables:
544             # mode: perl
545             # c-basic-indent: 4
546             # indent-tabs-mode: nil
547             # End: