File Coverage

blib/lib/POE/Filter/HTTPD.pm
Criterion Covered Total %
statement 201 214 93.9
branch 51 70 72.8
condition 23 37 62.1
subroutine 27 28 96.4
pod 5 6 83.3
total 307 355 86.4


line stmt bran cond sub pod time code
1             # Filter::HTTPD Copyright 1998 Artur Bergman .
2             # Thanks go to Gisle Aas for his excellent HTTP::Daemon. Some of the
3             # get code was copied out if, unfortunately HTTP::Daemon is not easily
4             # subclassed for POE because of the blocking nature.
5             # 2001-07-27 RCC: This filter will not support the newer get_one()
6             # interface. It gets single things by default, and it does not
7             # support filter switching. If someone absolutely needs to switch to
8             # and from HTTPD filters, they should submit their request as a patch.
9              
10             package POE::Filter::HTTPD;
11              
12 3     3   1445 use strict;
  3         4  
  3         153  
13 3     3   997 use POE::Filter;
  3         5  
  3         71  
14              
15 3     3   13 use vars qw($VERSION @ISA);
  3         4  
  3         568  
16             $VERSION = '1.365';
17             # NOTE - Should be #.### (three decimal places)
18             @ISA = qw(POE::Filter);
19              
20             sub DEBUG () { 0 }
21              
22             sub BUFFER () { 0 } # raw data buffer to build requests
23             sub STATE () { 1 } # built a full request
24             sub REQUEST () { 2 } # partial request being built
25             sub CLIENT_PROTO () { 3 } # client protocol version requested
26             sub CONTENT_LEN () { 4 } # expected content length
27             sub CONTENT_ADDED () { 5 } # amount of content added to request
28             sub CONTENT_MAX () { 6 } # max amount of content
29             sub STREAMING () { 7 } # we want to work in streaming mode
30             sub MAX_BUFFER () { 8 } # max size of framing buffer
31             sub FIRST_UNUSED () { 9 }
32              
33             sub ST_HEADERS () { 0x01 } # waiting for complete header block
34             sub ST_CONTENT () { 0x02 } # waiting for complete body
35              
36 3     3   14 use Carp qw(croak cluck carp);
  3         5  
  3         165  
37 3         417 use HTTP::Status qw( status_message RC_BAD_REQUEST RC_OK RC_LENGTH_REQUIRED
38 3     3   926 RC_REQUEST_ENTITY_TOO_LARGE );
  3         5282  
39 3     3   15 use HTTP::Request ();
  3         4  
  3         45  
40 3     3   1656 use HTTP::Response ();
  3         4974  
  3         73  
41 3     3   1649 use HTTP::Date qw(time2str);
  3         11330  
  3         212  
42 3     3   24 use URI ();
  3         5  
  3         153  
43              
44             my $HTTP_1_0 = _http_version("HTTP/1.0");
45             my $HTTP_1_1 = _http_version("HTTP/1.1");
46              
47 3     3   17 use base 'Exporter';
  3         6  
  3         918  
48             our @EXPORT_OK = qw( FIRST_UNUSED );
49              
50              
51              
52             #------------------------------------------------------------------------------
53             # Set up some routines for convert wide chars (which aren't allowed in HTTP headers)
54             # into MIME encoded equivalents.
55             # See ->headers_as_strings
56             BEGIN {
57 3     3   198 eval "use utf8";
  3     3   2144  
  3         104  
  3         18  
58 3 50       48 if( $@ ) {
59 0         0 DEBUG and warn "We don't have utf8.";
60 0         0 *HAVE_UTF8 = sub { 0 };
  0         0  
61             }
62             else {
63 3     2   19 *HAVE_UTF8 = sub { 1 };
  2         15  
64             my $downgrade = sub {
65 1         2 my $ret = $_[0];
66 1         4 utf8::downgrade( $ret );
67 1         2 return $ret
68 3         10 };
69 3     3   150 eval "use Email::MIME::RFC2047::Encoder";
  3         639  
  0         0  
  0         0  
70 3 50       14 if( $@ ) {
71 3         4 DEBUG and warn "We don't have Email::MIME::RFC2047::Encoder";
72             *encode_value = sub {
73 1     1   300 cluck(
74             "Downgrading wide characters in HTTP header. " .
75             "Consier installing Email::MIME::RFC2047::Encoder"
76             );
77 1         280 $downgrade->( @_ );
78 3         870 };
79             }
80             else {
81 0         0 my $encoder = Email::MIME::RFC2047::Encoder->new( encoding => 'iso-8859-1',
82             method => 'Q'
83             );
84 0         0 *encode_value = sub { $downgrade->( $encoder->encode_text( @_ ) ) };
  0         0  
85             }
86             }
87             }
88              
89              
90             #------------------------------------------------------------------------------
91              
92             sub new {
93 34     34 1 75298 my $type = shift;
94 34 50 66     131 croak "$type requires an even number of parameters" if @_ and @_ & 1;
95 34         82 my %params = @_;
96              
97 34         135 my $max_content = $type->__param_max( MaxContent => 1024*1024, \%params );
98 32         93 my $max_buffer = $type->__param_max( MaxBuffer => 512*1024*1024, \%params );
99 30   100     343 my $streaming = $params{Streaming} || 0;
100              
101 30 100       428 croak "MaxBuffer is not large enough for MaxContent"
102             unless $max_buffer >= $max_content + length( $max_content ) + 1;
103              
104 29         50 delete @params{qw(MaxContent MaxBuffer Streaming)};
105 29 50       63 carp("$type ignores unknown parameters: ", join(', ', sort keys %params))
106             if scalar keys %params;
107              
108 29         191 return bless(
109             [
110             '', # BUFFER
111             ST_HEADERS, # STATE
112             undef, # REQUEST
113             undef, # CLIENT_PROTO
114             0, # CONTENT_LEN
115             0, # CONTENT_ADDED
116             $max_content, # CONTENT_MAX
117             $streaming, # STREAMING
118             $max_buffer # MAX_BUFFER
119             ],
120             $type
121             );
122             }
123              
124             #------------------------------------------------------------------------------
125              
126             sub get_one_start {
127 63     63 1 68 my ($self, $stream) = @_;
128            
129 63         163 $self->[BUFFER] .= join( '', @$stream );
130 63         69 DEBUG and warn "$$:poe-filter-httpd: Buffered ".length( $self->[BUFFER] )." bytes";
131 63 100       327 die "Framing buffer exceeds the limit"
132             if $self->[MAX_BUFFER] < length( $self->[BUFFER] );
133             }
134              
135             sub get_one {
136 90     90 1 93 my ($self) = @_;
137              
138             # Need to check lengths in octets, not characters.
139 3 50   3   6 BEGIN { eval { require bytes } and bytes->import; }
  3         42  
140              
141             # Waiting for a complete suite of headers.
142 90 100       187 if ($self->[STATE] & ST_HEADERS) {
143 84         64 DEBUG and warn "$$:poe-filter-httpd: Looking for headers";
144             # Strip leading whitespace.
145 84         179 $self->[BUFFER] =~ s/^\s+//;
146              
147             # No blank line yet. Side effect: Raw headers block is extracted
148             # from the input buffer.
149 84 100       542 return [] unless (
150             $self->[BUFFER] =~
151             s/^(\S.*?(?:\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?))//s
152             );
153              
154             # Raw headers block from the input buffer.
155 28         82 my $rh = $1;
156              
157             # Parse the request line.
158 28 100       188 if ($rh !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) {
159             return [
160 2         11 $self->_build_error(RC_BAD_REQUEST, "Request line parse failure. ($rh)")
161             ];
162             }
163              
164             # Create an HTTP::Request object from values in the request line.
165 26   100     141 my ($method, $request_path, $proto) = ($1, $2, ($3 || "HTTP/0.9"));
166              
167             # Fix a double starting slash on the path. It happens.
168 26         35 $request_path =~ s!^//+!/!;
169              
170 26         104 my $r = HTTP::Request->new($method, URI->new($request_path));
171 26         11070 $r->protocol($proto);
172 26         196 $self->[CLIENT_PROTO] = $proto = _http_version($proto);
173              
174             # Parse headers.
175              
176 26         27 my ($key, $val);
177 26         146 HEADER: while ($rh =~ s/^([^\012]*)\012//) {
178 51         102 local $_ = $1;
179 51         66 s/\015$//;
180 51 100       168 if (/^([\w\-~]+)\s*:\s*(.*)/) {
    50          
181 25 100       96 $r->push_header($key, $val) if $key;
182 25         532 ($key, $val) = ($1, $2);
183             }
184             elsif (/^\s+(.*)/) {
185 0         0 $val .= " $1";
186             }
187             else {
188 26         49 last HEADER;
189             }
190             }
191              
192 26 100       72 $r->push_header($key, $val) if $key;
193              
194             # We got a full set of headers. Fall through to content if we
195             # have a content length.
196              
197 26         361 my $cl = $r->content_length();
198 26 100       848 if( defined $cl ) {
199 10 100       58 unless( $cl =~ /^\s*(\d+)\s*$/ ) {
200 1         6 $r = $self->_build_error(RC_BAD_REQUEST,
201             "Content-Length is not a number.",
202             $r);
203 1         3 $self->[BUFFER] = '';
204 1         5 $self->_reset();
205 1         5 return [ $r ];
206             }
207 9   50     38 $cl = $1 || 0;
208             }
209 25         83 my $ce = $r->content_encoding();
210            
211             # The presence of a message-body in a request is signaled by the
212             # inclusion of a Content-Length or Transfer-Encoding header field in
213             # the request's message-headers. A message-body MUST NOT be included in
214             # a request if the specification of the request method (section 5.1.1)
215             # does not allow sending an entity-body in requests. A server SHOULD
216             # read and forward a message-body on any request; if the request method
217             # does not include defined semantics for an entity-body, then the
218             # message-body SHOULD be ignored when handling the request.
219             # - RFC2616
220              
221 25 100 100     681 unless( defined $cl || defined $ce ) {
222             # warn "No body";
223 15         53 $self->_reset();
224 15         60 return [ $r ];
225             }
226            
227             # PG- GET shouldn't have a body. But RFC2616 talks about Content-Length
228             # for HEAD. And My reading of RFC2616 is that HEAD is the same as GET.
229             # So logically, GET can have a body. And RFC2616 says we SHOULD ignore
230             # it.
231             #
232             # What's more, in apache 1.3.28, a body on a GET or HEAD is read
233             # and discarded. See ap_discard_request_body() in http_protocol.c and
234             # default_handler() in http_core.c
235             #
236             # Neither Firefox 2.0 nor Lynx 2.8.5 set Content-Length on a GET
237              
238             # For compatibility with HTTP/1.0 applications, HTTP/1.1 requests
239             # containing a message-body MUST include a valid Content-Length header
240             # field unless the server is known to be HTTP/1.1 compliant. If a
241             # request contains a message-body and a Content-Length is not given,
242             # the server SHOULD respond with 400 (bad request) if it cannot
243             # determine the length of the message, or with 411 (length required) if
244             # it wishes to insist on receiving a valid Content-Length.
245             # - RFC2616
246             #
247             # PG- This seems to imply that we can either detect the length (but how
248             # would one do that?) or require a Content-Length header. We do the
249             # latter.
250             #
251             # PG- Dispite all the above, I'm not fully sure this implements RFC2616
252             # properly. There's something about transfer-coding that I don't fully
253             # understand.
254              
255 10 100       24 if ( not $cl) {
256             # assume a Content-Length of 0 is valid pre 1.1
257 1 50 33     8 if ($self->[CLIENT_PROTO] >= $HTTP_1_1 and not defined $cl) {
258             # We have Content-Encoding, but not Content-Length.
259 1         5 $r = $self->_build_error(RC_LENGTH_REQUIRED,
260             "No content length found.",
261             $r);
262             }
263 1         2 $self->[BUFFER] = '';
264 1         5 $self->_reset();
265 1         4 return [ $r ];
266             }
267              
268             # Prevent DOS of a server by malicious clients
269 9 100 100     57 if( not $self->[STREAMING] and $cl > $self->[CONTENT_MAX] ) {
270 2         11 $r = $self->_build_error(RC_REQUEST_ENTITY_TOO_LARGE,
271             "Content of $cl octets not accepted.",
272             $r);
273 2         5 $self->[BUFFER] = '';
274 2         6 $self->_reset();
275 2         8 return [ $r ];
276             }
277              
278 7         13 $self->[REQUEST] = $r;
279 7         11 $self->[CONTENT_LEN] = $cl;
280 7         14 $self->[STATE] = ST_CONTENT;
281             # Fall through to content.
282             }
283              
284             # Waiting for content.
285 13 50       29 if ($self->[STATE] & ST_CONTENT) {
286 13         16 my $r = $self->[REQUEST];
287 13         21 my $cl_needed = $self->[CONTENT_LEN] - $self->[CONTENT_ADDED];
288 13 50       25 die "already got enough content ($cl_needed needed)" if $cl_needed < 1;
289              
290 13 100       30 if( $self->[STREAMING] ) {
291 1         2 DEBUG and warn "$$:poe-filter-httpd: Streaming request content";
292 1         3 my @ret;
293             # do we have a request?
294 1 50       4 if( $self->[REQUEST] ) {
295 1         2 DEBUG and warn "$$:poe-filter-httpd: Sending request";
296 1         3 push @ret, $self->[REQUEST]; # send it to the wheel
297 1         3 $self->[REQUEST] = undef;
298             }
299             # do we have some content ?
300 1 50       4 if( length( $self->[BUFFER] ) ) { # send it to the wheel
301 1         4 my $more = substr($self->[BUFFER], 0, $cl_needed);
302 1         5 DEBUG and warn "$$:poe-filter-httpd: Sending content";
303 1         1 push @ret, $more;
304 1         3 $self->[CONTENT_ADDED] += length($more);
305 1         3 substr( $self->[BUFFER], 0, length($more) ) = "";
306             # is that enough content?
307 1 50       11 if( $self->[CONTENT_ADDED] >= $self->[CONTENT_LEN] ) {
308 1         2 DEBUG and warn "$$:poe-filter-httpd: All content received ($self->[CONTENT_ADDED] >= $self->[CONTENT_LEN])";
309             # Strip MSIE 5.01's extra CRLFs
310 1         3 $self->[BUFFER] =~ s/^\s+//;
311 1         3 $self->_reset;
312             }
313             }
314 1         5 return \@ret;
315             }
316              
317             # Not enough content to complete the request. Add it to the
318             # request content, and return an incomplete status.
319 12 100       29 if (length($self->[BUFFER]) < $cl_needed) {
320 6         15 $r->add_content($self->[BUFFER]);
321 6         62 $self->[CONTENT_ADDED] += length($self->[BUFFER]);
322 6         8 $self->[BUFFER] = "";
323 6         11 return [];
324             }
325              
326             # Enough data. Add it to the request content.
327             # PG- CGI.pm only reads Content-Length: bytes from STDIN.
328              
329             # Four-argument substr() would be ideal here, but it's not
330             # entirely backward compatible.
331 6         34 $r->add_content(substr($self->[BUFFER], 0, $cl_needed));
332 6         348 substr($self->[BUFFER], 0, $cl_needed) = "";
333              
334             # Some browsers (like MSIE 5.01) send extra CRLFs after the
335             # content. Shame on them.
336 6         17 $self->[BUFFER] =~ s/^\s+//;
337              
338             # XXX Should we throw the body away on a GET or HEAD? Probably not.
339              
340             # XXX Should we parse Multipart Types bodies?
341              
342             # Prepare for the next request, and return this one.
343 6         20 $self->_reset();
344 6         24 return [ $r ];
345             }
346              
347             # What are we waiting for?
348 0         0 die "unknown state $self->[STATE]";
349             }
350              
351             # Prepare for next request
352             sub _reset
353             {
354 26     26   34 my($self) = @_;
355 26         33 $self->[STATE] = ST_HEADERS;
356 26         48 @$self[REQUEST, CLIENT_PROTO] = (undef, undef);
357 26         53 @$self[CONTENT_LEN, CONTENT_ADDED] = (0, 0);
358             }
359              
360              
361             #------------------------------------------------------------------------------
362              
363             sub put {
364 2     2 1 14 my ($self, $responses) = @_;
365 2         3 my @raw;
366              
367             # HTTP::Response's as_string method returns the header lines
368             # terminated by "\n", which does not do the right thing if we want
369             # to send it to a client. Here I've stolen HTTP::Response's
370             # as_string's code and altered it to use network newlines so picky
371             # browsers like lynx get what they expect.
372             # PG- $r->as_string( "\x0D\x0A" ); would accomplish the same thing, no?
373              
374 2         3 foreach (@$responses) {
375 2         6 my $code = $_->code;
376 2   50     20 my $status_message = status_message($code) || "Unknown Error";
377 2   50     12 my $message = $_->message || "";
378 2   50     27 my $proto = $_->protocol || 'HTTP/1.0';
379              
380 2         21 my $status_line = "$proto $code";
381 2 100       11 $status_line .= " ($status_message)" if $status_message ne $message;
382 2 50       7 $status_line .= " $message" if length($message);
383              
384             # Use network newlines, and be sure not to mangle newlines in the
385             # response's content.
386              
387 2         2 my @headers;
388 2         4 push @headers, $status_line;
389              
390             # Perl can magically promote a string to UTF-8 if it is concatinated
391             # with another UTF-8 string. This behaviour changed between 5.8.8 and
392             # 5.10.1. This is normaly not a problem, but POE::Driver::SysRW uses
393             # syswrite(), which sends POE's internal buffer as-is.
394             # In other words, if the header contains UTF-8, the content will be
395             # promoted to UTF-8 and syswrite() will send those wide bytes, which
396             # will corrupt any images.
397             # For instance, 00 e7 ff 00 00 00 05
398             # will become, 00 c3 a7 c3 bf 00 00 00 05
399             #
400             # The real bug is in HTTP::Message->headers_as_string, which doesn't respect
401             # the following:
402             #
403             # "The TEXT rule is only used for descriptive field contents and values
404             # that are not intended to be interpreted by the message parser. Words
405             # of *TEXT MAY contain characters from character sets other than ISO-
406             # 8859-1 [22] only when encoded according to the rules of RFC 2047
407             # [14]. " -- RFC2616 section 2.2
408             # http://www.ietf.org/rfc/rfc2616.txt
409             # http://www.ietf.org/rfc/rfc2047.txt
410 2         3 my $endl = "\x0D\x0A";
411 2         6 push @headers, $self->headers_as_strings( $_->headers, $endl );
412 2         9 push @raw, join( $endl, @headers, "", "") . $_->content;
413             }
414              
415 2         28 \@raw;
416             }
417              
418             sub headers_as_strings
419             {
420 2     2 0 12 my( $self, $H, $endl ) = @_;
421 2         3 my @ret;
422             # $H is a HTTP::Headers object
423 2         6 foreach my $name ( $H->header_field_names ) {
424             # message-header = field-name ":" [ field-value ]
425             # field-name = token
426             # RFC2616 section 4.2
427             #
428             # token = 1*
429             # separators = "(" | ")" | "<" | ">" | "@"
430             # | "," | ";" | ":" | "\" | <">
431             # | "/" | "[" | "]" | "?" | "="
432             # | "{" | "}" | SP | HT
433             # CHAR =
434             # CTL =
435             # (octets 0 - 31) and DEL (127)>
436             # SP =
437             # HT =
438             # RFC2616 section 2.2
439              
440             # In other words, plain ascii text. HTTP::Headers doesn't check for
441             # this, of course. So if we complain here, the cluck ends up in
442             # the wrong place. Doing the simplest thing
443 1 50       20 utf8::downgrade( $name ) if HAVE_UTF8;
444              
445             # Deal with header values
446 1         3 foreach my $value ( $H->header( $name ) ) {
447 1 50 33     29 if( HAVE_UTF8 and utf8::is_utf8( $value ) ) {
448 1         1 DEBUG and warn "$$: Header $name is UTF-8";
449 1         4 $value = encode_value( $value );
450             }
451            
452 1         5 push @ret, join ": ", $name, _process_newline( $value, $endl );
453             }
454             }
455 2         15 return @ret;
456             }
457              
458             # This routine is lifted as-is from HTTP::Headers
459             sub _process_newline {
460 1     1   2 local $_ = shift;
461 1         2 my $endl = shift;
462             # must handle header values with embedded newlines with care
463 1         5 s/\s+$//; # trailing newlines and space must go
464 1         3 s/\n(\x0d?\n)+/\n/g; # no empty lines
465 1         2 s/\n([^\040\t])/\n $1/g; # initial space for continuation
466 1         2 s/\n/$endl/g; # substitute with requested line ending
467 1         7 $_;
468             }
469              
470             #------------------------------------------------------------------------------
471              
472             sub get_pending {
473 0     0 1 0 my $self = shift;
474 0 0       0 return [ $self->[BUFFER] ] if length $self->[BUFFER];
475 0         0 return undef;
476             }
477              
478             #------------------------------------------------------------------------------
479             # Functions specific to HTTPD;
480             #------------------------------------------------------------------------------
481              
482             # Internal function to parse an HTTP status line and return the HTTP
483             # protocol version.
484              
485             sub _http_version {
486 32     32   68 local($_) = shift;
487 32 50       200 return 0 unless m,^(?:HTTP/)?(\d+)\.(\d+)$,i;
488 32         140 $1 * 1000 + $2;
489             }
490              
491             # Build a basic response, given a status, a content type, and some
492             # content.
493              
494             sub _build_basic_response {
495 6     6   38 my ($self, $content, $content_type, $status, $message) = @_;
496              
497             # Need to check lengths in octets, not characters.
498 3 50   3   6284 BEGIN { eval { require bytes } and bytes->import; }
  3         47  
499              
500 6   50     15 $content_type ||= 'text/html';
501 6   50     9 $status ||= RC_OK;
502              
503 6         25 my $response = HTTP::Response->new($status, $message);
504              
505 6         241 $response->push_header( 'Content-Type', $content_type );
506 6         177 $response->push_header( 'Content-Length', length($content) );
507 6         141 $response->content($content);
508              
509 6         104 return $response;
510             }
511              
512             sub _build_error {
513 6     6   11 my($self, $status, $details, $req) = @_;
514              
515 6   50     13 $status ||= RC_BAD_REQUEST;
516 6   50     15 $details ||= '';
517 6   50     22 my $message = status_message($status) || "Unknown Error";
518              
519 6         69 my $resp = $self->_build_basic_response(
520             ( "" .
521             "" .
522             "Error $status: $message" .
523             "" .
524             "" .
525             "

Error $status: $message

" .
526             "

$details

" .
527             "" .
528             ""
529             ),
530             "text/html",
531             $status,
532             $message
533             );
534 6 100       27 $resp->request( $req ) if $req;
535 6         42 return $resp;
536             }
537              
538             1;
539              
540             __END__