File Coverage

blib/lib/HTTP/Message.pm
Criterion Covered Total %
statement 449 454 98.9
branch 291 312 93.2
condition 73 82 89.0
subroutine 37 37 100.0
pod 20 22 90.9
total 870 907 95.9


line stmt bran cond sub pod time code
1             package HTTP::Message;
2              
3 15     15   77112 use strict;
  15         42  
  15         437  
4 15     15   73 use warnings;
  15         29  
  15         83196  
5              
6             our $VERSION = '6.45';
7              
8             require HTTP::Headers;
9             require Carp;
10              
11             our $MAXIMUM_BODY_SIZE;
12              
13             my $CRLF = "\015\012"; # "\r\n" is not portable
14             unless ($HTTP::URI_CLASS) {
15             if ($ENV{PERL_HTTP_URI_CLASS}
16             && $ENV{PERL_HTTP_URI_CLASS} =~ /^([\w:]+)$/) {
17             $HTTP::URI_CLASS = $1;
18             } else {
19             $HTTP::URI_CLASS = "URI";
20             }
21             }
22             eval "require $HTTP::URI_CLASS"; die $@ if $@;
23              
24             *_utf8_downgrade = defined(&utf8::downgrade) ?
25             sub {
26 197 100   197   870 utf8::downgrade($_[0], 1) or
27             Carp::croak("HTTP::Message content must be bytes")
28             }
29             :
30             sub {
31             };
32              
33             sub new
34             {
35 155     155 1 21049 my($class, $header, $content) = @_;
36 155 100       368 if (defined $header) {
37 85 100       314 Carp::croak("Bad header argument") unless ref $header;
38 84 100       205 if (ref($header) eq "ARRAY") {
39 63         214 $header = HTTP::Headers->new(@$header);
40             }
41             else {
42 21         229 $header = $header->clone;
43             }
44             }
45             else {
46 70         241 $header = HTTP::Headers->new;
47             }
48 154 100       358 if (defined $content) {
49 80         188 _utf8_downgrade($content);
50             }
51             else {
52 74         122 $content = '';
53             }
54              
55 153         800 bless {
56             '_headers' => $header,
57             '_content' => $content,
58             '_max_body_size' => $HTTP::Message::MAXIMUM_BODY_SIZE,
59             }, $class;
60             }
61              
62             sub parse
63             {
64 31     31 1 86 my($class, $str) = @_;
65              
66 31         45 my @hdr;
67 31         48 while (1) {
68 65 100 100     361 if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
    100          
69 29         94 push(@hdr, $1, $2);
70 29         77 $hdr[-1] =~ s/\r\z//;
71             }
72             elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
73 5         13 $hdr[-1] .= "\n$1";
74 5         9 $hdr[-1] =~ s/\r\z//;
75             }
76             else {
77 31         91 $str =~ s/^\r?\n//;
78 31         55 last;
79             }
80             }
81 31         52 local $HTTP::Headers::TRANSLATE_UNDERSCORE;
82 31         69 new($class, \@hdr, $str);
83             }
84              
85              
86             sub clone
87             {
88 8     8 1 563 my $self = shift;
89 8         26 my $clone = HTTP::Message->new($self->headers,
90             $self->content);
91 8         50 $clone->protocol($self->protocol);
92 8         31 $clone;
93             }
94              
95              
96             sub clear {
97 4     4 1 728 my $self = shift;
98 4         14 $self->{_headers}->clear;
99 4         14 $self->content("");
100 4         7 delete $self->{_parts};
101 4         7 return;
102             }
103              
104              
105             sub protocol {
106 60     60 1 829 shift->_elem('_protocol', @_);
107             }
108              
109             sub headers {
110 1125     1125 1 3387 my $self = shift;
111              
112             # recalculation of _content might change headers, so we
113             # need to force it now
114 1125 100       2360 $self->_content unless exists $self->{_content};
115              
116 1125         3913 $self->{_headers};
117             }
118              
119             sub headers_as_string {
120 3     3 1 10 shift->headers->as_string(@_);
121             }
122              
123              
124             sub content {
125              
126 288     288 1 14174 my $self = $_[0];
127 288 100       653 if (defined(wantarray)) {
128 197 100       439 $self->_content unless exists $self->{_content};
129 197         308 my $old = $self->{_content};
130 197 100       442 $old = $$old if ref($old) eq "SCALAR";
131 197 100       449 &_set_content if @_ > 1;
132 197         743 return $old;
133             }
134              
135 91 100       222 if (@_ > 1) {
136 89         161 &_set_content;
137             }
138             else {
139 2 100       78 Carp::carp("Useless content call in void context") if $^W;
140             }
141             }
142              
143              
144             sub _set_content {
145 105     105   152 my $self = $_[0];
146 105         261 _utf8_downgrade($_[1]);
147 104 100 100     489 if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
148 3 50       8 ${$self->{_content}} = defined( $_[1] ) ? $_[1] : '';
  3         13  
149             }
150             else {
151 101 100       218 die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
152 100 100       229 $self->{_content} = defined( $_[1] ) ? $_[1] : '';
153 100         153 delete $self->{_content_ref};
154             }
155 102 100       316 delete $self->{_parts} unless $_[2];
156             }
157              
158              
159             sub add_content
160             {
161 12     12 1 44 my $self = shift;
162 12 50       33 $self->_content unless exists $self->{_content};
163 12         25 my $chunkref = \$_[0];
164 12 100       36 $chunkref = $$chunkref if ref($$chunkref); # legacy
165              
166 12         32 _utf8_downgrade($$chunkref);
167              
168 11         24 my $ref = ref($self->{_content});
169 11 100       43 if (!$ref) {
    100          
170 9         29 $self->{_content} .= $$chunkref;
171             }
172             elsif ($ref eq "SCALAR") {
173 1         2 ${$self->{_content}} .= $$chunkref;
  1         3  
174             }
175             else {
176 1         64 Carp::croak("Can't append to $ref content");
177             }
178 10         28 delete $self->{_parts};
179             }
180              
181             sub add_content_utf8 {
182 2     2 1 9 my($self, $buf) = @_;
183 2         8 utf8::upgrade($buf);
184 2         5 utf8::encode($buf);
185 2         5 $self->add_content($buf);
186             }
187              
188             sub content_ref
189             {
190 137     137 1 1598 my $self = shift;
191 137 50       344 $self->_content unless exists $self->{_content};
192 137         221 delete $self->{_parts};
193 137         243 my $old = \$self->{_content};
194 137         244 my $old_cref = $self->{_content_ref};
195 137 100       310 if (@_) {
196 6         8 my $new = shift;
197 6 100       78 Carp::croak("Setting content_ref to a non-ref") unless ref($new);
198 5         8 delete $self->{_content}; # avoid modifying $$old
199 5         18 $self->{_content} = $new;
200 5         7 $self->{_content_ref}++;
201             }
202 136 100       257 $old = $$old if $old_cref;
203 136         325 return $old;
204             }
205              
206              
207             sub content_charset
208             {
209 52     52 1 845 my $self = shift;
210 52 100       150 if (my $charset = $self->content_type_charset) {
211 1         5 return $charset;
212             }
213              
214             # time to start guessing
215 51         236 my $cref = $self->decoded_content(ref => 1, charset => "none");
216              
217             # Unicode BOM
218 51         113 for ($$cref) {
219 51 100       198 return "UTF-8" if /^\xEF\xBB\xBF/;
220 49 100       120 return "UTF-32LE" if /^\xFF\xFE\x00\x00/;
221 48 100       109 return "UTF-32BE" if /^\x00\x00\xFE\xFF/;
222 47 100       152 return "UTF-16LE" if /^\xFF\xFE/;
223 43 100       111 return "UTF-16BE" if /^\xFE\xFF/;
224             }
225              
226 42 100       138 if ($self->content_is_xml) {
    100          
    100          
227             # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
228             # XML entity not accompanied by external encoding information and not
229             # in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
230             # in which the first characters must be '
231 15         41 for ($$cref) {
232 15 100       50 return "UTF-32BE" if /^\x00\x00\x00
233 14 100       44 return "UTF-32LE" if /^<\x00\x00\x00/;
234 13 100       14120 return "UTF-16BE" if /^(?:\x00\s)*\x00
235 12 100       11343 return "UTF-16LE" if /^(?:\s\x00)*<\x00/;
236 11 100       11007 if (/^\s*(<\?xml[^\x00]*?\?>)/) {
237 4 50       31 if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) {
238 4         10 my $enc = $2;
239 4         11 $enc =~ s/^\s+//; $enc =~ s/\s+\z//;
  4         11  
240 4 100       24 return $enc if $enc;
241             }
242             }
243             }
244 8         8036 return "UTF-8";
245             }
246             elsif ($self->content_is_html) {
247             # look for or
248             # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding
249 4         543 require IO::HTML;
250             # Use relaxed search to match previous versions of HTTP::Message:
251 4         2167 my $encoding = IO::HTML::find_charset_in($$cref, { encoding => 1,
252             need_pragma => 0 });
253 4 100       543 return $encoding->mime_name if $encoding;
254             }
255             elsif ($self->content_type eq "application/json") {
256 6         13 for ($$cref) {
257             # RFC 4627, ch 3
258 6 100       21 return "UTF-32BE" if /^\x00\x00\x00./s;
259 5 100       19 return "UTF-32LE" if /^.\x00\x00\x00/s;
260 3 100       11 return "UTF-16BE" if /^\x00.\x00./s;
261 2 100       9 return "UTF-16LE" if /^.\x00.\x00/s;
262 1         5 return "UTF-8";
263             }
264             }
265 18 100       59 if ($self->content_type =~ /^text\//) {
266 17         34 for ($$cref) {
267 17 100       44 if (length) {
268 16 100       99 return "US-ASCII" unless /[\x80-\xFF]/;
269 3         609 require Encode;
270 3         14841 eval {
271 3         36 Encode::decode_utf8($_, Encode::FB_CROAK() | Encode::LEAVE_SRC());
272             };
273 3 100       47 return "UTF-8" unless $@;
274 1         7 return "ISO-8859-1";
275             }
276             }
277             }
278              
279 2         10 return undef;
280             }
281              
282             sub max_body_size {
283 133     133 0 1761 my $self = $_[0];
284 133         211 my $old = $self->{_max_body_size};
285 133 100       312 $self->_set_max_body_size($_[1]) if @_ > 1;
286 133         331 return $old;
287             }
288              
289             sub _set_max_body_size {
290 6     6   12 my $self = $_[0];
291 6         17 $self->{_max_body_size} = $_[1];
292             }
293              
294             sub decoded_content
295             {
296 118     118 1 380326 my($self, %opt) = @_;
297 118         227 my $content_ref;
298             my $content_ref_iscopy;
299              
300 118         203 eval {
301 118         293 $content_ref = $self->content_ref;
302 118 50       349 die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
303              
304             my $content_limit = exists $opt{ max_body_size } ? $opt{ max_body_size }
305 118 100       365 : defined $self->max_body_size ? $self->max_body_size
    100          
306             : undef
307             ;
308 118         164 my %limiter_options;
309 118 100       242 if( defined $content_limit ) {
310 6         28 %limiter_options = (LimitOutput => 1, Bufsize => $content_limit);
311             };
312 118 100       484 if (my $h = $self->header("Content-Encoding")) {
313 60         210 $h =~ s/^\s+//;
314 60         171 $h =~ s/\s+$//;
315 60         332 for my $ce (reverse split(/\s*,\s*/, lc($h))) {
316 105 50       231 next unless $ce;
317 105 100 100     375 next if $ce eq "identity" || $ce eq "none";
318 101 100 100     546 if ($ce eq "gzip" || $ce eq "x-gzip") {
    100 100        
    100 100        
    100          
    100          
    100          
    100          
319 30         828 require Compress::Raw::Zlib; # 'WANT_GZIP_OR_ZLIB', 'Z_BUF_ERROR';
320              
321 30 100 100     6061 if( ! $content_ref_iscopy and keys %limiter_options) {
322             # Create a copy of the input because Zlib will overwrite it
323             # :-(
324 2         8 my $input = "$$content_ref";
325 2         4 $content_ref = \$input;
326 2         5 $content_ref_iscopy++;
327             };
328 30         166 my ($i, $status) = Compress::Raw::Zlib::Inflate->new(
329             %limiter_options,
330             ConsumeInput => 0, # overridden by Zlib if we have %limiter_options :-(
331             WindowBits => Compress::Raw::Zlib::WANT_GZIP_OR_ZLIB(),
332             );
333 30         596101 my $res = $i->inflate( $content_ref, \my $output );
334 30 100       169 $res == Compress::Raw::Zlib::Z_BUF_ERROR()
335             and Carp::croak("Decoded content would be larger than $content_limit octets");
336 28 50 33     238 $res == Compress::Raw::Zlib::Z_OK()
337             or $res == Compress::Raw::Zlib::Z_STREAM_END()
338             or die "Can't gunzip content: $res";
339 28         338 $content_ref = \$output;
340 28         176 $content_ref_iscopy++;
341             }
342             elsif ($ce eq 'br') {
343 12         71 require IO::Uncompress::Brotli;
344 12         157 my $bro = IO::Uncompress::Brotli->create;
345              
346 12         24 my $output;
347 12 100       31 if( defined $content_limit ) {
348 2         5 $output = eval { $bro->decompress( $$content_ref, $content_limit ); }
  2         34  
349             } else {
350 10         16 $output = eval { $bro->decompress($$content_ref) };
  10         254436  
351             }
352              
353 12 100       118 $@ and die "Can't unbrotli content: $@";
354 10         31 $content_ref = \$output;
355 10         100 $content_ref_iscopy++;
356             }
357             elsif ($ce eq "x-bzip2" or $ce eq "bzip2") {
358 28         140 require Compress::Raw::Bzip2;
359              
360 28 100       80 if( ! $content_ref_iscopy ) {
361             # Create a copy of the input because Bzlib2 will overwrite it
362             # :-(
363 12         32 my $input = "$$content_ref";
364 12         19 $content_ref = \$input;
365 12         29 $content_ref_iscopy++;
366             };
367             my ($i, $status) = Compress::Raw::Bunzip2->new(
368             1, # appendInput
369             0, # consumeInput
370             0, # small
371 28   100     308 $limiter_options{ LimitOutput } || 0,
372             );
373 28         56 my $output;
374             $output = "\0" x $limiter_options{ Bufsize }
375 28 100       298 if $limiter_options{ Bufsize };
376 28         364969 my $res = $i->bzinflate( $content_ref, \$output );
377 28 50       192 $res == Compress::Raw::Bzip2::BZ_OUTBUFF_FULL()
378             and Carp::croak("Decoded content would be larger than $content_limit octets");
379 28 100 66     255 $res == Compress::Raw::Bzip2::BZ_OK()
380             or $res == Compress::Raw::Bzip2::BZ_STREAM_END()
381             or die "Can't bunzip content: $res";
382 26         331 $content_ref = \$output;
383 26         243 $content_ref_iscopy++;
384             }
385             elsif ($ce eq "deflate") {
386 6         23 require IO::Uncompress::Inflate;
387 6         11 my $output;
388 6         18 my $status = IO::Uncompress::Inflate::inflate($content_ref, \$output, Transparent => 0);
389 6         9134 my $error = $IO::Uncompress::Inflate::InflateError;
390 6 100       17 unless ($status) {
391             # "Content-Encoding: deflate" is supposed to mean the
392             # "zlib" format of RFC 1950, but Microsoft got that
393             # wrong, so some servers sends the raw compressed
394             # "deflate" data. This tries to inflate this format.
395 2         6 $output = undef;
396 2         11 require IO::Uncompress::RawInflate;
397 2 50       10 unless (IO::Uncompress::RawInflate::rawinflate($content_ref, \$output)) {
398 0         0 $self->push_header("Client-Warning" =>
399             "Could not raw inflate content: $IO::Uncompress::RawInflate::RawInflateError");
400 0         0 $output = undef;
401             }
402             }
403 6 50       2695 die "Can't inflate content: $error" unless defined $output;
404 6         13 $content_ref = \$output;
405 6         16 $content_ref_iscopy++;
406             }
407             elsif ($ce eq "compress" || $ce eq "x-compress") {
408 2         19 die "Can't uncompress content";
409             }
410             elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
411 19         557 require MIME::Base64;
412 19         701 $content_ref = \MIME::Base64::decode($$content_ref);
413 19         44 $content_ref_iscopy++;
414             }
415             elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
416 1         495 require MIME::QuotedPrint;
417 1         297 $content_ref = \MIME::QuotedPrint::decode($$content_ref);
418 1         3 $content_ref_iscopy++;
419             }
420             else {
421 3         28 die "Don't know how to decode Content-Encoding '$ce'";
422             }
423             }
424             }
425              
426 107 100 100     494 if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) {
427             my $charset = lc(
428             $opt{charset} ||
429             $self->content_type_charset ||
430             $opt{default_charset} ||
431 98   50     431 $self->content_charset ||
432             "ISO-8859-1"
433             );
434 98 100 100     488 if ($charset eq "none") {
    100          
435             # leave it as is
436             }
437             elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1") {
438 14 100 66     66 if ($$content_ref =~ /[^\x00-\x7F]/ && defined &utf8::upgrade) {
439 3 50       5 unless ($content_ref_iscopy) {
440 3         6 my $copy = $$content_ref;
441 3         5 $content_ref = \$copy;
442 3         6 $content_ref_iscopy++;
443             }
444 3         13 utf8::upgrade($$content_ref);
445             }
446             }
447             else {
448 27         3311 require Encode;
449 27         57643 eval {
450             $content_ref = \Encode::decode($charset, $$content_ref,
451 27 100       265 ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
452             };
453 27 100       302464 if ($@) {
454 4         8 my $retried;
455 4 100       22 if ($@ =~ /^Unknown encoding/) {
456 3   100     13 my $alt_charset = lc($opt{alt_charset} || "");
457 3 100 66     15 if ($alt_charset && $charset ne $alt_charset) {
458             # Retry decoding with the alternative charset
459             $content_ref = \Encode::decode($alt_charset, $$content_ref,
460 2 50       14 ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC())
    100          
461             unless $alt_charset eq "none";
462 2         48 $retried++;
463             }
464             }
465 4 100       24 die unless $retried;
466             }
467 25 50       96 die "Encode::decode() returned undef improperly" unless defined $$content_ref;
468 25 100       84 if ($is_xml) {
469             # Get rid of the XML encoding declaration if present
470 10         534699 $$content_ref =~ s/^\x{FEFF}//;
471 10 100       15279 if ($$content_ref =~ /^(\s*<\?xml[^\x00]*?\?>)/) {
472 4         82 substr($$content_ref, 0, length($1)) =~ s/\sencoding\s*=\s*(["']).*?\1//;
473             }
474             }
475             }
476             }
477             };
478 118 100       604 if ($@) {
479 13 100       1161 Carp::croak($@) if $opt{raise_error};
480 4         18 return undef;
481             }
482              
483 105 100       264638 return $opt{ref} ? $content_ref : $$content_ref;
484             }
485              
486              
487             sub decodable
488             {
489             # should match the Content-Encoding values that decoded_content can deal with
490 4     4 1 17 my $self = shift;
491 4         7 my @enc;
492 4         7 local $@;
493             # XXX preferably we should determine if the modules are available without loading
494             # them here
495 4         10 eval {
496 4         661 require Compress::Raw::Zlib;
497 4         5440 push(@enc, "gzip", "x-gzip");
498             };
499 4         10 eval {
500 4         1757 require IO::Uncompress::Inflate;
501 4         74619 require IO::Uncompress::RawInflate;
502 4         13 push(@enc, "deflate");
503             };
504 4         7 eval {
505 4         1004 require Compress::Raw::Bzip2;
506 4         2729 push(@enc, "x-bzip2", "bzip2");
507             };
508 4         14 eval {
509 4         933 require IO::Uncompress::Brotli;
510 4         1249 push(@enc, 'br');
511             };
512             # we don't care about announcing the 'identity', 'base64' and
513             # 'quoted-printable' stuff
514 4 100       49 return wantarray ? @enc : join(", ", @enc);
515             }
516              
517              
518             sub decode
519             {
520 12     12 1 38 my $self = shift;
521 12 100       31 return 1 unless $self->header("Content-Encoding");
522 11 100       34 if (defined(my $content = $self->decoded_content(charset => "none"))) {
523 10         45 $self->remove_header("Content-Encoding", "Content-Length", "Content-MD5");
524 10         27 $self->content($content);
525 10         48 return 1;
526             }
527 1         5 return 0;
528             }
529              
530              
531             sub encode
532             {
533 13     13 1 68 my($self, @enc) = @_;
534              
535 13 100       30 Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,;
536 12 100       31 Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,;
537              
538 11 100       38 return 1 unless @enc; # nothing to do
539              
540 10         85 my $content = $self->content;
541 10         24 for my $encoding (@enc) {
542 12 100 100     99 if ($encoding eq "identity" || $encoding eq "none") {
    100 100        
    100 100        
    100          
    100          
    100          
    100          
543             # nothing to do
544             }
545             elsif ($encoding eq "base64") {
546 2         11 require MIME::Base64;
547 2         14 $content = MIME::Base64::encode($content);
548             }
549             elsif ($encoding eq "gzip" || $encoding eq "x-gzip") {
550 2         1432 require IO::Compress::Gzip;
551 2         16286 my $output;
552 2 50       7 IO::Compress::Gzip::gzip(\$content, \$output, Minimal => 1)
553             or die "Can't gzip content: $IO::Compress::Gzip::GzipError";
554 2         4150 $content = $output;
555             }
556             elsif ($encoding eq "deflate") {
557 1         1149 require IO::Compress::Deflate;
558 1         1477 my $output;
559 1 50       5 IO::Compress::Deflate::deflate(\$content, \$output)
560             or die "Can't deflate content: $IO::Compress::Deflate::DeflateError";
561 1         1745 $content = $output;
562             }
563             elsif ($encoding eq "x-bzip2" || $encoding eq "bzip2") {
564 2         12 require IO::Compress::Bzip2;
565 2         6 my $output;
566 2 50       7 IO::Compress::Bzip2::bzip2(\$content, \$output)
567             or die "Can't bzip2 content: $IO::Compress::Bzip2::Bzip2Error";
568 2         1593 $content = $output;
569             }
570             elsif ($encoding eq "br") {
571 1         6 require IO::Compress::Brotli;
572 1         2 my $output;
573 1 50       1 eval { $output = IO::Compress::Brotli::bro($content) }
  1         1562  
574             or die "Can't brotli content: $@";
575 1         6 $content = $output;
576             }
577             elsif ($encoding eq "rot13") { # for the fun of it
578 1         4 $content =~ tr/A-Za-z/N-ZA-Mn-za-m/;
579             }
580             else {
581 1         5 return 0;
582             }
583             }
584 9         26 my $h = $self->header("Content-Encoding");
585 9 100       28 unshift(@enc, $h) if $h;
586 9         42 $self->header("Content-Encoding", join(", ", @enc));
587 9         32 $self->remove_header("Content-Length", "Content-MD5");
588 9         30 $self->content($content);
589 9         37 return 1;
590             }
591              
592              
593             sub as_string
594             {
595 73     73 1 924 my($self, $eol) = @_;
596 73 100       183 $eol = "\n" unless defined $eol;
597              
598             # The calculation of content might update the headers
599             # so we need to do that first.
600 73         164 my $content = $self->content;
601              
602 73 100 100     226 return join("", $self->{'_headers'}->as_string($eol),
603             $eol,
604             $content,
605             (@_ == 1 && length($content) &&
606             $content !~ /\n\z/) ? "\n" : "",
607             );
608             }
609              
610              
611             sub dump
612             {
613 14     14 1 57 my($self, %opt) = @_;
614 14         58 my $content = $self->content;
615 14         38 my $chopped = 0;
616 14 50       42 if (!ref($content)) {
617 14         28 my $maxlen = $opt{maxlength};
618 14 100       39 $maxlen = 512 unless defined($maxlen);
619 14 100 100     77 if ($maxlen && length($content) > $maxlen * 1.1 + 3) {
620 1         3 $chopped = length($content) - $maxlen;
621 1         3 $content = substr($content, 0, $maxlen) . "...";
622             }
623              
624 14         35 $content =~ s/\\/\\\\/g;
625 14         25 $content =~ s/\t/\\t/g;
626 14         23 $content =~ s/\r/\\r/g;
627              
628             # no need for 3 digits in escape for these
629 14         31 $content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
  8         30  
630              
631 14         27 $content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
  14         37  
632 14         25 $content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
  0         0  
633              
634             # remaining whitespace
635 14         21 $content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg;
  0         0  
636 14         23 $content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg;
  0         0  
637 14         18 $content =~ s/\n\z/\\n/;
638              
639 14         24 my $no_content = $opt{no_content};
640 14 100       33 $no_content = "(no content)" unless defined $no_content;
641 14 100       51 if ($content eq $no_content) {
    100          
642             # escape our $no_content marker
643 2         6 $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg;
  1         7  
644             }
645             elsif ($content eq "") {
646 7         12 $content = $no_content;
647             }
648             }
649              
650 14         25 my @dump;
651 14 100       33 push(@dump, $opt{preheader}) if $opt{preheader};
652 14         54 push(@dump, $self->{_headers}->as_string, $content);
653 14 100       38 push(@dump, "(+ $chopped more bytes not shown)") if $chopped;
654              
655 14         47 my $dump = join("\n", @dump, "");
656 14 100       66 $dump =~ s/^/$opt{prefix}/gm if $opt{prefix};
657              
658 14 100       140 print $dump unless defined wantarray;
659 14         86 return $dump;
660             }
661              
662             # allow subclasses to override what will handle individual parts
663             sub _part_class {
664 9     9   25 return __PACKAGE__;
665             }
666              
667             sub parts {
668 27     27 1 2490 my $self = shift;
669 27 100 100     115 if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
      100        
670 13         37 $self->_parts;
671             }
672 27         44 my $old = $self->{_parts};
673 27 100       55 if (@_) {
674 8 100       19 my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
  12         43  
675 8   100     21 my $ct = $self->content_type || "";
676 8 100       31 if ($ct =~ m,^message/,) {
    100          
677 3 100       192 Carp::croak("Only one part allowed for $ct content")
678             if @parts > 1;
679             }
680             elsif ($ct !~ m,^multipart/,) {
681 3         23 $self->remove_content_headers;
682 3         8 $self->content_type("multipart/mixed");
683             }
684 7         16 $self->{_parts} = \@parts;
685 7         14 _stale_content($self);
686             }
687 26 100       108 return @$old if wantarray;
688 11         41 return $old->[0];
689             }
690              
691             sub add_part {
692 4     4 1 7 my $self = shift;
693 4 100 100     9 if (($self->content_type || "") !~ m,^multipart/,) {
    100 66        
694 2         8 my $p = $self->_part_class->new(
695             $self->remove_content_headers,
696             $self->content(""),
697             );
698 2         7 $self->content_type("multipart/mixed");
699 2         4 $self->{_parts} = [];
700 2 100 66     4 if ($p->headers->header_field_names || $p->content ne "") {
701 1         4 push(@{$self->{_parts}}, $p);
  1         3  
702             }
703             }
704             elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
705 1         7 $self->_parts;
706             }
707              
708 4         8 push(@{$self->{_parts}}, @_);
  4         9  
709 4         8 _stale_content($self);
710 4         7 return;
711             }
712              
713             sub _stale_content {
714 11     11   19 my $self = shift;
715 11 100       35 if (ref($self->{_content}) eq "SCALAR") {
716             # must recalculate now
717 1         3 $self->_content;
718             }
719             else {
720             # just invalidate cache
721 10         18 delete $self->{_content};
722 10         21 delete $self->{_content_ref};
723             }
724             }
725              
726             # delegate all other method calls to the headers object.
727             our $AUTOLOAD;
728              
729             sub AUTOLOAD {
730 73     73   3679 my ( $package, $method ) = $AUTOLOAD =~ m/\A(.+)::([^:]*)\z/;
731 73         322 my $code = $_[0]->can($method);
732 73 100       428 Carp::croak(
733             qq(Can't locate object method "$method" via package "$package"))
734             unless $code;
735 70         213 goto &$code;
736             }
737              
738             sub can {
739 125     125 0 2947 my ( $self, $method ) = @_;
740              
741 125 100       630 if ( my $own_method = $self->SUPER::can($method) ) {
742 20         88 return $own_method;
743             }
744              
745 105 100       423 my $headers = ref($self) ? $self->headers : 'HTTP::Headers';
746 105 100       478 if ( $headers->can($method) ) {
747              
748             # We create the function here so that it will not need to be
749             # autoloaded or recreated the next time.
750 15     15   138 no strict 'refs';
  15         31  
  15         16745  
751             *$method = sub {
752 1003     1003   17402 local $Carp::Internal{ +__PACKAGE__ } = 1;
753 1003         1986 shift->headers->$method(@_);
754 72         633 };
755 72         263 return \&$method;
756             }
757              
758 33         449 return undef;
759             }
760              
761       1     sub DESTROY { } # avoid AUTOLOADing it
762              
763             # Private method to access members in %$self
764             sub _elem
765             {
766 266     266   434 my $self = shift;
767 266         380 my $elem = shift;
768 266         510 my $old = $self->{$elem};
769 266 100       637 $self->{$elem} = $_[0] if @_;
770 266         721 return $old;
771             }
772              
773              
774             # Create private _parts attribute from current _content
775             sub _parts {
776 14     14   22 my $self = shift;
777 14         54 my $ct = $self->content_type;
778 14 100       61 if ($ct =~ m,^multipart/,) {
    100          
    100          
779 4         24 require HTTP::Headers::Util;
780 4         14 my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
781 4 50       16 die "Assert" unless @h;
782 4         8 my %h = @{$h[0]};
  4         16  
783 4 100       13 if (defined(my $b = $h{boundary})) {
784 3         9 my $str = $self->content;
785 3         41 $str =~ s/\r?\n--\Q$b\E--.*//s;
786 3 50       53 if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
787 3         40 $self->{_parts} = [map $self->_part_class->parse($_),
788             split(/\r?\n--\Q$b\E\r?\n/, $str)]
789             }
790             }
791             }
792             elsif ($ct eq "message/http") {
793 4         700 require HTTP::Request;
794 4         1233 require HTTP::Response;
795 4         14 my $content = $self->content;
796 4 100       26 my $class = ($content =~ m,^(HTTP/.*)\n,) ?
797             "HTTP::Response" : "HTTP::Request";
798 4         27 $self->{_parts} = [$class->parse($content)];
799             }
800             elsif ($ct =~ m,^message/,) {
801 2         10 $self->{_parts} = [ $self->_part_class->parse($self->content) ];
802             }
803              
804 14   100     57 $self->{_parts} ||= [];
805             }
806              
807              
808             # Create private _content attribute from current _parts
809             sub _content {
810 10     10   15 my $self = shift;
811 10   50     28 my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed";
812 10 100       47 if ($ct =~ m,^\s*message/,i) {
813 2         20 _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
814 2         4 return;
815             }
816              
817 8         755 require HTTP::Headers::Util;
818 8         24 my @v = HTTP::Headers::Util::split_header_words($ct);
819 8 50       18 Carp::carp("Multiple Content-Type headers") if @v > 1;
820 8         12 @v = @{$v[0]};
  8         18  
821              
822 8         17 my $boundary;
823             my $boundary_index;
824 8         44 for (my @tmp = @v; @tmp;) {
825 10         26 my($k, $v) = splice(@tmp, 0, 2);
826 10 100       30 if ($k eq "boundary") {
827 2         3 $boundary = $v;
828 2         3 $boundary_index = @v - @tmp - 1;
829 2         4 last;
830             }
831             }
832              
833 8         12 my @parts = map $_->as_string($CRLF), @{$self->{_parts}};
  8         31  
834              
835 8         15 my $bno = 0;
836 8 100       30 $boundary = _boundary() unless defined $boundary;
837             CHECK_BOUNDARY:
838             {
839 8         12 for (@parts) {
  9         16  
840 15 100       42 if (index($_, $boundary) >= 0) {
841             # must have a better boundary
842 1         4 $boundary = _boundary(++$bno);
843 1         3 redo CHECK_BOUNDARY;
844             }
845             }
846             }
847              
848 8 100       28 if ($boundary_index) {
849 2         4 $v[$boundary_index] = $boundary;
850             }
851             else {
852 6         15 push(@v, boundary => $boundary);
853             }
854              
855 8         24 $ct = HTTP::Headers::Util::join_header_words(@v);
856 8         31 $self->{_headers}->header("Content-Type", $ct);
857              
858 8         48 _set_content($self, "--$boundary$CRLF" .
859             join("$CRLF--$boundary$CRLF", @parts) .
860             "$CRLF--$boundary--$CRLF",
861             1);
862             }
863              
864              
865             sub _boundary
866             {
867 7   100 7   18 my $size = shift || return "xYzZY";
868 1         476 require MIME::Base64;
869 1         709 my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
870 1         6 $b =~ s/[\W]/X/g; # ensure alnum only
871 1         3 $b;
872             }
873              
874              
875             1;
876              
877             =pod
878              
879             =encoding UTF-8
880              
881             =head1 NAME
882              
883             HTTP::Message - HTTP style message (base class)
884              
885             =head1 VERSION
886              
887             version 6.45
888              
889             =head1 SYNOPSIS
890              
891             use parent 'HTTP::Message';
892              
893             =head1 DESCRIPTION
894              
895             An C object contains some headers and a content body.
896             The following methods are available:
897              
898             =over 4
899              
900             =item $mess = HTTP::Message->new
901              
902             =item $mess = HTTP::Message->new( $headers )
903              
904             =item $mess = HTTP::Message->new( $headers, $content )
905              
906             This constructs a new message object. Normally you would want
907             construct C or C objects instead.
908              
909             The optional $header argument should be a reference to an
910             C object or a plain array reference of key/value pairs.
911             If an C object is provided then a copy of it will be
912             embedded into the constructed message, i.e. it will not be owned and
913             can be modified afterwards without affecting the message.
914              
915             The optional $content argument should be a string of bytes.
916              
917             =item $mess = HTTP::Message->parse( $str )
918              
919             This constructs a new message object by parsing the given string.
920              
921             =item $mess->headers
922              
923             Returns the embedded C object.
924              
925             =item $mess->headers_as_string
926              
927             =item $mess->headers_as_string( $eol )
928              
929             Call the as_string() method for the headers in the
930             message. This will be the same as
931              
932             $mess->headers->as_string
933              
934             but it will make your program a whole character shorter :-)
935              
936             =item $mess->content
937              
938             =item $mess->content( $bytes )
939              
940             The content() method sets the raw content if an argument is given. If no
941             argument is given the content is not touched. In either case the
942             original raw content is returned.
943              
944             If the C argument is given, the content is reset to its default value,
945             which is an empty string.
946              
947             Note that the content should be a string of bytes. Strings in perl
948             can contain characters outside the range of a byte. The C
949             module can be used to turn such strings into a string of bytes.
950              
951             =item $mess->add_content( $bytes )
952              
953             The add_content() methods appends more data bytes to the end of the
954             current content buffer.
955              
956             =item $mess->add_content_utf8( $string )
957              
958             The add_content_utf8() method appends the UTF-8 bytes representing the
959             string to the end of the current content buffer.
960              
961             =item $mess->content_ref
962              
963             =item $mess->content_ref( \$bytes )
964              
965             The content_ref() method will return a reference to content buffer string.
966             It can be more efficient to access the content this way if the content
967             is huge, and it can even be used for direct manipulation of the content,
968             for instance:
969              
970             ${$res->content_ref} =~ s/\bfoo\b/bar/g;
971              
972             This example would modify the content buffer in-place.
973              
974             If an argument is passed it will setup the content to reference some
975             external source. The content() and add_content() methods
976             will automatically dereference scalar references passed this way. For
977             other references content() will return the reference itself and
978             add_content() will refuse to do anything.
979              
980             =item $mess->content_charset
981              
982             This returns the charset used by the content in the message. The
983             charset is either found as the charset attribute of the
984             C header or by guessing.
985              
986             See L
987             for details about how charset is determined.
988              
989             =item $mess->decoded_content( %options )
990              
991             Returns the content with any C undone and, for textual content
992             (C values starting with C, exactly matching
993             C, or ending with C<+xml>), the raw content's character set
994             decoded into Perl's Unicode string format. Note that this
995             L
996             attempt to decode declared character sets for any other content types like
997             C or C. If the C
998             or C of the message is unknown, this method will fail by returning
999             C.
1000              
1001             The following options can be specified.
1002              
1003             =over
1004              
1005             =item C
1006              
1007             This overrides the charset parameter for text content. The value
1008             C can used to suppress decoding of the charset.
1009              
1010             =item C
1011              
1012             This overrides the default charset guessed by content_charset() or
1013             if that fails "ISO-8859-1".
1014              
1015             =item C
1016              
1017             If decoding fails because the charset specified in the Content-Type header
1018             isn't recognized by Perl's Encode module, then try decoding using this charset
1019             instead of failing. The C might be specified as C to simply
1020             return the string without any decoding of charset as alternative.
1021              
1022             =item C
1023              
1024             Abort decoding if malformed characters is found in the content. By
1025             default you get the substitution character ("\x{FFFD}") in place of
1026             malformed characters.
1027              
1028             =item C
1029              
1030             If TRUE then raise an exception if not able to decode content. Reason
1031             might be that the specified C or C is not
1032             supported. If this option is FALSE, then decoded_content() will return
1033             C on errors, but will still set $@.
1034              
1035             =item C
1036              
1037             If TRUE then a reference to decoded content is returned. This might
1038             be more efficient in cases where the decoded content is identical to
1039             the raw content as no data copying is required in this case.
1040              
1041             =back
1042              
1043             =item $mess->decodable
1044              
1045             =item HTTP::Message::decodable()
1046              
1047             This returns the encoding identifiers that decoded_content() can
1048             process. In scalar context returns a comma separated string of
1049             identifiers.
1050              
1051             This value is suitable for initializing the C request
1052             header field.
1053              
1054             =item $mess->decode
1055              
1056             This method tries to replace the content of the message with the
1057             decoded version and removes the C header. Returns
1058             TRUE if successful and FALSE if not.
1059              
1060             If the message does not have a C header this method
1061             does nothing and returns TRUE.
1062              
1063             Note that the content of the message is still bytes after this method
1064             has been called and you still need to call decoded_content() if you
1065             want to process its content as a string.
1066              
1067             =item $mess->encode( $encoding, ... )
1068              
1069             Apply the given encodings to the content of the message. Returns TRUE
1070             if successful. The "identity" (non-)encoding is always supported; other
1071             currently supported encodings, subject to availability of required
1072             additional modules, are "gzip", "deflate", "x-bzip2", "base64" and "br".
1073              
1074             A successful call to this function will set the C
1075             header.
1076              
1077             Note that C or C messages can't be encoded and
1078             this method will croak if you try.
1079              
1080             =item $mess->parts
1081              
1082             =item $mess->parts( @parts )
1083              
1084             =item $mess->parts( \@parts )
1085              
1086             Messages can be composite, i.e. contain other messages. The composite
1087             messages have a content type of C or C. This
1088             method give access to the contained messages.
1089              
1090             The argumentless form will return a list of C objects.
1091             If the content type of $msg is not C or C then
1092             this will return the empty list. In scalar context only the first
1093             object is returned. The returned message parts should be regarded as
1094             read-only (future versions of this library might make it possible
1095             to modify the parent by modifying the parts).
1096              
1097             If the content type of $msg is C then there will only be
1098             one part returned.
1099              
1100             If the content type is C, then the return value will be
1101             either an C or an C object.
1102              
1103             If a @parts argument is given, then the content of the message will be
1104             modified. The array reference form is provided so that an empty list
1105             can be provided. The @parts array should contain C
1106             objects. The @parts objects are owned by $mess after this call and
1107             should not be modified or made part of other messages.
1108              
1109             When updating the message with this method and the old content type of
1110             $mess is not C or C, then the content type is
1111             set to C and all other content headers are cleared.
1112              
1113             This method will croak if the content type is C and more
1114             than one part is provided.
1115              
1116             =item $mess->add_part( $part )
1117              
1118             This will add a part to a message. The $part argument should be
1119             another C object. If the previous content type of
1120             $mess is not C then the old content (together with all
1121             content headers) will be made part #1 and the content type made
1122             C before the new part is added. The $part object is
1123             owned by $mess after this call and should not be modified or made part
1124             of other messages.
1125              
1126             There is no return value.
1127              
1128             =item $mess->clear
1129              
1130             Will clear the headers and set the content to the empty string. There
1131             is no return value
1132              
1133             =item $mess->protocol
1134              
1135             =item $mess->protocol( $proto )
1136              
1137             Sets the HTTP protocol used for the message. The protocol() is a string
1138             like C or C.
1139              
1140             =item $mess->clone
1141              
1142             Returns a copy of the message object.
1143              
1144             =item $mess->as_string
1145              
1146             =item $mess->as_string( $eol )
1147              
1148             Returns the message formatted as a single string.
1149              
1150             The optional $eol parameter specifies the line ending sequence to use.
1151             The default is "\n". If no $eol is given then as_string will ensure
1152             that the returned string is newline terminated (even when the message
1153             content is not). No extra newline is appended if an explicit $eol is
1154             passed.
1155              
1156             =item $mess->dump( %opt )
1157              
1158             Returns the message formatted as a string. In void context print the string.
1159              
1160             This differs from C<< $mess->as_string >> in that it escapes the bytes
1161             of the content so that it's safe to print them and it limits how much
1162             content to print. The escapes syntax used is the same as for Perl's
1163             double quoted strings. If there is no content the string "(no
1164             content)" is shown in its place.
1165              
1166             Options to influence the output can be passed as key/value pairs. The
1167             following options are recognized:
1168              
1169             =over
1170              
1171             =item maxlength => $num
1172              
1173             How much of the content to show. The default is 512. Set this to 0
1174             for unlimited.
1175              
1176             If the content is longer then the string is chopped at the limit and
1177             the string "...\n(### more bytes not shown)" appended.
1178              
1179             =item no_content => $str
1180              
1181             Replaces the "(no content)" marker.
1182              
1183             =item prefix => $str
1184              
1185             A string that will be prefixed to each line of the dump.
1186              
1187             =back
1188              
1189             =back
1190              
1191             All methods unknown to C itself are delegated to the
1192             C object that is part of every message. This allows
1193             convenient access to these methods. Refer to L for
1194             details of these methods:
1195              
1196             $mess->header( $field => $val )
1197             $mess->push_header( $field => $val )
1198             $mess->init_header( $field => $val )
1199             $mess->remove_header( $field )
1200             $mess->remove_content_headers
1201             $mess->header_field_names
1202             $mess->scan( \&doit )
1203              
1204             $mess->date
1205             $mess->expires
1206             $mess->if_modified_since
1207             $mess->if_unmodified_since
1208             $mess->last_modified
1209             $mess->content_type
1210             $mess->content_encoding
1211             $mess->content_length
1212             $mess->content_language
1213             $mess->title
1214             $mess->user_agent
1215             $mess->server
1216             $mess->from
1217             $mess->referer
1218             $mess->www_authenticate
1219             $mess->authorization
1220             $mess->proxy_authorization
1221             $mess->authorization_basic
1222             $mess->proxy_authorization_basic
1223              
1224             =head1 AUTHOR
1225              
1226             Gisle Aas
1227              
1228             =head1 COPYRIGHT AND LICENSE
1229              
1230             This software is copyright (c) 1994 by Gisle Aas.
1231              
1232             This is free software; you can redistribute it and/or modify it under
1233             the same terms as the Perl 5 programming language system itself.
1234              
1235             =cut
1236              
1237             __END__