File Coverage

blib/lib/WARC/Record/Replay/HTTP/Response.pm
Criterion Covered Total %
statement 55 56 98.2
branch 13 14 92.8
condition 3 3 100.0
subroutine 8 8 100.0
pod 2 2 100.0
total 81 83 97.5


line stmt bran cond sub pod time code
1             package WARC::Record::Replay::HTTP::Response; # -*- CPerl -*-
2              
3 2     2   12 use strict;
  2         6  
  2         49  
4 2     2   9 use warnings;
  2         3  
  2         89  
5              
6             require HTTP::Response;
7             require WARC::Record::Replay::HTTP::Message;
8             our @ISA = qw(WARC::Record::Replay::HTTP::Message HTTP::Response);
9              
10 2     2   10 use WARC; *WARC::Record::Replay::HTTP::Response::VERSION = \$WARC::VERSION;
  2         6  
  2         282  
11              
12             require WARC::Record::Replay;
13              
14             WARC::Record::Replay::register
15             { $_->field('Content-Type') =~ m|^application/http; msgtype=response| }
16             \&_load_record;
17             WARC::Record::Replay::register
18             { $_->field('Content-Type') =~ m|^application/http| && $_->type eq 'response' }
19             \&_load_record;
20              
21             BEGIN {
22 2     2   12 use WARC::Record::Replay::HTTP;
  2         4  
  2         87  
23             $WARC::Record::Replay::HTTP::Response::{$_} =
24             $WARC::Record::Replay::HTTP::{$_}
25 2     2   822 for WARC::Record::Replay::HTTP::HTTP_PARSE_REs;
26             }
27              
28             sub _load_record {
29 28     28   44 my $record = shift;
30              
31 28         63 my $handle = $record->open_continued;
32 28         39 my $ob;
33              
34 28 100       74 if ($record->field('Content-Length')
35             < $WARC::Record::Replay::HTTP::Content_Deferred_Loading_Threshold) {
36             # The entire WARC block is smaller than the deferred loading threshold;
37             # this is an easy special case.
38 15         24 my $block;
39             {
40 15         20 local $/ = undef; # slurp
  15         52  
41 15         60 $block = <$handle>;
42             }
43             # Work around a bug in LWP that can append a trailing CR to the status
44             # message in an HTTP response parsed from a string.
45             #
46             # The response status message can contain spaces and LWP uses the
47             # LIMIT parameter to split to collect all text after the status code.
48             # This causes the parsed message to include the trailing CR.
49             #
50             # This problem does not occur with requests; the protocol version in a
51             # request cannot contain spaces, so the trailing CR is counted as
52             # whitespace and removed as the delimiter for a trailing empty field.
53 15         97 my $response = HTTP::Response->parse($block);
54 15         3488 { my $m;
  15         22  
55 15 50       28 if (($m = $response->message) =~ s/\015\z//) { $response->message($m) } }
  0         0  
56 15 100       154 if ($response->protocol =~ $HTTP__Version)
57 14         173 { $ob = $response; close $handle; $handle = undef }
  14         59  
  14         74  
58             else
59 1         28 { return undef }
60             } else {
61 13         47 my $code; my $reason; my $http_version;
  13         0  
62             {
63 13         17 local $/ = "\012";
  13         46  
64 13         61 my $line = <$handle>;
65 13         87 $line =~ s/[[:space:]]+$//; # trim trailing CR if present
66 13 100       118 return undef unless $line =~ $HTTP__Status_Line;
67             # $1 -- HTTP-Version $2 -- Status-Code $3 -- Reason-Phrase
68 11         35 $http_version = $1; $code = $2; $reason = $3;
  11         18  
  11         35  
69             }
70              
71 11         61 $ob = HTTP::Response->new($code, $reason);
72 11         476 $ob->protocol($http_version);
73             }
74              
75 25         144 $ob->{_warc_defer}{request} = 1;
76              
77 25         82 WARC::Record::Replay::HTTP::Message::_load_record($ob, $record, $handle);
78             }
79              
80             ## overridden methods for deferred loading of requests and redirects
81             sub request {
82 9     9 1 5411 my $self = shift;
83              
84 9 100 100     41 if ($self->{_warc_defer}{request} && $self->{_warc_record}{collection}) {
85             # replay other record
86 7         20 my $timestamp = $self->{_warc_record}->date;
87 9 100       72 my @requests = grep { $_->type eq 'request' and $_->date <= $timestamp }
88             ($self->{_warc_record}{collection}->search
89 7         29 (record_id => $self->{_warc_record}->fields->{'WARC-Concurrent-To'},
90             time => $timestamp)); # sorted by nearest timestamp
91             # filter removes all records after "this" record
92             # Therefore the list is sorted/filtered to "timestamp descending"
93 7         209 my $record = $requests[0]; # use latest record from the list
94              
95 7 100       26 $self->SUPER::request($record->replay) if $record;
96 7         79 $self->{_warc_defer}{request} = 0;
97             }
98              
99 9         31 return $self->SUPER::request(@_)
100             }
101              
102             sub previous {
103 1     1 1 2 my $self = shift;
104              
105             # This is a stub until a good way to find these is found.
106             # Additional metadata and/or index support will probably be needed.
107             # Lack of support is documented in WARC::Record::Replay::HTTP.
108              
109 1         1292 return $self->SUPER::previous(@_)
110             }
111              
112             1;
113             __END__