File Coverage

blib/lib/WARC/Record/Replay/HTTP/Response.pm
Criterion Covered Total %
statement 55 56 98.2
branch 13 14 92.8
condition 3 3 100.0
subroutine 8 8 100.0
pod 2 2 100.0
total 81 83 97.5


line stmt bran cond sub pod time code
1             package WARC::Record::Replay::HTTP::Response; # -*- CPerl -*-
2              
3 4     4   25 use strict;
  4         8  
  4         105  
4 4     4   18 use warnings;
  4         8  
  4         186  
5              
6             require HTTP::Response;
7             require WARC::Record::Replay::HTTP::Message;
8             our @ISA = qw(WARC::Record::Replay::HTTP::Message HTTP::Response);
9              
10 4     4   20 use WARC; *WARC::Record::Replay::HTTP::Response::VERSION = \$WARC::VERSION;
  4         7  
  4         584  
11              
12             require WARC::Record::Replay;
13              
14             WARC::Record::Replay::register
15             { $_->field('Content-Type') =~ m|^application/http; msgtype=response| }
16             \&_load_record;
17             WARC::Record::Replay::register
18             { $_->field('Content-Type') =~ m|^application/http| && $_->type eq 'response' }
19             \&_load_record;
20              
21             BEGIN {
22 4     4   34 use WARC::Record::Replay::HTTP;
  4         10  
  4         185  
23             $WARC::Record::Replay::HTTP::Response::{$_} =
24             $WARC::Record::Replay::HTTP::{$_}
25 4     4   1910 for WARC::Record::Replay::HTTP::HTTP_PARSE_REs;
26             }
27              
28             sub _load_record {
29 68     68   116 my $record = shift;
30              
31 68         170 my $handle = $record->open_continued;
32 68         128 my $ob;
33              
34 68 100       202 if ($record->field('Content-Length')
35             < $WARC::Record::Replay::HTTP::Content_Deferred_Loading_Threshold) {
36             # The entire WARC block is smaller than the deferred loading threshold;
37             # this is an easy special case.
38 53         91 my $block;
39             {
40 53         81 local $/ = undef; # slurp
  53         202  
41 53         238 $block = <$handle>;
42             }
43             # Work around a bug in LWP that can append a trailing CR to the status
44             # message in an HTTP response parsed from a string.
45             #
46             # The response status message can contain spaces and LWP uses the
47             # LIMIT parameter to split to collect all text after the status code.
48             # This causes the parsed message to include the trailing CR.
49             #
50             # This problem does not occur with requests; the protocol version in a
51             # request cannot contain spaces, so the trailing CR is counted as
52             # whitespace and removed as the delimiter for a trailing empty field.
53 53         352 my $response = HTTP::Response->parse($block);
54 53         13138 { my $m;
  53         87  
55 53 50       100 if (($m = $response->message) =~ s/\015\z//) { $response->message($m) } }
  0         0  
56 53 100       609 if ($response->protocol =~ $HTTP__Version)
57 49         673 { $ob = $response; close $handle; $handle = undef }
  49         211  
  49         306  
58             else
59 4         123 { return undef }
60             } else {
61 15         44 my $code; my $reason; my $http_version;
  15         0  
62             {
63 15         24 local $/ = "\012";
  15         61  
64 15         65 my $line = <$handle>;
65 15         113 $line =~ s/[[:space:]]+$//; # trim trailing CR if present
66 15 100       146 return undef unless $line =~ $HTTP__Status_Line;
67             # $1 -- HTTP-Version $2 -- Status-Code $3 -- Reason-Phrase
68 13         41 $http_version = $1; $code = $2; $reason = $3;
  13         22  
  13         43  
69             }
70              
71 13         93 $ob = HTTP::Response->new($code, $reason);
72 13         612 $ob->protocol($http_version);
73             }
74              
75 62         276 $ob->{_warc_defer}{request} = 1;
76              
77 62         204 WARC::Record::Replay::HTTP::Message::_load_record($ob, $record, $handle);
78             }
79              
80             ## overridden methods for deferred loading of requests and redirects
81             sub request {
82 9     9 1 5260 my $self = shift;
83              
84 9 100 100     42 if ($self->{_warc_defer}{request} && $self->{_warc_record}{collection}) {
85             # replay other record
86 7         21 my $timestamp = $self->{_warc_record}->date;
87 9 100       95 my @requests = grep { $_->type eq 'request' and $_->date <= $timestamp }
88             ($self->{_warc_record}{collection}->search
89 7         57 (record_id => $self->{_warc_record}->fields->{'WARC-Concurrent-To'},
90             time => $timestamp)); # sorted by nearest timestamp
91             # filter removes all records after "this" record
92             # Therefore the list is sorted/filtered to "timestamp descending"
93 7         145 my $record = $requests[0]; # use latest record from the list
94              
95 7 100       25 $self->SUPER::request($record->replay) if $record;
96 7         78 $self->{_warc_defer}{request} = 0;
97             }
98              
99 9         36 return $self->SUPER::request(@_)
100             }
101              
102             sub previous {
103 1     1 1 2 my $self = shift;
104              
105             # This is a stub until a good way to find these is found.
106             # Additional metadata and/or index support will probably be needed.
107             # Lack of support is documented in WARC::Record::Replay::HTTP.
108              
109 1         10 return $self->SUPER::previous(@_)
110             }
111              
112             1;
113             __END__