line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package LWP::UserAgent::Cached; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
140606
|
use strict; |
|
2
|
|
|
|
|
14
|
|
|
2
|
|
|
|
|
49
|
|
4
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
90
|
|
5
|
2
|
|
|
2
|
|
9
|
use Digest::MD5; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
46
|
|
6
|
2
|
|
|
2
|
|
421
|
use HTTP::Response; |
|
2
|
|
|
|
|
22465
|
|
|
2
|
|
|
|
|
74
|
|
7
|
2
|
|
|
2
|
|
12
|
use base 'LWP::UserAgent'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1257
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
12
|
3
|
|
|
3
|
1
|
13501
|
my ($class, %opts) = @_; |
13
|
|
|
|
|
|
|
|
14
|
3
|
|
|
|
|
10
|
my $cache_dir = delete $opts{cache_dir}; |
15
|
3
|
|
|
|
|
7
|
my $nocache_if = delete $opts{nocache_if}; |
16
|
3
|
|
|
|
|
8
|
my $recache_if = delete $opts{recache_if}; |
17
|
3
|
|
|
|
|
6
|
my $on_uncached = delete $opts{on_uncached}; |
18
|
3
|
|
|
|
|
8
|
my $cachename_spec = delete $opts{cachename_spec}; |
19
|
3
|
|
|
|
|
26
|
my $self = $class->SUPER::new(%opts, parse_head => 0); |
20
|
|
|
|
|
|
|
|
21
|
3
|
|
|
|
|
149
|
$self->{cache_dir} = $cache_dir; |
22
|
3
|
|
|
|
|
7
|
$self->{nocache_if} = $nocache_if; |
23
|
3
|
|
|
|
|
6
|
$self->{recache_if} = $recache_if; |
24
|
3
|
|
|
|
|
17
|
$self->{on_uncached} = $on_uncached; |
25
|
3
|
|
|
|
|
6
|
$self->{cachename_spec} = $cachename_spec; |
26
|
|
|
|
|
|
|
|
27
|
3
|
|
|
|
|
32
|
return $self; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# generate getters and setters |
31
|
|
|
|
|
|
|
foreach my $opt_name (qw(cache_dir nocache_if recache_if on_uncached cachename_spec)) { |
32
|
2
|
|
|
2
|
|
32572
|
no strict 'refs'; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
1986
|
|
33
|
|
|
|
|
|
|
*$opt_name = sub { |
34
|
21
|
|
|
21
|
|
6967
|
my $self = shift; |
35
|
21
|
100
|
|
|
|
59
|
if (@_) { |
36
|
13
|
|
|
|
|
40
|
my $opt_val = $self->{$opt_name}; |
37
|
13
|
|
|
|
|
22
|
$self->{$opt_name} = shift; |
38
|
13
|
|
|
|
|
205
|
return $opt_val; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
8
|
|
|
|
|
29
|
return $self->{$opt_name}; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub parse_head { |
46
|
3
|
|
|
3
|
1
|
10042
|
my ($self, $bool) = @_; |
47
|
|
|
|
|
|
|
|
48
|
3
|
50
|
|
|
|
21
|
if ($bool) { |
49
|
0
|
|
|
|
|
0
|
die "parse_head() is disabled, because it may cause encoding troubles while saving cache"; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
3
|
|
|
|
|
24
|
$self->SUPER::parse_head($bool); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub simple_request { |
56
|
34
|
|
|
34
|
1
|
36965
|
my $self = shift; |
57
|
34
|
50
|
|
|
|
101
|
unless (defined $self->{cache_dir}) { |
58
|
0
|
|
|
|
|
0
|
return $self->SUPER::simple_request(@_); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
34
|
|
|
|
|
45
|
my $request = $_[0]; |
62
|
34
|
|
|
|
|
222
|
$request = $self->prepare_request($request); |
63
|
34
|
|
|
|
|
21165
|
my $fpath = $self->_get_cache_name($request); |
64
|
34
|
|
|
|
|
4878
|
my $response; |
65
|
|
|
|
|
|
|
my $no_collision_suffix; |
66
|
|
|
|
|
|
|
|
67
|
34
|
100
|
|
|
|
91
|
unless ($self->{was_redirect}) { |
68
|
28
|
|
|
|
|
47
|
@{$self->{last_cached}} = (); |
|
28
|
|
|
|
|
58
|
|
69
|
28
|
|
|
|
|
36
|
@{$self->{last_used_cache}} = (); |
|
28
|
|
|
|
|
58
|
|
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
34
|
100
|
|
|
|
1122
|
if (-e $fpath) { |
73
|
19
|
100
|
|
|
|
81
|
unless ($response = $self->_parse_cached_response($fpath, $request)) { |
74
|
|
|
|
|
|
|
# collision |
75
|
3
|
100
|
|
|
|
233
|
if (my @cache_list = <$fpath-*>) { |
76
|
2
|
|
|
|
|
8
|
foreach my $cache_file (@cache_list) { |
77
|
3
|
100
|
|
|
|
9
|
if ($response = $self->_parse_cached_response($cache_file, $request)) { |
78
|
1
|
|
|
|
|
3
|
$fpath = $cache_file; |
79
|
1
|
|
|
|
|
2
|
last; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
2
|
100
|
|
|
|
6
|
unless ($response) { |
84
|
1
|
|
|
|
|
8
|
$no_collision_suffix = sprintf('-%03d', substr($cache_list[-1], -3) + 1); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
else { |
88
|
1
|
|
|
|
|
5
|
$no_collision_suffix = '-001'; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
19
|
100
|
100
|
|
|
105
|
if ($response && defined($self->{recache_if}) && $self->{recache_if}->($response, $fpath, $request)) { |
|
|
|
66
|
|
|
|
|
93
|
1
|
|
|
|
|
1733
|
$response = undef; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
34
|
100
|
|
|
|
138
|
unless ($response) { |
98
|
18
|
100
|
|
|
|
57
|
if (defined $self->{on_uncached}) { |
99
|
1
|
|
|
|
|
5
|
$self->{on_uncached}->($request); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
18
|
|
|
|
|
310
|
$response = $self->send_request(@_); |
103
|
|
|
|
|
|
|
|
104
|
18
|
100
|
100
|
|
|
764
|
if (!defined($self->{nocache_if}) || !$self->{nocache_if}->($response)) { |
105
|
17
|
100
|
|
|
|
71
|
if (defined $no_collision_suffix) { |
106
|
2
|
|
|
|
|
161
|
$fpath .= $no_collision_suffix; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
17
|
50
|
|
|
|
1893
|
if (open my $fh, '>:raw', $fpath) { |
110
|
17
|
|
|
|
|
102
|
print $fh $request->url, "\n"; |
111
|
17
|
|
|
|
|
793
|
print $fh $response->as_string("\n"); |
112
|
17
|
|
|
|
|
2521
|
close $fh; |
113
|
|
|
|
|
|
|
|
114
|
17
|
|
|
|
|
333
|
push @{$self->{last_cached}}, $fpath; |
|
17
|
|
|
|
|
70
|
|
115
|
17
|
|
|
|
|
32
|
push @{$self->{last_used_cache}}, $fpath; |
|
17
|
|
|
|
|
81
|
|
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
else { |
118
|
0
|
|
|
|
|
0
|
carp "open('$fpath', 'w'): $!"; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
else { |
123
|
16
|
|
|
|
|
22
|
push @{$self->{last_used_cache}}, $fpath; |
|
16
|
|
|
|
|
38
|
|
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
34
|
|
66
|
|
|
131
|
$self->{was_redirect} = $response->is_redirect && _in($request->method, $self->requests_redirectable); |
127
|
34
|
|
|
|
|
368
|
return $response; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub last_cached { |
131
|
3
|
|
|
3
|
1
|
6445
|
my $self = shift; |
132
|
|
|
|
|
|
|
return exists $self->{last_cached} ? |
133
|
3
|
50
|
|
|
|
55
|
@{$self->{last_cached}} : (); |
|
3
|
|
|
|
|
75
|
|
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub last_used_cache { |
137
|
2
|
|
|
2
|
1
|
7
|
my $self = shift; |
138
|
|
|
|
|
|
|
return exists $self->{last_used_cache} ? |
139
|
2
|
50
|
|
|
|
7
|
@{$self->{last_used_cache}} : (); |
|
2
|
|
|
|
|
11
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub uncache { |
143
|
1
|
|
|
1
|
1
|
48
|
my $self = shift; |
144
|
1
|
|
|
|
|
4
|
unlink $_ for $self->last_cached; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub _get_cache_name { |
148
|
35
|
|
|
35
|
|
319
|
my ($self, $request) = @_; |
149
|
|
|
|
|
|
|
|
150
|
35
|
100
|
66
|
|
|
123
|
if (defined($self->{cachename_spec}) && %{$self->{cachename_spec}}) { |
|
5
|
|
|
|
|
19
|
|
151
|
5
|
|
|
|
|
14
|
my $tmp_request = $request->clone(); |
152
|
5
|
|
|
|
|
1230
|
my $leave_only_specified; |
153
|
5
|
100
|
|
|
|
19
|
if (exists $self->{cachename_spec}{_headers}) { |
154
|
3
|
50
|
|
|
|
10
|
ref $self->{cachename_spec}{_headers} eq 'ARRAY' |
155
|
|
|
|
|
|
|
or croak 'cachename_spec->{_headers} should be array ref'; |
156
|
3
|
|
|
|
|
5
|
$leave_only_specified = 1; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
5
|
|
|
|
|
12
|
foreach my $hname ($tmp_request->headers->header_field_names) { |
160
|
15
|
100
|
66
|
|
|
352
|
if (exists $self->{cachename_spec}{$hname}) { |
|
|
100
|
|
|
|
|
|
161
|
5
|
100
|
|
|
|
11
|
if (defined $self->{cachename_spec}{$hname}) { |
162
|
4
|
|
|
|
|
11
|
$tmp_request->headers->header($hname, $self->{cachename_spec}{$hname}); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
else { |
165
|
1
|
|
|
|
|
3
|
$tmp_request->headers->remove_header($hname); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
elsif ($leave_only_specified && !_in($hname, $self->{cachename_spec}{_headers})) { |
169
|
9
|
|
|
|
|
18
|
$tmp_request->headers->remove_header($hname); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
5
|
100
|
|
|
|
164
|
if (exists $self->{cachename_spec}{_body}) { |
174
|
3
|
|
|
|
|
8
|
$tmp_request->content($self->{cachename_spec}{_body}); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
5
|
|
|
|
|
58
|
return $self->{cache_dir} . '/' . Digest::MD5::md5_hex($tmp_request->as_string("\n")); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
30
|
|
|
|
|
162
|
return $self->{cache_dir} . '/' . Digest::MD5::md5_hex($request->as_string("\n")); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub _parse_cached_response { |
184
|
22
|
|
|
22
|
|
56
|
my ($self, $cache_file, $request) = @_; |
185
|
|
|
|
|
|
|
|
186
|
22
|
|
|
|
|
107
|
my $fh; |
187
|
22
|
50
|
|
|
|
810
|
unless (open $fh, '<:raw', $cache_file) { |
188
|
0
|
|
|
|
|
0
|
carp "open('$cache_file', 'r'): $!"; |
189
|
0
|
|
|
|
|
0
|
return; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
22
|
|
|
|
|
288
|
my $url = <$fh>; |
193
|
22
|
|
|
|
|
168
|
$url =~ s/\s+$//; |
194
|
22
|
100
|
|
|
|
88
|
if ($url ne $request->url) { |
195
|
5
|
|
|
|
|
116
|
close $fh; |
196
|
5
|
|
|
|
|
32
|
return; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
17
|
|
|
|
|
577
|
local $/ = undef; |
200
|
17
|
|
|
|
|
218
|
my $response_str = <$fh>; |
201
|
17
|
|
|
|
|
181
|
close $fh; |
202
|
|
|
|
|
|
|
|
203
|
17
|
|
|
|
|
283
|
my $response = HTTP::Response->parse($response_str); |
204
|
17
|
|
|
|
|
3086
|
$response->request($request); |
205
|
|
|
|
|
|
|
|
206
|
17
|
50
|
|
|
|
167
|
if ($self->cookie_jar) { |
207
|
17
|
|
|
|
|
182
|
$self->cookie_jar->extract_cookies($response); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
17
|
|
|
|
|
3427
|
return $response; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _in($$) { |
214
|
15
|
|
|
15
|
|
202
|
my ($what, $where) = @_; |
215
|
|
|
|
|
|
|
|
216
|
15
|
|
|
|
|
24
|
foreach my $item (@$where) { |
217
|
7
|
100
|
|
|
|
161
|
return 1 if ($what eq $item); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
9
|
|
|
|
|
22
|
return 0; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
1; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=pod |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head1 NAME |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
LWP::UserAgent::Cached - LWP::UserAgent with simple caching mechanism |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head1 SYNOPSIS |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
use LWP::UserAgent::Cached; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my $ua = LWP::UserAgent::Cached->new(cache_dir => '/tmp/lwp-cache'); |
236
|
|
|
|
|
|
|
my $resp = $ua->get('http://google.com/'); # makes http request |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
... |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
$resp = $ua->get('http://google.com/'); # no http request - will get it from the cache |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 DESCRIPTION |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
When you process content from some website, you will get page one by one and extract some data from this |
245
|
|
|
|
|
|
|
page with regexp, DOM parser or smth else. Sometimes we makes errors in our data extractors and realize this |
246
|
|
|
|
|
|
|
only when all 1_000_000 pages were processed. We should fix our extraction logic and start all process from the |
247
|
|
|
|
|
|
|
beginning. Please STOP! How about cache? Yes, you can cache all responses and second, third and other attempts will |
248
|
|
|
|
|
|
|
be very fast. |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
LWP::UserAgent::Cached is yet another LWP::UserAgent subclass with cache support. It stores |
251
|
|
|
|
|
|
|
cache in the files on local filesystem and if response already available in the cache returns it instead of making HTTP request. |
252
|
|
|
|
|
|
|
This module was writed because other available alternatives didn't meet my needs: |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=over |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=item L |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
caches responses on local filesystem and gets it from the cache only if online document was not modified |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item L |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
same as above but stores cache in memory |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item L |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
can record responses in the cache or get responses from the cache, but not both for one useragent |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=item L |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
seems it may cache responses and get responses from the cache, but has too much dependencies and unclear |
271
|
|
|
|
|
|
|
`delay' parameter |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=back |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 METHODS |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
All LWP::UserAgent methods and several new. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 new(...) |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
Creates new LWP::UserAgent::Cached object. Since LWP::UserAgent::Cached is LWP::UserAgent subclass it has all same |
282
|
|
|
|
|
|
|
parameters, but in additional it has some new optional pararmeters: |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
L |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
L |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
L |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
L |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
L |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
LWP::UserAgent::Cached creation example: |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
my $ua = LWP::UserAgent::Cached->new(cache_dir => 'cache/lwp', nocache_if => sub { |
297
|
|
|
|
|
|
|
my $response = shift; |
298
|
|
|
|
|
|
|
return $response->code >= 500; # do not cache any bad response |
299
|
|
|
|
|
|
|
}, recache_if => sub { |
300
|
|
|
|
|
|
|
my ($response, $path, $request) = @_; |
301
|
|
|
|
|
|
|
return $response->code == 404 && -M $path > 1; # recache any 404 response older than 1 day |
302
|
|
|
|
|
|
|
}, on_uncached => sub { |
303
|
|
|
|
|
|
|
my $request = shift; |
304
|
|
|
|
|
|
|
sleep 5 if $request->uri =~ '/category/\d+'; # delay before http requests inside "/category" |
305
|
|
|
|
|
|
|
}, cachename_spec => { |
306
|
|
|
|
|
|
|
'User-Agent' => undef, # omit agent while calculating cache name |
307
|
|
|
|
|
|
|
}); |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head2 cache_dir() or cache_dir($dir) |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Gets or sets path to the directory where cache will be stored. |
312
|
|
|
|
|
|
|
If not set useragent will behaves as LWP::UserAgent without cache support. |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head2 nocache_if() or nocache_if($sub) |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called after receiving each non-cached response. First parameter |
317
|
|
|
|
|
|
|
of this subroutine will be HTTP::Response object. This subroutine should return true if this response should |
318
|
|
|
|
|
|
|
not be cached and false otherwise. If not set all responses will be cached. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 recache_if() or recache_if($sub) |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called for each response available in the cache. First parameter |
323
|
|
|
|
|
|
|
of this subroutine will be HTTP::Response object, second - path to file with cache, third - HTTP::Request object. |
324
|
|
|
|
|
|
|
This subroutine should return true if response needs to be recached (new HTTP request will be made) and false otherwise. |
325
|
|
|
|
|
|
|
This $sub will be called only if response already available in the cache. Here you can also modify request for your needs. |
326
|
|
|
|
|
|
|
This will not change name of the file with cache. |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 on_uncached() or on_uncached($sub) |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Gets or sets reference to subroutine which will be called for each non-cached http request, before actually request. |
331
|
|
|
|
|
|
|
First parameter of this subroutine will be HTTP::Request object. Here you can also modify request for your needs. |
332
|
|
|
|
|
|
|
This will not change name of the file with cache. |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 cachename_spec() or cachename_spec($spec) |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Gets or sets hash reference to cache naming specification. In fact cache naming for each request based on request content. |
337
|
|
|
|
|
|
|
Internally it is md5_hex($request->as_string("\n")). But what if some of request headers in your program changed dinamically, e.g. |
338
|
|
|
|
|
|
|
User-Agent or Cookie? In such case caching will not work properly for you. We need some way to omit this headers when calculating |
339
|
|
|
|
|
|
|
cache name. This option is what you need. Specification hash should contain header name and header value which will be used |
340
|
|
|
|
|
|
|
(instead of values in request) while calculating cache name. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
For example we already have cache where 'User-Agent' value in the headers was 'Mozilla/5.0', but in the current version of the program |
343
|
|
|
|
|
|
|
it will be changed for each request. So we force specified that for cache name calculation 'User-Agent' should be 'Mozilla/5.0'. Cached |
344
|
|
|
|
|
|
|
request had not 'Accept' header, but in the current version it has. So we force specified do not include this header for cache name |
345
|
|
|
|
|
|
|
calculation. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
cachename_spec => { |
348
|
|
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0', |
349
|
|
|
|
|
|
|
'Accept' => undef |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Specification hash may contain two special keys: '_body' and '_headers'. With '_body' key you can specify body content in the request |
353
|
|
|
|
|
|
|
for cache name calculation. For example to not include body content in cache name calculation set '_body' to undef or empty string. |
354
|
|
|
|
|
|
|
With '_headers' key you can specify which headers should be included in $request for cache name calculation. For example you can say to |
355
|
|
|
|
|
|
|
include only 'Host' and 'Referer'. '_headers' value should be array reference: |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
cachename_spec => { |
358
|
|
|
|
|
|
|
_body => undef, # omit body |
359
|
|
|
|
|
|
|
_headers => ['Host'], # include only host with value from request |
360
|
|
|
|
|
|
|
# It will be smth like: |
361
|
|
|
|
|
|
|
# md5_hex("METHOD url\r\nHost: host\r\n\r\n") |
362
|
|
|
|
|
|
|
# method and url will be included in any case |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Another example. Omit body, include only 'Host' and 'User-Agent' headers, use 'Host' value from request and specified 'User-Agent' value, |
366
|
|
|
|
|
|
|
in addition include referrer with specified value ('Referer' not specified in '_headers', but values from main specification hash has |
367
|
|
|
|
|
|
|
higher priority): |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
cachename_spec => { |
370
|
|
|
|
|
|
|
_body => '', |
371
|
|
|
|
|
|
|
_headers => ['Host', 'User-Agent'], |
372
|
|
|
|
|
|
|
'User-Agent' => 'Mozilla/5.0', |
373
|
|
|
|
|
|
|
'Referer' => 'http://www.com' |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
One more example. Calculate cache name based only on method and url: |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
cachename_spec => { |
379
|
|
|
|
|
|
|
_body =>'', |
380
|
|
|
|
|
|
|
_headers => [] |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 last_cached() |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Returns list with pathes to files with cache stored by last noncached response. List may contain more than one |
386
|
|
|
|
|
|
|
element if there was redirect. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 last_used_cache() |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Returns list with pathes to files with cache used in last response. This includes files just stored (last_cached) |
391
|
|
|
|
|
|
|
and files that may be already exists (cached earlier). List may contain more than one element if there was redirect. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 uncache() |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Removes last response from the cache. Use case example: |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
my $page = $ua->get($url)->decoded_content; |
398
|
|
|
|
|
|
|
if ($page =~ /Access for this ip was blocked/) { |
399
|
|
|
|
|
|
|
$ua->uncache(); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=head1 Proxy and cache name |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
Here you can see how changing of proxy for useragent will affect cache name |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=head2 HTTP proxy |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
HTTP proxy support works out of the box and causes no problems. Changing of proxy server will not affect cache name |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head2 HTTPS proxy |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Proper HTTPS proxy support added in LWP since 6.06 and causes no problems. Changing of proxy server will not affect cache name |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 CONNECT proxy |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
CONNECT proxy support may be added using L. The problem is that this module uses |
417
|
|
|
|
|
|
|
LWP's request() for creation of CONNECT tunnel, so this response will be cached. But in fact it shouldn't. To workaround this |
418
|
|
|
|
|
|
|
you need to install C hook |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
$ua->nocache_if(sub { |
421
|
|
|
|
|
|
|
my $resp = shift; |
422
|
|
|
|
|
|
|
# do not cache creation of tunnel |
423
|
|
|
|
|
|
|
$resp->request->method eq 'CONNECT'; |
424
|
|
|
|
|
|
|
}); |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
After that it works without problems. Changing of proxy server will not affect cache name |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head2 SOCKS proxy |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
SOCKS proxy support may be added using L and causes no problems. |
431
|
|
|
|
|
|
|
Changing of proxy server will not affect cache name |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 SEE ALSO |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
L |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 COPYRIGHT |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Copyright Oleg G . |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
442
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |