File Coverage

blib/lib/HTTP/Caching.pm
Criterion Covered Total %
statement 287 344 83.4
branch 117 170 68.8
condition 20 28 71.4
subroutine 37 43 86.0
pod 1 2 50.0
total 462 587 78.7


line stmt bran cond sub pod time code
1             package HTTP::Caching;
2              
3             =head1 NAME
4              
5             HTTP::Caching - The RFC 7234 compliant brains to do caching right
6              
7             =head1 VERSION
8              
9             Version 0.05
10              
11             =cut
12              
13             our $VERSION = '0.08';
14              
15 8     8   652246 use strict;
  8         12  
  8         200  
16 8     8   28 use warnings;
  8         11  
  8         157  
17              
18 8     8   27 use Carp;
  8         8  
  8         331  
19 8     8   31 use Digest::MD5;
  8         9  
  8         199  
20 8     8   3335 use HTTP::Method;
  8         5047  
  8         27  
21 8     8   2994 use HTTP::Status ':constants';
  8         21001  
  8         2774  
22 8     8   3978 use List::MoreUtils qw{ any };
  8         58730  
  8         40  
23 8     8   7101 use Monkey::Patch::Action;
  8         19851  
  8         300  
24 8     8   40 use Time::HiRes;
  8         12  
  8         57  
25 8     8   3928 use URI;
  8         22457  
  8         206  
26              
27 8     8   3800 use Moo;
  8         72524  
  8         41  
28 8     8   13561 use MooX::Types::MooseLike::Base ':all';
  8         35608  
  8         2440  
29              
30             our $DEBUG = 0;
31              
32 8     8   3767 use Readonly;
  8         22702  
  8         22578  
33              
34             Readonly my $REUSE_NO_MATCH => 0; # mismatch of headers etc
35             Readonly my $REUSE_IS_OK => 1;
36             Readonly my $REUSE_IS_STALE => 2;
37             Readonly my $REUSE_REVALIDATE => 4;
38             Readonly my $REUSE_IS_STALE_OK => $REUSE_IS_STALE | $REUSE_IS_OK;
39             Readonly my $REUSE_IS_STALE_REVALIDATE => $REUSE_IS_STALE | $REUSE_REVALIDATE;
40              
41             =head1 SYNOPSIS
42              
43             my $chi_cache = CHI->new(
44             driver => 'File',
45             root_dir => '/tmp/HTTP_Caching',
46             file_extension => '.cache',
47             l1_cache => {
48             driver => 'Memory',
49             global => 1,
50             max_size => 1024*1024
51             }
52             );
53            
54             my $ua = LWP::UserAgent->new();
55            
56             my $http_caching = HTTP::Caching->new(
57             cache => $chi_cache,
58             cache_type => 'private',
59             forwarder => sub { return $ua->request(shift) }
60             );
61            
62             my $rqst = HTTP::Request->new( GET => 'http://example.com' );
63            
64             my $resp = $http_caching->make_request( $rqst );
65            
66             =cut
67              
68             has cache => (
69             is => 'ro',
70             required => 0,
71             isa => Maybe[ HasMethods['set', 'get'] ],
72             builder => sub {
73 0     0   0 warn __PACKAGE__ . " without cache, forwards requests and responses\n";
74             return undef
75 0         0 },
76             );
77              
78             has cache_meta => (
79             is => 'ro',
80             required => 0,
81             isa => Maybe[ HasMethods['set', 'get'] ],
82             lazy => 1,
83             builder => sub {
84             return shift->cache
85 5     5   1542 },
86             );
87              
88             has cache_type => (
89             is => 'ro',
90             required => 1,
91             isa => Maybe[ Enum['private', 'public'] ],
92             );
93              
94             has cache_control_request => (
95             is => 'ro',
96             required => 0,
97             isa => Str,
98             );
99              
100             has cache_control_response => (
101             is => 'ro',
102             required => 0,
103             isa => Str,
104             );
105              
106             has forwarder => (
107             is => 'ro',
108             required => 1,
109             isa => CodeRef,
110             );
111              
112             sub is_shared {
113 63     63 0 51 my $self = shift;
114            
115 63 100       147 return unless $self->cache_type;
116 32         73 return $self->cache_type eq 'public'
117             }
118             =head1 DESCRIPTION
119              
120             This module tries to provide caching for HTTP responses based on
121             L1.1): Caching|
122             http://tools.ietf.org/html/rfc7234>.
123              
124             Basicly it looks like the following steps below:
125              
126             =over
127              
128             =item
129              
130             For a presented request, it will check with the cache if there is a suitable
131             response available AND if it can be served or that it needs to be revalidated
132             with an upstream server.
133              
134             =item
135              
136             If there was no response available at all, or non were suitable, the (modified)
137             request will simply be forwarded.
138              
139             =item
140              
141             Depending on the response it gets back, it will do one of the following
142             dependingon the response status code:
143              
144             =over
145              
146             =item 200 OK
147              
148             it will update the cache and serve the response as is
149              
150             =item 304 Not Modified
151              
152             the cached version is valid, update the cache with new header info and serve the
153             cached response
154              
155             =item 500 Server Error
156              
157             in general, this is an error, and pass that onto the caller, however, in some
158             cases it could be fine to serve a (stale) cached response
159              
160             =back
161              
162             =back
163              
164             The above is a over-simplified version of the RFC
165              
166             =cut
167              
168             =head1 CONSTRUCTORS
169              
170             =head2 new
171              
172             my $http_caching = HTTP::Caching->new(
173             cache => $chi_cache,
174             cache_type => 'private',
175             cache_request => 'max-age=86400, min-fresh=60',
176             forwarder => sub { return $ua->request(shift) }
177             );
178              
179             Constructs a new C object that knows how to find cached responses
180             and will forward if needed.
181              
182             =head1 ATRRIBUTES
183              
184             =head2 cache
185              
186             Cache must be an object that MUST implement two methods
187              
188             =over
189              
190             =item sub set ($key, $data)
191              
192             to store data in the cache
193              
194             =item sub get ($key)
195              
196             to retrieve the data stored under the key
197              
198             =back
199              
200             This can be as simple as a hash, like we use in the tests:
201              
202             use Test::MockObject;
203            
204             my %cache;
205             my $mocked_cache = Test::MockObject->new;
206             $mocked_cache->mock( set => sub { $cache{$_[1]} = $_[2] } );
207             $mocked_cache->mock( get => sub { return $cache{$_[1]} } );
208              
209             But very convenient is to use L, which implements both required methods and
210             also has the option to use a L1 cache to speed things up even more. See the
211             SYNOPSIS for an example
212              
213             =head2 cache_type
214              
215             This must either be C<'private'> or C<'public'>. For most L, it
216             can be C<'private'> as it will probably not be shared with other processes on
217             the same macine. If this module is being used at the serverside in a
218             L then the cache will be used by all other clients connecting
219             to the server, and thus should be set to C<'public'>.
220              
221             Responses to Authenticated request should not be held in public caches and also
222             those responses that specifacally have their cache-control headerfield set to
223             C<'private'>.
224              
225             =head2 cache_control_request
226              
227             A string that contains the Cache-control header-field settings that will be sent
228             as default with the request. So you do not have to set those each time. See
229             RFC 7234 Section 5.2.1 for the list of available cache-control directives.
230              
231             =head2 cache_control_response
232              
233             Like the above, but those will be set for each response. This is useful for
234             server side caching. See RFC 7234 Section 5.2.2.
235              
236             =head2 forwarder
237              
238             This CodeRef must be a callback function that accepts a L and
239             returns a L. Since this module does not know how to do a request
240             it will use the C. It will be used to sent of validation requests
241             with C and/or C header-fields. Or if it does
242             not have a stored response it will send the original full request (with the
243             extra directives from C).
244              
245             Failing to return a C might cause the module to die or generate
246             a response itself with status code C<502 Bad Gateway>.
247              
248             =head1 METHODS
249              
250             =head2 make_request
251              
252             This is the only public provided method and will take a L. Like
253             described above, it might have to forward the (modified) request throug the
254             CodeRef in the C attribute.
255              
256             It will return a L from cache or a new retrieved one. This might
257             be a HTTP respons with a C<500 Error> message.
258              
259             In other cases it might die and let the caller know what was wrong, or send
260             another 5XX Error.
261              
262             =cut
263              
264             sub make_request {
265 18     18 1 56939 my $self = shift;
266            
267 18 100       69 croak __PACKAGE__
268             . " missing request"
269             unless defined $_[0];
270 17 100       66 croak __PACKAGE__
271             . " request is not a HTTP::Request [$_[0]]"
272             unless UNIVERSAL::isa($_[0],'HTTP::Request');
273            
274 16         123 my $presented_request = shift->clone;
275            
276 16         1581 my @params = @_;
277              
278             # add the default Cache-Control request header-field
279 16 100       67 $presented_request->headers->push_header(
280             cache_control => $self->cache_control_request,
281             ) if $self->cache_control_request();
282            
283 16         62 my $response;
284            
285 16 100       87 unless ($self->cache) {
    50          
286 5         12 $response = $self->_forward($presented_request, @params);
287             } elsif ( $self->_non_safe($presented_request) ) {
288            
289             # always forwad requests with unsafe methods
290 0         0 $response = $self->_forward($presented_request, @params);
291            
292             # when returned with a non-err, invalidate the cache
293 0 0 0     0 if ( $response->is_success or $response->is_redirect ) {
294 0         0 $self->_invalidate($presented_request)
295             }
296             } else {
297 11 100       80 if (my $cache_resp =
298             $self->_retrieve($presented_request)
299             ) {
300 5         7 $response = $cache_resp;
301             } else {
302 6         14 $response = $self->_forward($presented_request, @params);
303 6         14 $self->_store($presented_request, $response);
304             }
305             }
306            
307             # add the default Cache-Control response header-field
308 16 100       60 $response->headers->push_header(
309             cache_control => $self->cache_control_response,
310             ) if $self->cache_control_request;
311            
312 16         127 return $response;
313            
314             }
315              
316             sub _forward {
317 14     14   14 my $self = shift;
318            
319 14         18 my $forwarded_rqst = shift;
320            
321 14         41 my $forwarded_resp = $self->forwarder->($forwarded_rqst, @_);
322            
323 14 100       953 unless ( UNIVERSAL::isa($forwarded_resp,'HTTP::Response') ) {
324 1         12 carp __PACKAGE__
325             . " response is not a HTTP::Response [$forwarded_resp]";
326             # rescue from a failed forwarding, HTTP::Caching should not break
327 1         416 $forwarded_resp = HTTP::Response->new(502); # Bad Gateway
328             }
329            
330 14         40 return $forwarded_resp;
331             }
332              
333              
334             =head1 ABOUT CACHING
335              
336             If one would read the RFC7234 Section 2. Overview of Cache Operation, it becomes
337             clear that a cache can hold multiple responses for the same URI. Caches that
338             conform to CHI and many others, typically use a key / value storage. But this
339             will become a problem as that it can not use the URI as a key to the various
340             responses.
341              
342             The way it is solved is to create an intermediate meta-dictionary. This can be
343             stored by URI as key. Each response will simply be stored with a unique key and
344             these keys will be used as the entries in the dictionary.
345              
346             The meta-dictionary entries will hold (relevant) request and response headers so
347             that it willbe more quick to figure wich entrie can be used. Otherwise we would
348             had to read the entire responses to analyze them.
349              
350             =cut
351              
352             # _store may or may not store the response into the cache
353             #
354             # depending on the response it _may_store_in_cache()
355             #
356             sub _store {
357 9     9   9 my $self = shift;
358 9         9 my $rqst = shift;
359 9         11 my $resp = shift;
360            
361 9 50       15 return unless $self->_may_store_in_cache($rqst, $resp);
362            
363 9 50       25 if ( my $resp_key = $self->_store_response($resp) ) {
364 9         29 my $rqst_key = Digest::MD5::md5_hex($rqst->uri()->as_string);
365 9         75 my $rsqt_stripped = $rqst->clone; $rsqt_stripped->content(undef);
  9         796  
366 9         108 my $resp_stripped = $resp->clone; $resp_stripped->content(undef);
  9         598  
367 9         97 $self->_insert_meta_dict(
368             $rqst_key,
369             $resp_key,
370             {
371             resp_stripped => $resp_stripped,
372             rqst_stripped => $rsqt_stripped,
373             },
374             );
375 9         19 return $resp_key;
376             }
377            
378             return
379 0         0 }
380              
381             sub _store_response {
382 9     9   9 my $self = shift;
383 9         6 my $resp = shift;
384            
385 9         104 my $resp_key = Digest::MD5::md5_hex(Time::HiRes::time());
386            
387 9         11 eval { $self->cache->set( $resp_key => $resp ) };
  9         53  
388 9 50       407 return $resp_key unless $@;
389            
390 0         0 croak __PACKAGE__
391             . " could not store response in cache with key [$resp_key], $@";
392            
393             return
394 0         0 }
395              
396             sub _insert_meta_dict {
397 9     9   9 my $self = shift;
398 9         12 my $rqst_key = shift;
399 9         8 my $resp_key = shift;
400 9         8 my $meta_data = shift;
401            
402 9   100     178 my $meta_dict = $self->cache_meta->get($rqst_key) || {};
403 9         396 $meta_dict->{$resp_key} = $meta_data;
404 9         133 $self->cache_meta->set( $rqst_key => $meta_dict );
405            
406 9         346 return $meta_dict;
407             }
408              
409             sub _retrieve {
410 11     11   12 my $self = shift;
411 11         10 my $rqst_presented = shift;
412            
413 11         24 my $rqst_key = Digest::MD5::md5_hex($rqst_presented->uri()->as_string);
414 11         110 my $meta_dict = $self->_retrieve_meta_dict($rqst_key);
415            
416 11 100       35 return unless $meta_dict;
417            
418 5         14 my @meta_keys = keys %$meta_dict;
419            
420 5         10 foreach my $meta_key (@meta_keys) {
421             my $reuse_status = $self->_may_reuse_from_cache(
422             $rqst_presented,
423             $meta_dict->{$meta_key}{resp_stripped},
424             $meta_dict->{$meta_key}{rqst_stripped}
425 5         18 );
426 5         36 $meta_dict->{$meta_key}{reuse_status} = $reuse_status
427             }
428            
429             my @okay_keys =
430 5         38 grep { $meta_dict->{$_}{reuse_status} & $REUSE_IS_OK} @meta_keys;
  5         17  
431            
432 5 100       30 if (scalar @okay_keys) {
433             #
434             # TODO: do content negotiation if possible
435             #
436             # TODO: Sort to select lates response
437             #
438 2         4 my ($resp_key) = @okay_keys;
439 2         9 my $resp = $self->_retrieve_response($resp_key);
440 2         10 return $resp
441             }
442            
443             my @vldt_keys =
444 3         4 grep { $meta_dict->{$_}{reuse_status} & $REUSE_REVALIDATE} @meta_keys;
  3         8  
445            
446 3 50       14 if (scalar @vldt_keys) {
447             #
448             # RFC 7234 Section 4.3.1
449             #
450             # Sending a Validation Request
451             #
452 3         3 my ($resp_key) = @vldt_keys;
453 3         3 my $resp_stripped = $meta_dict->{$resp_key}{resp_stripped};
454            
455             # Assume we have validation headers, otherwise we'll need a HEAD request
456             #
457 3         6 my $etag = $resp_stripped->header('ETag');
458 3         67 my $last = $resp_stripped->header('Last-Modified');
459            
460 3         60 my $rqst_forwarded = $rqst_presented->clone;
461 3 100       273 $rqst_forwarded->header('If-None-Match' => $etag) if $etag;
462 3 100       67 $rqst_forwarded->header('If-Modified-Since' => $last) if $last;
463            
464 3         54 my $resp_forwarded = $self->_forward($rqst_forwarded);
465            
466             # RFC 7234 Section 4.3.3.
467             #
468             # Handling a Validation Response
469             #
470             # Cache handling of a response to a conditional request is dependent
471             # upon its status code:
472            
473            
474             # A 304 (Not Modified) response status code indicates that the
475             # stored response can be updated and reused; see Section 4.3.4.
476             #
477 3 50       9 if ($resp_forwarded->code == HTTP_NOT_MODIFIED) {
478 0         0 my $resp = $self->_retrieve_response($resp_key);
479 0         0 return $resp
480             #
481             # TODO: make it all compliant with Section 4.3.4 on how to select
482             # which stored responses need update
483             # TODO: ade 'Age' header
484             }
485            
486            
487             # A full response (i.e., one with a payload body) indicates that
488             # none of the stored responses nominated in the conditional request
489             # is suitable. Instead, the cache MUST use the full response to
490             # satisfy the request and MAY replace the stored response(s).
491             #
492 3 50       27 if ( not HTTP::Status::is_server_error($resp_forwarded->code) ) {
493 3         30 $self->_store($rqst_presented, $resp_forwarded);
494 3         10 return $resp_forwarded;
495             }
496            
497            
498             # However, if a cache receives a 5xx (Server Error) response while
499             # attempting to validate a response, it can either forward this
500             # response to the requesting client, or act as if the server failed
501             # to respond. In the latter case, the cache MAY send a previously
502             # stored response (see Section 4.2.4).
503             #
504 0 0       0 if ( HTTP::Status::is_server_error($resp_forwarded->code) ) {
505 0         0 return $resp_forwarded;
506             }
507             #
508             # TODO: check if we can use a cached stale version
509            
510            
511 0         0 return undef;
512             }
513            
514 0         0 return undef;
515             }
516              
517             sub _retrieve_meta_dict {
518 11     11   11 my $self = shift;
519 11         11 my $rqst_key = shift;
520            
521 11         168 my $meta_dict = $self->cache_meta->get($rqst_key);
522            
523 11         924 return $meta_dict;
524             }
525              
526             sub _retrieve_response {
527 2     2   24 my $self = shift;
528 2         5 my $resp_key = shift;
529            
530 2 50       4 if (my $resp = eval { $self->cache->get( $resp_key ) } ) {
  2         21  
531 2         133 return $resp
532             }
533            
534 0         0 carp __PACKAGE__
535             . " could not retrieve response from cache with key [$resp_key], $@";
536            
537             return
538 0         0 }
539              
540             sub _invalidate {
541 0     0   0 my $self = shift;
542 0         0 my $rqst_presented = shift;
543            
544 0         0 my $rqst_key = Digest::MD5::md5_hex($rqst_presented->uri()->as_string);
545 0         0 my $meta_dict = $self->_retrieve_meta_dict($rqst_key);
546            
547 0 0       0 return unless $meta_dict;
548            
549 0         0 my @meta_keys = keys %$meta_dict;
550            
551 0         0 foreach my $meta_key (@meta_keys) {
552 0         0 $self->_invalidate_response($meta_key);
553             }
554            
555 0         0 $self->_invalidate_meta_dict($rqst_key);
556            
557 0         0 return;
558             }
559              
560             sub _invalidate_meta_dict {
561 0     0   0 my $self = shift;
562 0         0 my $rqst_key = shift;
563            
564 0         0 $self->cache_meta->remove($rqst_key);
565            
566             return
567 0         0 }
568              
569             sub _invalidate_response {
570 0     0   0 my $self = shift;
571 0         0 my $resp_key = shift;
572            
573 0         0 $self->cache->remove($resp_key);
574            
575             return
576 0         0 }
577              
578             sub _non_safe {
579 11     11   12 my $self = shift;
580 11         10 my $rqst = shift;
581            
582 11         12 my $method = eval { HTTP::Method->new( uc $rqst->method ) };
  11         22  
583 11 50       246 return 1 unless $method; #safety can not be guaranteed
584            
585 11         70 return not $method->is_method_safe
586             }
587              
588             # _may_store_in_cache()
589             #
590             # based on some headers in the request, but mostly on those in the new response
591             # the cache can hold a copy of it or not.
592             #
593             # see RFC 7234 Section 3: Storing Responses in Caches
594             #
595             sub _may_store_in_cache {
596 29     29   40092 my $self = shift;
597 29         31 my $rqst = shift;
598 29         28 my $resp = shift;
599            
600             # $msg->header('cache-control) is supposed to return a list, but only works
601             # if it has been generated as a list, not as string with 'comma'
602             # $msg->header in scalar context gives a ', ' joined string
603             # which we now split and trim whitespace
604             my @rqst_directives =
605 29   100     104 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  2         27  
  2         5  
  2         3  
  2         4  
606             split ',', scalar $rqst->header('cache-control') || '';
607             my @resp_directives =
608 29   100     1008 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  12         262  
  12         28  
  12         19  
  12         32  
609             split ',', scalar $resp->header('cache-control') || '';
610            
611            
612             # RFC 7234 Section 3
613             #
614             # A cache MUST NOT store a response to any request, unless:
615            
616             # RFC 7234 Section 3 #1
617             #
618             # the request method is understood by the cache and defined as being
619             # cacheable
620             #
621 29         479 do {
622 29         83 my $string = $rqst->method;
623 29         184 my $method = eval { HTTP::Method->new($string) };
  29         77  
624            
625 29 100       521 unless ($method) {
626 1 50       10 carp "NO CACHE: method is not understood: '$string'\n"
627             if $DEBUG;
628 1         337 return 0
629             }
630 28 100       159 unless ($method->is_method_cachable) { # XXX Fix cacheable
631 1 50       14 carp "NO CACHE: method is not cacheable: '$string'\n"
632             if $DEBUG;
633 1         336 return 0
634             }
635             };
636            
637             # RFC 7234 Section 3 #2
638             #
639             # the response status code is understood by the cache
640             #
641 27         111 do {
642 27         57 my $code = $resp->code;
643 27         151 my $message = eval { HTTP::Status::status_message($code) };
  27         60  
644            
645 27 100       102 unless ($message) {
646 1 50       19 carp "NO CACHE: response status code is not understood: '$code'\n"
647             if $DEBUG;
648 1         357 return 0
649             }
650             };
651            
652            
653             # RFC 7234 Section 3 #3
654             #
655             # the "no-store" cache directive (see Section 5.2) does not appear
656             # in request or response header fields
657             #
658 26         20 do {
659 26 100   1   251 if (any { lc $_ eq 'no-store' } @rqst_directives) {
  1         3  
660 1 50       11 carp "NO CACHE: 'no-store' appears in request cache directives\n"
661             if $DEBUG;
662 1         324 return 0
663             }
664 25 100   11   118 if (any { lc $_ eq 'no-store' } @resp_directives) {
  11         27  
665 1 50       10 carp "NO CACHE: 'no-store' appears in response cache directives\n"
666             if $DEBUG;
667 1         298 return 0
668             }
669             };
670            
671             # RFC 7234 Section 3 #4
672             #
673             # the "private" response directive (see Section 5.2.2.6) does not
674             # appear in the response, if the cache is shared
675             #
676 24 100       64 if ($self->is_shared) {
677 6 100   5   18 if (any { lc $_ eq 'private' } @resp_directives) {
  5         13  
678 1 50       12 carp "NO CACHE: 'private' appears in cache directives when shared\n"
679             if $DEBUG;
680 1         318 return 0
681             }
682             };
683            
684             # RFC 7234 Section 3 #5
685             #
686             # the Authorization header field (see Section 4.2 of [RFC7235]) does
687             # not appear in the request, if the cache is shared, unless the
688             # response explicitly allows it (see Section 3.2)
689             #
690 23 100       66 if ($self->is_shared) {
691 5 100       16 if ($rqst->header('Authorization')) {
692 4 100   3   105 if (any { lc $_ eq 'must-revalidate' } @resp_directives) {
  3         7  
693 1 50       12 carp "DO CACHE: 'Authorization' appears: must-revalidate\n"
694             if $DEBUG;
695 1         339 return 1
696             }
697 3 100   2   10 if (any { lc $_ eq 'public' } @resp_directives) {
  2         4  
698 1 50       11 carp "DO CACHE: 'Authorization' appears: public\n"
699             if $DEBUG;
700 1         291 return 1
701             }
702 2 100   1   7 if (any { lc $_ =~ m/^s-maxage=\d+$/ } @resp_directives) {
  1         5  
703 1 50       12 carp "DO CACHE: 'Authorization' appears: s-maxage\n"
704             if $DEBUG;
705 1         307 return 1
706             }
707 1 50       10 carp "NO CACHE: 'Authorization' appears in request when shared\n"
708             if $DEBUG;
709 1         323 return 0
710             }
711             };
712            
713            
714             # RFC 7234 Section 3 #6
715             #
716             # the response either:
717             #
718             # - contains an Expires header field (see Section 5.3)
719             #
720 19         43 do {
721 19         34 my $expires_at = $resp->header('Expires');
722            
723 19 100       463 if ($expires_at) {
724 1 50       14 carp "OK CACHE: 'Expires' at: $expires_at\n"
725             if $DEBUG;
726 1         305 return 1
727             }
728             };
729            
730             # - contains a max-age response directive (see Section 5.2.2.8)
731             #
732 18         15 do {
733 18 100   6   62 if (any { lc $_ =~ m/^max-age=\d+$/ } @resp_directives) {
  6         28  
734 2 100       33 carp "DO CACHE: 'max-age' appears in response cache directives\n"
735             if $DEBUG;
736 2         314 return 1
737             }
738             };
739            
740             # - contains a s-maxage response directive (see Section 5.2.2.9)
741             # and the cache is shared
742             #
743 16 100       38 if ($self->is_shared) {
744 1 50   1   5 if (any { lc $_ =~ m/^s-maxage=\d+$/ } @resp_directives) {
  1         10  
745 1 50       13 carp "DO CACHE: 's-maxage' appears in response cache directives\n"
746             if $DEBUG;
747 1         375 return 1
748             }
749             };
750            
751            
752             # - contains a Cache Control Extension (see Section 5.2.3) that
753             # allows it to be cache
754             #
755             # TODO it looks like this is only used for special defined cache-control
756             # directives. As such, those need special treatment.
757             # It does not seem a good idea to hardcode those here, a config would
758             # be a better solution.
759            
760            
761             # - has a status code that is defined as cacheable by default (see
762             # Section 4.2.2)
763             #
764 15         14 do {
765 15         34 my $code = $resp->code;
766            
767 15 100       92 if (HTTP::Status::is_cacheable_by_default($code)) {
768 9 100       41 carp "DO CACHE: status code is cacheable by default: '$code'\n"
769             if $DEBUG;
770 9         499 return 1
771             }
772             };
773            
774             # - contains a public response directive (see Section 5.2.2.5)
775             #
776 6         10 do {
777 6 100   3   19 if (any { lc $_ eq 'public' } @resp_directives) {
  3         7  
778 1 50       12 carp "DO CACHE: 'public' appears in response cache directives\n"
779             if $DEBUG;
780 1         363 return 1
781             }
782             };
783            
784             # Falls trough ... SHOULD NOT store in cache
785             #
786 5 50       58 carp "NO CACHE: Does not match the six criteria above\n"
787             if $DEBUG;
788            
789 5         1699 return undef;
790             }
791              
792              
793             # _may_reuse_from_cache
794             #
795             # my $status = _may_reuse_from_cache (
796             # $presented_request,
797             # $stored_response,
798             # $associated_request,
799             # )
800             #
801             # will return false if the stored response can not be used for this request at
802             # all. In all other cases, it either
803             # - can be used, because it matches all the criteria and os fresh
804             # - is stale and can be used if needed
805             # - needs revalidation
806             #
807             # see RFC 7234 Section 4: Constructing Responses from Caches
808             #
809             sub _may_reuse_from_cache {
810 23     23   34038 my $self = shift;
811 23         26 my $rqst_presented = shift;
812 23         19 my $resp_stored = shift;
813 23         36 my $rqst_associated = shift;
814            
815             # RFC 7234 Section 4
816             #
817             # When presented with a request, a cache MUST NOT reuse a stored
818             # response, unless:
819            
820            
821             # RFC 7234 Section 4 #1
822             #
823             # The presented effective request URI (Section 5.5 of [RFC7230]) and
824             # that of the stored response match
825             #
826 23         21 do {
827 23 100       54 unless ( URI::eq($rqst_presented->uri, $rqst_associated->uri) ) {
828 1 50       122 carp "NO REUSE: URI's do not match\n"
829             if $DEBUG;
830 1         330 return $REUSE_NO_MATCH
831             }
832             };
833            
834            
835             # RFC 7234 Section 4 #2
836             #
837             # the request method associated with the stored response allows it
838             # to be used for the presented request
839             #
840 22         2877 do {
841 22 100       48 unless ( $rqst_presented->method eq $rqst_associated->method ) {
842 2 50       36 carp "NO REUSE: Methods do not match\n"
843             if $DEBUG;
844 2         668 return $REUSE_NO_MATCH
845             }
846             };
847             #
848             # NOTE: We did not make the test case insensitive, according to RFC 7231.
849             #
850             # NOTE: We might want to extend it so that we can serve a chopped response
851             # where the presented request is a HEAD request
852            
853            
854             # RFC 7234 Section 4 #3
855             #
856             # selecting header fields nominated by the stored response (if any)
857             # match those presented (see Section 4.1)
858 20 100       240 if ( $resp_stored->header('Vary') ) {
859            
860 5 100       150 if ( scalar $resp_stored->header('Vary') eq '*' ) {
861 1 50       30 carp "NO REUSE: 'Vary' equals '*'\n"
862             if $DEBUG;
863 1         300 return $REUSE_NO_MATCH
864             }
865            
866             #create an array with nominated headers
867             my @vary_headers =
868 4   50     90 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  4         81  
  4         8  
  4         6  
  4         9  
869             split ',', scalar $resp_stored->header('Vary') || '';
870            
871 4         21 foreach my $header ( @vary_headers ) {
872 4   100     9 my $header_presented = $rqst_presented->header($header) || '';
873 4   100     108 my $header_associated = $rqst_associated->header($header) || '';
874 4 100       86 unless ( $header_presented eq $header_associated ) {
875 2 50       20 carp "NO REUSE: Nominated headers in 'Vary' do not match\n"
876             if $DEBUG;
877 2         655 return $REUSE_NO_MATCH
878             }
879             }
880             };
881             #
882             # TODO: According to Section 4.1, we could do normalization and reordering
883             # This requires further investigation and is worth doing to increase
884             # the hit chance
885            
886            
887             # RFC 7234 Section 4 #4
888             #
889             # the presented request does not contain the no-cache pragma
890             # (Section 5.4), nor the no-cache cache directive (Section 5.2.1),
891             # unless the stored response is successfully validated
892             # (Section 4.3)
893             #
894 17         490 do {
895             # generate an array with cache-control directives
896             my @rqst_directives =
897 17   100     51 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  1         25  
  1         2  
  1         4  
  1         2  
898             split ',', scalar $rqst_presented->header('cache-control') || '';
899            
900 17 100   1   480 if (any { lc $_ eq 'no-cache' } @rqst_directives) {
  1         4  
901 1 50       11 carp "NO REUSE: 'no-cache' appears in request cache directives\n"
902             if $DEBUG;
903 1         380 return $REUSE_REVALIDATE
904             }
905            
906 16 50 100     55 if (
907             $rqst_presented->header('Pragma')
908             and
909             scalar $rqst_presented->header('Pragma') =~ /no-cache/
910             ) {
911 1 50       52 carp "NO REUSE: Pragma: 'no-cache' appears in request\n"
912             if $DEBUG;
913 1         294 return $REUSE_REVALIDATE
914             }
915             };
916            
917            
918             # RFC 7234 Section 4 #5
919             #
920             # the stored response does not contain the no-cache cache directive
921             # (Section 5.2.2.2), unless it is successfully validated
922             # (Section 4.3)
923             #
924 15         318 do {
925             # generate an array with cache-control directives
926             my @resp_directives =
927 15   100     35 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  4         97  
  4         11  
  4         9  
  4         12  
928             split ',', scalar $resp_stored->header('cache-control') || '';
929            
930 15 100   4   289 if (any { lc $_ eq 'no-cache' } @resp_directives) {
  4         15  
931 1 50       11 carp "NO REUSE: 'no-cache' appears in response cache directives\n"
932             if $DEBUG;
933 1         329 return $REUSE_REVALIDATE
934             }
935             };
936            
937             # RFC 7234 Section 4 #6
938             #
939             # the stored response is either:
940             #
941             # - fresh (see Section 4.2), or
942             #
943 14         25 do {
944 14 100       44 if ($resp_stored->is_fresh(heuristic_expiry => undef)) {
945 4 100       8997 carp "DO REUSE: Response is fresh\n"
946             if $DEBUG;
947 4         652 return $REUSE_IS_OK
948             }
949             };
950             #
951             # TODO: heuristic_expiry => undef should be a option, not hardcoded
952            
953             # - allowed to be served stale (see Section 4.2.4), or
954             #
955 10         7806 do {
956             # generate an array with cache-control directives
957             my @resp_directives =
958 10   50     25 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  0         0  
  0         0  
  0         0  
  0         0  
959             split ',', scalar $resp_stored->header('cache-control') || '';
960            
961             # RFC 7234 Section 5.2.2.1
962             #
963             # must-revalidate
964             #
965 10 50   0   300 if (any { lc $_ eq 'must-revalidate' } @resp_directives) {
  0         0  
966 0 0       0 carp "NO REUSE: Stale but 'must-revalidate'\n"
967             if $DEBUG;
968 0         0 return $REUSE_IS_STALE_REVALIDATE
969             }
970            
971             # RFC 7234 Section 5.2.2.7
972             #
973             # proxy-revalidate
974             #
975 10 50 33     47 if (
976 0     0   0 any { lc $_ eq 'proxy-revalidate' } @resp_directives
977             and
978             $self->is_shared
979             ) {
980 0 0       0 carp "NO REUSE: Stale but 'proxy-revalidate'\n"
981             if $DEBUG;
982 0         0 return $REUSE_IS_STALE_REVALIDATE
983             }
984            
985            
986             # RFC 7234 Section 5.2.1.2
987             #
988             # max-stale = ...
989             #
990             my @rqst_directives =
991 10   50     30 map { my $str = $_; $str =~ s/^\s+//; $str =~ s/\s+$//; $str }
  0         0  
  0         0  
  0         0  
  0         0  
992             split ',', scalar $rqst_presented->header('cache-control') || '';
993            
994             my ($directive) =
995 10         224 grep { $_ =~ /^max-stale\s*=?\s*\d*$/ } @rqst_directives;
  0         0  
996            
997 10 50       22 if ($directive) {
998 0         0 my ($max_stale) = $directive =~ /:(\d+)$/;
999 0 0       0 unless ($max_stale) {
1000 0 0       0 carp "DO REUSE: 'max-stale' for unlimited time\n"
1001             if $DEBUG;
1002 0         0 return $REUSE_IS_STALE_OK
1003             }
1004 0         0 my $freshness = # not fresh!!! so, this is a negative number
1005             $resp_stored->freshness_lifetime(heuristic_expiry => undef);
1006 0 0       0 if ( abs($freshness) < $max_stale ) {
1007 0 0       0 carp "DO REUSE: 'max-stale' not exceeded\n"
1008             if $DEBUG;
1009 0         0 return $REUSE_IS_STALE_OK
1010             }
1011             }
1012            
1013             };
1014            
1015             # - successfully validated (see Section 4.3).
1016             #
1017 10         9 do {
1018 10 100       92 carp "NO REUSE: must successfully validated"
1019             if $DEBUG;
1020 10         2622 return $REUSE_IS_STALE_REVALIDATE
1021             };
1022            
1023             }
1024              
1025             # HTTP::Status::is_cacheable_by_default
1026             #
1027             # that subroutine is missing. Until it's added there, it's been monkey-patched
1028             # here.
1029             #
1030             # RFC 7231 - HTTP/1.1 Semantics and Content
1031             # Section 6.1. Overview of Status Codes
1032             #
1033             # Responses with status codes that are defined as cacheable by default
1034             # (e.g., 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, and 501 in
1035             # this specification) can be reused by a cache with heuristic
1036             # expiration unless otherwise indicated by the method definition or
1037             # explicit cache controls [RFC7234]; all other status codes are not
1038             # cacheable by default.
1039             #
1040             my $handle = Monkey::Patch::Action::patch_package (
1041             'HTTP::Status', 'is_cacheable_by_default', 'add', sub {
1042             my $code = shift;
1043             $code = $code +0;
1044            
1045             return any {$_ == $code} (200,203,204,206,300,301,404,405,410,414,501)
1046             }
1047             );
1048              
1049              
1050             1;