File Coverage

blib/lib/FCGI/Buffer.pm
Criterion Covered Total %
statement 385 846 45.5
branch 177 530 33.4
condition 81 275 29.4
subroutine 27 29 93.1
pod 5 5 100.0
total 675 1685 40.0


line stmt bran cond sub pod time code
1             package FCGI::Buffer;
2              
3 12     12   3768657 use strict;
  12         100  
  12         352  
4 12     12   77 use warnings;
  12         24  
  12         339  
5              
6             # FIXME: save_to treats ?arg1=a&arg2=b and ?arg2=b&arg1=a as different
7             # FIXME: save_to treats /cgi-bin/foo.fcgi and /cgi-bin2/foo.fcgi as the same
8              
9 12     12   69 use Digest::MD5;
  12         24  
  12         394  
10 12     12   63 use File::Path;
  12         24  
  12         698  
11 12     12   72 use File::Spec;
  12         38  
  12         314  
12 12     12   4840 use IO::String;
  12         33201  
  12         349  
13 12     12   5634 use CGI::Info;
  12         730936  
  12         736  
14 12     12   128 use Carp;
  12         28  
  12         730  
15 12     12   5504 use HTTP::Date;
  12         46241  
  12         714  
16 12     12   20787 use DBI;
  12         226626  
  12         1009  
17              
18             =head1 NAME
19              
20             FCGI::Buffer - Verify, Cache and Optimise FCGI Output
21              
22             =head1 VERSION
23              
24             Version 0.19
25              
26             =cut
27              
28             our $VERSION = '0.19';
29              
30             =head1 SYNOPSIS
31              
32             FCGI::Buffer verifies the HTML that you produce by passing it through
33             C<HTML::Lint>.
34              
35             FCGI::Buffer optimises FCGI programs by reducing, filtering and compressing
36             output to speed up the transmission and by nearly seamlessly making use of
37             client and server caches.
38              
39             To make use of client caches, that is to say to reduce needless calls
40             to your server asking for the same data:
41              
42             use FCGI;
43             use FCGI::Buffer;
44             # ...
45             my $request = FCGI::Request();
46             while($request->FCGI::Accept() >= 0) {
47             my $buffer = FCGI::Buffer->new();
48             $buffer->init(
49             optimise_content => 1,
50             lint_content => 0,
51             );
52             # ...
53             }
54              
55             To also make use of server caches, that is to say to save regenerating
56             output when different clients ask you for the same data, you will need
57             to create a cache.
58             But that's simple:
59              
60             use FCGI;
61             use CHI;
62             use FCGI::Buffer;
63              
64             # ...
65             my $request = FCGI::Request();
66             while($request->FCGI::Accept() >= 0) {
67             my $buffer = FCGI::Buffer->new();
68             $buffer->init(
69             optimise_content => 1,
70             lint_content => 0,
71             cache => CHI->new(driver => 'File')
72             );
73             if($buffer->is_cached()) {
74             # Nothing has changed - use the version in the cache
75             $request->Finish();
76             next;
77             # ...
78             }
79             }
80              
81             To temporarily prevent the use of server-side caches, for example whilst
82             debugging before publishing a code change, set the NO_CACHE environment variable
83             to any non-zero value.
84             This will also stop ETag being added to the header.
85             If you get errors about Wide characters in print it means that you've
86             forgotten to emit pure HTML on non-ASCII characters.
87             See L<HTML::Entities>.
88             As a hack work around you could also remove accents and the like by using
89             L<Text::Unidecode>,
90             which works well but isn't really what you want.
91              
92             =head1 SUBROUTINES/METHODS
93              
94             =cut
95              
96 12     12   95 use constant MIN_GZIP_LEN => 32;
  12         25  
  12         130789  
97              
98             =head2 new
99              
100             Create an FCGI::Buffer object. Do one of these for each FCGI::Accept.
101              
102             =cut
103              
104             # FIXME: Call init() on any arguments that are given
105             sub new {
106 39     39 1 190204 my $proto = shift;
107 39   66     229 my $class = ref($proto) || $proto;
108              
109             # Use FCGI::Buffer->new(), not FCGI::Buffer::new()
110 39 100       120 if(!defined($class)) {
111 1         33 carp(__PACKAGE__, ' use ->new() not ::new() to instantiate');
112 1         246 return;
113             }
114              
115 38         210 my $buf = IO::String->new();
116              
117 38         2035 my $rc = {
118             buf => $buf,
119             old_buf => select($buf),
120             generate_304 => 1,
121             generate_last_modified => 1,
122             compress_content => 1,
123             optimise_content => 0,
124             lint_content => 0,
125             };
126             # $rc->{o} = ();
127              
128 38 100 100     239 if($ENV{'SERVER_PROTOCOL'} &&
      100        
129             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0'))) {
130 20         63 $rc->{generate_etag} = 1;
131             } else {
132 18         63 $rc->{generate_etag} = 0;
133             }
134              
135 38         158 return bless $rc, $class;
136             }
137              
138             sub DESTROY {
139 37 50 33 37   6033 if(defined($^V) && ($^V ge 'v5.14.0')) {
140 37 50       153 return if ${^GLOBAL_PHASE} eq 'DESTRUCT'; # >= 5.14.0 only
141             }
142 37         131 my $self = shift;
143              
144 37 100       134 if($self->{'logger'}) {
145 1         6 $self->{'logger'}->info('In DESTROY');
146             }
147 37         125 select($self->{old_buf});
148 37 50 33     182 if((!defined($self->{buf})) || (!defined($self->{buf}->getpos()))) {
149             # Unlikely
150 0 0       0 if($self->{'logger'}) {
151 0         0 $self->{'logger'}->info('Nothing to send');
152             }
153 0         0 return;
154             }
155 37         481 my $pos = $self->{buf}->getpos();
156 37         368 $self->{buf}->setpos(0);
157 37         588 my $buf;
158 37         130 read($self->{buf}, $buf, $pos);
159 37         761 my $headers;
160 37         334 ($headers, $self->{body}) = split /\r?\n\r?\n/, $buf, 2;
161              
162 37 100       122 if($self->{'logger'}) {
163 1 50       5 if($ENV{'HTTP_IF_NONE_MATCH'}) {
164 0         0 $self->{logger}->debug("HTTP_IF_NONE_MATCH: $ENV{HTTP_IF_NONE_MATCH}");
165             }
166 1 50       4 if($ENV{'HTTP_IF_MODIFIED_SINCE'}) {
167 0         0 $self->{logger}->debug("HTTP_IF_MODIFIED_SINCE: $ENV{HTTP_IF_MODIFIED_SINCE}");
168             }
169 1         10 $self->{logger}->debug("Generate_etag = $self->{generate_etag}, ",
170             "Generate_304 = $self->{generate_304}, ",
171             "Generate_last_modified = $self->{generate_last_modified}");
172             }
173 37 100 66     119 unless($headers || $self->is_cached()) {
174 3 50       14 if($self->{'logger'}) {
175 0         0 $self->{'logger'}->debug('There was no output');
176             }
177 3 50       13 if(!defined($headers)) {
178 3         950 require HTTP::Status;
179 3         10841 HTTP::Status->import();
180              
181 3 50       29 if(!defined($self->{'status'})) {
182 3         11 $self->{'status'} = 200;
183             }
184             print 'Status: ', $self->{status}, ' ',
185 3         19 HTTP::Status::status_message($self->{status}),
186             "\n\n";
187             }
188 3         324 return;
189             }
190 34 100 100     186 if($ENV{'REQUEST_METHOD'} && ($ENV{'REQUEST_METHOD'} eq 'HEAD')) {
191 1         7 $self->{send_body} = 0;
192             } else {
193 33         66 $self->{send_body} = 1;
194             }
195              
196 34 50       91 if($headers) {
197 34         92 $self->_set_content_type($headers);
198             }
199              
200 34 100 66     207 if(defined($self->{body}) && ($self->{body} eq '')) {
    100          
201             # E.g. if header of Location is given with no body, for
202             # redirection
203 2         10 delete $self->{body};
204 2 50       13 if($self->{cache}) {
205             # Don't try to retrieve it below from the cache
206 0         0 $self->{send_body} = 0;
207             }
208             } elsif(defined($self->{content_type})) {
209 29         48 my @content_type = @{$self->{content_type}};
  29         89  
210 29 50 33     304 if(defined($content_type[0]) && (lc($content_type[0]) eq 'text') && (lc($content_type[1]) =~ /^html/) && defined($self->{body})) {
      33        
      33        
211 29 100       74 if($self->{optimise_content}) {
212             # require HTML::Clean;
213 25         1153 require HTML::Packer; # Overkill using HTML::Clean and HTML::Packer...
214              
215 25 50       22930 if($self->{'logger'}) {
216 0         0 $self->{'logger'}->trace('Packer');
217             }
218              
219 25         47 my $oldlength = length($self->{body});
220 25         36 my $newlength;
221              
222 25 100       87 if($self->{optimise_content} == 1) {
223 24         60 $self->_optimise_content();
224             } else {
225 1         2 while(1) {
226 3         9 $self->_optimise_content();
227 3         4 $newlength = length($self->{body});
228 3 100       9 last if ($newlength >= $oldlength);
229 2         4 $oldlength = $newlength;
230             }
231             }
232              
233             # If we're on http://www.example.com and have a link
234             # to http://www.example.com/foo/bar.htm, change the
235             # link to /foo/bar.htm - there's no need to include
236             # the site name in the link
237 25 100       76 unless(defined($self->{info})) {
238 22 50       51 if($self->{cache}) {
239 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
240             } else {
241 22         135 $self->{info} = CGI::Info->new();
242             }
243             }
244              
245 25         517 my $href = $self->{info}->host_name();
246 25         10103 my $protocol = $self->{info}->protocol();
247              
248 25 100       260 unless($protocol) {
249 10         18 $protocol = 'http';
250             }
251              
252 25         228 $self->{body} =~ s/<a\s+?href="$protocol:\/\/$href"/<a href="\/"/gim;
253 25         145 $self->{body} =~ s/<a\s+?href="$protocol:\/\/$href/<a href="/gim;
254 25         104 $self->{body} =~ s/<a\s+?href="$protocol:\/\//<a href="\/\//gim;
255              
256             # If we're in "/cgi-bin/foo.cgi?arg1=a" replace
257             # "/cgi-bin/foo.cgi?arg2=b" with "?arg2=b"
258              
259 25 100       86 if(my $script_name = $ENV{'SCRIPT_NAME'}) {
260 18 50       58 if($script_name =~ /^\//) {
261 18         104 $self->{body} =~ s/<a\s+?href="$script_name(\?.+)?"/<a href="$1"/gim;
262             }
263             }
264              
265             # TODO use URI->path_segments to change links in
266             # /aa/bb/cc/dd.htm which point to /aa/bb/ff.htm to
267             # ../ff.htm
268              
269             # TODO: <img border=0 src=...>
270 25         154 $self->{body} =~ s/<img\s+?src="$protocol:\/\/$href"/<img src="\/"/gim;
271 25         103 $self->{body} =~ s/<img\s+?src="$protocol:\/\/$href/<img src="/gim;
272              
273             # Don't use HTML::Clean because of RT402
274             # my $h = new HTML::Clean(\$self->{body});
275             # # $h->compat();
276             # $h->strip();
277             # my $ref = $h->data();
278              
279             # Don't always do javascript 'best' since it's confused
280             # by the common <!-- HIDE technique.
281 25         88 my $options = {
282             remove_comments => 1,
283             remove_newlines => 0,
284             do_stylesheet => 'minify'
285             };
286 25 100       68 if($self->{optimise_content} >= 2) {
287 1         3 $options->{do_javascript} = 'best';
288 1         3 $self->{body} =~ s/(<script.*?>)\s*<!--/$1/gi;
289 1         5 $self->{body} =~ s/\/\/-->\s*<\/script>/<\/script>/gi;
290 1         17 $self->{body} =~ s/(<script.*?>)\s+/$1/gi;
291             }
292 25         132 $self->{body} = HTML::Packer->init()->minify(\$self->{body}, $options);
293 25 100       341658 if($self->{optimise_content} >= 2) {
294             # Change document.write("a"); document.write("b")
295             # into document.write("a"+"b");
296 1         2 while(1) {
297 1         26 $self->{body} =~ s/<script\s*?type\s*?=\s*?"text\/javascript"\s*?>(.*?)document\.write\((.+?)\);\s*?document\.write\((.+?)\)/<script type="text\/JavaScript">${1}document.write($2+$3)/igs;
298 1         3 $newlength = length($self->{body});
299 1 50       5 last if ($newlength >= $oldlength);
300 0         0 $oldlength = $newlength;
301             }
302             }
303             }
304 29 100       144 if($self->{lint_content}) {
305 3         23 require HTML::Lint;
306 3         25 HTML::Lint->import;
307              
308 3 50       19 if($self->{'logger'}) {
309 0         0 $self->{'logger'}->trace('Lint');
310             }
311 3         16 my $lint = HTML::Lint->new();
312 3         64 $lint->parse($self->{body});
313 3         4612 $lint->eof();
314              
315 3 100       374 if($lint->errors) {
316 1         14 $headers = 'Status: 500 Internal Server Error';
317 1         3 @{$self->{o}} = ('Content-type: text/plain');
  1         4  
318 1         4 $self->{body} = '';
319 1         3 foreach my $error ($lint->errors) {
320 3         22 my $errtext = $error->where() . ': ' . $error->errtext() . "\n";
321 3 50       91 if($self->{logger}) {
322 0         0 $self->{logger}->warn($errtext);
323             } else {
324 3         95 warn($errtext);
325             }
326 3         22 $self->{body} .= $errtext;
327             }
328             }
329             }
330             }
331             }
332              
333 34 100 66     327 if(defined($headers) && ($headers =~ /^Status: (\d+)/m)) {
    100          
334 1         6 $self->{status} = $1;
335             } elsif(defined($self->{info})) {
336 25         116 $self->{status} = $self->{info}->status();
337             } else {
338 8         22 $self->{status} = 200;
339             }
340              
341 34 100       431 if($self->{'logger'}) {
342 1         6 $self->{'logger'}->debug("Initial status = $self->{status}");
343             }
344              
345             # Generate the eTag before compressing, since the compressed data
346             # includes the mtime field which changes thus causing a different
347             # Etag to be generated
348 34 100 100     229 if($ENV{'SERVER_PROTOCOL'} &&
      100        
      100        
      66        
349             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
350             $self->{generate_etag} && defined($self->{body})) {
351 11         39 $self->_generate_etag();
352              
353 11 100 100     58 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{generate_304} && ($self->{status} == 200)) {
      66        
354 2         8 $self->_check_if_none_match();
355             }
356             }
357              
358 34         63 my $dbh;
359 34 50       102 if(my $save_to = $self->{save_to}) {
360 0         0 my $sqlite_file = File::Spec->catfile($save_to->{directory}, 'fcgi.buffer.sql');
361 0 0       0 if($self->{logger}) {
362 0         0 $self->{logger}->debug("save_to sqlite file: $sqlite_file");
363             }
364 0 0       0 if(!-r $sqlite_file) {
365 0 0       0 if(!-d $save_to->{directory}) {
366 0         0 mkdir $save_to->{directory};
367             }
368 0         0 $dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef);
369 0 0       0 if($self->{save_to}->{create_table}) {
370 0         0 $dbh->prepare('CREATE TABLE fcgi_buffer(key char PRIMARY KEY, language char, browser_type char, path char UNIQUE NOT NULL, uri char NOT NULL, creation timestamp NOT NULL)')->execute();
371             }
372             } else {
373 0         0 $dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef);
374             }
375             }
376              
377 34         96 my $encoding = $self->_should_gzip();
378 34         65 my $unzipped_body = $self->{body};
379              
380 34 100       89 if(defined($unzipped_body)) {
381 32 50       91 my $range = $ENV{'Range'} ? $ENV{'Range'} : $ENV{'HTTP_RANGE'};
382              
383 32 100 66     106 if($range && !$self->{cache}) {
384             # TODO: Partials
385 3 50       23 if($range =~ /^bytes=(\d*)-(\d*)/) {
386 3 100 100     26 if($1 && $2) {
    100          
    50          
387 1         8 $self->{body} = substr($self->{body}, $1, $2-$1);
388             } elsif($1) {
389 1         6 $self->{body} = substr($self->{body}, $1);
390             } elsif($2) {
391 1         5 $self->{body} = substr($self->{body}, 0, $2);
392             }
393 3         8 $unzipped_body = $self->{body};
394 3         5 $self->{'status'} = 206;
395             }
396             }
397 32         126 $self->_compress({ encoding => $encoding });
398             }
399              
400 34 50       150 if($self->{cache}) {
    100          
401 0         0 require Storable;
402              
403 0         0 my $cache_hash;
404 0         0 my $key = $self->_generate_key();
405              
406             # Cache unzipped version
407 0 0       0 if(!defined($self->{body})) {
408 0 0       0 if($self->{send_body}) {
409 0         0 $self->{cobject} = $self->{cache}->get_object($key);
410 0 0       0 if(defined($self->{cobject})) {
    0          
411 0         0 $cache_hash = Storable::thaw($self->{cobject}->value());
412 0         0 $headers = $cache_hash->{'headers'};
413 0         0 $self->_set_content_type($headers);
414 0         0 @{$self->{o}} = ("X-FCGI-Buffer-$VERSION: Hit");
  0         0  
415 0 0       0 if($self->{info}) {
416 0         0 my $host_name = $self->{info}->host_name();
417 0         0 push @{$self->{o}}, "X-Cache: HIT from $host_name";
  0         0  
418 0         0 push @{$self->{o}}, "X-Cache-Lookup: HIT from $host_name";
  0         0  
419             } else {
420 0         0 push @{$self->{o}}, 'X-Cache: HIT';
  0         0  
421 0         0 push @{$self->{o}}, 'X-Cache-Lookup: HIT';
  0         0  
422             }
423             } elsif($self->{logger}) {
424 0         0 $self->{logger}->warn("Error retrieving data for key $key");
425             } else {
426 0         0 carp(__PACKAGE__, ": error retrieving data for key $key");
427             }
428             }
429              
430             # Nothing has been output yet, so we can check if it's
431             # OK to send 304 if possible
432 0 0 0     0 if($self->{send_body} && $ENV{'SERVER_PROTOCOL'} &&
      0        
      0        
      0        
      0        
433             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
434             $self->{generate_304} && ($self->{status} == 200)) {
435 0 0       0 if($ENV{'HTTP_IF_MODIFIED_SINCE'}) {
436             $self->_check_modified_since({
437             since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
438             modified => $self->{cobject}->created_at()
439 0         0 });
440             }
441             }
442 0 0 0     0 if($self->{send_body} && ($self->{status} == 200) && defined($cache_hash)) {
      0        
443 0         0 $self->{body} = $cache_hash->{'body'};
444 0 0       0 if($dbh) {
445 0         0 my $changes = $self->_save_to($self->{body}, $dbh);
446 0 0 0     0 if($changes && (my $ttl = $self->{save_to}->{ttl})) {
447 0         0 push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str(time + $ttl);
  0         0  
448             }
449             }
450 0 0       0 if(!defined($self->{body})) {
451             # Panic
452 0         0 $headers = 'Status: 500 Internal Server Error';
453 0         0 @{$self->{o}} = ('Content-type: text/plain');
  0         0  
454 0         0 $self->{body} = "Can't retrieve body for key $key, cache_hash contains:\n";
455 0         0 foreach my $k (keys %{$cache_hash}) {
  0         0  
456 0         0 $self->{body} .= "\t$k\n";
457             }
458              
459 0 0       0 if($dbh) {
460 0         0 my $query = "SELECT DISTINCT path FROM fcgi_buffer WHERE key = '$key'";
461 0         0 my $sth = $dbh->prepare($query);
462 0 0       0 if($self->{logger}) {
463 0         0 $self->{logger}->debug($query);
464             }
465 0 0 0     0 if($sth->execute() && (my $href = $sth->fetchrow_hashref())) {
466 0 0       0 if(my $path = $href->{'path'}) {
467 0         0 unlink($path);
468             }
469             }
470 0         0 $query = "DELETE FROM fcgi_buffer WHERE key = '$key'";
471 0         0 $dbh->do($query);
472 0 0       0 if($self->{logger}) {
473 0         0 $self->{logger}->debug($query);
474             }
475             }
476              
477 0         0 $self->{cache}->remove($key);
478              
479 0 0       0 if($self->{logger}) {
480 0         0 $self->{logger}->error("Can't retrieve body for key $key");
481 0         0 $self->{logger}->warn($self->{body});
482             } else {
483 0         0 carp("Can't retrieve body for key $key");
484 0         0 warn($self->{body});
485             }
486 0         0 $self->{send_body} = 0;
487 0         0 $self->{status} = 500;
488             }
489             }
490 0 0 0     0 if($self->{send_body} && $ENV{'SERVER_PROTOCOL'} &&
      0        
      0        
      0        
491             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
492             ($self->{status} == 200)) {
493 0 0 0     0 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{generate_etag}) {
494 0 0       0 if(!defined($self->{etag})) {
495 0         0 $self->_generate_etag();
496             }
497 0         0 $self->_check_if_none_match();
498             }
499             }
500 0 0       0 if($self->{status} == 200) {
501 0         0 $encoding = $self->_should_gzip();
502 0 0       0 if($self->{send_body}) {
503 0 0 0     0 if($self->{generate_etag} && !defined($self->{etag}) && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
      0        
      0        
504 0         0 $self->_generate_etag();
505             }
506 0         0 $self->_compress({ encoding => $encoding });
507             }
508             }
509 0         0 my $cannot_304 = !$self->{generate_304};
510 0 0       0 unless($self->{etag}) {
511 0 0 0     0 if(defined($headers) && ($headers =~ /^ETag: "([a-z0-9]{32})"/m)) {
512 0         0 $self->{etag} = $1;
513             } else {
514 0         0 $self->{etag} = $cache_hash->{'etag'};
515             }
516             }
517 0 0 0     0 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{send_body} && ($self->{status} != 304) && $self->{generate_304}) {
      0        
      0        
518 0 0       0 if(!$self->_check_if_none_match()) {
519 0         0 $cannot_304 = 1;
520             }
521             }
522 0 0       0 if($self->{cobject}) {
523 0 0 0     0 if($ENV{'HTTP_IF_MODIFIED_SINCE'} && ($self->{status} != 304) && !$cannot_304) {
      0        
524             $self->_check_modified_since({
525             since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
526             modified => $self->{cobject}->created_at()
527 0         0 });
528             }
529 0 0 0     0 if(($self->{status} == 200) && $self->{generate_last_modified}) {
530 0 0       0 if($self->{logger}) {
531 0         0 $self->{logger}->debug('Set Last-Modified to ', HTTP::Date::time2str($self->{cobject}->created_at()));
532             }
533 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($self->{cobject}->created_at());
  0         0  
534             }
535             }
536             } else {
537             # Not in the server side cache
538 0 0       0 if($self->{status} == 200) {
539 0         0 my $changes = $self->_save_to($unzipped_body, $dbh);
540              
541 0 0       0 unless($self->{cache_age}) {
542             # It would be great if CHI::set()
543             # allowed the time to be 'lru' for least
544             # recently used.
545 0         0 $self->{cache_age} = '10 minutes';
546             }
547 0         0 $cache_hash->{'body'} = $unzipped_body;
548 0 0 0     0 if($changes && $encoding) {
549 0         0 $self->_compress({ encoding => $encoding });
550             }
551 0 0 0     0 if($self->{o} && scalar(@{$self->{o}})) {
  0 0 0     0  
552             # Remember, we're storing the UNzipped
553             # version in the cache
554 0         0 my $c;
555 0 0 0     0 if(defined($headers) && length($headers)) {
556 0         0 $c = "$headers\r\n" . join("\r\n", @{$self->{o}});
  0         0  
557             } else {
558 0         0 $c = join("\r\n", @{$self->{o}});
  0         0  
559             }
560 0         0 $c =~ s/^Content-Encoding: .+$//mg;
561 0         0 $c =~ s/^Vary: Accept-Encoding.*\r?$//mg;
562 0         0 $c =~ s/\n+/\n/gs;
563 0 0       0 if(length($c)) {
564 0         0 $cache_hash->{'headers'} = $c;
565             }
566             } elsif(defined($headers) && length($headers)) {
567 0         0 $headers =~ s/^Content-Encoding: .+$//mg;
568 0         0 $headers =~ s/^Vary: Accept-Encoding.*\r?$//mg;
569 0         0 $headers =~ s/\n+/\n/gs;
570 0 0       0 if(length($headers)) {
571 0         0 $cache_hash->{'headers'} = $headers;
572             }
573             }
574 0 0       0 if($self->{generate_etag}) {
575 0 0       0 if(!defined($self->{etag})) {
576 0         0 $self->_generate_etag();
577             }
578 0         0 $cache_hash->{'etag'} = $self->{etag};
579             }
580             # TODO: Support the Expires header
581             # if($headers !~ /^Expires: /m))) {
582             # }
583 0 0       0 if($self->{logger}) {
584 0         0 $self->{logger}->debug("Store $key in the cache, age = ", $self->{cache_age}, ' ', length($cache_hash->{'body'}), ' bytes');
585             }
586 0         0 $self->{cache}->set($key, Storable::freeze($cache_hash), $self->{cache_age});
587              
588             # Create a static page with the information and link to that in the output
589             # HTML
590 0 0 0     0 if($dbh && $self->{info} && $self->{save_to} && (my $request_uri = $ENV{'REQUEST_URI'})) {
      0        
      0        
591 0         0 my $query = "SELECT DISTINCT creation FROM fcgi_buffer WHERE key = ?";
592 0 0       0 if($self->{logger}) {
593 0         0 $self->{logger}->debug("$query: $key");
594             }
595 0         0 my $sth = $dbh->prepare($query);
596 0         0 $sth->execute($key);
597 0 0       0 if(my $href = $sth->fetchrow_hashref()) {
598 0 0       0 if(my $ttl = $self->{save_to}->{ttl}) {
599 0         0 push @{$self->{o}}, 'Expires: ' .
600 0         0 HTTP::Date::time2str($href->{'creation'} + $ttl);
601             }
602             } else {
603 0         0 my $dir = $self->{save_to}->{directory};
604 0         0 my $browser_type = $self->{info}->browser_type();
605 0         0 my $language;
606 0 0       0 if($self->{'lingua'}) {
607 0         0 $language = $self->{lingua}->language();
608 0 0       0 if($language =~ /([\w\s]+)/i) {
609 0         0 $language = $1; # Untaint
610             }
611             } else {
612 0         0 $language = 'default';
613             }
614 0         0 my $bdir = File::Spec->catfile($dir, $browser_type);
615 0 0       0 if($bdir =~ /^([\/\\])(.+)$/) {
616 0         0 $bdir = "$1$2"; # Untaint
617             }
618 0         0 my $ldir = File::Spec->catfile($bdir, $language);
619 0         0 my $sdir = File::Spec->catfile($ldir, $self->{info}->script_name());
620 0 0       0 if($self->{logger}) {
621 0         0 $self->{logger}->debug("Create paths to $sdir");
622             }
623 0         0 File::Path::make_path($sdir);
624 0         0 my $file = $self->{info}->as_string();
625 0         0 $file =~ tr/\//_/;
626 0         0 my $path = File::Spec->catfile($sdir, "$file.html");
627 0 0       0 if($path =~ /^(.+)$/) {
628 0         0 $path = $1; # Untaint
629 0         0 $path =~ tr/[\|;]/_/;
630             }
631 0 0       0 if(open(my $fout, '>', $path)) {
632 0         0 my $u = $request_uri;
633 0         0 $u =~ s/\?/\\?/g;
634 0         0 $u =~ s/\)/\\)/g;
635 0         0 my $copy = $unzipped_body;
636 0         0 my $changes = ($copy =~ s/<a\s+href="$u"/<a href="$path"/gi);
637              
638             # handle <a href="?arg3=4">Call self with different args</a>
639 0         0 my $script_name = $ENV{'SCRIPT_NAME'};
640 0         0 $copy =~ s/<a\s+href="(\?.+?)"/<a href="$script_name$1"/gi;
641              
642             # Avoid Wide character
643 0 0       0 unless($self->{_encode_loaded}) {
644 0         0 require Encode;
645 0         0 $self->{_encode_loaded} = 1;
646             }
647 0         0 print $fout Encode::encode_utf8($copy);
648 0         0 close $fout;
649             # Do INSERT OR REPLACE in case another program has
650             # got in first,
651 0         0 $query = "INSERT OR REPLACE INTO fcgi_buffer(key, language, browser_type, path, uri, creation) VALUES('$key', '$language', '$browser_type', '$path', '$request_uri', strftime('\%s','now'))";
652 0 0       0 if($self->{logger}) {
653 0         0 $self->{logger}->debug($query);
654             }
655 0         0 $dbh->prepare($query)->execute();
656              
657 0 0 0     0 if($changes && (my $ttl = $self->{save_to}->{ttl})) {
658 0         0 push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str(time + $ttl);
  0         0  
659             }
660             } else {
661 0 0       0 if($self->{logger}) {
662 0         0 $self->{logger}->warn("Can't create $path");
663             }
664             }
665             }
666             }
667 0 0       0 if($self->{generate_last_modified}) {
668 0         0 $self->{cobject} = $self->{cache}->get_object($key);
669 0 0       0 if(defined($self->{cobject})) {
670 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($self->{cobject}->created_at());
  0         0  
671             } else {
672 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str(time);
  0         0  
673             }
674             }
675             }
676 0 0       0 if($self->{info}) {
677 0         0 my $host_name = $self->{info}->host_name();
678 0 0       0 if(defined($self->{x_cache})) {
679 0         0 push @{$self->{o}}, 'X-Cache: ' . $self->{x_cache} . " from $host_name";
  0         0  
680             } else {
681 0         0 push @{$self->{o}}, "X-Cache: MISS from $host_name";
  0         0  
682             }
683 0         0 push @{$self->{o}}, "X-Cache-Lookup: MISS from $host_name";
  0         0  
684             } else {
685 0 0       0 if(defined($self->{x_cache})) {
686 0         0 push @{$self->{o}}, 'X-Cache: ' . $self->{x_cache};
  0         0  
687             } else {
688 0         0 push @{$self->{o}}, 'X-Cache: MISS';
  0         0  
689             }
690 0         0 push @{$self->{o}}, 'X-Cache-Lookup: MISS';
  0         0  
691             }
692 0         0 push @{$self->{o}}, "X-FCGI-Buffer-$VERSION: Miss";
  0         0  
693             }
694             # We don't need it any more, so give Perl a chance to
695             # tidy it up seeing as we're in the destructor
696 0         0 delete $self->{cache};
697             } elsif($self->{info}) {
698 26         86 my $host_name = $self->{info}->host_name();
699 26         252 push @{$self->{o}}, ("X-Cache: MISS from $host_name", "X-Cache-Lookup: MISS from $host_name");
  26         126  
700 26 50       84 if($self->{generate_last_modified}) {
701 26 100       65 if(my $age = $self->_my_age()) {
702 8         16 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($age);
  8         67  
703             }
704             }
705 26 50 66     298 if($ENV{'HTTP_IF_MODIFIED_SINCE'} && ($self->{status} != 304) && $self->{generate_304}) {
      66        
706             $self->_check_modified_since({
707 3         16 since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
708             modified => $self->_my_age()
709             });
710             }
711 26 50 33     102 if($self->_save_to($unzipped_body, $dbh) && $encoding) {
712 0         0 $self->_compress({ encoding => $encoding });
713             }
714             } else {
715 8         16 push @{$self->{o}}, ('X-Cache: MISS', 'X-Cache-Lookup: MISS');
  8         38  
716             }
717              
718             # if($self->{generate_etag} && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
719             # if((!defined($self->{etag})) &&
720             # (($self->{status} == 200) || $self->{status} == 304) &&
721             # $self->{body} && (!$ENV{'NO_CACHE'}) &&
722             # !$self->is_cached()) {
723             # unless($self->{_encode_loaded}) {
724             # require Encode;
725             # $self->{_encode_loaded} = 1;
726             # }
727             # $self->{etag} = '"' . Digest::MD5->new->add(Encode::encode_utf8($unzipped_body))->hexdigest() . '"';
728             # }
729             # if(defined($self->{etag})) {
730             # push @{$self->{o}}, "ETag: $self->{etag}";
731             # if($self->{logger}) {
732             # $self->{logger}->debug("Set ETag to $self->{etag}");
733             # }
734             # } else {
735             # open(my $fout, '>>', '/tmp/FCGI-bug');
736             # print $fout "BUG: ETag not generated, status $self->{status}:\n",
737             # "$headers\n",
738             # 'x' x 40, "\n",
739             # # defined($self->{body}) ? $self->{body} : "body is empty\n",
740             # defined($unzipped_body) ? "$unzipped_body\n" : "body is empty\n",
741             # 'x' x 40,
742             # "\n";
743             # print $fout "ENV:\n";
744             # while(my ($key, $value) = each %ENV) {
745             # print $fout "$key = $value\n";
746             # }
747             # close $fout;
748             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
749             # }
750             # }
751 34 100 33     196 if($self->{generate_etag} && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
      66        
752 11 50 0     33 if(defined($self->{etag})) {
    0 0        
      0        
      0        
      0        
753 11         16 push @{$self->{o}}, "ETag: $self->{etag}";
  11         39  
754 11 50       31 if($self->{logger}) {
755 0         0 $self->{logger}->debug("Set ETag to $self->{etag}");
756             }
757             } elsif($self->{logger} && (($self->{status} == 200) || $self->{status} == 304) && $self->{body} && (!$ENV{'NO_CACHE'}) && !$self->is_cached()) {
758             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
759             # open(my $fout, '>>', '/tmp/FCGI-bug');
760             # print $fout "BUG: ETag not generated, status $self->{status}:\n",
761             # $headers,
762             # 'x' x 40,
763             # defined($self->{body}) ? $self->{body} : "body is empty\n",
764             # 'x' x 40,
765             # "\n";
766             # print $fout "ENV:\n";
767             # while(my ($key, $value) = each %ENV) {
768             # print $fout "$key = $value\n;
769             # }
770             # print $fout 'x' x 40, "\n";
771             # close $fout;
772             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
773             }
774             }
775              
776 34         59 my $body_length;
777 34 100       95 if(defined($self->{body})) {
778 32 100       129 if(utf8::is_utf8($self->{body})) {
779 1         5 utf8::encode($self->{body});
780             }
781 32         66 $body_length = length($self->{body});
782             } else {
783 2         5 $body_length = 0;
784             }
785              
786 34 50 33     154 if(defined($headers) && length($headers)) {
787             # Put the original headers first, then those generated within
788             # FCGI::Buffer
789 34         55 unshift @{$self->{o}}, split(/\r\n/, $headers);
  34         162  
790 34 100 100     179 if($self->{body} && $self->{send_body}) {
791 30 50       39 unless(grep(/^Content-Length: \d/, @{$self->{o}})) {
  30         150  
792 30         53 push @{$self->{o}}, "Content-Length: $body_length";
  30         109  
793             }
794             }
795 34 100       60 unless(grep(/^Status: \d/, @{$self->{o}})) {
  34         159  
796 31         1778 require HTTP::Status;
797 31         26771 HTTP::Status->import();
798              
799 31         83 push @{$self->{o}}, 'Status: ' . $self->{status} . ' ' . HTTP::Status::status_message($self->{status});
  31         215  
800             }
801             } else {
802 0         0 push @{$self->{o}}, "X-FCGI-Buffer-$VERSION: No headers";
  0         0  
803             }
804              
805 34 100 100     297 if($body_length && $self->{send_body}) {
806 30         49 push @{$self->{o}}, ('', $self->{body});
  30         79  
807             }
808              
809             # XXXXXXXXXXXXXXXXXXXXXXX
810 34 50       60 if(0) {
811             # This code helps to debug Wide character prints
812             my $wideCharWarningsIssued = 0;
813             my $widemess;
814             $SIG{__WARN__} = sub {
815 0     0   0 $wideCharWarningsIssued += "@_" =~ /Wide character in .../;
816 0         0 $widemess = "@_";
817 0 0       0 if($self->{logger}) {
818 0         0 $self->{logger}->fatal($widemess);
819 0         0 my $i = 1;
820 0         0 $self->{logger}->trace('Stack Trace');
821 0         0 while((my @call_details = (caller($i++)))) {
822 0         0 $self->{logger}->trace($call_details[1], ':', $call_details[2], ' in function ', $call_details[3]);
823             }
824             }
825 0         0 CORE::warn(@_); # call the builtin warn as usual
826             };
827              
828             if(scalar @{$self->{o}}) {
829             print join("\r\n", @{$self->{o}});
830             if($wideCharWarningsIssued) {
831             my $mess = join("\r\n", @{$self->{o}});
832             $mess =~ /[^\x00-\xFF]/;
833             open(my $fout, '>>', '/tmp/NJH');
834             print $fout "$widemess:\n",
835             $mess,
836             'x' x 40,
837             "\n";
838             close $fout;
839             }
840             }
841 0         0 } elsif(scalar @{$self->{o}}) {
  34         82  
842 34         50 print join("\r\n", @{$self->{o}});
  34         2220  
843             }
844             # XXXXXXXXXXXXXXXXXXXXXXX
845              
846 34 100 100     800 if((!$self->{send_body}) || !defined($self->{body})) {
847 4         101 print "\r\n\r\n";
848             }
849             }
850              
851             sub _generate_etag {
852 11     11   20 my $self = shift;
853              
854 11 50       32 return if defined($self->{'etag'});
855 11 50       30 return unless defined($self->{'body'});
856              
857 11 50       30 if(!defined($self->{_encode_loaded})) {
858             # encode to avoid "Wide character in subroutine entry"
859 11         62 require Encode;
860 11         38 $self->{_encode_loaded} = 1;
861             }
862 11         180 $self->{etag} = '"' . Digest::MD5->new->add(Encode::encode_utf8($self->{body}))->hexdigest() . '"';
863 11 50       77 if($self->{'logger'}) {
864 0         0 $self->{'logger'}->debug('Etag = ', $self->{'etag'});
865             }
866             }
867              
868             sub _check_modified_since {
869 3     3   7 my $self = shift;
870              
871 3 50       9 if($self->{logger}) {
872 0         0 $self->{logger}->trace('In _check_modified_since');
873             }
874              
875 3 50       8 if(!$self->{generate_304}) {
876 0         0 return;
877             }
878 3         7 my $params = shift;
879              
880 3 50       11 if(!defined($$params{since})) {
881 0         0 return;
882             }
883 3         13 my $s = HTTP::Date::str2time($$params{since});
884 3 100       423 if(!defined($s)) {
885 1 50       5 if($self->{logger}) {
886 0         0 $self->{logger}->info("$$params{since} is not a valid date");
887             }
888 1         4 return;
889             }
890              
891 2         7 my $age = $self->_my_age();
892 2 50       13 if(!defined($age)) {
893 2 50       9 if($self->{logger}) {
894 0         0 $self->{logger}->info("Can't determine my age");
895             }
896 2         6 return;
897             }
898 0 0       0 if($age > $s) {
899 0 0       0 if($self->{logger}) {
900 0         0 $self->{logger}->debug('_check_modified_since: script has been modified');
901             }
902             # Script has been updated so it may produce different output
903 0         0 return;
904             }
905              
906 0 0       0 if($self->{logger}) {
907 0         0 $self->{logger}->debug("_check_modified_since: Compare $$params{modified} with $s");
908             }
909 0 0       0 if($$params{modified} <= $s) {
910 0         0 push @{$self->{o}}, 'Status: 304 Not Modified';
  0         0  
911 0         0 $self->{status} = 304;
912 0         0 $self->{send_body} = 0;
913 0 0       0 if($self->{logger}) {
914 0         0 $self->{logger}->debug('Set status to 304');
915             }
916             }
917             }
918              
919             # Reduce output, e.g. remove superfluous white-space.
920             sub _optimise_content {
921 27     27   43 my $self = shift;
922              
923             # FIXME: regex bad, HTML parser good
924             # Regexp::List - wow!
925 27         1127 $self->{body} =~ s/(((\s+|\r)\n|\n(\s+|\+)))/\n/g;
926             # $self->{body} =~ s/\r\n/\n/gs;
927             # $self->{body} =~ s/\s+\n/\n/gs;
928             # $self->{body} =~ s/\n+/\n/gs;
929             # $self->{body} =~ s/\n\s+|\s+\n/\n/g;
930 27         120 $self->{body} =~ s/\<\/div\>\s+\<div/\<\/div\>\<div/gis;
931             # $self->{body} =~ s/\<\/p\>\s\<\/div/\<\/p\>\<\/div/gis;
932             # $self->{body} =~ s/\<div\>\s+/\<div\>/gis; # Remove spaces after <div>
933 27         8404 $self->{body} =~ s/(<div>\s+|\s+<div>)/<div>/gis;
934 27         117 $self->{body} =~ s/\s+<\/div\>/\<\/div\>/gis; # Remove spaces before </div>
935 27         528 $self->{body} =~ s/\s+\<p\>|\<p\>\s+/\<p\>/im; # TODO <p class=
936 27         556 $self->{body} =~ s/\s+\<\/p\>|\<\/p\>\s+/\<\/p\>/gis;
937 27         88 $self->{body} =~ s/<html>\s+<head>/<html><head>/is;
938 27         148 $self->{body} =~ s/\s*<\/head>\s+<body>\s*/<\/head><body>/is;
939 27         71 $self->{body} =~ s/<html>\s+<body>/<html><body>/is;
940 27         75 $self->{body} =~ s/<body>\s+/<body>/is;
941 27         99 $self->{body} =~ s/\s+\<\/html/\<\/html/is;
942 27         85 $self->{body} =~ s/\s+\<\/body/\<\/body/is;
943 27         165 $self->{body} =~ s/\s(\<.+?\>\s\<.+?\>)/$1/;
944             # $self->{body} =~ s/(\<.+?\>\s\<.+?\>)\s/$1/g;
945 27         80 $self->{body} =~ s/\<p\>\s/\<p\>/gi;
946 27         66 $self->{body} =~ s/\<\/p\>\s\<p\>/\<\/p\>\<p\>/gi;
947 27         66 $self->{body} =~ s/\<\/tr\>\s\<tr\>/\<\/tr\>\<tr\>/gi;
948 27         71 $self->{body} =~ s/\<\/td\>\s\<\/tr\>/\<\/td\>\<\/tr\>/gi;
949 27         98 $self->{body} =~ s/\<\/td\>\s*\<td\>/\<\/td\>\<td\>/gis;
950 27         68 $self->{body} =~ s/\<\/tr\>\s\<\/table\>/\<\/tr\>\<\/table\>/gi;
951 27         88 $self->{body} =~ s/\<br\s?\/?\>\s?\<p\>/\<p\>/gi;
952 27         69 $self->{body} =~ s/\<br\>\s+/\<br\>/gi;
953 27         78 $self->{body} =~ s/\s+\<br/\<br/gi;
954 27         53 $self->{body} =~ s/\<br\s?\/\>\s/\<br \/\>/gi;
955 27         147 $self->{body} =~ s/[ \t]+/ /gs; # Remove duplicate space, don't use \s+ it breaks JavaScript
956 27         84 $self->{body} =~ s/\s\<p\>/\<p\>/gi;
957 27         77 $self->{body} =~ s/\s\<script/\<script/gi;
958 27         427 $self->{body} =~ s/(<script>\s|\s<script>)/<script>/gis;
959 27         425 $self->{body} =~ s/(<\/script>\s|\s<\/script>)/<\/script>/gis;
960 27         74 $self->{body} =~ s/\<td\>\s/\<td\>/gi;
961 27         111 $self->{body} =~ s/\s+\<a\shref="(.+?)"\>\s?/ <a href="$1">/gis;
962 27         117 $self->{body} =~ s/\s?<a\shref=\s"(.+?)"\>/ <a href="$1">/gis;
963 27         86 $self->{body} =~ s/\s+<\/a\>\s+/<\/a> /gis;
964 27         682 $self->{body} =~ s/(\s?<hr>\s+|\s+<hr>\s?)/<hr>/gis;
965             # $self->{body} =~ s/\s<hr>/<hr>/gis;
966             # $self->{body} =~ s/<hr>\s/<hr>/gis;
967 27         67 $self->{body} =~ s/<\/li>\s+<li>/<\/li><li>/gis;
968 27         68 $self->{body} =~ s/<\/li>\s+<\/ul>/<\/li><\/ul>/gis;
969 27         54 $self->{body} =~ s/<ul>\s+<li>/<ul><li>/gis;
970 27         79 $self->{body} =~ s/\s+<\/li>/<\/li>/gis;
971 27         59 $self->{body} =~ s/\<\/option\>\s+\<option/\<\/option\>\<option/gis;
972 27         74 $self->{body} =~ s/<title>\s*(.+?)\s*<\/title>/<title>$1<\/title>/is;
973 27         110 $self->{body} =~ s/<\/center>\s+<center>/ /gis;
974             }
975              
976             # Create a key for the cache
977             sub _generate_key {
978 0     0   0 my $self = shift;
979 0 0       0 if($self->{cache_key}) {
980 0         0 return $self->{cache_key};
981             }
982 0 0       0 unless(defined($self->{info})) {
983 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
984             }
985              
986 0         0 my $key = $self->{info}->browser_type() . '::' . $self->{info}->domain_name() . '::' . $self->{info}->script_name() . '::' . $self->{info}->as_string();
987              
988 0 0       0 if($self->{lingua}) {
989 0         0 $key .= '::' . $self->{lingua}->language();
990             }
991 0 0       0 if($ENV{'HTTP_COOKIE'}) {
992             # Different states of the client are stored in different caches
993             # Don't put different Google Analytics in different caches, and anyway they
994             # would be wrong
995 0         0 foreach my $cookie(split(/;/, $ENV{'HTTP_COOKIE'})) {
996 0 0       0 unless($cookie =~ /^__utm[abcz]/) {
997 0         0 $key .= "::$cookie";
998             }
999             }
1000             }
1001              
1002             # Honour the Vary headers
1003 0         0 my $headers = $self->{'headers'};
1004 0 0 0     0 if($headers && ($headers =~ /^Vary: .*$/m)) {
1005 0 0       0 if(defined($self->{logger})) {
1006 0         0 $self->{logger}->debug('Found Vary header');
1007             }
1008 0         0 foreach my $h1(split(/\r?\n/, $headers)) {
1009 0         0 my ($h1_name, $h1_value) = split /\:\s*/, $h1, 2;
1010 0 0       0 if(lc($h1_name) eq 'vary') {
1011 0         0 foreach my $h2(split(/\r?\n/, $headers)) {
1012 0         0 my ($h2_name, $h2_value) = split /\:\s*/, $h2, 2;
1013 0 0       0 if($h2_name eq $h1_value) {
1014 0         0 $key .= "::$h2_value";
1015 0         0 last;
1016             }
1017             }
1018             }
1019             }
1020             }
1021 0         0 $key =~ s/\//::/g;
1022 0         0 $key =~ s/::::/::/g;
1023 0         0 $key =~ s/::$//;
1024 0 0       0 if(defined($self->{logger})) {
1025 0         0 $self->{logger}->trace("Returning $key");
1026             }
1027 0         0 $self->{cache_key} = $key;
1028 0         0 return $key;
1029             }
1030              
1031             =head2 init
1032              
1033             Set various options and override default values.
1034              
1035             # Put this toward the top of your program before you do anything
1036             # By default, generate_tag, generate_304 and compress_content are ON,
1037             # optimise_content and lint_content are OFF. Set optimise_content to 2 to
1038             # do aggressive JavaScript optimisations which may fail.
1039             use FCGI::Buffer;
1040              
1041             my $buffer = FCGI::Buffer->new()->init({
1042             generate_etag => 1, # make good use of client's cache
1043             generate_last_modified => 1, # more use of client's cache
1044             compress_content => 1, # if gzip the output
1045             optimise_content => 0, # optimise your program's HTML, CSS and JavaScript
1046             cache => CHI->new(driver => 'File'), # cache requests
1047             cache_key => 'string', # key for the cache
1048             cache_age => '10 minutes', # how long to store responses in the cache
1049             logger => $self->{logger},
1050             lint_content => 0, # Pass through HTML::Lint
1051             generate_304 => 1, # When appropriate, generate 304: Not modified
1052             save_to => { directory => '/var/www/htdocs/save_to', ttl => 600, create_table => 1 },
1053             info => CGI::Info->new(),
1054             lingua => CGI::Lingua->new(),
1055             });
1056              
1057             If no cache_key is given, one will be generated which may not be unique.
1058             The cache_key should be a unique value dependent upon the values set by the
1059             browser.
1060              
1061             The cache object will be an object that understands get_object(),
1062             set(), remove() and created_at() messages, such as an L<CHI> object. It is
1063             used as a server-side cache to reduce the need to rerun database accesses.
1064              
1065             Items stay in the server-side cache by default for 10 minutes.
1066             This can be overridden by the cache_control HTTP header in the request, and
1067             the default can be changed by the cache_age argument to init().
1068              
1069             Save_to is feature which stores output of dynamic pages to your
1070             htdocs tree and replaces future links that point to that page with static links
1071             to avoid going through CGI at all.
1072             Ttl is set to the number of seconds that the static pages are deemed to
1073             be live for, the default is 10 minutes.
1074             If set to 0, the page is live forever.
1075             To enable save_to, a info and lingua arguments must also be given.
1076             It works best when cache is also given.
1077             Only use where output is guaranteed to be the same with a given set of arguments
1078             (the same criteria for enabling generate_304).
1079             You can turn it off on a case by case basis thus:
1080              
1081             my $params = CGI::Info->new()->params();
1082             if($params->{'send_private_email'}) {
1083             $buffer->init('save_to' => undef);
1084             }
1085              
1086             Info is an optional argument to give information about the FCGI environment, e.g.
1087             a L<CGI::Info> object.
1088              
1089             Logger will be an object that understands debug() such as an L<Log::Log4perl>
1090             object.
1091              
1092             To generate a last_modified header, you must give a cache object.
1093              
1094             Init allows a reference of the options to be passed. So both of these work:
1095             use FCGI::Buffer;
1096             #...
1097             my $buffer = FCGI::Buffer->new();
1098             $b->init(generate_etag => 1);
1099             $b->init({ generate_etag => 1, info => CGI::Info->new() });
1100              
1101             Generally speaking, passing by reference is better since it copies less on to
1102             the stack.
1103              
1104             If you give a cache to init() then later give cache => undef,
1105             the server side cache is no longer used.
1106             This is useful when you find an error condition when creating your HTML
1107             and decide that you no longer wish to store the output in the cache.
1108              
1109             =cut
1110              
1111             sub init {
1112 32     32 1 8572 my $self = shift;
1113 32 100       125 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  22         79  
1114              
1115             # Safe options - can be called at any time
1116 32 100       124 if(defined($params{generate_etag})) {
1117 9         19 $self->{generate_etag} = $params{generate_etag};
1118             }
1119 32 50       81 if(defined($params{generate_last_modified})) {
1120 0         0 $self->{generate_last_modified} = $params{generate_last_modified};
1121             }
1122 32 100       75 if(defined($params{compress_content})) {
1123 1         3 $self->{compress_content} = $params{compress_content};
1124             }
1125 32 100       78 if(defined($params{optimise_content})) {
1126 31         61 $self->{optimise_content} = $params{optimise_content};
1127             }
1128 32 100       69 if(defined($params{lint_content})) {
1129 4         19 $self->{lint_content} = $params{lint_content};
1130             }
1131 32 100       76 if(defined($params{logger})) {
1132 1         5 $self->{logger} = $params{logger};
1133             }
1134 32 50       64 if(defined($params{lingua})) {
1135 0         0 $self->{lingua} = $params{lingua};
1136             }
1137              
1138 32 100 66     146 if(defined($params{save_to}) && $self->can_cache()) {
    50 33        
1139 1 50       3 if(my $dir = $params{'save_to'}->{'directory'}) {
1140 1 50       28 if(! -d $dir) {
1141 1         13 mkdir $dir;
1142 1 50       12 if(! -d $dir) {
1143 1         6 Carp::carp("$dir isn't a directory");
1144 1         545 return;
1145             }
1146             }
1147 0 0       0 if(! -w $dir) {
1148 0         0 Carp::carp("$dir isn't writeable");
1149 0         0 return;
1150             }
1151             }
1152 0         0 $self->{save_to} = $params{save_to};
1153 0 0       0 if(!exists($params{save_to}->{'ttl'})) {
1154 0         0 $self->{save_to}->{'ttl'} = 600;
1155             }
1156             } elsif(exists($params{'save_to'}) && !defined($params{'save_to'})) {
1157 0         0 delete $self->{'save_to'};
1158             }
1159 31 100       70 if(defined($params{generate_304})) {
1160 3         18 $self->{generate_304} = $params{generate_304};
1161             }
1162 31 100 66     118 if(defined($params{info}) && (!defined($self->{info}))) {
1163 4         8 $self->{info} = $params{info};
1164             }
1165              
1166             # Unsafe options - must be called before output has been started
1167 31         112 my $pos = $self->{buf}->getpos;
1168 31 50       350 if($pos > 0) {
1169 0 0       0 if(defined($self->{logger})) {
1170 0         0 my @call_details = caller(0);
1171 0         0 $self->{logger}->warn("Too late to call init, $pos characters have been printed, caller line $call_details[2] of $call_details[1]");
1172             } else {
1173             # Must do Carp::carp instead of carp for Test::Carp
1174 0         0 Carp::carp "Too late to call init, $pos characters have been printed";
1175             }
1176             }
1177 31 50 33     86 if(exists($params{cache}) && $self->can_cache()) {
1178 0 0       0 if(defined($ENV{'HTTP_CACHE_CONTROL'})) {
1179 0         0 my $control = $ENV{'HTTP_CACHE_CONTROL'};
1180 0 0       0 if(defined($self->{logger})) {
1181 0         0 $self->{logger}->debug("cache_control = $control");
1182             }
1183 0 0       0 if($control =~ /^max-age\s*=\s*(\d+)$/) {
1184             # There is an argument not to do this
1185             # since one client will affect others
1186 0         0 $self->{cache_age} = "$1 seconds";
1187 0 0       0 if(defined($self->{logger})) {
1188 0         0 $self->{logger}->debug("cache_age = $self->{cache_age}");
1189             }
1190             }
1191             }
1192 0   0     0 $self->{cache_age} ||= $params{cache_age};
1193 0 0 0     0 if((!defined($params{cache})) && defined($self->{cache})) {
1194 0 0       0 if(defined($self->{logger})) {
1195 0 0       0 if($self->{cache_key}) {
1196 0         0 $self->{logger}->debug('disabling cache ', $self->{cache_key});
1197             } else {
1198 0         0 $self->{logger}->debug('disabling cache');
1199             }
1200             }
1201 0         0 delete $self->{cache};
1202             } else {
1203 0         0 $self->{cache} = $params{cache};
1204             }
1205 0 0       0 if(defined($params{cache_key})) {
1206 0         0 $self->{cache_key} = $params{cache_key};
1207             }
1208             }
1209              
1210 31         113 return $self;
1211             }
1212              
1213             sub import {
1214             # my $class = shift;
1215 11     11   172 shift;
1216              
1217 11 50       2111 return unless scalar(@_);
1218              
1219 0         0 init(@_);
1220             }
1221              
1222             =head2 set_options
1223              
1224             Synonym for init, kept for historical reasons.
1225              
1226             =cut
1227              
1228             sub set_options {
1229 11     11 1 6953 my $self = shift;
1230 11 100       42 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  8         39  
1231              
1232 11         40 $self->init(\%params);
1233             }
1234              
1235             =head2 can_cache
1236              
1237             Returns true if the server is allowed to store the results locally.
1238             This is the value of X-Cache in the returned header.
1239              
1240             =cut
1241              
1242             sub can_cache {
1243 9     9 1 2785 my $self = shift;
1244              
1245 9 100       45 if(defined($self->{x_cache})) {
1246 1         6 return ($self->{x_cache} eq 'HIT');
1247             }
1248 8 50 33     63 if(defined($ENV{'NO_CACHE'}) || defined($ENV{'NO_STORE'})) {
1249 0         0 $self->{x_cache} = 'MISS';
1250 0         0 return 0;
1251             }
1252 8 50       44 if(defined($ENV{'HTTP_CACHE_CONTROL'})) {
1253 0         0 my $control = $ENV{'HTTP_CACHE_CONTROL'};
1254 0 0       0 if(defined($self->{logger})) {
1255 0         0 $self->{logger}->debug("cache_control = $control");
1256             }
1257             # TODO: check Authorization header not present
1258 0 0 0     0 if(($control eq 'no-store') ||
      0        
      0        
1259             ($control eq 'no-cache') ||
1260             ($control eq 'max-age=0') ||
1261             ($control eq 'private')) {
1262 0         0 $self->{x_cache} = 'MISS';
1263 0         0 return 0;
1264             }
1265             }
1266 8         23 $self->{x_cache} = 'HIT';
1267 8         39 return 1;
1268             }
1269              
1270             =head2 is_cached
1271              
1272             Returns true if the output is cached. If it is then it means that all of the
1273             expensive routines in the FCGI script can be by-passed because we already have
1274             the result stored in the cache.
1275              
1276             # Put this toward the top of your program before you do anything
1277              
1278             # Example key generation - use whatever you want as something
1279             # unique for this call, so that subsequent calls with the same
1280             # values match something in the cache
1281             use CGI::Info;
1282             use CGI::Lingua;
1283             use FCGI::Buffer;
1284              
1285             my $i = CGI::Info->new();
1286             my $l = CGI::Lingua->new(supported => ['en']);
1287              
1288             # To use server side caching you must give the cache argument, however
1289             # the cache_key argument is optional - if you don't give one then one will
1290             # be generated for you
1291             my $buffer = FCGI::Buffer->new();
1292             if($buffer->can_cache()) {
1293             $buffer->init(
1294             cache => CHI->new(driver => 'File'),
1295             cache_key => $i->domain_name() . '/' . $i->script_name() . '/' . $i->as_string() . '/' . $l->language()
1296             );
1297             if($buffer->is_cached()) {
1298             # Output will be retrieved from the cache and sent automatically
1299             exit;
1300             }
1301             }
1302             # Not in the cache, so now do our expensive computing to generate the
1303             # results
1304             print "Content-type: text/html\n";
1305             # ...
1306              
1307             =cut
1308              
1309             sub is_cached {
1310 12     12 1 1512 my $self = shift;
1311              
1312 12 50       51 unless($self->{cache}) {
1313 12 50       36 if($self->{logger}) {
1314 0         0 $self->{logger}->debug("is_cached: cache hasn't been enabled");
1315 0         0 my $i = 0;
1316 0         0 while((my @call_details = (caller($i++)))) {
1317 0         0 $self->{logger}->debug($call_details[1], ':', $call_details[2], ' calling function ', $call_details[3]);
1318             }
1319             }
1320 12         53 return 0;
1321             }
1322              
1323 0         0 my $key = $self->_generate_key();
1324              
1325 0 0       0 if($self->{logger}) {
1326 0         0 $self->{logger}->debug("is_cached: looking for key = $key");
1327             }
1328 0         0 $self->{cobject} = $self->{cache}->get_object($key);
1329 0 0       0 unless($self->{cobject}) {
1330 0 0       0 if($self->{logger}) {
1331 0         0 $self->{logger}->debug('not found in cache');
1332             }
1333 0         0 return 0;
1334             }
1335 0 0       0 unless($self->{cobject}->value($key)) {
1336 0 0       0 if($self->{logger}) {
1337 0         0 $self->{logger}->warn('is_cached: object is in the cache but not the data');
1338             }
1339 0         0 delete $self->{cobject};
1340 0         0 return 0;
1341             }
1342              
1343             # If the script has changed, don't use the cache since we may produce
1344             # different output
1345 0         0 my $age = $self->_my_age();
1346 0 0       0 unless(defined($age)) {
1347 0 0       0 if($self->{logger}) {
1348 0         0 $self->{logger}->debug("Can't determine script's age");
1349             }
1350             # Can't determine the age. Play it safe an assume we're not
1351             # cached
1352 0         0 delete $self->{cobject};
1353 0         0 return 0;
1354             }
1355 0 0       0 if($age > $self->{cobject}->created_at()) {
1356             # Script has been updated so it may produce different output
1357 0 0       0 if($self->{logger}) {
1358 0         0 $self->{logger}->debug('Script has been updated');
1359             }
1360 0         0 delete $self->{cobject};
1361             # Nothing will be in date and all new searches would miss
1362             # anyway, so may as well clear it all
1363             # FIXME: RT104471
1364             # $self->{cache}->clear();
1365 0         0 return 0;
1366             }
1367 0 0       0 if($self->{logger}) {
1368 0         0 $self->{logger}->debug('Script is in the cache');
1369             }
1370 0         0 return 1;
1371             }
1372              
1373             sub _my_age {
1374 31     31   47 my $self = shift;
1375              
1376 31 50       83 if($self->{script_mtime}) {
1377 0         0 return $self->{script_mtime};
1378             }
1379 31 50       102 unless(defined($self->{info})) {
1380 0 0       0 if($self->{cache}) {
1381 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
1382             } else {
1383 0         0 $self->{info} = CGI::Info->new();
1384             }
1385             }
1386              
1387 31         79 my $path = $self->{info}->script_path();
1388 31 50       5479 unless(defined($path)) {
1389 0         0 return;
1390             }
1391              
1392 31         465 my @statb = stat($path);
1393 31         119 $self->{script_mtime} = $statb[9];
1394 31         158 return $self->{script_mtime};
1395             }
1396              
1397             sub _should_gzip {
1398 34     34   63 my $self = shift;
1399              
1400 34 100 100     223 if($self->{compress_content} && ($ENV{'HTTP_ACCEPT_ENCODING'} || $ENV{'HTTP_TE'})) {
      100        
1401 2 50       9 if(defined($self->{content_type})) {
1402 2         5 my @content_type = @{$self->{content_type}};
  2         8  
1403 2 50       10 if($content_type[0] ne 'text') {
1404 0         0 return '';
1405             }
1406             }
1407 2 100       14 my $accept = lc($ENV{'HTTP_ACCEPT_ENCODING'} ? $ENV{'HTTP_ACCEPT_ENCODING'} : $ENV{'HTTP_TE'});
1408 2         15 foreach my $method(split(/,\s?/, $accept)) {
1409 2 50 66     15 if(($method eq 'gzip') || ($method eq 'x-gzip') || ($method eq 'br')) {
      66        
1410 2         8 return $method;
1411             }
1412             }
1413             }
1414              
1415 32         81 return '';
1416             }
1417              
1418             sub _set_content_type {
1419 34     34   58 my $self = shift;
1420 34         59 my $headers = shift;
1421              
1422 34         115 foreach my $header (split(/\r?\n/, $headers)) {
1423 34         154 my ($header_name, $header_value) = split /\:\s*/, $header, 2;
1424 34 100       143 if (lc($header_name) eq 'content-type') {
1425 31         110 my @content_type = split /\//, $header_value, 2;
1426 31         77 $self->{content_type} = \@content_type;
1427 31         99 return;
1428             }
1429             }
1430             }
1431              
1432             sub _compress()
1433             {
1434 32     32   60 my $self = shift;
1435 32 50       95 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  32         128  
1436              
1437 32 50       96 return unless(defined($self->{'body'}));
1438 32         63 my $encoding = $params{encoding};
1439              
1440 32 100 66     107 if((length($encoding) == 0) || (length($self->{body}) < MIN_GZIP_LEN)) {
1441 30         97 return;
1442             }
1443              
1444 2 100       13 if($encoding eq 'gzip') {
    50          
1445 1         733 require Compress::Zlib;
1446 1         57345 Compress::Zlib->import;
1447              
1448             # Avoid 'Wide character in memGzip'
1449 1 50       14 unless($self->{_encode_loaded}) {
1450 1         616 require Encode;
1451 1         10829 $self->{_encode_loaded} = 1;
1452             }
1453 1         9 my $nbody = Compress::Zlib::memGzip(\Encode::encode_utf8($self->{body}));
1454 1 50       508 if(length($nbody) < length($self->{body})) {
1455 0         0 $self->{body} = $nbody;
1456 0 0       0 unless(grep(/^Content-Encoding: gzip/, @{$self->{o}})) {
  0         0  
1457 0         0 push @{$self->{o}}, 'Content-Encoding: gzip';
  0         0  
1458             }
1459 0 0       0 unless(grep(/^Vary: Accept-Encoding/, @{$self->{o}})) {
  0         0  
1460 0         0 push @{$self->{o}}, 'Vary: Accept-Encoding';
  0         0  
1461             }
1462             }
1463             } elsif($encoding eq 'br') {
1464 1         458 require IO::Compress::Brotli;
1465 1         1446 IO::Compress::Brotli->import();
1466              
1467 1 50       9 unless($self->{_encode_loaded}) {
1468 0         0 require Encode;
1469 0         0 $self->{_encode_loaded} = 1;
1470             }
1471 1         3365 my $nbody = IO::Compress::Brotli::bro(Encode::encode_utf8($self->{body}));
1472 1 50       14 if(length($nbody) < length($self->{body})) {
1473 1         4 $self->{body} = $nbody;
1474 1 50       3 unless(grep(/^Content-Encoding: br/, @{$self->{o}})) {
  1         7  
1475 1         3 push @{$self->{o}}, 'Content-Encoding: br';
  1         5  
1476             }
1477 1 50       3 unless(grep(/^Vary: Accept-Encoding/, @{$self->{o}})) {
  1         6  
1478 1         3 push @{$self->{o}}, 'Vary: Accept-Encoding';
  1         7  
1479             }
1480             }
1481             }
1482             }
1483              
1484             sub _check_if_none_match {
1485 2     2   5 my $self = shift;
1486              
1487 2 50       8 if($self->{logger}) {
1488 0         0 $self->{logger}->debug("Compare $ENV{HTTP_IF_NONE_MATCH} with $self->{etag}");
1489             }
1490 2 50       8 if($ENV{'HTTP_IF_NONE_MATCH'} eq $self->{etag}) {
1491 2         4 push @{$self->{o}}, 'Status: 304 Not Modified';
  2         9  
1492 2         4 $self->{send_body} = 0;
1493 2         4 $self->{status} = 304;
1494 2 50       7 if($self->{logger}) {
1495 0         0 $self->{logger}->debug('Set status to 304');
1496             }
1497 2         5 return 1;
1498             }
1499 0 0 0     0 if($self->{cache} && $self->{logger} && $self->{logger}->is_debug()) {
      0        
1500 0         0 my $cached_copy = $self->{cache}->get($self->_generate_key());
1501              
1502 0 0 0     0 if($cached_copy && $self->{body}) {
1503 0         0 require Text::Diff;
1504 0         0 Text::Diff->import();
1505              
1506 0         0 $cached_copy = Storable::thaw($cached_copy)->{body};
1507 0         0 my $diffs = diff(\$self->{body}, \$cached_copy);
1508 0         0 $self->{logger}->debug('diffs: ', $diffs);
1509             } else {
1510 0         0 $self->{logger}->debug('Nothing to compare');
1511             }
1512             }
1513 0         0 return 0;
1514             }
1515              
1516             # replace dynamic links with static links
1517             sub _save_to {
1518 26     26   69 my ($self, $unzipped_body, $dbh) = @_;
1519              
1520 26 0 33     124 return 0 unless($dbh && $self->{info} && (my $request_uri = $ENV{'REQUEST_URI'}));
      0        
1521 0 0         return 0 if(!defined($unzipped_body));
1522              
1523 0           my $query;
1524 0           my $copy = $unzipped_body;
1525 0           my $changes = 0;
1526 0           my $creation;
1527             my %seen_links;
1528 0           while($unzipped_body =~ /<a\shref="(.+?)"/gis) {
1529 0           my $link = $1;
1530 0 0         next if($seen_links{$link}); # Already updated in the copy
1531 0           $seen_links{$link} = 1;
1532 0           $link =~ tr/[\|;]/_/;
1533              
1534 0           my $search_uri = $link;
1535 0 0         if($search_uri =~ /^\?/) {
1536             # CGI script has links to itself
1537             # $search_uri = "${request_uri}${link}";
1538 0           my $r = $request_uri;
1539 0           $r =~ s/\?.*$//;
1540 0           $search_uri = "${r}$link";
1541             } else {
1542 0 0         next if($link =~ /^https?:\/\//); # FIXME: skips full URLs to ourself
1543             # Though optimise_content fixes that
1544 0 0         next if($link =~ /.html?$/);
1545 0 0         next if($link =~ /.jpg$/);
1546 0 0         next if($link =~ /.gif$/);
1547             }
1548 0 0         if($self->{save_to}->{ttl}) {
1549 0           $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE uri = ? AND language = ? AND browser_type = ? AND creation >= strftime('\%s','now') - " . $self->{save_to}->{ttl};
1550             } else {
1551 0           $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE uri = ? AND language = ? AND browser_type = ?";
1552             }
1553 0 0         if($self->{logger}) {
1554 0           $self->{logger}->debug("$query: $search_uri, ", $self->{lingua}->language(), ', ', $self->{info}->browser_type());
1555             }
1556 0 0         if(defined(my $sth = $dbh->prepare($query))) {
    0          
1557 0           $sth->execute($search_uri, $self->{lingua}->language(), $self->{info}->browser_type());
1558 0 0         if(my $href = $sth->fetchrow_hashref()) {
1559 0 0         if(my $path = $href->{'path'}) {
1560 0 0         if(-r $path) {
1561 0 0         if($self->{logger}) {
1562 0           $self->{logger}->debug("Changing links from $link to $path");
1563             }
1564 0           $link =~ s/\?/\\?/g;
1565 0           my $rootdir = $self->{info}->rootdir();
1566 0           $path = substr($path, length($rootdir));
1567 0           $changes += ($copy =~ s/<a\s+href="$link">/<a href="$path">/gis);
1568             # Find the first link that will expire and use that
1569 0 0 0       if((!defined($creation)) || ($href->{'creation'} < $creation)) {
1570 0           $creation = $href->{'creation'};
1571             }
1572             } else {
1573 0           $query = "DELETE FROM fcgi_buffer WHERE path = ?";
1574 0           $dbh->prepare($query)->execute($path);
1575 0 0         if($self->{logger}) {
1576 0           $self->{logger}->warn("Remove entry for non-existant file $path");
1577             }
1578             }
1579             }
1580             }
1581             } elsif($self->{logger}) {
1582 0           $self->{logger}->warn("failed to prepare '$query'");
1583             }
1584             }
1585 0           my $expiration = 0;
1586 0 0 0       if(defined($creation) && (my $ttl = $self->{save_to}->{ttl})) {
1587 0           $expiration = $creation + $ttl;
1588             }
1589 0 0 0       if($changes && (($expiration == 0) || ($expiration >= time))) {
    0 0        
      0        
1590 0 0         if($self->{logger}) {
1591             # $self->{logger}->debug("$changes links now point to static pages");
1592 0 0         if($changes == 1) {
1593 0 0         if($self->{'save_to'}->{'ttl'}) {
1594 0           $self->{logger}->info('1 link now points to a static page for ', $expiration - time, 's');
1595             } else {
1596 0           $self->{logger}->info('1 link now points to a static page');
1597             }
1598             } else {
1599 0           $self->{logger}->info("$changes links now point to static pages");
1600             }
1601             }
1602 0           $unzipped_body = $copy;
1603 0           $self->{'body'} = $unzipped_body;
1604 0 0         if(my $ttl = $self->{save_to}->{ttl}) {
1605 0           push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str($creation + $ttl);
  0            
1606             }
1607             } elsif($expiration && ($expiration < time)) {
1608             # Delete the save_to files
1609 0 0         if($self->{save_to}->{ttl}) {
1610 0           $query = "SELECT path FROM fcgi_buffer WHERE creation < strftime('\%s','now') - " . $self->{save_to}->{ttl};
1611             } else {
1612 0           $query = 'SELECT path FROM fcgi_buffer'; # Hmm, I suspect this is overkill
1613             }
1614 0           my $sth = $dbh->prepare($query);
1615 0           $sth->execute();
1616 0           while(my $href = $sth->fetchrow_hashref()) {
1617 0 0         if(my $path = $href->{'path'}) {
1618 0 0         if($self->{logger}) {
1619 0           $self->{logger}->debug("unlink $path");
1620             }
1621 0           unlink $path;
1622             }
1623             }
1624 0 0         if($self->{save_to}->{ttl}) {
1625 0           $query = "DELETE FROM fcgi_buffer WHERE creation < strftime('\%s','now') - " . $self->{save_to}->{ttl};
1626             } else {
1627 0           $query = 'DELETE FROM fcgi_buffer'; # Hmm, I suspect this is overkill
1628             }
1629 0 0         if($self->{logger}) {
1630 0           $self->{logger}->debug($query);
1631             }
1632 0           $dbh->prepare($query)->execute();
1633             # } else {
1634             # Old code
1635             # if($self->{save_to}->{ttl}) {
1636             # $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE key = '$key' AND creation >= strftime('\%s','now') - " . $self->{save_to}->{ttl};
1637             # } else {
1638             # $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE key = '$key'";
1639             # }
1640             # my $sth = $dbh->prepare($query);
1641             # $sth->execute();
1642             # my $href = $sth->fetchrow_hashref();
1643             # if(my $path = $href->{'path'}) {
1644             # # FIXME: don't do this if we've passed the TTL, and if we are clean
1645             # # up the database and remove the static page
1646             # $request_uri =~ s/\?/\\?/g;
1647             # if(($unzipped_body =~ s/<a href="$request_uri"/<a href="$path"/gi) > 0) {
1648             # $self->{'body'} = $unzipped_body;
1649             # if(my $ttl = $self->{save_to}->{ttl}) {
1650             # push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str($href->{creation} + $ttl);
1651             # }
1652             # }
1653             # }
1654             }
1655 0           return $changes;
1656             }
1657              
1658             =head1 AUTHOR
1659              
1660             Nigel Horne, C<< <njh at bandsman.co.uk> >>
1661              
1662             =head1 BUGS
1663              
1664             FCGI::Buffer should be safe even in scripts which produce lots of different
1665             output, e.g. e-commerce situations.
1666             On such pages, however, I strongly urge to setting generate_304 to 0 and
1667             sending the HTTP header "Cache-Control: no-cache".
1668              
1669             When using L<Template>, ensure that you don't use it to output to STDOUT,
1670             instead you will need to capture into a variable and print that.
1671             For example:
1672              
1673             my $output;
1674             $template->process($input, $vars, \$output) || ($output = $template->error());
1675             print $output;
1676              
1677             Can produce buggy JavaScript if you use the <!-- HIDING technique.
1678             This is a bug in L<JavaScript::Packer>, not FCGI::Buffer.
1679              
1680             Mod_deflate can confuse this when compressing output.
1681             Ensure that deflation is off for .pl files:
1682              
1683             SetEnvIfNoCase Request_URI \.(?:gif|jpe?g|png|pl)$ no-gzip dont-vary
1684              
1685             If you request compressed output then uncompressed output (or vice
1686             versa) on input that produces the same output, the status will be 304.
1687             The letter of the spec says that's wrong, so I'm noting it here, but
1688             in practice you should not see this happen or have any difficulties
1689             because of it.
1690              
1691             FCGI::Buffer has not been tested against FastCGI.
1692              
1693             I advise adding FCGI::Buffer as the last use statement so that it is
1694             cleared up first. In particular it should be loaded after
1695             L<Log::Log4perl>, if you're using that, so that any messages it
1696             produces are printed after the HTTP headers have been sent by
1697             FCGI::Buffer;
1698              
1699             Save_to doesn't understand links in JavaScript, which means that if you use self-calling
1700             CGIs which are loaded as a static page they may point to the wrong place.
1701             The workaround is to avoid self-calling CGIs in JavaScript
1702              
1703             Please report any bugs or feature requests to C<bug-fcgi-buffer at rt.cpan.org>,
1704             or through the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=FCGI-Buffer>.
1705             I will be notified, and then you'll automatically be notified of progress on
1706             your bug as I make changes.
1707              
1708             The lint operation only works on HTML4, because of a restriction in L<HTML::Lint>.
1709              
1710             =head1 SEE ALSO
1711              
1712             CGI::Buffer, HTML::Packer, HTML::Lint
1713              
1714             =head1 SUPPORT
1715              
1716             You can find documentation for this module with the perldoc command.
1717              
1718             perldoc FCGI::Buffer
1719              
1720             You can also look for information at:
1721              
1722             =over 4
1723              
1724             =item * RT: CPAN's request tracker
1725              
1726             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=FCGI-Buffer>
1727              
1728             =item * CPAN Ratings
1729              
1730             L<http://cpanratings.perl.org/d/FCGI-Buffer>
1731              
1732             =item * Search CPAN
1733              
1734             L<http://search.cpan.org/dist/FCGI-Buffer/>
1735              
1736             =back
1737              
1738             =head1 ACKNOWLEDGEMENTS
1739              
1740             The inspiration and code for some of this is cgi_buffer by Mark
1741             Nottingham: L<https://www.mnot.net/blog/2003/04/24/etags>.
1742              
1743             =head1 LICENSE AND COPYRIGHT
1744              
1745             The licence for cgi_buffer is:
1746              
1747             "(c) 2000 Copyright Mark Nottingham <mnot@pobox.com>
1748              
1749             This software may be freely distributed, modified and used,
1750             provided that this copyright notice remain intact.
1751              
1752             This software is provided 'as is' without warranty of any kind."
1753              
1754             The rest of the program is Copyright 2015-2023 Nigel Horne,
1755             and is released under the following licence: GPL2
1756              
1757             =cut
1758              
1759             1; # End of FCGI::Buffer