File Coverage

blib/lib/FCGI/Buffer.pm
Criterion Covered Total %
statement 378 839 45.0
branch 174 524 33.2
condition 81 275 29.4
subroutine 27 29 93.1
pod 5 5 100.0
total 665 1672 39.7


line stmt bran cond sub pod time code
1             package FCGI::Buffer;
2              
3 12     12   3719968 use strict;
  12         91  
  12         341  
4 12     12   61 use warnings;
  12         34  
  12         347  
5              
6             # FIXME: save_to treats ?arg1=a&arg2=b and ?arg2=b&arg1=a as different
7             # FIXME: save_to treats /cgi-bin/foo.fcgi and /cgi-bin2/foo.fcgi as the same
8              
9 12     12   74 use Digest::MD5;
  12         33  
  12         421  
10 12     12   71 use File::Path;
  12         25  
  12         737  
11 12     12   77 use File::Spec;
  12         22  
  12         328  
12 12     12   4629 use IO::String;
  12         32054  
  12         433  
13 12     12   5529 use CGI::Info;
  12         719099  
  12         673  
14 12     12   138 use Carp;
  12         23  
  12         756  
15 12     12   5541 use HTTP::Date;
  12         44292  
  12         691  
16 12     12   19514 use DBI;
  12         220163  
  12         1140  
17              
18             =head1 NAME
19              
20             FCGI::Buffer - Verify, Cache and Optimise FCGI Output
21              
22             =head1 VERSION
23              
24             Version 0.18
25              
26             =cut
27              
28             our $VERSION = '0.18';
29              
30             =head1 SYNOPSIS
31              
32             FCGI::Buffer verifies the HTML that you produce by passing it through
33             C<HTML::Lint>.
34              
35             FCGI::Buffer optimises FCGI programs by reducing, filtering and compressing
36             output to speed up the transmission and by nearly seamlessly making use of
37             client and server caches.
38              
39             To make use of client caches, that is to say to reduce needless calls
40             to your server asking for the same data:
41              
42             use FCGI;
43             use FCGI::Buffer;
44             # ...
45             my $request = FCGI::Request();
46             while($request->FCGI::Accept() >= 0) {
47             my $buffer = FCGI::Buffer->new();
48             $buffer->init(
49             optimise_content => 1,
50             lint_content => 0,
51             );
52             # ...
53             }
54              
55             To also make use of server caches, that is to say to save regenerating
56             output when different clients ask you for the same data, you will need
57             to create a cache.
58             But that's simple:
59              
60             use FCGI;
61             use CHI;
62             use FCGI::Buffer;
63              
64             # ...
65             my $request = FCGI::Request();
66             while($request->FCGI::Accept() >= 0) {
67             my $buffer = FCGI::Buffer->new();
68             $buffer->init(
69             optimise_content => 1,
70             lint_content => 0,
71             cache => CHI->new(driver => 'File')
72             );
73             if($buffer->is_cached()) {
74             # Nothing has changed - use the version in the cache
75             $request->Finish();
76             next;
77             # ...
78             }
79             }
80              
81             To temporarily prevent the use of server-side caches, for example whilst
82             debugging before publishing a code change, set the NO_CACHE environment variable
83             to any non-zero value.
84             This will also stop ETag being added to the header.
85             If you get errors about Wide characters in print it means that you've
86             forgotten to emit pure HTML on non-ASCII characters.
87             See L<HTML::Entities>.
88             As a hack work around you could also remove accents and the like by using
89             L<Text::Unidecode>,
90             which works well but isn't really what you want.
91              
92             =head1 SUBROUTINES/METHODS
93              
94             =cut
95              
96 12     12   105 use constant MIN_GZIP_LEN => 32;
  12         33  
  12         125902  
97              
98             =head2 new
99              
100             Create an FCGI::Buffer object. Do one of these for each FCGI::Accept.
101              
102             =cut
103              
104             # FIXME: Call init() on any arguments that are given
105             sub new {
106 39     39 1 162193 my $proto = shift;
107 39   66     223 my $class = ref($proto) || $proto;
108              
109             # Use FCGI::Buffer->new(), not FCGI::Buffer::new()
110 39 100       142 if(!defined($class)) {
111 1         21 carp(__PACKAGE__, ' use ->new() not ::new() to instantiate');
112 1         278 return;
113             }
114              
115 38         190 my $buf = IO::String->new();
116              
117 38         2053 my $rc = {
118             buf => $buf,
119             old_buf => select($buf),
120             generate_304 => 1,
121             generate_last_modified => 1,
122             compress_content => 1,
123             optimise_content => 0,
124             lint_content => 0,
125             };
126             # $rc->{o} = ();
127              
128 38 100 100     250 if($ENV{'SERVER_PROTOCOL'} &&
      100        
129             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0'))) {
130 20         62 $rc->{generate_etag} = 1;
131             } else {
132 18         61 $rc->{generate_etag} = 0;
133             }
134              
135 38         165 return bless $rc, $class;
136             }
137              
138             sub DESTROY {
139 37 50 33 37   5581 if(defined($^V) && ($^V ge 'v5.14.0')) {
140 37 50       162 return if ${^GLOBAL_PHASE} eq 'DESTRUCT'; # >= 5.14.0 only
141             }
142 37         123 my $self = shift;
143              
144 37 100       128 if($self->{'logger'}) {
145 1         5 $self->{'logger'}->info('In DESTROY');
146             }
147 37         125 select($self->{old_buf});
148 37 50 33     194 if((!defined($self->{buf})) || (!defined($self->{buf}->getpos()))) {
149             # Unlikely
150 0 0       0 if($self->{'logger'}) {
151 0         0 $self->{'logger'}->info('Nothing to send');
152             }
153 0         0 return;
154             }
155 37         450 my $pos = $self->{buf}->getpos();
156 37         383 $self->{buf}->setpos(0);
157 37         526 my $buf;
158 37         162 read($self->{buf}, $buf, $pos);
159 37         736 my $headers;
160 37         309 ($headers, $self->{body}) = split /\r?\n\r?\n/, $buf, 2;
161              
162 37 100       127 if($self->{'logger'}) {
163 1 50       4 if($ENV{'HTTP_IF_NONE_MATCH'}) {
164 0         0 $self->{logger}->debug("HTTP_IF_NONE_MATCH: $ENV{HTTP_IF_NONE_MATCH}");
165             }
166 1 50       4 if($ENV{'HTTP_IF_MODIFIED_SINCE'}) {
167 0         0 $self->{logger}->debug("HTTP_IF_MODIFIED_SINCE: $ENV{HTTP_IF_MODIFIED_SINCE}");
168             }
169 1         10 $self->{logger}->debug("Generate_etag = $self->{generate_etag}, ",
170             "Generate_304 = $self->{generate_304}, ",
171             "Generate_last_modified = $self->{generate_last_modified}");
172             }
173 37 100 66     150 unless($headers || $self->is_cached()) {
174 3 50       12 if($self->{'logger'}) {
175 0         0 $self->{'logger'}->debug('There was no output');
176             }
177 3         20 return;
178             }
179 34 100 100     158 if($ENV{'REQUEST_METHOD'} && ($ENV{'REQUEST_METHOD'} eq 'HEAD')) {
180 1         4 $self->{send_body} = 0;
181             } else {
182 33         68 $self->{send_body} = 1;
183             }
184              
185 34 50       100 if($headers) {
186 34         90 $self->_set_content_type($headers);
187             }
188              
189 34 100 66     197 if(defined($self->{body}) && ($self->{body} eq '')) {
    100          
190             # E.g. if header of Location is given with no body, for
191             # redirection
192 2         16 delete $self->{body};
193 2 50       12 if($self->{cache}) {
194             # Don't try to retrieve it below from the cache
195 0         0 $self->{send_body} = 0;
196             }
197             } elsif(defined($self->{content_type})) {
198 29         88 my @content_type = @{$self->{content_type}};
  29         108  
199 29 50 33     342 if(defined($content_type[0]) && (lc($content_type[0]) eq 'text') && (lc($content_type[1]) =~ /^html/) && defined($self->{body})) {
      33        
      33        
200 29 100       84 if($self->{optimise_content}) {
201             # require HTML::Clean;
202 25         1179 require HTML::Packer; # Overkill using HTML::Clean and HTML::Packer...
203              
204 25 50       21678 if($self->{'logger'}) {
205 0         0 $self->{'logger'}->trace('Packer');
206             }
207              
208 25         54 my $oldlength = length($self->{body});
209 25         36 my $newlength;
210              
211 25 100       73 if($self->{optimise_content} == 1) {
212 24         85 $self->_optimise_content();
213             } else {
214 1         3 while(1) {
215 3         9 $self->_optimise_content();
216 3         5 $newlength = length($self->{body});
217 3 100       9 last if ($newlength >= $oldlength);
218 2         14 $oldlength = $newlength;
219             }
220             }
221              
222             # If we're on http://www.example.com and have a link
223             # to http://www.example.com/foo/bar.htm, change the
224             # link to /foo/bar.htm - there's no need to include
225             # the site name in the link
226 25 100       73 unless(defined($self->{info})) {
227 22 50       53 if($self->{cache}) {
228 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
229             } else {
230 22         143 $self->{info} = CGI::Info->new();
231             }
232             }
233              
234 25         532 my $href = $self->{info}->host_name();
235 25         10002 my $protocol = $self->{info}->protocol();
236              
237 25 100       271 unless($protocol) {
238 10         25 $protocol = 'http';
239             }
240              
241 25         219 $self->{body} =~ s/<a\s+?href="$protocol:\/\/$href"/<a href="\/"/gim;
242 25         174 $self->{body} =~ s/<a\s+?href="$protocol:\/\/$href/<a href="/gim;
243 25         126 $self->{body} =~ s/<a\s+?href="$protocol:\/\//<a href="\/\//gim;
244              
245             # If we're in "/cgi-bin/foo.cgi?arg1=a" replace
246             # "/cgi-bin/foo.cgi?arg2=b" with "?arg2=b"
247              
248 25 100       83 if(my $script_name = $ENV{'SCRIPT_NAME'}) {
249 18 50       57 if($script_name =~ /^\//) {
250 18         128 $self->{body} =~ s/<a\s+?href="$script_name(\?.+)?"/<a href="$1"/gim;
251             }
252             }
253              
254             # TODO use URI->path_segments to change links in
255             # /aa/bb/cc/dd.htm which point to /aa/bb/ff.htm to
256             # ../ff.htm
257              
258             # TODO: <img border=0 src=...>
259 25         121 $self->{body} =~ s/<img\s+?src="$protocol:\/\/$href"/<img src="\/"/gim;
260 25         108 $self->{body} =~ s/<img\s+?src="$protocol:\/\/$href/<img src="/gim;
261              
262             # Don't use HTML::Clean because of RT402
263             # my $h = new HTML::Clean(\$self->{body});
264             # # $h->compat();
265             # $h->strip();
266             # my $ref = $h->data();
267              
268             # Don't always do javascript 'best' since it's confused
269             # by the common <!-- HIDE technique.
270 25         102 my $options = {
271             remove_comments => 1,
272             remove_newlines => 0,
273             do_stylesheet => 'minify'
274             };
275 25 100       90 if($self->{optimise_content} >= 2) {
276 1         4 $options->{do_javascript} = 'best';
277 1         5 $self->{body} =~ s/(<script.*?>)\s*<!--/$1/gi;
278 1         3 $self->{body} =~ s/\/\/-->\s*<\/script>/<\/script>/gi;
279 1         20 $self->{body} =~ s/(<script.*?>)\s+/$1/gi;
280             }
281 25         153 $self->{body} = HTML::Packer->init()->minify(\$self->{body}, $options);
282 25 100       345873 if($self->{optimise_content} >= 2) {
283             # Change document.write("a"); document.write("b")
284             # into document.write("a"+"b");
285 1         3 while(1) {
286 1         29 $self->{body} =~ s/<script\s*?type\s*?=\s*?"text\/javascript"\s*?>(.*?)document\.write\((.+?)\);\s*?document\.write\((.+?)\)/<script type="text\/JavaScript">${1}document.write($2+$3)/igs;
287 1         9 $newlength = length($self->{body});
288 1 50       9 last if ($newlength >= $oldlength);
289 0         0 $oldlength = $newlength;
290             }
291             }
292             }
293 29 100       124 if($self->{lint_content}) {
294 3         27 require HTML::Lint;
295 3         53 HTML::Lint->import;
296              
297 3 50       16 if($self->{'logger'}) {
298 0         0 $self->{'logger'}->trace('Lint');
299             }
300 3         18 my $lint = HTML::Lint->new();
301 3         71 $lint->parse($self->{body});
302 3         4675 $lint->eof();
303              
304 3 100       410 if($lint->errors) {
305 1         16 $headers = 'Status: 500 Internal Server Error';
306 1         3 @{$self->{o}} = ('Content-type: text/plain');
  1         5  
307 1         5 $self->{body} = '';
308 1         4 foreach my $error ($lint->errors) {
309 3         24 my $errtext = $error->where() . ': ' . $error->errtext() . "\n";
310 3 50       100 if($self->{logger}) {
311 0         0 $self->{logger}->warn($errtext);
312             } else {
313 3         97 warn($errtext);
314             }
315 3         22 $self->{body} .= $errtext;
316             }
317             }
318             }
319             }
320             }
321              
322 34 100 66     297 if(defined($headers) && ($headers =~ /^Status: (\d+)/m)) {
    100          
323 1         6 $self->{status} = $1;
324             } elsif(defined($self->{info})) {
325 25         123 $self->{status} = $self->{info}->status();
326             } else {
327 8         20 $self->{status} = 200;
328             }
329              
330 34 100       434 if($self->{'logger'}) {
331 1         9 $self->{'logger'}->debug("Initial status = $self->{status}");
332             }
333              
334             # Generate the eTag before compressing, since the compressed data
335             # includes the mtime field which changes thus causing a different
336             # Etag to be generated
337 34 100 100     308 if($ENV{'SERVER_PROTOCOL'} &&
      100        
      100        
      66        
338             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
339             $self->{generate_etag} && defined($self->{body})) {
340 11         38 $self->_generate_etag();
341              
342 11 100 100     82 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{generate_304} && ($self->{status} == 200)) {
      66        
343 2         10 $self->_check_if_none_match();
344             }
345             }
346              
347 34         57 my $dbh;
348 34 50       136 if(my $save_to = $self->{save_to}) {
349 0         0 my $sqlite_file = File::Spec->catfile($save_to->{directory}, 'fcgi.buffer.sql');
350 0 0       0 if($self->{logger}) {
351 0         0 $self->{logger}->debug("save_to sqlite file: $sqlite_file");
352             }
353 0 0       0 if(!-r $sqlite_file) {
354 0 0       0 if(!-d $save_to->{directory}) {
355 0         0 mkdir $save_to->{directory};
356             }
357 0         0 $dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef);
358 0 0       0 if($self->{save_to}->{create_table}) {
359 0         0 $dbh->prepare('CREATE TABLE fcgi_buffer(key char PRIMARY KEY, language char, browser_type char, path char UNIQUE NOT NULL, uri char NOT NULL, creation timestamp NOT NULL)')->execute();
360             }
361             } else {
362 0         0 $dbh = DBI->connect("dbi:SQLite:dbname=$sqlite_file", undef, undef);
363             }
364             }
365              
366 34         109 my $encoding = $self->_should_gzip();
367 34         74 my $unzipped_body = $self->{body};
368              
369 34 100       80 if(defined($unzipped_body)) {
370 32 50       96 my $range = $ENV{'Range'} ? $ENV{'Range'} : $ENV{'HTTP_RANGE'};
371              
372 32 100 66     95 if($range && !$self->{cache}) {
373             # TODO: Partials
374 3 50       22 if($range =~ /^bytes=(\d*)-(\d*)/) {
375 3 100 100     24 if($1 && $2) {
    100          
    50          
376 1         6 $self->{body} = substr($self->{body}, $1, $2-$1);
377             } elsif($1) {
378 1         7 $self->{body} = substr($self->{body}, $1);
379             } elsif($2) {
380 1         6 $self->{body} = substr($self->{body}, 0, $2);
381             }
382 3         7 $unzipped_body = $self->{body};
383 3         7 $self->{'status'} = 206;
384             }
385             }
386 32         124 $self->_compress({ encoding => $encoding });
387             }
388              
389 34 50       152 if($self->{cache}) {
    100          
390 0         0 require Storable;
391              
392 0         0 my $cache_hash;
393 0         0 my $key = $self->_generate_key();
394              
395             # Cache unzipped version
396 0 0       0 if(!defined($self->{body})) {
397 0 0       0 if($self->{send_body}) {
398 0         0 $self->{cobject} = $self->{cache}->get_object($key);
399 0 0       0 if(defined($self->{cobject})) {
    0          
400 0         0 $cache_hash = Storable::thaw($self->{cobject}->value());
401 0         0 $headers = $cache_hash->{'headers'};
402 0         0 $self->_set_content_type($headers);
403 0         0 @{$self->{o}} = ("X-FCGI-Buffer-$VERSION: Hit");
  0         0  
404 0 0       0 if($self->{info}) {
405 0         0 my $host_name = $self->{info}->host_name();
406 0         0 push @{$self->{o}}, "X-Cache: HIT from $host_name";
  0         0  
407 0         0 push @{$self->{o}}, "X-Cache-Lookup: HIT from $host_name";
  0         0  
408             } else {
409 0         0 push @{$self->{o}}, 'X-Cache: HIT';
  0         0  
410 0         0 push @{$self->{o}}, 'X-Cache-Lookup: HIT';
  0         0  
411             }
412             } elsif($self->{logger}) {
413 0         0 $self->{logger}->warn("Error retrieving data for key $key");
414             } else {
415 0         0 carp(__PACKAGE__, ": error retrieving data for key $key");
416             }
417             }
418              
419             # Nothing has been output yet, so we can check if it's
420             # OK to send 304 if possible
421 0 0 0     0 if($self->{send_body} && $ENV{'SERVER_PROTOCOL'} &&
      0        
      0        
      0        
      0        
422             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
423             $self->{generate_304} && ($self->{status} == 200)) {
424 0 0       0 if($ENV{'HTTP_IF_MODIFIED_SINCE'}) {
425             $self->_check_modified_since({
426             since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
427             modified => $self->{cobject}->created_at()
428 0         0 });
429             }
430             }
431 0 0 0     0 if($self->{send_body} && ($self->{status} == 200) && defined($cache_hash)) {
      0        
432 0         0 $self->{body} = $cache_hash->{'body'};
433 0 0       0 if($dbh) {
434 0         0 my $changes = $self->_save_to($self->{body}, $dbh);
435 0 0 0     0 if($changes && (my $ttl = $self->{save_to}->{ttl})) {
436 0         0 push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str(time + $ttl);
  0         0  
437             }
438             }
439 0 0       0 if(!defined($self->{body})) {
440             # Panic
441 0         0 $headers = 'Status: 500 Internal Server Error';
442 0         0 @{$self->{o}} = ('Content-type: text/plain');
  0         0  
443 0         0 $self->{body} = "Can't retrieve body for key $key, cache_hash contains:\n";
444 0         0 foreach my $k (keys %{$cache_hash}) {
  0         0  
445 0         0 $self->{body} .= "\t$k\n";
446             }
447              
448 0 0       0 if($dbh) {
449 0         0 my $query = "SELECT DISTINCT path FROM fcgi_buffer WHERE key = '$key'";
450 0         0 my $sth = $dbh->prepare($query);
451 0 0       0 if($self->{logger}) {
452 0         0 $self->{logger}->debug($query);
453             }
454 0 0 0     0 if($sth->execute() && (my $href = $sth->fetchrow_hashref())) {
455 0 0       0 if(my $path = $href->{'path'}) {
456 0         0 unlink($path);
457             }
458             }
459 0         0 $query = "DELETE FROM fcgi_buffer WHERE key = '$key'";
460 0         0 $dbh->do($query);
461 0 0       0 if($self->{logger}) {
462 0         0 $self->{logger}->debug($query);
463             }
464             }
465              
466 0         0 $self->{cache}->remove($key);
467              
468 0 0       0 if($self->{logger}) {
469 0         0 $self->{logger}->error("Can't retrieve body for key $key");
470 0         0 $self->{logger}->warn($self->{body});
471             } else {
472 0         0 carp("Can't retrieve body for key $key");
473 0         0 warn($self->{body});
474             }
475 0         0 $self->{send_body} = 0;
476 0         0 $self->{status} = 500;
477             }
478             }
479 0 0 0     0 if($self->{send_body} && $ENV{'SERVER_PROTOCOL'} &&
      0        
      0        
      0        
480             (($ENV{'SERVER_PROTOCOL'} eq 'HTTP/1.1') || ($ENV{'SERVER_PROTOCOL'} eq 'HTTP/2.0')) &&
481             ($self->{status} == 200)) {
482 0 0 0     0 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{generate_etag}) {
483 0 0       0 if(!defined($self->{etag})) {
484 0         0 $self->_generate_etag();
485             }
486 0         0 $self->_check_if_none_match();
487             }
488             }
489 0 0       0 if($self->{status} == 200) {
490 0         0 $encoding = $self->_should_gzip();
491 0 0       0 if($self->{send_body}) {
492 0 0 0     0 if($self->{generate_etag} && !defined($self->{etag}) && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
      0        
      0        
493 0         0 $self->_generate_etag();
494             }
495 0         0 $self->_compress({ encoding => $encoding });
496             }
497             }
498 0         0 my $cannot_304 = !$self->{generate_304};
499 0 0       0 unless($self->{etag}) {
500 0 0 0     0 if(defined($headers) && ($headers =~ /^ETag: "([a-z0-9]{32})"/m)) {
501 0         0 $self->{etag} = $1;
502             } else {
503 0         0 $self->{etag} = $cache_hash->{'etag'};
504             }
505             }
506 0 0 0     0 if($ENV{'HTTP_IF_NONE_MATCH'} && $self->{send_body} && ($self->{status} != 304) && $self->{generate_304}) {
      0        
      0        
507 0 0       0 if(!$self->_check_if_none_match()) {
508 0         0 $cannot_304 = 1;
509             }
510             }
511 0 0       0 if($self->{cobject}) {
512 0 0 0     0 if($ENV{'HTTP_IF_MODIFIED_SINCE'} && ($self->{status} != 304) && !$cannot_304) {
      0        
513             $self->_check_modified_since({
514             since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
515             modified => $self->{cobject}->created_at()
516 0         0 });
517             }
518 0 0 0     0 if(($self->{status} == 200) && $self->{generate_last_modified}) {
519 0 0       0 if($self->{logger}) {
520 0         0 $self->{logger}->debug('Set Last-Modified to ', HTTP::Date::time2str($self->{cobject}->created_at()));
521             }
522 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($self->{cobject}->created_at());
  0         0  
523             }
524             }
525             } else {
526             # Not in the server side cache
527 0 0       0 if($self->{status} == 200) {
528 0         0 my $changes = $self->_save_to($unzipped_body, $dbh);
529              
530 0 0       0 unless($self->{cache_age}) {
531             # It would be great if CHI::set()
532             # allowed the time to be 'lru' for least
533             # recently used.
534 0         0 $self->{cache_age} = '10 minutes';
535             }
536 0         0 $cache_hash->{'body'} = $unzipped_body;
537 0 0 0     0 if($changes && $encoding) {
538 0         0 $self->_compress({ encoding => $encoding });
539             }
540 0 0 0     0 if($self->{o} && scalar(@{$self->{o}})) {
  0 0 0     0  
541             # Remember, we're storing the UNzipped
542             # version in the cache
543 0         0 my $c;
544 0 0 0     0 if(defined($headers) && length($headers)) {
545 0         0 $c = "$headers\r\n" . join("\r\n", @{$self->{o}});
  0         0  
546             } else {
547 0         0 $c = join("\r\n", @{$self->{o}});
  0         0  
548             }
549 0         0 $c =~ s/^Content-Encoding: .+$//mg;
550 0         0 $c =~ s/^Vary: Accept-Encoding.*\r?$//mg;
551 0         0 $c =~ s/\n+/\n/gs;
552 0 0       0 if(length($c)) {
553 0         0 $cache_hash->{'headers'} = $c;
554             }
555             } elsif(defined($headers) && length($headers)) {
556 0         0 $headers =~ s/^Content-Encoding: .+$//mg;
557 0         0 $headers =~ s/^Vary: Accept-Encoding.*\r?$//mg;
558 0         0 $headers =~ s/\n+/\n/gs;
559 0 0       0 if(length($headers)) {
560 0         0 $cache_hash->{'headers'} = $headers;
561             }
562             }
563 0 0       0 if($self->{generate_etag}) {
564 0 0       0 if(!defined($self->{etag})) {
565 0         0 $self->_generate_etag();
566             }
567 0         0 $cache_hash->{'etag'} = $self->{etag};
568             }
569             # TODO: Support the Expires header
570             # if($headers !~ /^Expires: /m))) {
571             # }
572 0 0       0 if($self->{logger}) {
573 0         0 $self->{logger}->debug("Store $key in the cache, age = ", $self->{cache_age}, ' ', length($cache_hash->{'body'}), ' bytes');
574             }
575 0         0 $self->{cache}->set($key, Storable::freeze($cache_hash), $self->{cache_age});
576              
577             # Create a static page with the information and link to that in the output
578             # HTML
579 0 0 0     0 if($dbh && $self->{info} && $self->{save_to} && (my $request_uri = $ENV{'REQUEST_URI'})) {
      0        
      0        
580 0         0 my $query = "SELECT DISTINCT creation FROM fcgi_buffer WHERE key = ?";
581 0 0       0 if($self->{logger}) {
582 0         0 $self->{logger}->debug("$query: $key");
583             }
584 0         0 my $sth = $dbh->prepare($query);
585 0         0 $sth->execute($key);
586 0 0       0 if(my $href = $sth->fetchrow_hashref()) {
587 0 0       0 if(my $ttl = $self->{save_to}->{ttl}) {
588 0         0 push @{$self->{o}}, 'Expires: ' .
589 0         0 HTTP::Date::time2str($href->{'creation'} + $ttl);
590             }
591             } else {
592 0         0 my $dir = $self->{save_to}->{directory};
593 0         0 my $browser_type = $self->{info}->browser_type();
594 0         0 my $language;
595 0 0       0 if($self->{'lingua'}) {
596 0         0 $language = $self->{lingua}->language();
597 0 0       0 if($language =~ /([\w\s]+)/i) {
598 0         0 $language = $1; # Untaint
599             }
600             } else {
601 0         0 $language = 'default';
602             }
603 0         0 my $bdir = File::Spec->catfile($dir, $browser_type);
604 0 0       0 if($bdir =~ /^([\/\\])(.+)$/) {
605 0         0 $bdir = "$1$2"; # Untaint
606             }
607 0         0 my $ldir = File::Spec->catfile($bdir, $language);
608 0         0 my $sdir = File::Spec->catfile($ldir, $self->{info}->script_name());
609 0 0       0 if($self->{logger}) {
610 0         0 $self->{logger}->debug("Create paths to $sdir");
611             }
612 0         0 File::Path::make_path($sdir);
613 0         0 my $file = $self->{info}->as_string();
614 0         0 $file =~ tr/\//_/;
615 0         0 my $path = File::Spec->catfile($sdir, "$file.html");
616 0 0       0 if($path =~ /^(.+)$/) {
617 0         0 $path = $1; # Untaint
618 0         0 $path =~ tr/[\|;]/_/;
619             }
620 0 0       0 if(open(my $fout, '>', $path)) {
621 0         0 my $u = $request_uri;
622 0         0 $u =~ s/\?/\\?/g;
623 0         0 $u =~ s/\)/\\)/g;
624 0         0 my $copy = $unzipped_body;
625 0         0 my $changes = ($copy =~ s/<a\s+href="$u"/<a href="$path"/gi);
626              
627             # handle <a href="?arg3=4">Call self with different args</a>
628 0         0 my $script_name = $ENV{'SCRIPT_NAME'};
629 0         0 $copy =~ s/<a\s+href="(\?.+?)"/<a href="$script_name$1"/gi;
630              
631             # Avoid Wide character
632 0 0       0 unless($self->{_encode_loaded}) {
633 0         0 require Encode;
634 0         0 $self->{_encode_loaded} = 1;
635             }
636 0         0 print $fout Encode::encode_utf8($copy);
637 0         0 close $fout;
638             # Do INSERT OR REPLACE in case another program has
639             # got in first,
640 0         0 $query = "INSERT OR REPLACE INTO fcgi_buffer(key, language, browser_type, path, uri, creation) VALUES('$key', '$language', '$browser_type', '$path', '$request_uri', strftime('\%s','now'))";
641 0 0       0 if($self->{logger}) {
642 0         0 $self->{logger}->debug($query);
643             }
644 0         0 $dbh->prepare($query)->execute();
645              
646 0 0 0     0 if($changes && (my $ttl = $self->{save_to}->{ttl})) {
647 0         0 push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str(time + $ttl);
  0         0  
648             }
649             } else {
650 0 0       0 if($self->{logger}) {
651 0         0 $self->{logger}->warn("Can't create $path");
652             }
653             }
654             }
655             }
656 0 0       0 if($self->{generate_last_modified}) {
657 0         0 $self->{cobject} = $self->{cache}->get_object($key);
658 0 0       0 if(defined($self->{cobject})) {
659 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($self->{cobject}->created_at());
  0         0  
660             } else {
661 0         0 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str(time);
  0         0  
662             }
663             }
664             }
665 0 0       0 if($self->{info}) {
666 0         0 my $host_name = $self->{info}->host_name();
667 0 0       0 if(defined($self->{x_cache})) {
668 0         0 push @{$self->{o}}, 'X-Cache: ' . $self->{x_cache} . " from $host_name";
  0         0  
669             } else {
670 0         0 push @{$self->{o}}, "X-Cache: MISS from $host_name";
  0         0  
671             }
672 0         0 push @{$self->{o}}, "X-Cache-Lookup: MISS from $host_name";
  0         0  
673             } else {
674 0 0       0 if(defined($self->{x_cache})) {
675 0         0 push @{$self->{o}}, 'X-Cache: ' . $self->{x_cache};
  0         0  
676             } else {
677 0         0 push @{$self->{o}}, 'X-Cache: MISS';
  0         0  
678             }
679 0         0 push @{$self->{o}}, 'X-Cache-Lookup: MISS';
  0         0  
680             }
681 0         0 push @{$self->{o}}, "X-FCGI-Buffer-$VERSION: Miss";
  0         0  
682             }
683             # We don't need it any more, so give Perl a chance to
684             # tidy it up seeing as we're in the destructor
685 0         0 delete $self->{cache};
686             } elsif($self->{info}) {
687 26         88 my $host_name = $self->{info}->host_name();
688 26         247 push @{$self->{o}}, ("X-Cache: MISS from $host_name", "X-Cache-Lookup: MISS from $host_name");
  26         128  
689 26 50       75 if($self->{generate_last_modified}) {
690 26 100       71 if(my $age = $self->_my_age()) {
691 8         15 push @{$self->{o}}, 'Last-Modified: ' . HTTP::Date::time2str($age);
  8         73  
692             }
693             }
694 26 50 66     281 if($ENV{'HTTP_IF_MODIFIED_SINCE'} && ($self->{status} != 304) && $self->{generate_304}) {
      66        
695             $self->_check_modified_since({
696 3         12 since => $ENV{'HTTP_IF_MODIFIED_SINCE'},
697             modified => $self->_my_age()
698             });
699             }
700 26 50 33     85 if($self->_save_to($unzipped_body, $dbh) && $encoding) {
701 0         0 $self->_compress({ encoding => $encoding });
702             }
703             } else {
704 8         22 push @{$self->{o}}, ('X-Cache: MISS', 'X-Cache-Lookup: MISS');
  8         30  
705             }
706              
707             # if($self->{generate_etag} && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
708             # if((!defined($self->{etag})) &&
709             # (($self->{status} == 200) || $self->{status} == 304) &&
710             # $self->{body} && (!$ENV{'NO_CACHE'}) &&
711             # !$self->is_cached()) {
712             # unless($self->{_encode_loaded}) {
713             # require Encode;
714             # $self->{_encode_loaded} = 1;
715             # }
716             # $self->{etag} = '"' . Digest::MD5->new->add(Encode::encode_utf8($unzipped_body))->hexdigest() . '"';
717             # }
718             # if(defined($self->{etag})) {
719             # push @{$self->{o}}, "ETag: $self->{etag}";
720             # if($self->{logger}) {
721             # $self->{logger}->debug("Set ETag to $self->{etag}");
722             # }
723             # } else {
724             # open(my $fout, '>>', '/tmp/FCGI-bug');
725             # print $fout "BUG: ETag not generated, status $self->{status}:\n",
726             # "$headers\n",
727             # 'x' x 40, "\n",
728             # # defined($self->{body}) ? $self->{body} : "body is empty\n",
729             # defined($unzipped_body) ? "$unzipped_body\n" : "body is empty\n",
730             # 'x' x 40,
731             # "\n";
732             # print $fout "ENV:\n";
733             # while(my ($key, $value) = each %ENV) {
734             # print $fout "$key = $value\n";
735             # }
736             # close $fout;
737             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
738             # }
739             # }
740 34 100 33     186 if($self->{generate_etag} && ((!defined($headers)) || ($headers !~ /^ETag: /m))) {
      66        
741 11 50 0     32 if(defined($self->{etag})) {
    0 0        
      0        
      0        
      0        
742 11         14 push @{$self->{o}}, "ETag: $self->{etag}";
  11         42  
743 11 50       35 if($self->{logger}) {
744 0         0 $self->{logger}->debug("Set ETag to $self->{etag}");
745             }
746             } elsif($self->{logger} && (($self->{status} == 200) || $self->{status} == 304) && $self->{body} && (!$ENV{'NO_CACHE'}) && !$self->is_cached()) {
747             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
748             # open(my $fout, '>>', '/tmp/FCGI-bug');
749             # print $fout "BUG: ETag not generated, status $self->{status}:\n",
750             # $headers,
751             # 'x' x 40,
752             # defined($self->{body}) ? $self->{body} : "body is empty\n",
753             # 'x' x 40,
754             # "\n";
755             # print $fout "ENV:\n";
756             # while(my ($key, $value) = each %ENV) {
757             # print $fout "$key = $value\n;
758             # }
759             # print $fout 'x' x 40, "\n";
760             # close $fout;
761             # $self->{logger}->warn("BUG: ETag not generated, status $self->{status}");
762             }
763             }
764              
765 34         51 my $body_length;
766 34 100       88 if(defined($self->{body})) {
767 32 100       107 if(utf8::is_utf8($self->{body})) {
768 1         6 utf8::encode($self->{body});
769             }
770 32         58 $body_length = length($self->{body});
771             } else {
772 2         5 $body_length = 0;
773             }
774              
775 34 50 33     164 if(defined($headers) && length($headers)) {
776             # Put the original headers first, then those generated within
777             # FCGI::Buffer
778 34         60 unshift @{$self->{o}}, split(/\r\n/, $headers);
  34         164  
779 34 100 100     170 if($self->{body} && $self->{send_body}) {
780 30 50       51 unless(grep(/^Content-Length: \d/, @{$self->{o}})) {
  30         146  
781 30         46 push @{$self->{o}}, "Content-Length: $body_length";
  30         111  
782             }
783             }
784 34 100       67 unless(grep(/^Status: \d/, @{$self->{o}})) {
  34         162  
785 31         1981 require HTTP::Status;
786 31         26416 HTTP::Status->import();
787              
788 31         87 push @{$self->{o}}, 'Status: ' . $self->{status} . ' ' . HTTP::Status::status_message($self->{status});
  31         190  
789             }
790             } else {
791 0         0 push @{$self->{o}}, "X-FCGI-Buffer-$VERSION: No headers";
  0         0  
792             }
793              
794 34 100 100     291 if($body_length && $self->{send_body}) {
795 30         48 push @{$self->{o}}, ('', $self->{body});
  30         77  
796             }
797              
798             # XXXXXXXXXXXXXXXXXXXXXXX
799 34 50       49 if(0) {
800             # This code helps to debug Wide character prints
801             my $wideCharWarningsIssued = 0;
802             my $widemess;
803             $SIG{__WARN__} = sub {
804 0     0   0 $wideCharWarningsIssued += "@_" =~ /Wide character in .../;
805 0         0 $widemess = "@_";
806 0 0       0 if($self->{logger}) {
807 0         0 $self->{logger}->fatal($widemess);
808 0         0 my $i = 1;
809 0         0 $self->{logger}->trace('Stack Trace');
810 0         0 while((my @call_details = (caller($i++)))) {
811 0         0 $self->{logger}->trace($call_details[1], ':', $call_details[2], ' in function ', $call_details[3]);
812             }
813             }
814 0         0 CORE::warn(@_); # call the builtin warn as usual
815             };
816              
817             if(scalar @{$self->{o}}) {
818             print join("\r\n", @{$self->{o}});
819             if($wideCharWarningsIssued) {
820             my $mess = join("\r\n", @{$self->{o}});
821             $mess =~ /[^\x00-\xFF]/;
822             open(my $fout, '>>', '/tmp/NJH');
823             print $fout "$widemess:\n",
824             $mess,
825             'x' x 40,
826             "\n";
827             close $fout;
828             }
829             }
830 0         0 } elsif(scalar @{$self->{o}}) {
  34         96  
831 34         60 print join("\r\n", @{$self->{o}});
  34         1853  
832             }
833             # XXXXXXXXXXXXXXXXXXXXXXX
834              
835 34 100 100     701 if((!$self->{send_body}) || !defined($self->{body})) {
836 4         102 print "\r\n\r\n";
837             }
838             }
839              
840             sub _generate_etag {
841 11     11   19 my $self = shift;
842              
843 11 50       29 return if defined($self->{'etag'});
844 11 50       27 return unless defined($self->{'body'});
845              
846 11 50       28 if(!defined($self->{_encode_loaded})) {
847             # encode to avoid "Wide character in subroutine entry"
848 11         72 require Encode;
849 11         25 $self->{_encode_loaded} = 1;
850             }
851 11         154 $self->{etag} = '"' . Digest::MD5->new->add(Encode::encode_utf8($self->{body}))->hexdigest() . '"';
852 11 50       74 if($self->{'logger'}) {
853 0         0 $self->{'logger'}->debug('Etag = ', $self->{'etag'});
854             }
855             }
856              
857             sub _check_modified_since {
858 3     3   8 my $self = shift;
859              
860 3 50       10 if($self->{logger}) {
861 0         0 $self->{logger}->trace('In _check_modified_since');
862             }
863              
864 3 50       8 if(!$self->{generate_304}) {
865 0         0 return;
866             }
867 3         6 my $params = shift;
868              
869 3 50       10 if(!defined($$params{since})) {
870 0         0 return;
871             }
872 3         13 my $s = HTTP::Date::str2time($$params{since});
873 3 100       357 if(!defined($s)) {
874 1 50       5 if($self->{logger}) {
875 0         0 $self->{logger}->info("$$params{since} is not a valid date");
876             }
877 1         3 return;
878             }
879              
880 2         7 my $age = $self->_my_age();
881 2 50       9 if(!defined($age)) {
882 2 50       11 if($self->{logger}) {
883 0         0 $self->{logger}->info("Can't determine my age");
884             }
885 2         5 return;
886             }
887 0 0       0 if($age > $s) {
888 0 0       0 if($self->{logger}) {
889 0         0 $self->{logger}->debug('_check_modified_since: script has been modified');
890             }
891             # Script has been updated so it may produce different output
892 0         0 return;
893             }
894              
895 0 0       0 if($self->{logger}) {
896 0         0 $self->{logger}->debug("_check_modified_since: Compare $$params{modified} with $s");
897             }
898 0 0       0 if($$params{modified} <= $s) {
899 0         0 push @{$self->{o}}, 'Status: 304 Not Modified';
  0         0  
900 0         0 $self->{status} = 304;
901 0         0 $self->{send_body} = 0;
902 0 0       0 if($self->{logger}) {
903 0         0 $self->{logger}->debug('Set status to 304');
904             }
905             }
906             }
907              
908             # Reduce output, e.g. remove superfluous white-space.
909             sub _optimise_content {
910 27     27   51 my $self = shift;
911              
912             # FIXME: regex bad, HTML parser good
913             # Regexp::List - wow!
914 27         1136 $self->{body} =~ s/(((\s+|\r)\n|\n(\s+|\+)))/\n/g;
915             # $self->{body} =~ s/\r\n/\n/gs;
916             # $self->{body} =~ s/\s+\n/\n/gs;
917             # $self->{body} =~ s/\n+/\n/gs;
918             # $self->{body} =~ s/\n\s+|\s+\n/\n/g;
919 27         134 $self->{body} =~ s/\<\/div\>\s+\<div/\<\/div\>\<div/gis;
920             # $self->{body} =~ s/\<\/p\>\s\<\/div/\<\/p\>\<\/div/gis;
921             # $self->{body} =~ s/\<div\>\s+/\<div\>/gis; # Remove spaces after <div>
922 27         8603 $self->{body} =~ s/(<div>\s+|\s+<div>)/<div>/gis;
923 27         116 $self->{body} =~ s/\s+<\/div\>/\<\/div\>/gis; # Remove spaces before </div>
924 27         531 $self->{body} =~ s/\s+\<p\>|\<p\>\s+/\<p\>/im; # TODO <p class=
925 27         545 $self->{body} =~ s/\s+\<\/p\>|\<\/p\>\s+/\<\/p\>/gis;
926 27         95 $self->{body} =~ s/<html>\s+<head>/<html><head>/is;
927 27         140 $self->{body} =~ s/\s*<\/head>\s+<body>\s*/<\/head><body>/is;
928 27         77 $self->{body} =~ s/<html>\s+<body>/<html><body>/is;
929 27         93 $self->{body} =~ s/<body>\s+/<body>/is;
930 27         97 $self->{body} =~ s/\s+\<\/html/\<\/html/is;
931 27         89 $self->{body} =~ s/\s+\<\/body/\<\/body/is;
932 27         173 $self->{body} =~ s/\s(\<.+?\>\s\<.+?\>)/$1/;
933             # $self->{body} =~ s/(\<.+?\>\s\<.+?\>)\s/$1/g;
934 27         75 $self->{body} =~ s/\<p\>\s/\<p\>/gi;
935 27         76 $self->{body} =~ s/\<\/p\>\s\<p\>/\<\/p\>\<p\>/gi;
936 27         63 $self->{body} =~ s/\<\/tr\>\s\<tr\>/\<\/tr\>\<tr\>/gi;
937 27         68 $self->{body} =~ s/\<\/td\>\s\<\/tr\>/\<\/td\>\<\/tr\>/gi;
938 27         104 $self->{body} =~ s/\<\/td\>\s*\<td\>/\<\/td\>\<td\>/gis;
939 27         71 $self->{body} =~ s/\<\/tr\>\s\<\/table\>/\<\/tr\>\<\/table\>/gi;
940 27         88 $self->{body} =~ s/\<br\s?\/?\>\s?\<p\>/\<p\>/gi;
941 27         74 $self->{body} =~ s/\<br\>\s+/\<br\>/gi;
942 27         87 $self->{body} =~ s/\s+\<br/\<br/gi;
943 27         63 $self->{body} =~ s/\<br\s?\/\>\s/\<br \/\>/gi;
944 27         144 $self->{body} =~ s/[ \t]+/ /gs; # Remove duplicate space, don't use \s+ it breaks JavaScript
945 27         90 $self->{body} =~ s/\s\<p\>/\<p\>/gi;
946 27         81 $self->{body} =~ s/\s\<script/\<script/gi;
947 27         443 $self->{body} =~ s/(<script>\s|\s<script>)/<script>/gis;
948 27         435 $self->{body} =~ s/(<\/script>\s|\s<\/script>)/<\/script>/gis;
949 27         71 $self->{body} =~ s/\<td\>\s/\<td\>/gi;
950 27         129 $self->{body} =~ s/\s+\<a\shref="(.+?)"\>\s?/ <a href="$1">/gis;
951 27         121 $self->{body} =~ s/\s?<a\shref=\s"(.+?)"\>/ <a href="$1">/gis;
952 27         94 $self->{body} =~ s/\s+<\/a\>\s+/<\/a> /gis;
953 27         691 $self->{body} =~ s/(\s?<hr>\s+|\s+<hr>\s?)/<hr>/gis;
954             # $self->{body} =~ s/\s<hr>/<hr>/gis;
955             # $self->{body} =~ s/<hr>\s/<hr>/gis;
956 27         71 $self->{body} =~ s/<\/li>\s+<li>/<\/li><li>/gis;
957 27         64 $self->{body} =~ s/<\/li>\s+<\/ul>/<\/li><\/ul>/gis;
958 27         63 $self->{body} =~ s/<ul>\s+<li>/<ul><li>/gis;
959 27         75 $self->{body} =~ s/\s+<\/li>/<\/li>/gis;
960 27         69 $self->{body} =~ s/\<\/option\>\s+\<option/\<\/option\>\<option/gis;
961 27         83 $self->{body} =~ s/<title>\s*(.+?)\s*<\/title>/<title>$1<\/title>/is;
962 27         135 $self->{body} =~ s/<\/center>\s+<center>/ /gis;
963             }
964              
965             # Create a key for the cache
966             sub _generate_key {
967 0     0   0 my $self = shift;
968 0 0       0 if($self->{cache_key}) {
969 0         0 return $self->{cache_key};
970             }
971 0 0       0 unless(defined($self->{info})) {
972 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
973             }
974              
975 0         0 my $key = $self->{info}->browser_type() . '::' . $self->{info}->domain_name() . '::' . $self->{info}->script_name() . '::' . $self->{info}->as_string();
976              
977 0 0       0 if($self->{lingua}) {
978 0         0 $key .= '::' . $self->{lingua}->language();
979             }
980 0 0       0 if($ENV{'HTTP_COOKIE'}) {
981             # Different states of the client are stored in different caches
982             # Don't put different Google Analytics in different caches, and anyway they
983             # would be wrong
984 0         0 foreach my $cookie(split(/;/, $ENV{'HTTP_COOKIE'})) {
985 0 0       0 unless($cookie =~ /^__utm[abcz]/) {
986 0         0 $key .= "::$cookie";
987             }
988             }
989             }
990              
991             # Honour the Vary headers
992 0         0 my $headers = $self->{'headers'};
993 0 0 0     0 if($headers && ($headers =~ /^Vary: .*$/m)) {
994 0 0       0 if(defined($self->{logger})) {
995 0         0 $self->{logger}->debug('Found Vary header');
996             }
997 0         0 foreach my $h1(split(/\r?\n/, $headers)) {
998 0         0 my ($h1_name, $h1_value) = split /\:\s*/, $h1, 2;
999 0 0       0 if(lc($h1_name) eq 'vary') {
1000 0         0 foreach my $h2(split(/\r?\n/, $headers)) {
1001 0         0 my ($h2_name, $h2_value) = split /\:\s*/, $h2, 2;
1002 0 0       0 if($h2_name eq $h1_value) {
1003 0         0 $key .= "::$h2_value";
1004 0         0 last;
1005             }
1006             }
1007             }
1008             }
1009             }
1010 0         0 $key =~ s/\//::/g;
1011 0         0 $key =~ s/::::/::/g;
1012 0         0 $key =~ s/::$//;
1013 0 0       0 if(defined($self->{logger})) {
1014 0         0 $self->{logger}->trace("Returning $key");
1015             }
1016 0         0 $self->{cache_key} = $key;
1017 0         0 return $key;
1018             }
1019              
1020             =head2 init
1021              
1022             Set various options and override default values.
1023              
1024             # Put this toward the top of your program before you do anything
1025             # By default, generate_tag, generate_304 and compress_content are ON,
1026             # optimise_content and lint_content are OFF. Set optimise_content to 2 to
1027             # do aggressive JavaScript optimisations which may fail.
1028             use FCGI::Buffer;
1029              
1030             my $buffer = FCGI::Buffer->new()->init({
1031             generate_etag => 1, # make good use of client's cache
1032             generate_last_modified => 1, # more use of client's cache
1033             compress_content => 1, # if gzip the output
1034             optimise_content => 0, # optimise your program's HTML, CSS and JavaScript
1035             cache => CHI->new(driver => 'File'), # cache requests
1036             cache_key => 'string', # key for the cache
1037             cache_age => '10 minutes', # how long to store responses in the cache
1038             logger => $self->{logger},
1039             lint_content => 0, # Pass through HTML::Lint
1040             generate_304 => 1, # When appropriate, generate 304: Not modified
1041             save_to => { directory => '/var/www/htdocs/save_to', ttl => 600, create_table => 1 },
1042             info => CGI::Info->new(),
1043             lingua => CGI::Lingua->new(),
1044             });
1045              
1046             If no cache_key is given, one will be generated which may not be unique.
1047             The cache_key should be a unique value dependent upon the values set by the
1048             browser.
1049              
1050             The cache object will be an object that understands get_object(),
1051             set(), remove() and created_at() messages, such as an L<CHI> object. It is
1052             used as a server-side cache to reduce the need to rerun database accesses.
1053              
1054             Items stay in the server-side cache by default for 10 minutes.
1055             This can be overridden by the cache_control HTTP header in the request, and
1056             the default can be changed by the cache_age argument to init().
1057              
1058             Save_to is feature which stores output of dynamic pages to your
1059             htdocs tree and replaces future links that point to that page with static links
1060             to avoid going through CGI at all.
1061             Ttl is set to the number of seconds that the static pages are deemed to
1062             be live for, the default is 10 minutes.
1063             If set to 0, the page is live forever.
1064             To enable save_to, a info and lingua arguments must also be given.
1065             It works best when cache is also given.
1066             Only use where output is guaranteed to be the same with a given set of arguments
1067             (the same criteria for enabling generate_304).
1068             You can turn it off on a case by case basis thus:
1069              
1070             my $params = CGI::Info->new()->params();
1071             if($params->{'send_private_email'}) {
1072             $buffer->init('save_to' => undef);
1073             }
1074              
1075             Info is an optional argument to give information about the FCGI environment, e.g.
1076             a L<CGI::Info> object.
1077              
1078             Logger will be an object that understands debug() such as an L<Log::Log4perl>
1079             object.
1080              
1081             To generate a last_modified header, you must give a cache object.
1082              
1083             Init allows a reference of the options to be passed. So both of these work:
1084             use FCGI::Buffer;
1085             #...
1086             my $buffer = FCGI::Buffer->new();
1087             $b->init(generate_etag => 1);
1088             $b->init({ generate_etag => 1, info => CGI::Info->new() });
1089              
1090             Generally speaking, passing by reference is better since it copies less on to
1091             the stack.
1092              
1093             If you give a cache to init() then later give cache => undef,
1094             the server side cache is no longer used.
1095             This is useful when you find an error condition when creating your HTML
1096             and decide that you no longer wish to store the output in the cache.
1097              
1098             =cut
1099              
1100             sub init {
1101 32     32 1 8408 my $self = shift;
1102 32 100       114 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  22         89  
1103              
1104             # Safe options - can be called at any time
1105 32 100       109 if(defined($params{generate_etag})) {
1106 9         22 $self->{generate_etag} = $params{generate_etag};
1107             }
1108 32 50       80 if(defined($params{generate_last_modified})) {
1109 0         0 $self->{generate_last_modified} = $params{generate_last_modified};
1110             }
1111 32 100       75 if(defined($params{compress_content})) {
1112 1         4 $self->{compress_content} = $params{compress_content};
1113             }
1114 32 100       74 if(defined($params{optimise_content})) {
1115 31         72 $self->{optimise_content} = $params{optimise_content};
1116             }
1117 32 100       72 if(defined($params{lint_content})) {
1118 4         10 $self->{lint_content} = $params{lint_content};
1119             }
1120 32 100       72 if(defined($params{logger})) {
1121 1         2 $self->{logger} = $params{logger};
1122             }
1123 32 50       73 if(defined($params{lingua})) {
1124 0         0 $self->{lingua} = $params{lingua};
1125             }
1126              
1127 32 100 66     153 if(defined($params{save_to}) && $self->can_cache()) {
    50 33        
1128 1 50       3 if(my $dir = $params{'save_to'}->{'directory'}) {
1129 1 50       18 if(! -d $dir) {
1130 1         16 mkdir $dir;
1131 1 50       11 if(! -d $dir) {
1132 1         6 Carp::carp("$dir isn't a directory");
1133 1         486 return;
1134             }
1135             }
1136 0 0       0 if(! -w $dir) {
1137 0         0 Carp::carp("$dir isn't writeable");
1138 0         0 return;
1139             }
1140             }
1141 0         0 $self->{save_to} = $params{save_to};
1142 0 0       0 if(!exists($params{save_to}->{'ttl'})) {
1143 0         0 $self->{save_to}->{'ttl'} = 600;
1144             }
1145             } elsif(exists($params{'save_to'}) && !defined($params{'save_to'})) {
1146 0         0 delete $self->{'save_to'};
1147             }
1148 31 100       67 if(defined($params{generate_304})) {
1149 3         23 $self->{generate_304} = $params{generate_304};
1150             }
1151 31 100 66     87 if(defined($params{info}) && (!defined($self->{info}))) {
1152 4         10 $self->{info} = $params{info};
1153             }
1154              
1155             # Unsafe options - must be called before output has been started
1156 31         105 my $pos = $self->{buf}->getpos;
1157 31 50       341 if($pos > 0) {
1158 0 0       0 if(defined($self->{logger})) {
1159 0         0 my @call_details = caller(0);
1160 0         0 $self->{logger}->warn("Too late to call init, $pos characters have been printed, caller line $call_details[2] of $call_details[1]");
1161             } else {
1162             # Must do Carp::carp instead of carp for Test::Carp
1163 0         0 Carp::carp "Too late to call init, $pos characters have been printed";
1164             }
1165             }
1166 31 50 33     93 if(exists($params{cache}) && $self->can_cache()) {
1167 0 0       0 if(defined($ENV{'HTTP_CACHE_CONTROL'})) {
1168 0         0 my $control = $ENV{'HTTP_CACHE_CONTROL'};
1169 0 0       0 if(defined($self->{logger})) {
1170 0         0 $self->{logger}->debug("cache_control = $control");
1171             }
1172 0 0       0 if($control =~ /^max-age\s*=\s*(\d+)$/) {
1173             # There is an argument not to do this
1174             # since one client will affect others
1175 0         0 $self->{cache_age} = "$1 seconds";
1176 0 0       0 if(defined($self->{logger})) {
1177 0         0 $self->{logger}->debug("cache_age = $self->{cache_age}");
1178             }
1179             }
1180             }
1181 0   0     0 $self->{cache_age} ||= $params{cache_age};
1182 0 0 0     0 if((!defined($params{cache})) && defined($self->{cache})) {
1183 0 0       0 if(defined($self->{logger})) {
1184 0 0       0 if($self->{cache_key}) {
1185 0         0 $self->{logger}->debug('disabling cache ', $self->{cache_key});
1186             } else {
1187 0         0 $self->{logger}->debug('disabling cache');
1188             }
1189             }
1190 0         0 delete $self->{cache};
1191             } else {
1192 0         0 $self->{cache} = $params{cache};
1193             }
1194 0 0       0 if(defined($params{cache_key})) {
1195 0         0 $self->{cache_key} = $params{cache_key};
1196             }
1197             }
1198              
1199 31         114 return $self;
1200             }
1201              
1202             sub import {
1203             # my $class = shift;
1204 11     11   181 shift;
1205              
1206 11 50       2418 return unless scalar(@_);
1207              
1208 0         0 init(@_);
1209             }
1210              
1211             =head2 set_options
1212              
1213             Synonym for init, kept for historical reasons.
1214              
1215             =cut
1216              
1217             sub set_options {
1218 11     11 1 7167 my $self = shift;
1219 11 100       44 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  8         38  
1220              
1221 11         42 $self->init(\%params);
1222             }
1223              
1224             =head2 can_cache
1225              
1226             Returns true if the server is allowed to store the results locally.
1227             This is the value of X-Cache in the returned header.
1228              
1229             =cut
1230              
1231             sub can_cache {
1232 9     9 1 2697 my $self = shift;
1233              
1234 9 100       52 if(defined($self->{x_cache})) {
1235 1         6 return ($self->{x_cache} eq 'HIT');
1236             }
1237 8 50 33     75 if(defined($ENV{'NO_CACHE'}) || defined($ENV{'NO_STORE'})) {
1238 0         0 $self->{x_cache} = 'MISS';
1239 0         0 return 0;
1240             }
1241 8 50       26 if(defined($ENV{'HTTP_CACHE_CONTROL'})) {
1242 0         0 my $control = $ENV{'HTTP_CACHE_CONTROL'};
1243 0 0       0 if(defined($self->{logger})) {
1244 0         0 $self->{logger}->debug("cache_control = $control");
1245             }
1246             # TODO: check Authorization header not present
1247 0 0 0     0 if(($control eq 'no-store') ||
      0        
      0        
1248             ($control eq 'no-cache') ||
1249             ($control eq 'max-age=0') ||
1250             ($control eq 'private')) {
1251 0         0 $self->{x_cache} = 'MISS';
1252 0         0 return 0;
1253             }
1254             }
1255 8         24 $self->{x_cache} = 'HIT';
1256 8         34 return 1;
1257             }
1258              
1259             =head2 is_cached
1260              
1261             Returns true if the output is cached. If it is then it means that all of the
1262             expensive routines in the FCGI script can be by-passed because we already have
1263             the result stored in the cache.
1264              
1265             # Put this toward the top of your program before you do anything
1266              
1267             # Example key generation - use whatever you want as something
1268             # unique for this call, so that subsequent calls with the same
1269             # values match something in the cache
1270             use CGI::Info;
1271             use CGI::Lingua;
1272             use FCGI::Buffer;
1273              
1274             my $i = CGI::Info->new();
1275             my $l = CGI::Lingua->new(supported => ['en']);
1276              
1277             # To use server side caching you must give the cache argument, however
1278             # the cache_key argument is optional - if you don't give one then one will
1279             # be generated for you
1280             my $buffer = FCGI::Buffer->new();
1281             if($buffer->can_cache()) {
1282             $buffer->init(
1283             cache => CHI->new(driver => 'File'),
1284             cache_key => $i->domain_name() . '/' . $i->script_name() . '/' . $i->as_string() . '/' . $l->language()
1285             );
1286             if($buffer->is_cached()) {
1287             # Output will be retrieved from the cache and sent automatically
1288             exit;
1289             }
1290             }
1291             # Not in the cache, so now do our expensive computing to generate the
1292             # results
1293             print "Content-type: text/html\n";
1294             # ...
1295              
1296             =cut
1297              
1298             sub is_cached {
1299 12     12 1 1594 my $self = shift;
1300              
1301 12 50       86 unless($self->{cache}) {
1302 12 50       37 if($self->{logger}) {
1303 0         0 $self->{logger}->debug("is_cached: cache hasn't been enabled");
1304 0         0 my $i = 0;
1305 0         0 while((my @call_details = (caller($i++)))) {
1306 0         0 $self->{logger}->debug($call_details[1], ':', $call_details[2], ' calling function ', $call_details[3]);
1307             }
1308             }
1309 12         63 return 0;
1310             }
1311              
1312 0         0 my $key = $self->_generate_key();
1313              
1314 0 0       0 if($self->{logger}) {
1315 0         0 $self->{logger}->debug("is_cached: looking for key = $key");
1316             }
1317 0         0 $self->{cobject} = $self->{cache}->get_object($key);
1318 0 0       0 unless($self->{cobject}) {
1319 0 0       0 if($self->{logger}) {
1320 0         0 $self->{logger}->debug('not found in cache');
1321             }
1322 0         0 return 0;
1323             }
1324 0 0       0 unless($self->{cobject}->value($key)) {
1325 0 0       0 if($self->{logger}) {
1326 0         0 $self->{logger}->warn('is_cached: object is in the cache but not the data');
1327             }
1328 0         0 delete $self->{cobject};
1329 0         0 return 0;
1330             }
1331              
1332             # If the script has changed, don't use the cache since we may produce
1333             # different output
1334 0         0 my $age = $self->_my_age();
1335 0 0       0 unless(defined($age)) {
1336 0 0       0 if($self->{logger}) {
1337 0         0 $self->{logger}->debug("Can't determine script's age");
1338             }
1339             # Can't determine the age. Play it safe an assume we're not
1340             # cached
1341 0         0 delete $self->{cobject};
1342 0         0 return 0;
1343             }
1344 0 0       0 if($age > $self->{cobject}->created_at()) {
1345             # Script has been updated so it may produce different output
1346 0 0       0 if($self->{logger}) {
1347 0         0 $self->{logger}->debug('Script has been updated');
1348             }
1349 0         0 delete $self->{cobject};
1350             # Nothing will be in date and all new searches would miss
1351             # anyway, so may as well clear it all
1352             # FIXME: RT104471
1353             # $self->{cache}->clear();
1354 0         0 return 0;
1355             }
1356 0 0       0 if($self->{logger}) {
1357 0         0 $self->{logger}->debug('Script is in the cache');
1358             }
1359 0         0 return 1;
1360             }
1361              
1362             sub _my_age {
1363 31     31   53 my $self = shift;
1364              
1365 31 50       73 if($self->{script_mtime}) {
1366 0         0 return $self->{script_mtime};
1367             }
1368 31 50       72 unless(defined($self->{info})) {
1369 0 0       0 if($self->{cache}) {
1370 0         0 $self->{info} = CGI::Info->new({ cache => $self->{cache} });
1371             } else {
1372 0         0 $self->{info} = CGI::Info->new();
1373             }
1374             }
1375              
1376 31         83 my $path = $self->{info}->script_path();
1377 31 50       5143 unless(defined($path)) {
1378 0         0 return;
1379             }
1380              
1381 31         459 my @statb = stat($path);
1382 31         109 $self->{script_mtime} = $statb[9];
1383 31         134 return $self->{script_mtime};
1384             }
1385              
1386             sub _should_gzip {
1387 34     34   60 my $self = shift;
1388              
1389 34 100 100     196 if($self->{compress_content} && ($ENV{'HTTP_ACCEPT_ENCODING'} || $ENV{'HTTP_TE'})) {
      100        
1390 2 50       12 if(defined($self->{content_type})) {
1391 2         10 my @content_type = @{$self->{content_type}};
  2         9  
1392 2 50       10 if($content_type[0] ne 'text') {
1393 0         0 return '';
1394             }
1395             }
1396 2 100       14 my $accept = lc($ENV{'HTTP_ACCEPT_ENCODING'} ? $ENV{'HTTP_ACCEPT_ENCODING'} : $ENV{'HTTP_TE'});
1397 2         16 foreach my $method(split(/,\s?/, $accept)) {
1398 2 50 66     17 if(($method eq 'gzip') || ($method eq 'x-gzip') || ($method eq 'br')) {
      66        
1399 2         8 return $method;
1400             }
1401             }
1402             }
1403              
1404 32         90 return '';
1405             }
1406              
1407             sub _set_content_type {
1408 34     34   70 my $self = shift;
1409 34         52 my $headers = shift;
1410              
1411 34         128 foreach my $header (split(/\r?\n/, $headers)) {
1412 34         170 my ($header_name, $header_value) = split /\:\s*/, $header, 2;
1413 34 100       144 if (lc($header_name) eq 'content-type') {
1414 31         123 my @content_type = split /\//, $header_value, 2;
1415 31         85 $self->{content_type} = \@content_type;
1416 31         95 return;
1417             }
1418             }
1419             }
1420              
1421             sub _compress()
1422             {
1423 32     32   52 my $self = shift;
1424 32 50       94 my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  32         135  
1425              
1426 32         71 my $encoding = $params{encoding};
1427              
1428 32 100 66     109 if((length($encoding) == 0) || (length($self->{body}) < MIN_GZIP_LEN)) {
1429 30         128 return;
1430             }
1431              
1432 2 100       11 if($encoding eq 'gzip') {
    50          
1433 1         731 require Compress::Zlib;
1434 1         57542 Compress::Zlib->import;
1435              
1436             # Avoid 'Wide character in memGzip'
1437 1 50       13 unless($self->{_encode_loaded}) {
1438 1         626 require Encode;
1439 1         10865 $self->{_encode_loaded} = 1;
1440             }
1441 1         13 my $nbody = Compress::Zlib::memGzip(\Encode::encode_utf8($self->{body}));
1442 1 50       533 if(length($nbody) < length($self->{body})) {
1443 0         0 $self->{body} = $nbody;
1444 0 0       0 unless(grep(/^Content-Encoding: gzip/, @{$self->{o}})) {
  0         0  
1445 0         0 push @{$self->{o}}, 'Content-Encoding: gzip';
  0         0  
1446             }
1447 0 0       0 unless(grep(/^Vary: Accept-Encoding/, @{$self->{o}})) {
  0         0  
1448 0         0 push @{$self->{o}}, 'Vary: Accept-Encoding';
  0         0  
1449             }
1450             }
1451             } elsif($encoding eq 'br') {
1452 1         487 require IO::Compress::Brotli;
1453 1         1531 IO::Compress::Brotli->import();
1454              
1455 1 50       16 unless($self->{_encode_loaded}) {
1456 0         0 require Encode;
1457 0         0 $self->{_encode_loaded} = 1;
1458             }
1459 1         2384 my $nbody = IO::Compress::Brotli::bro(Encode::encode_utf8($self->{body}));
1460 1 50       12 if(length($nbody) < length($self->{body})) {
1461 1         7 $self->{body} = $nbody;
1462 1 50       3 unless(grep(/^Content-Encoding: br/, @{$self->{o}})) {
  1         7  
1463 1         2 push @{$self->{o}}, 'Content-Encoding: br';
  1         4  
1464             }
1465 1 50       4 unless(grep(/^Vary: Accept-Encoding/, @{$self->{o}})) {
  1         7  
1466 1         2 push @{$self->{o}}, 'Vary: Accept-Encoding';
  1         5  
1467             }
1468             }
1469             }
1470             }
1471              
1472             sub _check_if_none_match {
1473 2     2   3 my $self = shift;
1474              
1475 2 50       6 if($self->{logger}) {
1476 0         0 $self->{logger}->debug("Compare $ENV{HTTP_IF_NONE_MATCH} with $self->{etag}");
1477             }
1478 2 50       9 if($ENV{'HTTP_IF_NONE_MATCH'} eq $self->{etag}) {
1479 2         3 push @{$self->{o}}, 'Status: 304 Not Modified';
  2         7  
1480 2         5 $self->{send_body} = 0;
1481 2         5 $self->{status} = 304;
1482 2 50       8 if($self->{logger}) {
1483 0         0 $self->{logger}->debug('Set status to 304');
1484             }
1485 2         5 return 1;
1486             }
1487 0 0 0     0 if($self->{cache} && $self->{logger} && $self->{logger}->is_debug()) {
      0        
1488 0         0 my $cached_copy = $self->{cache}->get($self->_generate_key());
1489              
1490 0 0 0     0 if($cached_copy && $self->{body}) {
1491 0         0 require Text::Diff;
1492 0         0 Text::Diff->import();
1493              
1494 0         0 $cached_copy = Storable::thaw($cached_copy)->{body};
1495 0         0 my $diffs = diff(\$self->{body}, \$cached_copy);
1496 0         0 $self->{logger}->debug('diffs: ', $diffs);
1497             } else {
1498 0         0 $self->{logger}->debug('Nothing to compare');
1499             }
1500             }
1501 0         0 return 0;
1502             }
1503              
1504             # replace dynamic links with static links
1505             sub _save_to {
1506 26     26   68 my ($self, $unzipped_body, $dbh) = @_;
1507              
1508 26 0 33     115 return 0 unless($dbh && $self->{info} && (my $request_uri = $ENV{'REQUEST_URI'}));
      0        
1509 0 0         return 0 if(!defined($unzipped_body));
1510              
1511 0           my $query;
1512 0           my $copy = $unzipped_body;
1513 0           my $changes = 0;
1514 0           my $creation;
1515             my %seen_links;
1516 0           while($unzipped_body =~ /<a\shref="(.+?)"/gis) {
1517 0           my $link = $1;
1518 0 0         next if($seen_links{$link}); # Already updated in the copy
1519 0           $seen_links{$link} = 1;
1520 0           $link =~ tr/[\|;]/_/;
1521              
1522 0           my $search_uri = $link;
1523 0 0         if($search_uri =~ /^\?/) {
1524             # CGI script has links to itself
1525             # $search_uri = "${request_uri}${link}";
1526 0           my $r = $request_uri;
1527 0           $r =~ s/\?.*$//;
1528 0           $search_uri = "${r}$link";
1529             } else {
1530 0 0         next if($link =~ /^https?:\/\//); # FIXME: skips full URLs to ourself
1531             # Though optimise_content fixes that
1532 0 0         next if($link =~ /.html?$/);
1533 0 0         next if($link =~ /.jpg$/);
1534 0 0         next if($link =~ /.gif$/);
1535             }
1536 0 0         if($self->{save_to}->{ttl}) {
1537 0           $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE uri = ? AND language = ? AND browser_type = ? AND creation >= strftime('\%s','now') - " . $self->{save_to}->{ttl};
1538             } else {
1539 0           $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE uri = ? AND language = ? AND browser_type = ?";
1540             }
1541 0 0         if($self->{logger}) {
1542 0           $self->{logger}->debug("$query: $search_uri, ", $self->{lingua}->language(), ', ', $self->{info}->browser_type());
1543             }
1544 0 0         if(defined(my $sth = $dbh->prepare($query))) {
    0          
1545 0           $sth->execute($search_uri, $self->{lingua}->language(), $self->{info}->browser_type());
1546 0 0         if(my $href = $sth->fetchrow_hashref()) {
1547 0 0         if(my $path = $href->{'path'}) {
1548 0 0         if(-r $path) {
1549 0 0         if($self->{logger}) {
1550 0           $self->{logger}->debug("Changing links from $link to $path");
1551             }
1552 0           $link =~ s/\?/\\?/g;
1553 0           my $rootdir = $self->{info}->rootdir();
1554 0           $path = substr($path, length($rootdir));
1555 0           $changes += ($copy =~ s/<a\s+href="$link">/<a href="$path">/gis);
1556             # Find the first link that will expire and use that
1557 0 0 0       if((!defined($creation)) || ($href->{'creation'} < $creation)) {
1558 0           $creation = $href->{'creation'};
1559             }
1560             } else {
1561 0           $query = "DELETE FROM fcgi_buffer WHERE path = ?";
1562 0           $dbh->prepare($query)->execute($path);
1563 0 0         if($self->{logger}) {
1564 0           $self->{logger}->warn("Remove entry for non-existant file $path");
1565             }
1566             }
1567             }
1568             }
1569             } elsif($self->{logger}) {
1570 0           $self->{logger}->warn("failed to prepare '$query'");
1571             }
1572             }
1573 0           my $expiration = 0;
1574 0 0 0       if(defined($creation) && (my $ttl = $self->{save_to}->{ttl})) {
1575 0           $expiration = $creation + $ttl;
1576             }
1577 0 0 0       if($changes && (($expiration == 0) || ($expiration >= time))) {
    0 0        
      0        
1578 0 0         if($self->{logger}) {
1579             # $self->{logger}->debug("$changes links now point to static pages");
1580 0 0         if($changes == 1) {
1581 0 0         if($self->{'save_to'}->{'ttl'}) {
1582 0           $self->{logger}->info('1 link now points to a static page for ', $expiration - time, 's');
1583             } else {
1584 0           $self->{logger}->info('1 link now points to a static page');
1585             }
1586             } else {
1587 0           $self->{logger}->info("$changes links now point to static pages");
1588             }
1589             }
1590 0           $unzipped_body = $copy;
1591 0           $self->{'body'} = $unzipped_body;
1592 0 0         if(my $ttl = $self->{save_to}->{ttl}) {
1593 0           push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str($creation + $ttl);
  0            
1594             }
1595             } elsif($expiration && ($expiration < time)) {
1596             # Delete the save_to files
1597 0 0         if($self->{save_to}->{ttl}) {
1598 0           $query = "SELECT path FROM fcgi_buffer WHERE creation < strftime('\%s','now') - " . $self->{save_to}->{ttl};
1599             } else {
1600 0           $query = 'SELECT path FROM fcgi_buffer'; # Hmm, I suspect this is overkill
1601             }
1602 0           my $sth = $dbh->prepare($query);
1603 0           $sth->execute();
1604 0           while(my $href = $sth->fetchrow_hashref()) {
1605 0 0         if(my $path = $href->{'path'}) {
1606 0 0         if($self->{logger}) {
1607 0           $self->{logger}->debug("unlink $path");
1608             }
1609 0           unlink $path;
1610             }
1611             }
1612 0 0         if($self->{save_to}->{ttl}) {
1613 0           $query = "DELETE FROM fcgi_buffer WHERE creation < strftime('\%s','now') - " . $self->{save_to}->{ttl};
1614             } else {
1615 0           $query = 'DELETE FROM fcgi_buffer'; # Hmm, I suspect this is overkill
1616             }
1617 0 0         if($self->{logger}) {
1618 0           $self->{logger}->debug($query);
1619             }
1620 0           $dbh->prepare($query)->execute();
1621             # } else {
1622             # Old code
1623             # if($self->{save_to}->{ttl}) {
1624             # $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE key = '$key' AND creation >= strftime('\%s','now') - " . $self->{save_to}->{ttl};
1625             # } else {
1626             # $query = "SELECT DISTINCT path, creation FROM fcgi_buffer WHERE key = '$key'";
1627             # }
1628             # my $sth = $dbh->prepare($query);
1629             # $sth->execute();
1630             # my $href = $sth->fetchrow_hashref();
1631             # if(my $path = $href->{'path'}) {
1632             # # FIXME: don't do this if we've passed the TTL, and if we are clean
1633             # # up the database and remove the static page
1634             # $request_uri =~ s/\?/\\?/g;
1635             # if(($unzipped_body =~ s/<a href="$request_uri"/<a href="$path"/gi) > 0) {
1636             # $self->{'body'} = $unzipped_body;
1637             # if(my $ttl = $self->{save_to}->{ttl}) {
1638             # push @{$self->{o}}, 'Expires: ' . HTTP::Date::time2str($href->{creation} + $ttl);
1639             # }
1640             # }
1641             # }
1642             }
1643 0           return $changes;
1644             }
1645              
1646             =head1 AUTHOR
1647              
1648             Nigel Horne, C<< <njh at bandsman.co.uk> >>
1649              
1650             =head1 BUGS
1651              
1652             FCGI::Buffer should be safe even in scripts which produce lots of different
1653             output, e.g. e-commerce situations.
1654             On such pages, however, I strongly urge to setting generate_304 to 0 and
1655             sending the HTTP header "Cache-Control: no-cache".
1656              
1657             When using L<Template>, ensure that you don't use it to output to STDOUT,
1658             instead you will need to capture into a variable and print that.
1659             For example:
1660              
1661             my $output;
1662             $template->process($input, $vars, \$output) || ($output = $template->error());
1663             print $output;
1664              
1665             Can produce buggy JavaScript if you use the <!-- HIDING technique.
1666             This is a bug in L<JavaScript::Packer>, not FCGI::Buffer.
1667              
1668             Mod_deflate can confuse this when compressing output.
1669             Ensure that deflation is off for .pl files:
1670              
1671             SetEnvIfNoCase Request_URI \.(?:gif|jpe?g|png|pl)$ no-gzip dont-vary
1672              
1673             If you request compressed output then uncompressed output (or vice
1674             versa) on input that produces the same output, the status will be 304.
1675             The letter of the spec says that's wrong, so I'm noting it here, but
1676             in practice you should not see this happen or have any difficulties
1677             because of it.
1678              
1679             FCGI::Buffer has not been tested against FastCGI.
1680              
1681             I advise adding FCGI::Buffer as the last use statement so that it is
1682             cleared up first. In particular it should be loaded after
1683             L<Log::Log4perl>, if you're using that, so that any messages it
1684             produces are printed after the HTTP headers have been sent by
1685             FCGI::Buffer;
1686              
1687             Save_to doesn't understand links in JavaScript, which means that if you use self-calling
1688             CGIs which are loaded as a static page they may point to the wrong place.
1689             The workaround is to avoid self-calling CGIs in JavaScript
1690              
1691             Please report any bugs or feature requests to C<bug-fcgi-buffer at rt.cpan.org>,
1692             or through the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=FCGI-Buffer>.
1693             I will be notified, and then you'll automatically be notified of progress on
1694             your bug as I make changes.
1695              
1696             The lint operation only works on HTML4, because of a restriction in L<HTML::Lint>.
1697              
1698             =head1 SEE ALSO
1699              
1700             CGI::Buffer, HTML::Packer, HTML::Lint
1701              
1702             =head1 SUPPORT
1703              
1704             You can find documentation for this module with the perldoc command.
1705              
1706             perldoc FCGI::Buffer
1707              
1708             You can also look for information at:
1709              
1710             =over 4
1711              
1712             =item * RT: CPAN's request tracker
1713              
1714             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=FCGI-Buffer>
1715              
1716             =item * CPAN Ratings
1717              
1718             L<http://cpanratings.perl.org/d/FCGI-Buffer>
1719              
1720             =item * Search CPAN
1721              
1722             L<http://search.cpan.org/dist/FCGI-Buffer/>
1723              
1724             =back
1725              
1726             =head1 ACKNOWLEDGEMENTS
1727              
1728             The inspiration and code for some of this is cgi_buffer by Mark
1729             Nottingham: L<https://www.mnot.net/blog/2003/04/24/etags>.
1730              
1731             =head1 LICENSE AND COPYRIGHT
1732              
1733             The licence for cgi_buffer is:
1734              
1735             "(c) 2000 Copyright Mark Nottingham <mnot@pobox.com>
1736              
1737             This software may be freely distributed, modified and used,
1738             provided that this copyright notice remain intact.
1739              
1740             This software is provided 'as is' without warranty of any kind."
1741              
1742             The rest of the program is Copyright 2015-2023 Nigel Horne,
1743             and is released under the following licence: GPL2
1744              
1745             =cut
1746              
1747             1; # End of FCGI::Buffer