File Coverage

blib/lib/CPAN/CachingProxy.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             package CPAN::CachingProxy;
4              
5 1     1   1445 use strict;
  1         4  
  1         42  
6 1     1   5 use Carp;
  1         2  
  1         76  
7 1     1   10119 use URI;
  1         18449  
  1         39  
8 1     1   694 use Cache::File;
  0            
  0            
9             use Data::Dumper;
10             use LWP::UserAgent;
11             use Fcntl qw(:flock);
12             use Digest::SHA1 qw(sha1_hex);
13              
14             our $VERSION = "1.6500";
15              
16             # wget -O MIRRORED.BY http://www.cpan.org/MIRRORED.BY
17              
18             # new {{{
19             sub new {
20             my $class = shift;
21             my $this = bless {@_}, $class;
22              
23             unless( $this->{cgi} ) {
24             require CGI or die $@;
25             $this->{cgi} = new CGI;
26             }
27              
28             unless( $this->{cache_object} ) {
29             $this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root};
30             $this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root};
31             $this->{default_expire} = "2 day" unless exists $this->{default_expire};
32             $this->{index_expire} = "3 hour" unless exists $this->{index_expire};
33             $this->{error_expire} = "15 minute" unless exists $this->{error_expire};
34             $this->{url_lockfile_dir} = "/tmp" unless exists $this->{url_lockfile_dir};
35              
36             $this->{index_regexp} = qr/(?:03modlist\.data|02packages\.details\.txt|01mailrc\.txt)/ unless exists $this->{index_regexp};
37             $this->{cache_object} = Cache::File->new(cache_root=>$this->{cache_root}, default_expires => $this->{default_expire} );
38             }
39              
40             $this->{key_space} = "CK" unless $this->{key_space};
41              
42             unless( $this->{ua} ) {
43             my $ua = $this->{ua} = new LWP::UserAgent;
44             $ua->agent($this->{agent} ? $this->{agent} : "CCP/$VERSION (Paul's CPAN caching proxy / perlmonks-id=16186)");
45             if( exists $this->{activity_timeout} ) {
46             if( defined (my $at = $this->{activity_timeout}) ) {
47             $ua->timeout($at);
48             }
49              
50             } else {
51             $ua->timeout(12);
52             }
53             }
54              
55             $this->{ua}->timeout( $this->{activity_timeout} ) if defined $this->{activity_timeout};
56              
57             croak "there are no default mirrors, they must be set" unless $this->{mirrors};
58              
59             return $this;
60             }
61             # }}}
62             # run {{{
63             sub run {
64             my $this = shift;
65             my $cgi = $this->{cgi};
66             my $mirror = $this->{mirrors}[ rand @{$this->{mirrors}} ];
67             my $pinfo = $cgi->path_info || return print $cgi->redirect( $cgi->url . "/" );
68              
69             $pinfo =~ s/^\///;
70             $mirror=~ s/\/$//;
71              
72             my $CK = "$this->{key_space}:$pinfo";
73             my $URL = "$mirror/$pinfo";
74             # $URL =~ s/\/{2,}/\//g;
75              
76             if( $pinfo =~ s{^___/}{} ) {
77             # NOTE: undocumented special case. If the path begins with ___, it
78             # probably came from a 404 handler. in which case, the real pinfo was
79             # probably an absolute url. replace the entire path portion of our
80             # mirror url with the non ___'d part of the pinfo.
81              
82             my $nurl = URI->new($mirror);
83             $nurl->path($pinfo);
84             # arguably we should use URI for all our path manips, but this section is new and the old stuff works fine
85             $URL = "$nurl";
86             }
87              
88             my $lockfile_fh;
89             my $uld = $this->{url_lockfile_dir};
90             if( $uld and -d $uld ) {
91             for(glob("$uld/.CP_FILE*")) {
92             open my $tlf, "<", $_ or next;
93             next unless flock $tlf, (LOCK_NB|LOCK_EX);
94             warn "[DEBUG] unlinking old URL-lockfile $_\n" if $this->{debug};
95             unlink $_;
96             }
97              
98             # NOTE: sha1 is not for security, as I think timing attacks on this
99             # have little value really, aside from DoS, and then the local
100             # attackers probably have better things to do. These are simply here
101             # because my /tmp is tmpfs, which has surprisingly low file name length
102             # restrictions.
103             my $converted = join("/", $uld, ".CP_FILE_" . sha1_hex($URL));
104              
105             warn "[DEBUG] locking $URL using $converted lockfile\n" if $this->{debug};
106             open $lockfile_fh, ">", $converted or die "error opening lockfile for $URL: $!";
107             flock $lockfile_fh, LOCK_EX or die "failed to lock lockfile for $URL: $!";
108             }
109              
110             else {
111             die "as of version 1.6, url_locking_dir is a required option.";
112             }
113              
114             my $cache = $this->{cache_object};
115             if( $cache->exists($CK) and $cache->exists("$CK.hdr") ) { our $VAR1;
116             my $res = eval $cache->get( "$CK.hdr" ); die "problem finding cache entry\n" if $@;
117              
118             unless( $this->{ignore_last_modified} ) {
119             if( my $lm = $res->header('last_modified') ) {
120             my $_lm = eval { $this->{ua}->head($URL)->header('last_modified') };
121              
122             # $lm = "hehe, random failure time" if (int rand(7)) == 0;
123              
124             if( $_lm and $lm ne $_lm ) {
125             warn "[DEBUG] last_modified differs ($lm vs $_lm), forcing cache miss\n" if $this->{debug};
126             goto FORCE_CACHE_MISS;
127             }
128             }
129             }
130              
131             my $start = $this->my_copy_hdr($res, "cache hit");
132              
133             # XXX: is it the right thing to do to close the lockfile here?
134             # Probably. At this point, we should have the whole file, and we sure
135             # don't mind serving similtaneous requests, right?
136              
137             close $lockfile_fh;
138              
139             ###
140              
141             my $fh = $cache->handle( $CK, "<" ) or die "problem finding cache entry\n";
142             my $buf;
143             BUF: while( read $fh, $buf, 4096 ) {
144             if( $start > 0 ) {
145             if( $start > length $buf ) {
146             $start -= length $buf;
147             next BUF;
148              
149             } else {
150             substr $buf, 0, $start, "";
151             $start = 0;
152             }
153             }
154             print $buf;
155             }
156             close $fh;
157              
158             } else {
159             FORCE_CACHE_MISS:
160             my $expire = $this->{default_expire};
161             $expire = $this->{index_expire} if $pinfo =~ $this->{index_regexp};
162              
163             $cache->set($CK, 1, $expire ); # doesn't seem like we should have to do this, but apparently we do
164              
165             warn "[DEBUG] getting $URL\n" if $this->{debug};
166              
167             my $fh = $cache->handle( $CK, ">", $expire );
168             my $request = HTTP::Request->new(GET => $URL);
169              
170             my $announced_header;
171             my $response = $this->{ua}->request($request, sub {
172             my $chunk = shift;
173              
174             unless( $announced_header ) {
175             my $res = shift;
176             $announced_header = 1;
177             $this->my_copy_hdr($res, "cache miss");
178             }
179              
180             print $fh $chunk;
181             print $chunk;
182             });
183             close $fh;
184              
185             unless( $response->is_success ) {
186             my $my_fail = "FAIL: " . $response->status_line . "\n";
187             $cache->set($CK => $my_fail, $expire);
188             $response->header(content_length=>length $my_fail); # fix content length so we don't lie to clients
189              
190             $this->my_copy_hdr($response, "cache miss [fail]");
191             print $my_fail;
192             }
193              
194             warn "[DEBUG] setting $CK\n" if $this->{debug};
195             $cache->set("$CK.hdr", Dumper($response), $expire);
196              
197             # if there was an error (which we don't know until ex post facto), go back and fix the expiry
198             if( defined $this->{error_expire} and not $response->is_success ) {
199             $cache->set_expiry( $CK => $this->{error_expire} );
200             $cache->set_expiry( "$CK.hdr" => $this->{error_expire} );
201             }
202             }
203             }
204             # }}}
205              
206             # {{{ sub my_copy_hdr
207             sub my_copy_hdr {
208             my ($this, $res, $hit) = @_;
209             my $cgi = $this->{cgi};
210              
211             my $status = $res->status_line;
212             warn "[DEBUG] cache status: $hit; status: $status\n" if $this->{debug};
213              
214             my %more_headers = (qw(accept_ranges bytes));
215              
216             for(qw(content_length), $this->{ignore_last_modified} ? ():(qw(last_modified))) {
217             my $v = $res->header($_);
218              
219             if( $v ) {
220             my $k = lc $_;
221             $k =~ s/-/_/g;
222              
223             $more_headers{$k} = $v;
224             }
225             }
226              
227             my $start = 0;
228              
229             if( my $r = $cgi->http("Range") ) {
230             if( ($start) = $r =~ m/^bytes=(\d+)-$/ ) {
231             my $len = $more_headers{content_length};
232             my $new = $len - $start;
233             my $end = $len - 1; # this is the byte number, not a number of bytes or something
234              
235             $more_headers{content_range} = "bytes $start-$end/$len";
236             $more_headers{content_length} = $new;
237             }
238             }
239              
240             print $cgi->header(-status=>$status, -charset=>"", -type=>$res->header( 'content-type' ), %more_headers);
241              
242             return $start;
243             }
244              
245             # }}}