File Coverage

blib/lib/CPAN/Site/Index.pm
Criterion Covered Total %
statement 101 282 35.8
branch 33 128 25.7
condition 22 56 39.2
subroutine 21 34 61.7
pod 0 18 0.0
total 177 518 34.1


line stmt bran cond sub pod time code
1             # Copyrights 1998,2005-2018 by [Mark Overmeer].
2             # For other contributors see ChangeLog.
3             # See the manual pages for details on the licensing terms.
4             # Pod stripped from pm file by OODoc 2.02.
5             # This code is part of distribution CPAN::Site.
6             # Copyright Mark Overmeer. Licensed under the same terms as Perl itself.
7              
8             package CPAN::Site::Index;
9 1     1   599 use vars '$VERSION';
  1         2  
  1         46  
10             $VERSION = '1.15';
11              
12 1     1   5 use base 'Exporter';
  1         2  
  1         72  
13              
14 1     1   5 use warnings;
  1         1  
  1         29  
15 1     1   5 use strict;
  1         1  
  1         40  
16              
17             our @EXPORT_OK = qw/cpan_index cpan_mirror/;
18             our $VERSION; # required in test-env
19              
20 1     1   394 use Log::Report 'cpan-site', syntax => 'SHORT';
  1         97675  
  1         6  
21              
22 1     1   733 use version;
  1         1556  
  1         4  
23 1     1   64 use File::Find qw/find/;
  1         2  
  1         52  
24 1     1   417 use File::Copy qw/copy/;
  1         1933  
  1         50  
25 1     1   7 use File::Basename qw/basename dirname/;
  1         2  
  1         60  
26 1     1   458 use HTTP::Date qw/time2str/;
  1         1413  
  1         56  
27 1     1   418 use File::Spec::Functions qw/catfile catdir splitdir/;
  1         636  
  1         55  
28 1     1   546 use LWP::UserAgent ();
  1         34426  
  1         24  
29 1     1   639 use Archive::Tar ();
  1         68202  
  1         29  
30 1     1   581 use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
  1         45680  
  1         121  
31 1     1   386 use CPAN::Checksums ();
  1         23829  
  1         25  
32 1     1   7 use IO::Zlib ();
  1         11  
  1         3141  
33              
34             my $tar_gz = qr/ \.tar\.gz$ | \.tar\.Z$ | \.tgz$/xi;
35             my $zip = qr/ \.zip$ /xi;
36             my $cpan_update = 0.04; # days between reload of full CPAN index
37             my $ua;
38              
39             sub safe_copy($$);
40             sub cpan_index($@);
41             sub register($$$);
42             sub package_inventory($$;$);
43             sub package_on_usual_location($);
44             sub inspect_archive;
45             sub inspect_tar_archive($$);
46             sub inspect_zip_archive($$);
47             sub collect_package_details($$$);
48             sub update_global_cpan($$);
49             sub load_file($$);
50             sub merge_global_cpan($$$);
51             sub create_details($$$$$);
52             sub calculate_checksums($);
53             sub read_details($);
54             sub remove_expired_details($$$);
55             sub mkdirhier(@);
56             sub cpan_mirror($$$@);
57              
58             sub safe_copy($$)
59 0     0 0 0 { my ($from, $to) = @_;
60 0         0 trace "copy $from to $to";
61 0 0       0 copy $from, $to
62             or fault __x"cannot copy {from} to {to}", from => $from, to => $to;
63             }
64              
65             sub cpan_index($@)
66 0     0 0 0 { my ($mycpan, $globalcpan, %opts) = @_;
67 0         0 my $lazy = $opts{lazy};
68 0         0 my $fallback = $opts{fallback};
69 0 0       0 my $undefs = exists $opts{undefs} ? $opts{undefs} : 1;
70              
71 0 0       0 unless($ua)
72 0         0 { $ua = LWP::UserAgent->new;
73 0 0       0 $ua->env_proxy if $opts{env_proxy};
74             }
75              
76 0 0       0 -d $mycpan
77             or error __x"archive top '{dir}' is not a directory"
78             , dir => $mycpan;
79              
80 0         0 my $program = basename $0;
81 0   0     0 $VERSION ||= 'undef'; # test env at home
82 0         0 trace "$program version $VERSION";
83              
84 0         0 my $global = catdir $mycpan, 'global';
85 0         0 my $mods = catdir $mycpan, 'modules';
86 0         0 my $authors = catdir $mycpan, 'authors';
87 0         0 mkdirhier $global, $mods, $authors;
88              
89 0         0 my $globdetails = update_global_cpan $mycpan, $globalcpan;
90              
91             # Create mailrc and modlist
92              
93 0         0 safe_copy catfile($global, '01mailrc.txt.gz')
94             , catfile($authors, '01mailrc.txt.gz');
95              
96 0         0 safe_copy catfile($global, '03modlist.data.gz')
97             , catfile($mods, '03modlist.data.gz');
98              
99             # Create packages details
100              
101 0         0 my $details = catfile $mods, '02packages.details.txt.gz';
102 0         0 my $newlist = catfile $mods, '02packages.details.tmp.gz';
103 0         0 my $newer;
104              
105 0         0 my $reuse_dists = {};
106 0 0 0     0 if($lazy && -f $details)
107 0         0 { $reuse_dists = read_details $details;
108 0         0 $newer = -M $details;
109 0         0 remove_expired_details $mycpan, $reuse_dists, $newer;
110             }
111              
112 0         0 my ($mypkgs, $distdirs)
113             = package_inventory $mycpan, $reuse_dists, $newer;
114              
115 0 0       0 merge_global_cpan $mycpan, $mypkgs, $globdetails
116             if $fallback;
117              
118 0         0 create_details $details, $newlist, $mypkgs, $lazy, $undefs;
119              
120 0 0       0 if(-f $details)
121 0         0 { trace "backup old details file to $details.bak";
122 0         0 safe_copy $details, "$details.bak";
123             }
124              
125 0 0       0 if(-f $newlist)
126 0         0 { trace "promoting $newlist to current";
127 0 0       0 rename $newlist, $details
128             or error __x"cannot rename '{from}' in '{to}'"
129             , from => $newlist, to => $details;
130             }
131              
132 0         0 calculate_checksums $distdirs;
133             }
134              
135             #
136             # Package Inventory
137             #
138              
139             # global variables for testing purposes (sorry)
140             our ($topdir, $findpkgs, %finddirs, $olddists, $index_age);
141              
142             sub register($$$)
143 28     28 0 56 { my ($package, $this_version, $dist) = @_;
144              
145             # warn "register $package, " . (defined $this_version ? $this_version : 'undef');
146              
147 28 50       42 if(ref $this_version)
148 0         0 { eval { $this_version = version->parse($this_version) };
  0         0  
149 0 0       0 if($@)
150 0         0 { alert __x"error when creating version object for {pkg}: {err}"
151             , pkg => $package, err => $@;
152 0         0 return;
153             }
154             }
155            
156 28 50       61 my $registered_version = exists $findpkgs->{$package} ? $findpkgs->{$package}[0] : undef;
157 28 100       48 $this_version =~ s/^v// if $this_version;
158              
159 28 50 33     41 return if defined $registered_version
160             && $registered_version > $this_version;
161              
162 28         111 $findpkgs->{$package} = [ $this_version, $dist ];
163             }
164              
165             sub package_inventory($$;$)
166 0     0 0 0 { (my $mycpan, $olddists, $index_age) = @_; #!!! see "my"
167 0         0 $topdir = catdir $mycpan, 'authors', 'id';
168 0         0 mkdirhier $topdir;
169              
170 0         0 $findpkgs = {};
171 0         0 trace "creating inventory from $topdir";
172              
173 0         0 find {wanted => \&inspect_archive, no_chdir => 1}, $topdir;
174 0         0 ($findpkgs, \%finddirs);
175             }
176              
177             sub package_on_usual_location($)
178 22     22 0 349 { my $file = shift;
179 22         61 my ($top, $subdir, @rest) = splitdir $file;
180 22 50       155 defined $subdir or return 0;
181              
182             !@rest # path is at top-level of distro
183 22 100       104 || $subdir eq 'lib'; # inside lib
184             }
185              
186             sub inspect_archive
187 3     3 0 15519 { my $fn = $File::Find::name;
188 3 50 33     105 return unless -f $fn && ($fn =~ $tar_gz || $fn =~ $zip);
      33        
189              
190 3         35 (my $dist = $fn) =~ s!^\Q$topdir\E[\\/]!!;
191              
192 3 50 33     10 if(defined $index_age && -M $fn > $index_age)
193             {
194 0 0       0 unless(exists $olddists->{$dist})
195 0         0 { trace "not the latest: $dist";
196 0         0 return;
197             }
198              
199 0         0 trace "latest older than index: $dist";
200 0         0 foreach (@{$olddists->{$dist}})
  0         0  
201 0         0 { my ($pkg, $version) = @$_;
202 0         0 register $pkg, $version, $dist;
203             }
204 0         0 return;
205             }
206              
207 3         18 trace "inspecting archive $fn";
208 3         84 $finddirs{$File::Find::dir}++;
209              
210 3 50       27 return inspect_tar_archive $dist, $fn
211             if $fn =~ $tar_gz;
212              
213 0 0       0 return inspect_zip_archive $dist, $fn
214             if $fn =~ $zip;
215             }
216              
217             sub inspect_tar_archive($$)
218 3     3 0 16 { my ($dist, $fn) = @_;
219              
220 3         22 my $arch = Archive::Tar->new;
221 3 50       38 $arch->read($fn, 1)
222             or error __x"no files in tar archive '{fn}': {err}"
223             , fn => $fn, err => $arch->error;
224              
225 3         53105 foreach my $file ($arch->get_files)
226 60         570 { my $fn = $file->full_path;
227 60 100 100     1473 $file->is_file && $fn =~ m/\.pm$/i && package_on_usual_location $fn
      100        
228             or next;
229 20         49 collect_package_details $fn, $dist, $file->get_content_by_ref;
230             }
231             }
232              
233             sub inspect_zip_archive($$)
234 0     0 0 0 { my ($dist, $fn) = @_;
235              
236 0         0 my $arch = Archive::Zip->new;
237 0 0       0 $arch->read($fn)==AZ_OK
238             or error __x"no files in zip archive '{fn}': {err}"
239             , fn => $fn, err => $arch->error;
240              
241 0         0 foreach my $member ($arch->membersMatching( qr/\.pm$/i ))
242 0         0 { my $fn = $member->fileName;
243 0 0 0     0 $member->isTextFile && package_on_usual_location $fn
244             or next;
245 0         0 my ($contents, $status) = $member->contents;
246 0 0       0 $status==AZ_OK
247             or error "error in zip file {fn}: {err}"
248             , fn => $fn, err => $status;
249 0         0 collect_package_details $fn, $dist, \$contents;
250             }
251             }
252              
253             sub collect_package_details($$$)
254 20     20 0 93 { my ($fn, $dist) = (shift, shift);
255 20         26 my @lines = split /\r?\n/, ${shift()};
  20         2247  
256 20         47 my $in_pod = 0;
257 20         22 my $package;
258 20         29 local $VERSION = undef; # may get destroyed by eval
259              
260 20         41 while(@lines)
261 4623         5939 { local $_ = shift @lines;
262 4623 50       6415 last if m/^__(?:END|DATA)__$/;
263              
264 4623 100       6340 $in_pod = ($1 ne 'cut') if m/^=(\w+)/;
265 4623 100 100     9658 next if $in_pod || m/^\s*#/;
266              
267 3388   100     9055 $_ .= shift @lines
      100        
268             while @lines && m/package|use|VERSION/ && !m/[;{]/;
269              
270 3388 100       5231 if( m/^\s* package \s* ((?:\w+\:\:)*\w+) (?:\s+ (\S*))? \s* [;{]/x )
271 28         84 { my ($thispkg, $v) = ($1, $2);
272 28         33 my $thisversion;
273 28 50       41 if($v)
274 0         0 { $thisversion = eval {qv($v)};
  0         0  
275 0 0       0 alert __x"illegal version for {pkg}, found '{version}': {err}"
276             , pkg => $thispkg, version => $v, err => $@ if $@;
277             }
278              
279             # second package in file?
280 28 100       53 register $package, $VERSION, $dist
281             if defined $package;
282              
283 28         52 ($package, $VERSION) = ($thispkg, $thisversion);
284 28         105 trace "pkg $package from $fn";
285             }
286              
287 3388 50       8350 if( m/^\s* \$ ${package}::VERSION \s* = \s* ["']?(\w+?)["']? \s* ;/x )
288 0         0 { $VERSION = $1;
289             }
290              
291 3388 100 100     8891 if( !$VERSION && m/^ (?:use\s+version\s*;\s*)?
292             (?:our)? \s* \$ ((?: \w+\:\:)*) VERSION \s* \= (.*)/x )
293 7 50       18 { defined $2 or next;
294 7         19 my ($ns, $vers) = ($1, $2);
295              
296             # some versions of CPAN.pm do contain lines like "$VERSION =~ ..."
297             # which also need to be processed.
298 7         375 eval "\$VERSION =$vers";
299 7 50       26 if(defined $VERSION)
300 7 100       23 { ($package = $ns) =~ s/\:\:$//
301             if length $ns;
302 7         26 trace "pkg $package version $VERSION";
303             }
304             }
305             }
306              
307 20 50       37 $VERSION = $VERSION->numify if ref $VERSION;
308 20 50       51 register $package, $VERSION, $dist
309             if defined $package;
310             }
311              
312             sub update_global_cpan($$)
313 0     0 0   { my ($mycpan, $globalcpan) = @_;
314              
315 0           my $global = catdir $mycpan, 'global';
316             my ($mailrc, $globdetails, $modlist) =
317 0           map { catfile $global, $_ }
  0            
318             qw/01mailrc.txt.gz 02packages.details.txt.gz 03modlist.data.gz/;
319              
320 0 0 0       return $globdetails
      0        
      0        
321             if -f $globdetails && -f $globdetails && -f $modlist
322             && -M $globdetails < $cpan_update;
323              
324 0           info "(re)loading global CPAN files";
325              
326 0           mkdirhier $global;
327 0           load_file "$globalcpan/authors/01mailrc.txt.gz", $mailrc;
328 0           load_file "$globalcpan/modules/02packages.details.txt.gz", $globdetails;
329 0           load_file "$globalcpan/modules/03modlist.data.gz", $modlist;
330 0           $globdetails;
331             }
332              
333             sub load_file($$)
334 0     0 0   { my ($from, $to) = @_;
335 0           my $response = $ua->get($from, ':content_file' => $to);
336 0 0         return if $response->is_success;
337              
338 0           unlink $to;
339 0           error __x"failed to get {uri} for {to}: {err}"
340             , uri => $from, to => $to, err => $response->status_line;
341             }
342              
343             sub merge_global_cpan($$$)
344 0     0 0   { my ($mycpan, $pkgs, $globdetails) = @_;
345              
346 0           trace "merge packages with CPAN core list in $globdetails";
347 0           my $cpan_pkgs = read_details $globdetails;
348              
349 0           while(my ($cpandist, $cpanpkgs) = each %$cpan_pkgs)
350 0           { foreach (@$cpanpkgs)
351 0           { my ($pkg, $version) = @$_;
352 0 0         next if exists $pkgs->{$pkg};
353 0           $pkgs->{$pkg} = [$version, $cpandist];
354             }
355             }
356             }
357              
358             sub create_details($$$$$)
359 0     0 0   { my ($details, $filename, $pkgs, $lazy, $undefs) = @_;
360              
361 0           trace "creating package details in $filename";
362 0 0         my $fh = IO::Zlib->new($filename, 'wb')
363             or fault __x"generating gzipped {fn}", fn => $filename;
364              
365 0           my $lines = keys %$pkgs;
366 0           my $date = time2str time;
367 0 0         my $how = $lazy ? "lazy" : "full";
368              
369 0           info "produced list of $lines packages $how";
370              
371 0           my $program = basename $0;
372 0           my $module = __PACKAGE__;
373 0           $fh->print (<<__HEADER);
374             File: 02packages.details.txt
375             URL: file://$details
376             Description: Packages listed in CPAN and local repository
377             Columns: package name, version, path
378             Intended-For: private CPAN
379             Line-Count: $lines
380             Written-By: $program with $module $CPAN::Site::Index::VERSION ($how)
381             Last-Updated: $date
382              
383             __HEADER
384              
385 0           foreach my $pkg (sort keys %$pkgs)
386 0           { my ($version, $path) = @{$pkgs->{$pkg}};
  0            
387              
388 0 0 0       $version = 'undef'
389             if !defined $version || $version eq '';
390              
391             next
392 0 0 0       if $version eq 'undef' && !$undefs;
393              
394 0           $path =~ s,\\,/,g;
395 0           $fh->printf("%-30s\t%s\t%s\n", $pkg, $version, $path);
396             }
397             }
398              
399             sub calculate_checksums($)
400 0     0 0   { my $dirs = shift;
401 0           trace "updating checksums";
402              
403 0           foreach my $dir (keys %$dirs)
404 0           { trace "summing $dir";
405 0 0         CPAN::Checksums::updatedir($dir)
406             or warning 'failed calculating checksums in {dir}', dir => $dir;
407             }
408             }
409              
410             sub read_details($)
411 0     0 0   { my $fn = shift;
412 0 0         -f $fn or return {};
413 0           trace "collecting all details from $fn";
414              
415 0 0         my $fh = IO::Zlib->new($fn, 'rb')
416             or fault __x"cannot read from {fn}", fn => $fn;
417              
418 0           my $line; # skip header, search first blank
419 0           do { $line = $fh->getline } until $line =~ m/^\s*$/;
  0            
420              
421 0           my $time_last_update = (stat $fn)[9];
422 0           my %dists;
423              
424 0           while(my $line = $fh->getline)
425 0           { chomp $line;
426 0           my ($pkg, $version, $dist) = split ' ', $line, 3;
427              
428 0 0         unless($dist)
429 0           { warning "$fn error line=\n $line";
430 0           next;
431             }
432              
433 0           push @{$dists{$dist}}, [$pkg, $version];
  0            
434             }
435              
436 0           \%dists;
437             }
438              
439             sub remove_expired_details($$$)
440 0     0 0   { my ($mycpan, $dists, $newer) = @_;
441 0           trace "extracting only existing local distributions";
442              
443 0           my $authors = catdir $mycpan, 'authors', 'id';
444 0           foreach my $dist (keys %$dists)
445 0           { my $fn = catfile $authors, $dist;
446 0 0         if(! -f $fn)
    0          
447             { # removed local or a global dist
448 0           delete $dists->{$dist};
449             }
450             elsif(-M $fn < $newer)
451 0           { trace "dist $dist file updated, reindexing";
452 0           delete $dists->{$dist};
453             }
454             }
455             }
456              
457             sub mkdirhier(@)
458 0     0 0   { foreach my $dir (@_)
459 0 0         { next if -d $dir;
460 0           mkdirhier dirname $dir;
461              
462 0 0         mkdir $dir, 0755
463             or fault __x"cannot create directory {dir}", dir => $dir;
464              
465 0           trace "created $dir";
466             }
467 0           1;
468             }
469              
470             sub cpan_mirror($$$@)
471 0     0 0   { my ($mycpan, $globalcpan, $mods, %opts) = @_;
472 0 0         @$mods or return;
473 0           my %need = map { ($_ => 1) } @$mods;
  0            
474 0           my $auth = catdir $mycpan, 'authors', 'id';
475              
476 0 0         unless($ua)
477 0           { $ua = LWP::UserAgent->new;
478 0 0         $ua->env_proxy if $opts{env_proxy};
479             }
480              
481 0           my $globdetails
482             = update_global_cpan $mycpan, $globalcpan;
483              
484 0 0         my $fh = IO::Zlib->new($globdetails, 'rb')
485             or fault __x"cannot read from {fn}", fn => $globdetails;
486              
487 0           while(my $line = $fh->getline) # skip header, search first blank
488 0 0         { last if $line =~ m/^\s*$/;
489             }
490              
491 0   0       $ua ||= LWP::UserAgent->new;
492 0           while(my $line = $fh->getline)
493 0           { my ($pkg, $version, $dist) = split ' ', $line;
494 0 0         delete $need{$pkg} or next;
495              
496 0           my $to = catfile $auth, split m#/#, $dist;
497 0 0         if(-f $to)
498 0           { trace __x"package {pkg} present in distribution {dist}"
499             , pkg => $pkg, dist => $dist;
500 0           next;
501             }
502              
503 0           my $source = "$globalcpan/authors/id/$dist";
504 0           mkdirhier dirname $to;
505 0           my $response = $ua->get($source, ':content_file' => $to);
506 0 0         unless($response->is_success)
507 0           { unlink $to;
508 0           error __x"failed to get {uri} for {to}: {err}"
509             , uri => $source, to => $to, err => $response->status_line;
510             }
511              
512 0           info __x"got {pkg} in {dist}", pkg => $pkg, dist => $dist;
513             }
514              
515             warning __x"package {pkg} does not exist", pkg => $_
516 0           for sort keys %need;
517             }
518              
519             1;