File Coverage

blib/lib/Dist/Surveyor/MakeCpan.pm
Criterion Covered Total %
statement 133 164 81.1
branch 16 48 33.3
condition 5 14 35.7
subroutine 19 19 100.0
pod 2 6 33.3
total 175 251 69.7


line stmt bran cond sub pod time code
1             package Dist::Surveyor::MakeCpan;
2              
3 1     1   362 use strict;
  1         2  
  1         21  
4 1     1   5 use warnings;
  1         1  
  1         20  
5 1     1   4 use Carp; # core
  1         2  
  1         52  
6 1     1   520 use Data::Dumper; # core
  1         6541  
  1         52  
7 1     1   8 use File::Path; # core
  1         3  
  1         42  
8 1     1   352 use CPAN::DistnameInfo;
  1         753  
  1         40  
9 1     1   9 use File::Basename qw{dirname}; # core
  1         3  
  1         90  
10 1     1   582 use HTTP::Tiny;
  1         35877  
  1         41  
11 1     1   395 use Dist::Surveyor::Inquiry;
  1         3  
  1         64  
12 1     1   9 use List::Util qw(max); # core
  1         2  
  1         1618  
13              
14             our $VERSION = '0.019';
15              
16             our $verbose;
17             *verbose = \$::VERBOSE;
18              
19             sub new {
20 1     1 0 606 my ($class, $cpan_dir, $progname, $irregularities) = @_;
21              
22 1         552 require Compress::Zlib;
23 1         43881 mkpath("$cpan_dir/modules");
24              
25             # --- write extra data files that may be useful XXX may change
26             # XXX these don't all (yet?) merge with existing data
27 1         6 my $survey_datadump_dir = "$cpan_dir/$progname";
28 1         107 mkpath($survey_datadump_dir);
29              
30             # Write list of releases, like default stdout
31 1         79 open my $rel_fh, ">", "$survey_datadump_dir/releases.txt";
32              
33             # dump the primary result data for additional info and debugging
34 1 50       8 my $gzwrite = Compress::Zlib::gzopen("$survey_datadump_dir/_data_dump.perl.gz", 'wb')
35             or croak "Cannot open $survey_datadump_dir/_data_dump.perl.gz for writing: " . $Compress::Zlib::gzerrno;
36 1         1965 $gzwrite->gzwrite("[\n");
37              
38              
39 1         141 my $self = {
40             errors => 0,
41             cpan_dir => $cpan_dir,
42             irregularities => $irregularities,
43             pkg_ver_rel => {}, # for 02packages
44             progname => $progname,
45             rel_fh => $rel_fh,
46             gzwrite => $gzwrite,
47             };
48 1         9 return bless $self, $class;
49             }
50              
51             sub close {
52 1     1 1 215 my $self = shift;
53              
54             # --- write 02packages file
55              
56 1         21 my $pkg_lines = _readpkgs($self->{cpan_dir});
57 1         3 my %packages;
58 1         3 for my $line (@$pkg_lines, map { $_->{line} } values %{ $self->{pkg_ver_rel} }) {
  2         8  
  1         5  
59 2         9 my ($pkg) = split(/\s+/, $line, 2);
60 2 50 33     9 if ($packages{$pkg} and $packages{$pkg} ne $line) {
61 0 0       0 warn "Old $packages{$pkg}\nNew $line\n" if $verbose;
62             }
63 2         5 $packages{$pkg} = $line;
64             };
65 1         7 _writepkgs($self->{cpan_dir}, [ sort { lc $a cmp lc $b } values %packages ] );
  1         7  
66              
67              
68              
69             # Write list of token packages - each should match only one release.
70             # This makes it _much_ faster to do installs via cpanm because it
71             # can skip the modules it knows are installed (whereas using a list of
72             # distros it has to reinstall _all_ of them every time).
73             # XXX maybe add as a separate option: "--mainpkgs mainpkgs.lst"
74 1         1276 my %dist_packages;
75 1         11 while ( my ($pkg, $line) = each %packages) {
76 2         15 my $distpath = (split /\s+/, $line)[2];
77 2         12 $dist_packages{$distpath}{$pkg}++;
78             }
79 1         3 my %token_package;
80 1         5 my %token_package_pri = ( # alter install order for some modules
81             'Module::Build' => 100, # should be near first
82             Moose => 50,
83              
84             # install distros that use Module::Install late so their dependencies
85             # have already been resolved (else they try to fetch them directly,
86             # bypassing our cpanm --mirror-only goal)
87             'Olson::Abbreviations' => -90,
88              
89             # distros with special needs
90             'Term::ReadKey' => -100, # tests hang if run in background
91             );
92 1         5 for my $distpath (sort keys %dist_packages) {
93 1         3 my $dp = $dist_packages{$distpath};
94 1         10 my $di = CPAN::DistnameInfo->new($distpath);
95             #warn Dumper([ $distpath, $di->dist, $di]);
96 1         98 (my $token_pkg = $di->dist) =~ s/-/::/g;
97 1 50       8 if (!$dp->{$token_pkg}) {
98 0 0       0 if (my $keypkg = $self->{irregularities}->{$di->dist}) {
99 0         0 $token_pkg = $keypkg;
100             }
101             else {
102             # XXX not good - may pick a dummy test package
103 0   0     0 $token_pkg = (grep { $_ } keys %$dp)[0] || $token_pkg;
104 0         0 warn "Picked $token_pkg as token package for ".$di->distvname."\n";
105             }
106             }
107 1   50     10 $token_package{$token_pkg} = $token_package_pri{$token_pkg} || 0;
108             }
109              
110 1 0       4 my @main_pkgs = sort { $token_package{$b} <=> $token_package{$a} or $a cmp $b } keys %token_package;
  0         0  
111 1         77 open my $key_pkg_fh, ">", join('/', $self->{cpan_dir}, $self->{progname}, "token_packages.txt");
112 1         9 print $key_pkg_fh "$_\n" for @main_pkgs;
113 1         23 close $key_pkg_fh;
114              
115 1         19 close $self->{rel_fh};
116              
117 1         6 $self->{gzwrite}->gzwrite("]\n");
118 1         107 $self->{gzwrite}->gzclose;
119              
120 1         208 warn $self->{cpan_dir}." updated.\n";
121 1         8 return $self->{errors};
122             }
123              
124             sub add_release {
125 1     1 1 746 my ($self, $ri) = @_;
126              
127             # --- get the file
128              
129 1         3 my $main_url = $ri->{download_url};
130 1         4 my $di = distname_info_from_url($main_url);
131 1         6 my $pathfile = "authors/id/".$di->pathname;
132 1         14 my $destfile = $self->{cpan_dir}."/$pathfile";
133 1         445 mkpath(dirname($destfile));
134              
135 1         5 my @urls = ($main_url);
136 1         3 for my $mirror ('http://backpan.perl.org') {
137 1         4 push @urls, "$mirror/$pathfile";
138             }
139              
140 1         3 my $mirror_status;
141 1         10 my $ua = HTTP::Tiny->new(agent => "dist_surveyor/$VERSION");
142 1         91 for my $url (@urls) {
143 1         5 $mirror_status = $ua->mirror($url, $destfile);
144 1 50       41249 last if $mirror_status->{success};
145             }
146 1 50       6 if (!$mirror_status->{success}) {
147 0 0       0 my $err = $mirror_status->{status} == 599 ? $mirror_status->{content} : $mirror_status->{status};
148 0         0 my $msg = "Error $err mirroring $main_url";
149 0 0       0 if (-f $destfile) {
150 0         0 warn "$msg - using existing file\n";
151             }
152             else {
153             # better to keep going and add the packages to the index
154             # than abort at this stage due to network/mirror problems
155             # the user can drop the files in later
156 0         0 warn "$msg - continuing, ADD FILE MANUALLY!\n";
157 0         0 $self->{errors}++;
158             }
159             }
160             else {
161 1 50       6 warn "$mirror_status->{status} $main_url\n" if $verbose;
162             }
163              
164              
165 1         27 my $mods_in_rel = get_module_versions_in_release($ri->{author}, $ri->{name});
166              
167 1 50       18 if (!keys %$mods_in_rel) { # XXX hack for common::sense
168 0         0 (my $dist_as_pkg = $ri->{distribution}) =~ s/-/::/g;
169 0         0 warn "$ri->{author}/$ri->{name} has no modules! Adding fake module $dist_as_pkg ".$di->version."\n";
170 0         0 $mods_in_rel->{$dist_as_pkg} = {
171             name => $dist_as_pkg,
172             version => $di->version,
173             version_obj => version->parse($di->version),
174             };
175             }
176              
177              
178             # --- accumulate package info for 02packages file
179              
180 1         9 for my $pkg (sort keys %$mods_in_rel ) {
181             # pi => { name=>, version=>, version_obj=> }
182 2         4 my $pi = $mods_in_rel->{$pkg};
183              
184             # for selecting which dist a package belongs to
185             # XXX should factor in authorization status
186 2         8 my $p_r_match_score = p_r_match_score($pkg, $ri);
187              
188 2 50       24 if (my $pvr = $self->{pkg_ver_rel}->{$pkg}) {
189             # already seen same package name in different distribution
190 0 0       0 if ($p_r_match_score < $pvr->{p_r_match_score}) {
191 0         0 warn "$pkg seen in $pvr->{ri}{name} so ignoring one in $ri->{name}\n";
192 0         0 next;
193             }
194 0         0 warn "$pkg seen in $pvr->{ri}{name} - now overridden by $ri->{name}\n";
195             }
196              
197 2         10 my $line = _fmtmodule($pkg, $di->pathname, $pi->{version});
198 2         11 $self->{pkg_ver_rel}->{$pkg} = { line => $line, pi => $pi, ri => $ri, p_r_match_score => $p_r_match_score };
199             }
200              
201 1 50       3 printf { $self->{rel_fh} } "%s\n", ( exists $ri->{url} ? $ri->{url} : "?url" );
  1         11  
202              
203 1         8 $self->{gzwrite}->gzwrite(Dumper($ri));
204 1         273 $self->{gzwrite}->gzwrite(",");
205              
206             }
207              
208             sub p_r_match_score {
209 2     2 0 5 my ($pkg_name, $ri) = @_;
210 2         14 my @p = split /\W/, $pkg_name;
211 2         8 my @r = split /\W/, $ri->{name};
212 2         12 for my $i (0..max(scalar @p, scalar @r)) {
213 4 100 33     31 return $i if not defined $p[$i]
      66        
214             or not defined $r[$i]
215             or $p[$i] ne $r[$i]
216             }
217 0         0 die; # unreached
218             }
219              
220             # copied from CPAN::Mini::Inject and hacked
221              
222             sub _readpkgs {
223 1     1   5 my ($cpandir) = @_;
224              
225 1         3 my $packages_file = $cpandir.'/modules/02packages.details.txt.gz';
226 1 50       74 return [] if not -f $packages_file;
227              
228 0 0       0 my $gzread = Compress::Zlib::gzopen($packages_file, 'rb')
229             or croak "Cannot open $packages_file: " . $Compress::Zlib::gzerrno . "\n";
230              
231 0         0 my $inheader = 1;
232 0         0 my @packages;
233             my $package;
234              
235 0         0 while ( $gzread->gzreadline( $package ) ) {
236 0 0       0 if ( $inheader ) {
237 0 0       0 $inheader = 0 unless $package =~ /\S/;
238 0         0 next;
239             }
240 0         0 chomp $package;
241 0         0 push @packages, $package;
242             }
243              
244 0         0 $gzread->gzclose;
245              
246 0         0 return \@packages;
247             }
248              
249             sub _writepkgs {
250 1     1   3 my ($cpandir, $pkgs) = @_;
251              
252 1         3 my $packages_file = $cpandir.'/modules/02packages.details.txt.gz';
253 1 50       4 my $gzwrite = Compress::Zlib::gzopen($packages_file, 'wb')
254             or croak "Cannot open $packages_file for writing: " . $Compress::Zlib::gzerrno;
255            
256 1         1865 $gzwrite->gzwrite( "File: 02packages.details.txt\n" );
257 1         90 $gzwrite->gzwrite(
258             "URL: http://www.perl.com/CPAN/modules/02packages.details.txt\n"
259             );
260 1         88 $gzwrite->gzwrite(
261             'Description: Package names found in directory $CPAN/authors/id/'
262             . "\n" );
263 1         77 $gzwrite->gzwrite( "Columns: package name, version, path\n" );
264 1         76 $gzwrite->gzwrite(
265             "Intended-For: Automated fetch routines, namespace documentation.\n"
266             );
267 1         120 $gzwrite->gzwrite( "Written-By: $0 0.001\n" ); # XXX TODO
268 1         93 $gzwrite->gzwrite( "Line-Count: " . scalar( @$pkgs ) . "\n" );
269             # Last-Updated: Sat, 19 Mar 2005 19:49:10 GMT
270 1         91 my @date = split( /\s+/, scalar( gmtime ) );
271 1         7 $gzwrite->gzwrite( "Last-Updated: $date[0], $date[2] $date[1] $date[4] $date[3] GMT\n\n" );
272            
273 1         82 $gzwrite->gzwrite( "$_\n" ) for ( @$pkgs );
274            
275 1         163 $gzwrite->gzclose;
276             }
277              
278             sub distname_info_from_url {
279 1     1 0 4 my ($url) = @_;
280 1 50       10 $url =~ s{.* \b authors/id/ }{}x
281             or warn "No authors/ in '$url'\n";
282 1         11 my $di = CPAN::DistnameInfo->new($url);
283 1         93 return $di;
284             }
285              
286             sub _fmtmodule {
287 2     2   36 my ( $module, $file, $version ) = @_;
288 2 50       6 $version = "undef" if not defined $version;
289 2         4 my $fw = 38 - length $version;
290 2 50       6 $fw = length $module if $fw < length $module;
291 2         14 return sprintf "%-${fw}s %s %s", $module, $version, $file;
292             }
293              
294             sub errors {
295 1     1 0 6 my $self = shift;
296 1         7 return $self->{errors};
297             }
298              
299             1;
300              
301             =head1 NAME
302              
303             Dist::Surveyor::MakeCpan - Create a Mini-CPAN for the surveyed modules
304              
305             =head1 SYNOPSIS
306              
307             use Dist::Surveyor::MakeCpan;
308             my $cpan = Dist::Surveyor::MakeCpan->new(
309             $cpan_dir, $progname, $irregularities);
310             foreach my $rel (@releases) {
311             $cpan->add_release($rel);
312             }
313             $cpan->close();
314             say "There where ", $cpan->errors(), " errors";
315              
316             =head1 DESCRIPTION
317              
318             Create a mini-CPAN for the surveyed modules, so you will be able to re-install
319             the same setup in a new computer.
320              
321             =head1 CONSTRUCTOR
322              
323             my $cpan = Dist::Surveyor::MakeCpan->new(
324             $cpan_dir, $progname, $irregularities, $verbose);
325              
326             =over
327              
328             =item $cpan_dir
329              
330             The directory where the mini-cpan will be created
331              
332             =item $progname
333              
334             The name of the running program - will be used to create a subdirectory
335             inside $cpan_dir, that will contain debug information.
336              
337             =item $irregularities
338              
339             A hashref with a list of irregular named releases. i.e. 'libwww-perl' => 'LWP'.
340              
341             =back
342              
343             =head1 METHODS
344              
345             =head2 $cpan->add_release($rel)
346              
347             Add one release to the mini-cpan. the $rel should be a hashref,
348             and contain the following fields:
349              
350             $rel = {
351             download_url => 'http://cpan.metacpan.org/authors/id/S/SE/SEMUELF/Dist-Surveyor-0.009.tar.gz',
352             url => 'authors/id/S/SE/SEMUELF/Dist-Surveyor-0.009.tar.gz',
353             author => 'SEMUELF',
354             name => 'Dist-Surveyor-0.009',
355             distribution => 'Dist-Surveyor',
356             }
357              
358             =head2 $cpan->close()
359              
360             Close the mini-CPAN, and close all the debug data dump files.
361              
362             =head1 License, Copyright
363              
364             Please see L for details
365              
366             =cut