File Coverage

blib/lib/MyCPAN/App/DPAN/Reporter/Minimal.pm
Criterion Covered Total %
statement 119 400 29.7
branch 16 128 12.5
condition 1 26 3.8
subroutine 23 44 52.2
pod 14 14 100.0
total 173 612 28.2


line stmt bran cond sub pod time code
1             package MyCPAN::App::DPAN::Reporter::Minimal;
2 3     3   113428 use strict;
  3         6  
  3         103  
3 3     3   16 use warnings;
  3         5  
  3         101  
4              
5 3     3   14 use base qw(MyCPAN::Indexer::Reporter::Base);
  3         6  
  3         2110  
6 3     3   7452 use vars qw($VERSION $reporter_logger $collator_logger);
  3         6  
  3         186  
7             $VERSION = '1.28_11';
8              
9 3     3   17 use Carp;
  3         7  
  3         185  
10 3     3   16 use Cwd;
  3         5  
  3         177  
11 3     3   14 use File::Basename;
  3         7  
  3         171  
12 3     3   15 use File::Path;
  3         5  
  3         681  
13 3     3   15 use File::Spec::Functions qw(catfile rel2abs file_name_is_absolute);
  3         5  
  3         184  
14 3     3   29 use Log::Log4perl;
  3         4  
  3         62  
15              
16             BEGIN {
17 3     3   154 $reporter_logger = Log::Log4perl->get_logger( 'Reporter' );
18 3         355 $collator_logger = Log::Log4perl->get_logger( 'Collator' );
19             }
20              
21             =head1 NAME
22              
23             MyCPAN::App::DPAN::Reporter::Minimal - Save the minimum information that dpan needs
24              
25             =head1 SYNOPSIS
26              
27             Use this in the C config by specifying it as the reporter class:
28              
29             # in dpan.config
30             reporter_class MyCPAN::App::DPAN::Reporter::Minimal
31              
32             =head1 DESCRIPTION
33              
34             This class takes the result of examining a distribution and saves only
35             the information that dpan needs to create the PAUSE index files. It's
36             a very small text file with virtually no processing overhead compared
37             to YAML.
38              
39             =head2 Methods
40              
41             =over 4
42              
43             =item get_reporter
44              
45             C sets the C key in the notes. The value is a
46             code reference that takes the information collected about a
47             distribution and dumps it as a YAML file.
48              
49             See L for details about what
50             C expects and should do.
51              
52             If C is true, the reports removes the base
53             path up to I.
54              
55             =item get_report_file_extension
56              
57             Returns the extension for report files.
58              
59             =cut
60              
61 0     0 1 0 sub get_report_file_extension { 'txt' }
62              
63             sub get_reporter
64             {
65 0     0 1 0 my( $self ) = @_;
66              
67             # why is this here?
68 0         0 my $base_dir = $self->get_config->dpan_dir;
69              
70 0 0       0 if( $self->get_config->organize_dists )
71             {
72 0         0 $base_dir = catfile( $base_dir, qw(authors id) );
73             }
74              
75             my $reporter = sub {
76 0     0   0 my( $info ) = @_;
77              
78 0 0       0 unless( defined $info )
79             {
80 0         0 $reporter_logger->error( "info is undefined!" );
81 0         0 return;
82             }
83              
84 0         0 my( %Found_canonical, %Current_version, @packages_to_write );
85 0 0       0 MODULE: foreach my $module ( @{ $info->{dist_info}{module_info} || [] } )
  0         0  
86             {
87             # skip if we are ignoring those packages?
88 0   0     0 my $version = $module->{version_info}{value} || 'undef';
89 0 0       0 $version = $version->numify if eval { $version->can('numify') };
  0         0  
90              
91 0 0       0 unless( defined $module->{primary_package} )
92             {
93 3     3   822 no warnings 'uninitialized';
  3         5  
  3         265  
94 0         0 $reporter_logger->warn( "No primary package for $module->{name}" );
95 0         0 next MODULE;
96             }
97              
98 0 0       0 next MODULE if $Found_canonical{ $module->{primary_package} };
99             {
100 3     3   15 no warnings qw(uninitialized numeric);
  3         4  
  3         1711  
  0         0  
101 0 0       0 next MODULE if $version < $Current_version{ $module->{primary_package} };
102             }
103              
104 0         0 $Current_version{ $module->{primary_package} } = $version;
105 0 0       0 $Found_canonical{ $module->{primary_package} } = 1 if
106             $module->{primary_package} eq $module->{module_name_from_file_guess};
107              
108             # this should be an absolute path
109 0         0 my $dist_file = $info->{dist_info}{dist_file};
110              
111 0 0       0 if( $self->get_config->relative_paths_in_report )
112             {
113             # XXX: what if there isn't an authors/id?
114 0         0 $dist_file =~ s/^.*authors.id.//;
115 0         0 $dist_file =~ tr|\\|/|; # translate windows \ to Unix /, cheating
116             }
117              
118 0 0       0 $reporter_logger->warn( "No dist file for $module->{name}" )
119             unless defined $dist_file;
120              
121 0         0 push @packages_to_write, [
122             $module->{primary_package},
123             $version,
124             $dist_file,
125             ];
126             }
127              
128 0 0       0 if( $info->{run_info}{completed} )
129             {
130 0         0 $self->_write_success_file( $info, \@packages_to_write );
131             }
132             else
133             {
134 0         0 $self->_write_error_file( $info );
135             }
136 0         0 1;
137 0         0 };
138              
139 0         0 $self->set_note( 'reporter', $reporter );
140             }
141              
142             sub _write_success_file
143             {
144 0     0   0 my( $self, $info, $packages ) = @_;
145              
146 0         0 my $out_path = $self->get_report_path( $info );
147 0 0       0 open my($fh), ">:utf8", $out_path or
148             $reporter_logger->fatal( "Could not open $out_path to record success report: $!" );
149              
150 0         0 print $fh "# Primary package [TAB] version [TAB] dist file [newline]\n";
151              
152 0         0 foreach my $tuple ( @$packages )
153             {
154 0         0 print $fh join "\t", @$tuple;
155 0         0 print $fh "\n";
156             }
157              
158 0         0 close $fh;
159              
160             # check that the file is where it should be
161 0 0       0 $reporter_logger->error( "$out_path is missing!" ) unless -e $out_path;
162              
163 0         0 return 1;
164             }
165              
166             sub _write_error_file
167             {
168 0     0   0 my( $self, $info ) = @_;
169              
170 0         0 my $out_path = $self->get_report_path( $info );
171 0 0       0 open my($fh), ">:utf8", $out_path or
172             $reporter_logger->fatal( "Could not open $out_path to record error report: $!" );
173              
174 0   0     0 print $fh "ERRORS:\n",
175 0         0 map { sprintf "%s: %s\n", $_, $info->{run_info}{$_} || '' }
176             qw( error fatal_error extraction_error );
177              
178 3     3   2291 use Data::Dumper;
  3         23928  
  3         966  
179 0         0 print $fh '-' x 73, "\n";
180 0         0 print $fh Dumper( $info );
181              
182 0         0 close $fh;
183              
184             # check that the file is where it should be
185 0 0       0 $reporter_logger->error( "$out_path is missing!" ) unless -e $out_path;
186              
187 0         0 return 1;
188             }
189              
190             =item get_collator
191              
192             This Reporter class also implements its Collator since the two are
193             coupled by the report format. It's a wrapper around C,
194             which previously did the same thing.
195              
196             =cut
197              
198             sub get_collator
199             {
200             #TRACE( sub { get_caller_info } );
201              
202 0     0 1 0 my( $self ) = @_;
203              
204             my $collator = sub {
205 0     0   0 $self->final_words;
206 0 0       0 unless( eval { $self->create_index_files } )
  0         0  
207             {
208 0         0 $self->set_note( 'epic_fail', $@ );
209 0         0 return;
210             }
211 0         0 return 1;
212 0         0 };
213              
214 0         0 $self->set_note( $_[0]->collator_type, $collator );
215              
216 0         0 1;
217             }
218              
219             =item final_words
220              
221             Runs after all the reporting for all distributions has finished. This
222             creates a C object and stores it as the C
223             notes. It store the list of directories that need fresh F files
224             in the C note.
225              
226             The checksums and index file creation are split across two steps so that
227             C has a chance to do something between the analysis and their creation.
228              
229             =cut
230              
231             sub final_words
232             {
233             # This is where I want to write 02packages and CHECKSUMS
234 0     0 1 0 my( $self ) = @_;
235              
236 0         0 $collator_logger->trace( "Final words from the DPAN Reporter" );
237              
238 0         0 my %dirs_needing_checksums;
239              
240 3     3   2668 use CPAN::PackageDetails 0.22;
  3         21675  
  3         1690  
241 0         0 my $package_details = CPAN::PackageDetails->new(
242             allow_packages_only_once => 0
243             );
244              
245 0         0 $collator_logger->info( "Creating index files" );
246              
247 0         0 $self->_init_skip_package_from_config;
248              
249 0         0 require version;
250 0         0 FILE: foreach my $file ( $self->get_latest_module_reports )
251             {
252 0         0 $collator_logger->debug( "Processing output file $file" );
253              
254 0 0       0 unless( -e $file )
255             {
256 0         0 $collator_logger->debug( "No success report for [$file]" );
257 0         0 next FILE;
258             }
259              
260 0 0       0 open my($fh), '<:utf8', $file or do {
261 0         0 $collator_logger->error( "Could not open [$file]: $!" );
262 0         0 next FILE;
263             };
264              
265 0         0 my @packages;
266 0         0 PACKAGE: while( <$fh> )
267             {
268 0 0       0 next PACKAGE if /^\s*#/;
269              
270 0         0 chomp;
271 0         0 my( $package, $version, $dist_file ) = split /\t/;
272 0 0       0 $version = undef if $version eq 'undef';
273 0 0       0 $collator_logger->warn( "$package has no distribution file: $file" )
274             unless defined $dist_file;
275              
276 0 0 0     0 unless( defined $package && length $package )
277             {
278 0         0 $collator_logger->debug( "File $file line $.: no package! Line is [$_]" );
279 0         0 next PACKAGE;
280             }
281              
282 0         0 my $full_path = $dist_file;
283              
284 0 0       0 unless( file_name_is_absolute( $full_path ) )
285             {
286 0         0 my $dpan_dir = $self->get_config->dpan_dir;
287              
288             # if we're using organize_dists, we created an authors/id
289             # directory under dpan_dir, so we have to put those
290             # three pieces together
291 0 0       0 if( $self->get_config->organize_dists )
    0          
292             {
293 0         0 $full_path = catfile(
294             $dpan_dir,
295             qw(authors id),
296             $dist_file
297             ) ;
298             }
299             # otherwise, every path should be relative to $dpan_dir
300             # I'm not sure that is actually true though if dpan_dir
301             # is the current directory, and there is an authors/id
302             # under it
303             elsif( $self->get_config->relative_paths_in_report )
304             {
305 0         0 my $f1 = catfile(
306             $dpan_dir,
307             $dist_file
308             );
309              
310 0         0 my $f2 = catfile(
311             $dpan_dir,
312             qw(authors id),
313             $dist_file
314             );
315              
316 0         0 ( $full_path ) = grep { -e } ( $f1, $f2 )
  0         0  
317             }
318             }
319              
320             {
321 3     3   33 no warnings 'uninitialized';
  3         7  
  3         686  
  0         0  
322 0         0 $collator_logger->debug( "dist_file is now [$dist_file]" );
323 0         0 $collator_logger->debug( "full_path is now [$full_path]" );
324             }
325              
326 0 0 0     0 next PACKAGE unless defined $full_path && -e $full_path;
327 0         0 my $dist_dir = dirname( $full_path );
328 0         0 $dirs_needing_checksums{ $dist_dir }++;
329              
330             # broken crap that works on Unix and Windows to make cpanp
331             # happy. It assumes that authors/id/ is in front of the path
332             # in 02packages.details.txt
333 0         0 ( my $path = $dist_file ) =~ s/.*authors.id.//g;
334              
335 3     3   19 no warnings 'uninitialized';
  3         7  
  3         377  
336 0         0 $path =~ s|\\+|/|g; # no windows paths.
337              
338 0 0       0 if( $self->skip_package( $package ) )
339             {
340 0         0 $collator_logger->debug( "Skipping $package: excluded by config" );
341 0         0 next PACKAGE;
342             }
343              
344 0 0 0     0 push @packages, [ $package, $version, $path ]
      0        
345             if( $package and $version and $path );
346             }
347              
348             # Some distros declare the same package in multiple files. We
349             # only want the one with the defined or highest version
350 0         0 my %Seen;
351 3     3   17 no warnings;
  3         6  
  3         1305  
352              
353 0         0 my @filtered_packages =
354 0         0 grep { ! $Seen{$_->[0]}++ }
355 0 0       0 map { my $s = $_; $s->[1] = 'undef' unless defined $s->[1]; $s }
  0 0       0  
  0         0  
356             sort {
357 0         0 $a->[0] cmp $b->[0]
358             ||
359             $b->[1] cmp $a->[1] # yes, versions are strings
360             }
361             @packages;
362              
363 0         0 foreach my $tuple ( @filtered_packages )
364             {
365 0         0 my( $package, $version, $path ) = @$tuple;
366              
367 0 0       0 eval { $package_details->add_entry(
  0         0  
368             'package name' => $package,
369             version => $version,
370             path => $path,
371             ) } or warn "Could not add $package $version from $path! $@\n";
372             }
373             }
374              
375 0         0 $self->set_note( 'package_details', $package_details );
376 0         0 $self->set_note( 'dirs_needing_checksums', [ keys %dirs_needing_checksums ] );
377              
378 0         0 1;
379             }
380              
381             =item get_latest_module_reports
382              
383             Return the list of interesting reports for this indexing run. This
384             re-runs the queuer to get the final list of distributions in
385             dpan_dir (some things might have moved around), gets the reports for
386              
387             =cut
388              
389             sub get_latest_module_reports
390             {
391 4     4 1 2225 my( $self ) = @_;
392 4         21 $reporter_logger->info( "In get_latest_module_reports" );
393 4         14 my $report_names_by_dist_names = $self->_get_report_names_by_dist_names;
394              
395 4         15 my $all_reports = $self->_get_all_reports;
396              
397              
398 4         11 my %Seen = ();
399 4         9 my $report_dir = $self->get_success_report_dir;
400              
401 3     3   21 no warnings 'uninitialized';
  3         14  
  3         5647  
402 10         32 my @files =
403 10         25 map { catfile( $report_dir, $_->[-1] ) }
404 10         45 grep { ! $Seen{$_->[0]}++ }
405 4         16 map { [ /^(.*)-(.*)\.txt\z/, $_ ] }
406             reverse
407             sort
408             keys %$report_names_by_dist_names;
409              
410 4   50     15 my $extra_reports = $self->_get_extra_reports || [];
411              
412 4         24 push @files, @$extra_reports;
413 4         23 $reporter_logger->debug( "Adding extra reports [@$extra_reports]" );
414              
415 4         24 @files;
416             }
417              
418             sub _get_all_reports
419             {
420 0     0   0 my( $self ) = @_;
421              
422 0         0 my $report_dir = $self->get_success_report_dir;
423 0         0 $reporter_logger->debug( "Report dir is $report_dir" );
424              
425 0 0       0 opendir my($dh), $report_dir or
426             $reporter_logger->fatal( "Could not open directory [$report_dir]: $!");
427              
428 0         0 my @reports = readdir( $dh );
429              
430 0         0 \@reports;
431             }
432              
433             # this generates a list of report names based on what should
434             # be there according to the dist that we just indexed. There
435             # might be many reports for different versions or modules no
436             # longer in the DPAN, so we don't want those
437             sub _get_report_names_by_dist_names
438             {
439 0     0   0 my( $self ) = @_;
440              
441             # We have to recreate the queue because we might have moved
442             # things around with organize_dists
443 0         0 my $queuer = $self->get_coordinator->get_component( 'queue' );
444              
445             # these are the directories to index
446 0         0 my @dirs = $self->get_config->dpan_dir;
447 0         0 $reporter_logger->debug( "Queue directories are [@dirs]" );
448              
449             # This is the list of distributions in the indexed directories
450 0         0 my $dists = $queuer->_get_file_list( @dirs );
451              
452             # The code in this map is duplicated from MyCPAN::Indexer::Reporter::Base
453             # in get_report_filename. That method assumes it's getting a big data
454             # structure, so I need to refactor out this bit to _dist2report or
455             # something. I'll get it to work here first.
456 0         0 my %dist_reports = map {
457 0         0 ( my $basename = basename( $_ ) ) =~ s/\.(tgz|tar\.gz|zip)$//;
458 0         0 my $report_name = join '.', $basename, $self->get_report_file_extension;
459 0         0 ( $report_name, $_ );
460             } @$dists;
461              
462 0         0 return \%dist_reports;
463             }
464              
465             sub _get_extra_reports
466             {
467 0     0   0 my( $self ) = @_;
468              
469 0 0       0 return [] unless $self->get_config->exists( 'extra_reports_dir' );
470              
471 0         0 my $dir = $self->get_config->extra_reports_dir;
472 0 0       0 return [] unless defined $dir;
473 0         0 $reporter_logger->debug( "Extra reports directory is [$dir]" );
474              
475 0         0 my $cwd = cwd();
476 0 0       0 $reporter_logger->debug( "Extra reports directory does not exist! Cwd is [$cwd]" )
477             unless -d $dir;
478              
479 0         0 my $glob = catfile(
480             $dir,
481             "*." . $self->get_report_file_extension
482             );
483 0         0 $reporter_logger->debug( "glob pattern is [$glob]" );
484              
485 0         0 my @reports = glob( $glob );
486 0         0 $reporter_logger->debug( "Got extra reports [@reports]" );
487              
488 0         0 return \@reports;
489             }
490              
491             =item create_index_files
492              
493             Creates the F<02packages.details.txt.gz> and F<03modlist.txt.gz>
494             files. If there is a problem, it logs a fatal message and returns
495             nothing. If everything works, it returns true.
496              
497             It initially creates the F<02packages.details.txt.gz> as a temporary
498             file. Before it moves it to its final name, it checks the file with
499             C to ensure it is valid. If it
500             isn't, it stops the process.
501              
502             =cut
503              
504             sub create_index_files
505             {
506 4     4 1 12812 my( $self ) = @_;
507 4         7 my $index_dir = do {
508 4         14 my $d = $self->get_config->dpan_dir;
509              
510             # there might be more than one if we pull from multiple sources
511             # so make the index in the first one.
512 4         50 my $abs = rel2abs( $d );
513 4         134 $abs =~ s/authors.id.*//;
514 4         21 catfile( $abs, 'modules' );
515             };
516              
517 4 50       136 mkpath( $index_dir ) unless -d $index_dir; # XXX
518              
519 4         7 my $_02packages_name = '02packages.details.txt.gz';
520 4         22 my $packages_file = catfile( $index_dir, $_02packages_name );
521              
522 4         16 my $package_details = $self->get_note( 'package_details' );
523 4 50       104 if( -e catfile( $index_dir, '.svn' ) )
524             {
525 0         0 $package_details->set_header( 'X-SVN-Id', '$Id$' );
526             }
527              
528             # inside write_file, the module writes to a temp file then renames
529             # it. It doesn't do any other checking. Should some of this be in
530             # there, though?
531              
532             # before we start, ensure that there are some entries. check_files
533             # checks this too, but I want to die earlier with a better message
534 4         13 my $count = $package_details->count;
535              
536 4 100       24 unless( $count > 0 )
537             {
538 1         8 $collator_logger->fatal( "There are no entries to put into $_02packages_name!" );
539 1         194 return;
540             }
541              
542             # now, write the file. Even though write_file writes to a temporary
543             # file first, that doesn't protect us from overwriting a good 02packages
544             # with a bad one at this level.
545             { # scope for $temp_file
546 3         4 my $temp_file = "$packages_file-$$-trial";
  3         13  
547 3         17 $collator_logger->info( "Writing $temp_file" );
548 3         36 $package_details->write_file( $temp_file );
549              
550             # We tell it to start in $index_dir, but that might have authors/id under it
551             # and that prefix won't show up in 02packages. That's a problem when we want
552             # to find packages and compare their paths. CPAN::PackageDetails might consider
553             # stripping authors/id
554             #
555             # Note: CPANPLUS always assumes authors/id, even for full paths.
556 3         229 my $dpan_dir = dirname( $index_dir );
557 3         18 my $dpan_authors_id = catfile( $dpan_dir, qw( authors id ) );
558              
559             # if there is an authors/id under the dpan_dir, let's give that path to
560             # check_file
561 3 50       60 $dpan_dir = $dpan_authors_id if -d $dpan_authors_id;
562 3         19 $collator_logger->debug( "Using dpan_dir => $dpan_dir" );
563              
564              
565             # Check the trial file for errors
566 3 50       28 unless( $self->get_config->i_ignore_errors_at_my_peril )
567             {
568 3         29 $collator_logger->info( "Checking validity of $temp_file" );
569 3         19 my $at;
570 3 100       5 my $result = eval { $package_details->check_file( $temp_file, $dpan_dir ) }
  3         10  
571             or $at = $@;
572              
573 3 100       316 if( defined $at )
574             {
575             # _interpret_check_file_error can nerf an error based
576             # on configuration. Maybe you don't care about a
577             # particular error.
578 1         64 my $error = $self->_interpret_check_file_error( $at );
579              
580 1 50       6 if( defined $error )
581             {
582 1 50       6 unlink $temp_file unless $collator_logger->is_debug;
583 1 50       75 $collator_logger->logdie( "$temp_file has a problem and I have to abort:\n".
584             "Deleting file (unless you're debugging)\n" .
585             "$error"
586             ) if defined $error;
587             }
588             }
589             }
590              
591             # if we are this far, 02packages must be okay
592 2 100       7 unless( rename( $temp_file => $packages_file ) )
593             {
594 1         12 $collator_logger->fatal( "Could not rename $temp_file => $packages_file" );
595 1         142 return;
596             }
597             }
598              
599             # there are no worries about 03modlist because it is just a stub.
600             # there are no real data in it.
601 1         9 $collator_logger->info( 'Writing 03modlist.txt.gz' );
602 1         10 $self->create_modlist( $index_dir );
603              
604 1         6 $collator_logger->info( 'Creating CHECKSUMS files' );
605 1         7 $self->create_checksums( $self->get_note( 'dirs_needing_checksums' ) );
606              
607 1         19 $collator_logger->info( 'Updating mailrc and whois files' );
608 1         9 $self->update_whois;
609              
610 1         5 1;
611             }
612              
613             sub _interpret_check_file_error
614             {
615 1     1   2 my( $self, $at ) = @_;
616              
617 1         2 my $error_message = do {
618 1 50       5 if( not ref $at )
    0          
    0          
619             {
620 1         4 $at;
621             }
622             # eventually this will filter the missing files and still
623             # complain for the left over ones
624             elsif( exists $at->{missing_in_file} )
625             {
626 0 0         if( $self->get_config->ignore_missing_dists ) {
627 0           undef;
628             }
629             else {
630 0           "Some distributions in the repository do not show up in the file\n\t" .
631 0           join( "\n\t", @{ $at->{missing_in_file} } )
632             }
633             }
634             # eventually this will filter the missing dists and still
635             # complain for the left over ones
636             elsif( exists $at->{missing_in_repo} )
637             {
638 0 0         if( $self->get_config->ignore_extra_dists ) {
639 0           undef;
640             }
641             else {
642 0           "The file has distributions that do not appear in the repository\n\t" .
643 0           join( "\n\t", @{ $at->{missing_in_repo} } )
644             }
645             }
646 0           else { 'Unknown error!' }
647             };
648              
649             }
650              
651             =item skip_package( PACKAGE )
652              
653             Returns true if the indexer should ignore PACKAGE.
654              
655             By default, this skips the Perl special packages specified by the
656             ignore_packages configuration. By default, ignore packages is:
657              
658             main
659             MY
660             MM
661             DB
662             bytes
663             DynaLoader
664              
665             To set a different list, configure ignore_packages with a space
666             separated list of packages to ignore:
667              
668             ignore_packages main Foo Bar::Baz Test
669              
670             Note that this only ignores those exact packages. You can't configure
671             this with regex or wildcards (yet).
672              
673             =cut
674              
675             BEGIN {
676 3     3   8 my $initialized = 0;
677 3         5090 my %skip_packages;
678              
679 0     0     sub _skip_package_initialized { $initialized }
680              
681             sub _init_skip_package_from_config
682             {
683 0     0     my( $self, $Notes ) = @_;
684              
685 0           %skip_packages =
686 0           map { $_, 1 }
687 0   0       grep { defined }
688             split /\s+/,
689             $self->get_config->ignore_packages || '';
690              
691 0           $initialized = 1;
692             }
693              
694             sub skip_package
695             {
696 0     0 1   my( $self, $package ) = @_;
697              
698 0           exists $skip_packages{ $package }
699             }
700             }
701              
702             =item create_package_details
703              
704             Not yet implemented. Otehr code needs to be refactored and show up
705             here.
706              
707             =cut
708              
709             sub create_package_details
710             {
711 0     0 1   my( $self, $index_dir ) = @_;
712              
713              
714 0           1;
715             }
716              
717             =item create_modlist
718              
719             If a modules/03modlist.data.gz does not already exist, this creates a
720             placeholder which defines the CPAN::Modulelist package and the method
721             C in that package. The C method returns an empty hash
722             reference.
723              
724             =cut
725              
726             sub create_modlist
727             {
728 0     0 1   my( $self, $index_dir ) = @_;
729              
730 0           my $module_list_file = catfile( $index_dir, '03modlist.data.gz' );
731 0           $collator_logger->debug( "modules list file is [$module_list_file]");
732              
733 0 0         if( -e $module_list_file )
734             {
735 0           $collator_logger->debug( "File [$module_list_file] already exists!" );
736 0           return 1;
737             }
738              
739 0           my $fh = IO::Compress::Gzip->new( $module_list_file );
740 0           print $fh <<"HERE";
741             File: 03modlist.data
742             Description: This a placeholder for CPAN.pm
743             Modcount: 0
744 0           Written-By: Id: $0
745             Date: @{ [ scalar localtime ] }
746              
747             package CPAN::Modulelist;
748              
749             sub data { {} }
750              
751             1;
752             HERE
753              
754 0           close $fh;
755             }
756              
757             =item update_whois
758              
759              
760             00whois.xml 01mailrc.txt.gz
761              
762             =cut
763              
764             sub update_whois
765             {
766 0     0 1   my( $self, $index_dir ) = @_;
767 0           require MyCPAN::App::DPAN::CPANUtils;
768              
769 0           my $success = 0;
770              
771             # no matter the situation, start over. I don't like this situation
772             # so much, but it's more expedient then parsing the xml file to look
773             # for missing users
774 0           unlink map { my $f = catfile(
  0            
775             $self->get_config->dpan_dir,
776             'authors',
777             MyCPAN::App::DPAN::CPANUtils->$_()
778             );
779              
780 0           $f;
781             } qw( mailrc_filename whois_filename );
782              
783 0 0         if( $self->get_config->use_real_whois )
784             {
785 0           my $result = MyCPAN::App::DPAN::CPANUtils->pull_latest_whois(
786             $self->get_config->dpan_dir, $collator_logger
787             );
788 0 0         if( $result == 2 )
789             {
790 0           $success = 1;
791             }
792             else
793             {
794 0           warn "Could not pull whois files from CPAN\n";
795 0           $success = 0;
796             }
797              
798             }
799              
800 0 0         unless( $success )
801             {
802 0           MyCPAN::App::DPAN::CPANUtils->make_fake_whois(
803             $self->get_config->dpan_dir, $collator_logger
804             );
805             }
806              
807 0           my %authors = $self->get_all_authors;
808              
809 0           $self->update_01mailrc( \%authors );
810              
811 0           $self->update_00whois( \%authors );
812              
813 0           return 1;
814             }
815              
816             =item get_all_authors
817              
818             Walk the repository and extract all of the actual authors in the repo.
819              
820             =cut
821              
822             sub get_all_authors
823             {
824 0     0 1   my( $self ) = @_;
825              
826 0           my $author_map = do {
827 0           my $file = $self->get_config->author_map;
828 0 0         if( defined $file )
829             {
830 0           my $hash;
831 0 0         unless( -e $file )
    0          
832             {
833 0           $collator_logger->error( "Author map file [$file] does not exist" );
834 0           {};
835             }
836             elsif( open my($fh), '<:utf8', $file )
837             {
838 0           while( <$fh> )
839             {
840 0           chomp;
841 0           my( $pause_id, $full_name ) = split /\s+/, $_, 2;
842 0   0       $hash->{uc $pause_id} = $full_name || $self->get_config->pause_full_name;
843             }
844 0           $hash;
845             }
846             else
847             {
848 0           $collator_logger->error( "Could not open author map file [$file]: $!" );
849 0           {};
850             }
851             }
852 0           else { {} }
853             };
854              
855 0           my $old_cwd = cwd();
856 0           my $id_dir = catfile( $self->get_config->dpan_dir, 'authors', 'id' );
857 0           chdir $id_dir;
858              
859 0           my @authors_in_repo = map { basename( $_ ) } glob( "*/*/*" );
  0            
860 0           chdir $old_cwd;
861              
862 0   0       my %authors = map {
863 0           $_,
864             $author_map->{$_} || $self->get_config->pause_full_name
865             } @authors_in_repo;
866              
867 0           %authors;
868             }
869              
870             =item update_01mailrc
871              
872             Ensure that every PAUSE ID that's in the repository shows up in the
873             F file. Any new IDs show up with the name
874             from the C configuration.
875              
876             TO DO: offer a way to configure multiple new IDs
877              
878             =cut
879              
880             sub update_01mailrc
881             {
882 0     0 1   my( $self, $authors ) = @_;
883              
884 0           require IO::Uncompress::Gunzip;
885 0           require IO::Compress::Gzip;
886              
887 0           my $d = $self->get_config->dpan_dir;
888 0           my $mailrc_fh = do {
889 0           my $file = catfile( $d, 'authors', '01mailrc.txt.gz' );
890 0 0         IO::Uncompress::Gunzip->new( $file ) or do {
891 0           carp "Could not open $file: $IO::Uncompress::Gunzip::GunzipError\n";
892 0           undef;
893             };
894             };
895              
896 0           my $new_mailrc_fh = do {
897 0           my $file = catfile( $d, 'authors', 'new-01mailrc.txt.gz' );
898 0 0         my $z = IO::Compress::Gzip->new( $file )
899             or carp "gzip failed: $IO::Compress::Gzip::GzipError\n";
900             };
901              
902 0           while( <$mailrc_fh> )
903             {
904 0           my( $pause_id, $name, $email ) = m/^
905             alias \s+
906             (\S+) \s+
907             "
908             (.*) \s+
909             <
910             (.*?)
911             >
912             "/x;
913              
914 0           delete $authors->{$pause_id};
915 0           print { $new_mailrc_fh } $_;
  0            
916             }
917              
918 0           foreach my $author ( keys %$authors )
919             {
920 0           print { $new_mailrc_fh } qq|alias $author "$authors->{$author}"\n|;
  0            
921             }
922              
923 0           close $new_mailrc_fh;
924              
925 0           rename
926             catfile( $d, 'authors', 'new-01mailrc.txt.gz' ),
927             catfile( $d, 'authors', '01mailrc.txt.gz' );
928             }
929              
930             =item update_00whois
931              
932             Ensure that every PAUSE ID that's in the repository shows up in the
933             F file. Any new IDs show up with the name
934             from the C configuration.
935              
936             =cut
937              
938             sub update_00whois
939             {
940 0     0 1   my( $self, $authors ) = @_;
941              
942 0           my $d = $self->get_config->dpan_dir;
943              
944 0           my $file = catfile( $d, 'authors', '00whois.xml' );
945             open my( $whois_fh ), "+<:utf8", $file
946 0 0         or do {
947 0           carp "Could not open $file: $!\n";
948 0           return;
949             };
950              
951 0           my $file_end = "\n";
952 0           seek $whois_fh, - length( $file_end ), 2;
953              
954 0           foreach my $author ( keys %$authors )
955             {
956 0           my( $name, $email ) = # XXX need to encode
957 0           map { my $x = $_;
958 0           $x =~ s/&/&/g;
959 0           $x =~ s/
960 0           $x =~ s/>/>/g;
961 0           $x =~ s/"/"/g;
962 0           $x;
963             } $authors->{$author} =~ m/\s*(.+)\s+<(.+?)>/;
964              
965 0           print { $whois_fh } <<"AUTHOR";
  0            
966            
967             $author
968             author
969             $name
970             $email
971            
972             AUTHOR
973             }
974              
975 0           print { $whois_fh } $file_end;
  0            
976              
977 0           close $whois_fh;
978              
979 0           1;
980             }
981              
982             =item create_checksums
983              
984             Creates the CHECKSUMS file that goes in each author directory in CPAN.
985             This is mostly a wrapper around CPAN::Checksums since that already handles
986             updating an entire tree. We just do a little logging.
987              
988             =cut
989              
990             sub create_checksums
991             {
992 0     0 1   my( $self, $dirs ) = @_;
993              
994 0           require CPAN::Checksums;
995 0           foreach my $dir ( @$dirs )
996             {
997 0           my $rc = eval{ CPAN::Checksums::updatedir( $dir ) };
  0            
998 0 0         $reporter_logger->error( "Couldn't create CHECKSUMS for $dir: $@" ) if $@;
999             $reporter_logger->info(
1000 0           do {
1001 0 0         if( $rc == 1 ) { "Valid CHECKSUMS file is already present" }
  0 0          
1002 0           elsif( $rc == 2 ) { "Wrote new CHECKSUMS file in $dir" }
1003 0           else { "updatedir unexpectedly returned an error" }
1004             } );
1005             }
1006             }
1007              
1008             =back
1009              
1010             =head1 TO DO
1011              
1012             How much time do you have?
1013              
1014             =head1 SOURCE AVAILABILITY
1015              
1016             This code is in Github:
1017              
1018             git://github.com/briandfoy/mycpan--app--dpan.git
1019              
1020             =head1 AUTHOR
1021              
1022             brian d foy, C<< >>
1023              
1024             =head1 COPYRIGHT AND LICENSE
1025              
1026             Copyright (c) 2009-2010, brian d foy, All Rights Reserved.
1027              
1028             You may redistribute this under the same terms as Perl itself.
1029              
1030             =cut
1031              
1032             1;