File Coverage

blib/lib/MyCPAN/App/DPAN/Reporter/Minimal.pm
Criterion Covered Total %
statement 100 248 40.3
branch 16 86 18.6
condition 1 11 9.0
subroutine 18 31 58.0
pod 9 9 100.0
total 144 385 37.4


line stmt bran cond sub pod time code
1             package MyCPAN::App::DPAN::Reporter::Minimal;
2 3     3   281905 use strict;
  3         6  
  3         136  
3 3     3   20 use warnings;
  3         7  
  3         112  
4              
5 3     3   19 use base qw(MyCPAN::Indexer::Reporter::Base);
  3         6  
  3         3056  
6 3     3   14774 use vars qw($VERSION $logger);
  3         7  
  3         174  
7             $VERSION = '1.28';
8              
9 3     3   17 use Carp;
  3         13  
  3         193  
10 3     3   18 use Cwd;
  3         7  
  3         204  
11 3     3   18 use File::Basename;
  3         6  
  3         311  
12 3     3   19 use File::Path;
  3         6  
  3         770  
13 3     3   18 use File::Spec::Functions qw(catfile rel2abs);
  3         6  
  3         176  
14 3     3   31 use Log::Log4perl;
  3         7  
  3         62  
15              
16             BEGIN {
17 3     3   135 $logger = Log::Log4perl->get_logger( 'Reporter' );
18             }
19              
20             =head1 NAME
21              
22             MyCPAN::App::DPAN::Reporter::Minimal - Save the minimum information that dpan needs
23              
24             =head1 SYNOPSIS
25              
26             Use this in the C config by specifying it as the reporter class:
27              
28             # in dpan.config
29             reporter_class MyCPAN::App::DPAN::Reporter::Minimal
30              
31             =head1 DESCRIPTION
32              
33             This class takes the result of examining a distribution and saves only
34             the information that dpan needs to create the PAUSE index files. It's
35             a very small text file with virtually no processing overhead compared
36             to YAML.
37              
38             =head2 Methods
39              
40             =over 4
41              
42             =item get_reporter
43              
44             C sets the C key in the notes. The value is a
45             code reference that takes the information collected about a
46             distribution and dumps it as a YAML file.
47              
48             See L for details about what
49             C expects and should do.
50              
51             =cut
52              
53 0     0 1 0 sub get_report_file_extension { 'txt' }
54              
55             sub get_reporter
56             {
57             #TRACE( sub { get_caller_info } );
58              
59 0     0 1 0 my( $self ) = @_;
60              
61 0         0 my $base_dir = $self->get_config->backpan_dir;
62            
63 0 0       0 if( $self->get_config->organize_dists )
64             {
65 0         0 $base_dir = catfile( $base_dir, qw(authors id) );
66             }
67            
68             my $reporter = sub {
69 0     0   0 my( $info ) = @_;
70              
71 0 0       0 unless( defined $info )
72             {
73 0         0 $logger->error( "info is undefined!" );
74 0         0 return;
75             }
76              
77 0         0 my $out_path = $self->get_report_path( $info );
78              
79 0 0       0 open my($fh), ">", $out_path or
80             $logger->fatal( "Could not open $out_path to record report: $!" );
81              
82 0         0 print $fh "# Primary package [TAB] version [TAB] dist file [newline]\n";
83            
84 0 0       0 MODULE: foreach my $module ( @{ $info->{dist_info}{module_info} || [] } )
  0         0  
85             {
86             # skip if we are ignoring those packages?
87 0   0     0 my $version = $module->{version_info}{value} || 'undef';
88 0 0       0 $version = $version->numify if eval { $version->can('numify') };
  0         0  
89              
90 0 0       0 unless( defined $module->{primary_package} )
91             {
92 0         0 $logger->warn( "No primary package for $module->{name}" );
93 0         0 next MODULE;
94             }
95              
96             # this should be an absolute path
97 0         0 my $dist_file = $info->{dist_info}{dist_file};
98              
99 0 0       0 $dist_file =~ s/^.*authors.id.// if $self->get_config->organize_dists;
100            
101 0 0       0 $logger->warn( "No dist file for $module->{name}" )
102             unless defined $dist_file;
103              
104 0         0 print $fh join "\t",
105             $module->{primary_package},
106             $version,
107             $dist_file;
108              
109 0         0 print $fh "\n";
110             }
111 0         0 close $fh;
112              
113 0 0       0 $logger->error( "$out_path is missing!" ) unless -e $out_path;
114              
115 0         0 1;
116 0         0 };
117              
118 0         0 $self->set_note( 'reporter', $reporter );
119             }
120            
121             =item final_words
122              
123             Runs after all the reporting for all distributions has finished. This
124             creates a C object and stores it as the C
125             notes. It store the list of directories that need fresh F files
126             in the C note.
127              
128             The checksums and index file creation are split across two steps so that
129             C has a chance to do something between the analysis and their creation.
130              
131             =cut
132              
133             sub final_words
134             {
135             # This is where I want to write 02packages and CHECKSUMS
136 0     0 1 0 my( $self ) = @_;
137              
138 0         0 $logger->trace( "Final words from the DPAN Reporter" );
139              
140 0         0 my %dirs_needing_checksums;
141              
142 3     3   5805 use CPAN::PackageDetails 0.22;
  3         43336  
  3         1664  
143 0         0 my $package_details = CPAN::PackageDetails->new(
144             allow_packages_only_once => 0
145             );
146              
147 0         0 $logger->info( "Creating index files" );
148              
149 0         0 $self->_init_skip_package_from_config;
150            
151 0         0 require version;
152 0         0 FILE: foreach my $file ( $self->get_latest_module_reports )
153             {
154 0         0 $logger->debug( "Processing output file $file" );
155            
156 0 0       0 open my($fh), '<', $file or do {
157 0         0 $logger->error( "Could not open [$file]: $!" );
158 0         0 next FILE;
159             };
160            
161 0         0 my @packages;
162 0         0 PACKAGE: while( <$fh> )
163             {
164 0 0       0 next PACKAGE if /^\s*#/;
165            
166 0         0 chomp;
167 0         0 my( $package, $version, $dist_file ) = split /\t/;
168 0 0       0 $version = undef if $version eq 'undef';
169            
170 0 0 0     0 unless( defined $package && length $package )
171             {
172 0         0 $logger->debug( "File $file line $.: no package! Line is [$_]" );
173 0         0 next PACKAGE;
174             }
175              
176 0 0       0 if( $self->get_config->organize_dists )
177             {
178 0         0 my $backpan_dir = ($self->get_config->backpan_dir)[0];
179 0         0 $dist_file = catfile(
180             $backpan_dir,
181             qw(authors id),
182             $dist_file
183             );
184             }
185            
186 0         0 $logger->debug( "dist_file is now [$dist_file]" );
187 0 0       0 next PACKAGE unless -e $dist_file; # && $dist_file =~ m/^\Q$backpan_dir/;
188 0         0 my $dist_dir = dirname( $dist_file );
189 0         0 $dirs_needing_checksums{ $dist_dir }++;
190              
191             # broken crap that works on Unix and Windows to make cpanp
192             # happy. It assumes that authors/id/ is in front of the path
193             # in 02packages.details.txt
194 0         0 ( my $path = $dist_file ) =~ s/.*authors.id.//g;
195              
196 0         0 $path =~ s|\\+|/|g; # no windows paths.
197              
198 0 0       0 if( $self->skip_package( $package ) )
199             {
200 0         0 $logger->debug( "Skipping $package: excluded by config" );
201 0         0 next PACKAGE;
202             }
203            
204 0         0 push @packages, [ $package, $version, $path ];
205             }
206            
207             # Some distros declare the same package in multiple files. We
208             # only want the one with the defined or highest version
209 0         0 my %Seen;
210 3     3   42 no warnings;
  3         6  
  3         1312  
211 0         0 my @filtered_packages =
212 0         0 grep { ! $Seen{$_->[0]}++ }
213 0 0       0 map { my $s = $_; $s->[1] = 'undef' unless defined $s->[1]; $s }
  0 0       0  
  0         0  
214             sort {
215 0         0 $a->[0] cmp $b->[0]
216             ||
217             $b->[1] cmp $a->[1] # yes, versions are strings
218             }
219             @packages;
220              
221 0         0 foreach my $tuple ( @filtered_packages )
222             {
223 0         0 my( $package, $version, $path ) = @$tuple;
224            
225 0 0       0 eval { $package_details->add_entry(
  0         0  
226             'package name' => $package,
227             version => $version,
228             path => $path,
229             ) } or warn "Could not add $package $version from $path! $@\n";
230             }
231             }
232              
233 0         0 $self->set_note( 'package_details', $package_details );
234 0         0 $self->set_note( 'dirs_needing_checksums', [ keys %dirs_needing_checksums ] );
235            
236 0         0 1;
237             }
238              
239             =item get_latest_module_reports
240              
241             Return the list of interesting reports for this indexing run. This
242             re-runs the queuer to get the final list of distributions in
243             backpan_dir (some things might have moved around), gets the reports for
244              
245             =cut
246              
247             sub get_latest_module_reports
248             {
249 4     4 1 2599 my( $self ) = @_;
250 4         30 $logger->info( "In get_latest_module_reports" );
251 4         17 my $report_names_by_dist_names = $self->_get_report_names_by_dist_names;
252            
253 4         21 my $all_reports = $self->_get_all_reports;
254            
255              
256 4         14 my %Seen = ();
257 4         14 my $report_dir = $self->get_success_report_dir;
258            
259 3     3   20 no warnings 'uninitialized';
  3         8  
  3         5473  
260 10         47 my @files =
261 10         39 map { catfile( $report_dir, $_->[-1] ) }
262 10         59 grep { ! $Seen{$_->[0]}++ }
263 4         21 map { [ /^(.*)-(.*)\.txt\z/, $_ ] }
264             reverse
265             sort
266             keys %$report_names_by_dist_names;
267            
268 4   50     20 my $extra_reports = $self->_get_extra_reports || [];
269            
270 4         32 push @files, @$extra_reports;
271 4         48 $logger->debug( "Adding extra reports [@$extra_reports]" );
272              
273 4         32 @files;
274             }
275              
276             sub _get_all_reports
277             {
278 0     0   0 my( $self ) = @_;
279            
280 0         0 my $report_dir = $self->get_success_report_dir;
281 0         0 $logger->debug( "Report dir is $report_dir" );
282              
283 0 0       0 opendir my($dh), $report_dir or
284             $logger->fatal( "Could not open directory [$report_dir]: $!");
285            
286 0         0 my @reports = readdir( $dh );
287              
288 0         0 \@reports;
289             }
290              
291             # this generates a list of report names based on what should
292             # be there according to the dist that we just indexed. There
293             # might be many reports for different versions or modules no
294             # longer in the DPAN, so we don't want those
295             sub _get_report_names_by_dist_names
296             {
297 0     0   0 my( $self ) = @_;
298            
299             # We have to recreate the queue because we might have moved
300             # things around with organize_dists
301 0         0 my $queuer = $self->get_coordinator->get_component( 'queue' );
302              
303             # these are the directories to index
304 0         0 my @dirs = do {
305 0   0     0 my $item = $self->get_config->backpan_dir || '';
306 0         0 split /\s+/, $item;
307             };
308 0         0 $logger->debug( "Queue directories are [@dirs]" );
309            
310             # This is the list of distributions in the indexed directories
311 0         0 my $dists = $queuer->_get_file_list( @dirs );
312              
313             # The code in this map is duplicated from MyCPAN::Indexer::Reporter::Base
314             # in get_report_filename. That method assumes it's getting a big data
315             # structure, so I need to refactor out this bit to _dist2report or
316             # something. I'll get it to work here first.
317 0         0 my %dist_reports = map {
318 0         0 ( my $basename = basename( $_ ) ) =~ s/\.(tgz|tar\.gz|zip)$//;
319 0         0 my $report_name = join '.', $basename, $self->get_report_file_extension;
320 0         0 ( $report_name, $_ );
321             } @$dists;
322            
323 0         0 return \%dist_reports;
324             }
325              
326             sub _get_extra_reports
327             {
328 0     0   0 my( $self ) = @_;
329              
330 0 0       0 return [] unless $self->get_config->exists( 'extra_reports_dir' );
331            
332 0         0 my $dir = $self->get_config->extra_reports_dir;
333 0 0       0 return [] unless defined $dir;
334 0         0 $logger->debug( "Extra reports directory is [$dir]" );
335              
336 0         0 my $cwd = cwd();
337 0 0       0 $logger->debug( "Extra reports directory does not exist! Cwd is [$cwd]" )
338             unless -d $dir;
339            
340 0         0 my $glob = catfile(
341             $dir,
342             "*." . $self->get_report_file_extension
343             );
344 0         0 $logger->debug( "glob pattern is [$glob]" );
345            
346 0         0 my @reports = glob( $glob );
347 0         0 $logger->debug( "Got extra reports [@reports]" );
348            
349 0         0 return \@reports;
350             }
351            
352             =item create_index_files
353              
354             Creates the 02packages.details.txt.gz and 03modlist.txt.gz files. If there
355             is a problem, it logs a fatal message and returns nothing. If everything works,
356             it returns true.
357              
358             It initially creates the 02packages.details.txt.gz as a temporary file. Before
359             it moves it to its final name, it checks the file with CPAN::PackageDetails::check_file
360             to ensure it is valid. If it isn't, it stops the process.
361              
362             =cut
363              
364             sub create_index_files
365             {
366 4     4 1 11137 my( $self ) = @_;
367 4         9 my $index_dir = do {
368 4         18 my $d = $self->get_config->backpan_dir;
369            
370             # there might be more than one if we pull from multiple sources
371             # so make the index in the first one.
372 4 50       70 my $abs = rel2abs( ref $d ? $d->[0] : $d );
373 4         334 $abs =~ s/authors.id.*//;
374 4         21 catfile( $abs, 'modules' );
375             };
376            
377 4 50       441 mkpath( $index_dir ) unless -d $index_dir; # XXX
378              
379 4         8 my $_02packages_name = '02packages.details.txt.gz';
380 4         25 my $packages_file = catfile( $index_dir, $_02packages_name );
381              
382 4         16 my $package_details = $self->get_note( 'package_details' );
383            
384             # inside write_file, the module writes to a temp file then renames
385             # it. It doesn't do any other checking. Should some of this be in
386             # there, though?
387            
388             # before we start, ensure that there are some entries. check_files
389             # checks this too, but I want to die earlier with a better message
390 4         35 my $count = $package_details->count;
391            
392 4 100       22 unless( $count > 0 )
393             {
394 1         8 $logger->fatal( "There are no entries to put into $_02packages_name!" );
395 1         193 return;
396             }
397            
398             # now, write the file. Even though write_file writes to a temporary
399             # file first, that doesn't protect us from overwriting a good 02packages
400             # with a bad one at this level.
401             { # scope for $temp_file
402 3         4 my $temp_file = "$packages_file-$$-trial";
  3         14  
403 3         40 $logger->info( "Writing $temp_file" );
404 3         44 $package_details->write_file( $temp_file );
405              
406             # We tell it to start in $index_dir, but that might have authors/id under it
407             # and that prefix won't show up in 02packages. That's a problem when we want
408             # to find packages and compare their paths. CPAN::PackageDetails might consider
409             # stripping authors/id
410             #
411             # Note: CPANPLUS always assumes authors/id, even for full paths.
412 3         174 my $dpan_dir = dirname( $index_dir );
413 3         18 my $dpan_authors_id = catfile( $dpan_dir, qw( authors id ) );
414            
415             # if there is an authors/id under the dpan_dir, let's give that path to
416             # check_file
417 3 50       60 $dpan_dir = $dpan_authors_id if -d $dpan_authors_id;
418 3         19 $logger->debug( "Using dpan_dir => $dpan_dir" );
419              
420              
421             # Check the trial file for errors
422 3 50       194 unless( $self->get_config->i_ignore_errors_at_my_peril )
423             {
424 3         32 $logger->info( "Checking validity of $temp_file" );
425 3         20 my $at;
426 3 100       5 my $result = eval { $package_details->check_file( $temp_file, $dpan_dir ) }
  3         10  
427             or $at = $@;
428            
429 3 100       508 if( defined $at )
430             {
431             # _interpret_check_file_error can nerf an error based
432             # on configuration. Maybe you don't care about a
433             # particular error.
434 1         11 my $error = $self->_interpret_check_file_error( $at );
435            
436 1 50       4 if( defined $error )
437             {
438 1 50       6 unlink $temp_file unless $logger->is_debug;
439 1 50       82 $logger->logdie( "$temp_file has a problem and I have to abort:\n".
440             "Deleting file (unless you're debugging)\n" .
441             "$error"
442             ) if defined $error;
443             }
444             }
445             }
446              
447             # if we are this far, 02packages must be okay
448 2 100       10 unless( rename( $temp_file => $packages_file ) )
449             {
450 1         11 $logger->fatal( "Could not rename $temp_file => $packages_file" );
451 1         158 return;
452             }
453             }
454            
455             # there are no worries about 03modlist because it is just a stub.
456             # there are no real data in it.
457 1         8 $logger->info( 'Writing 03modlist.txt.gz' );
458 1         16 $self->create_modlist( $index_dir );
459              
460 1         4 $logger->info( 'Creating CHECKSUMS files' );
461 1         7 $self->create_checksums( $self->get_note( 'dirs_needing_checksums' ) );
462            
463 1         9 1;
464             }
465            
466             sub _interpret_check_file_error
467             {
468 1     1   4 my( $self, $at ) = @_;
469            
470 1         2 my $error_message = do {
471 1 50       6 if( not ref $at )
    0          
    0          
472             {
473 1         4 $at;
474             }
475             # eventually this will filter the missing files and still
476             # complain for the left over ones
477             elsif( exists $at->{missing_in_file} )
478             {
479 0 0         if( $self->get_config->ignore_missing_dists ) {
480 0           undef;
481             }
482             else {
483 0           "Some distributions in the repository do not show up in the file\n\t" .
484 0           join( "\n\t", @{ $at->{missing_in_file} } )
485             }
486             }
487             # eventually this will filter the missing dists and still
488             # complain for the left over ones
489             elsif( exists $at->{missing_in_repo} )
490             {
491 0 0         if( $self->get_config->ignore_extra_dists ) {
492 0           undef;
493             }
494             else {
495 0           "The file has distributions that do not appear in the repository\n\t" .
496 0           join( "\n\t", @{ $at->{missing_in_repo} } )
497             }
498             }
499 0           else { 'Unknown error!' }
500             };
501            
502             }
503            
504             =item skip_package( PACKAGE )
505              
506             Returns true if the indexer should ignore PACKAGE.
507              
508             By default, this skips the Perl special packages specified by the
509             ignore_packages configuration. By default, ignore packages is:
510              
511             main
512             MY
513             MM
514             DB
515             bytes
516             DynaLoader
517              
518             To set a different list, configure ignore_packages with a space
519             separated list of packages to ignore:
520              
521             ignore_packages main Foo Bar::Baz Test
522              
523             Note that this only ignores those exact packages. You can't configure
524             this with regex or wildcards (yet).
525              
526             =cut
527              
528             BEGIN {
529 3     3   7 my $initialized = 0;
530 3         1218 my %skip_packages;
531              
532 0     0     sub _skip_package_initialized { $initialized }
533            
534             sub _init_skip_package_from_config
535             {
536 0     0     my( $self, $Notes ) = @_;
537            
538 0           %skip_packages =
539 0           map { $_, 1 }
540 0   0       grep { defined }
541             split /\s+/,
542             $self->get_config->ignore_packages || '';
543            
544 0           $initialized = 1;
545             }
546            
547             sub skip_package
548             {
549 0     0 1   my( $self, $package ) = @_;
550            
551 0           exists $skip_packages{ $package }
552             }
553             }
554              
555             =item create_package_details
556              
557             Not yet implemented. Otehr code needs to be refactored and show up
558             here.
559              
560             =cut
561              
562             sub create_package_details
563             {
564 0     0 1   my( $self, $index_dir ) = @_;
565              
566              
567 0           1;
568             }
569              
570             =item create_modlist
571              
572             If a modules/03modlist.data.gz does not already exist, this creates a
573             placeholder which defines the CPAN::Modulelist package and the method
574             C in that package. The C method returns an empty hash
575             reference.
576              
577             =cut
578              
579             sub create_modlist
580             {
581 0     0 1   my( $self, $index_dir ) = @_;
582              
583 0           my $module_list_file = catfile( $index_dir, '03modlist.data.gz' );
584 0           $logger->debug( "modules list file is [$module_list_file]");
585              
586 0 0         if( -e $module_list_file )
587             {
588 0           $logger->debug( "File [$module_list_file] already exists!" );
589 0           return 1;
590             }
591              
592 0           my $fh = IO::Compress::Gzip->new( $module_list_file );
593 0           print $fh <<"HERE";
594             File: 03modlist.data
595             Description: This a placeholder for CPAN.pm
596             Modcount: 0
597 0           Written-By: Id: $0
598             Date: @{ [ scalar localtime ] }
599              
600             package CPAN::Modulelist;
601              
602             sub data { {} }
603              
604             1;
605             HERE
606              
607 0           close $fh;
608             }
609              
610             =item create_checksums
611              
612             Creates the CHECKSUMS file that goes in each author directory in CPAN.
613             This is mostly a wrapper around CPAN::Checksums since that already handles
614             updating an entire tree. We just do a little logging.
615              
616             =cut
617              
618             sub create_checksums
619             {
620 0     0 1   my( $self, $dirs ) = @_;
621              
622 0           require CPAN::Checksums;
623 0           foreach my $dir ( @$dirs )
624             {
625 0           my $rc = eval{ CPAN::Checksums::updatedir( $dir ) };
  0            
626 0 0         $logger->error( "Couldn't create CHECKSUMS for $dir: $@" ) if $@;
627             $logger->info(
628 0           do {
629 0 0         if( $rc == 1 ) { "Valid CHECKSUMS file is already present" }
  0 0          
630 0           elsif( $rc == 2 ) { "Wrote new CHECKSUMS file in $dir" }
631 0           else { "updatedir unexpectedly returned an error" }
632             } );
633             }
634             }
635            
636             =back
637              
638             =head1 TO DO
639              
640             =head1 SOURCE AVAILABILITY
641              
642             This code is in Github:
643              
644             git://github.com/briandfoy/mycpan--app--dpan.git
645              
646             =head1 AUTHOR
647              
648             brian d foy, C<< >>
649              
650             =head1 COPYRIGHT AND LICENSE
651              
652             Copyright (c) 2009, brian d foy, All Rights Reserved.
653              
654             You may redistribute this under the same terms as Perl itself.
655              
656             =cut
657              
658             1;