File Coverage

blib/lib/CPAN/Mini/Visit.pm
Criterion Covered Total %
statement 110 123 89.4
branch 41 62 66.1
condition 5 12 41.6
subroutine 17 17 100.0
pod 2 2 100.0
total 175 216 81.0


line stmt bran cond sub pod time code
1             package CPAN::Mini::Visit;
2              
3             =pod
4              
5             =head1 NAME
6              
7             CPAN::Mini::Visit - A generalised API version of David Golden's visitcpan
8              
9             =head1 SYNOPSIS
10              
11             CPAN::Mini::Visit->new(
12             minicpan => '/minicpan',
13             acme => 0,
14             author => 'ADAMK',
15             warnings => 1,
16             random => 1,
17             callback => sub {
18             print "# counter: $_[0]->{counter}\n";
19             print "# archive: $_[0]->{archive}\n";
20             print "# tempdir: $_[0]->{tempdir}\n";
21             print "# dist: $_[0]->{dist}\n";
22             print "# author: $_[0]->{author}\n";
23             }
24             )->run;
25            
26             # counter: 1234
27             # archive: /minicpan/authors/id/A/AD/ADAMK/Config-Tiny-1.00.tar.gz
28             # tempdir: /tmp/1a4YRmFAJ3/Config-Tiny-1.00
29             # dist: ADAMK/Config-Tiny-1.00.tar.gz
30             # author: ADAMK
31              
32             =head1 DESCRIPTION
33              
34             L has been relatively successful at allowing processes
35             to run across the contents (or a subset of the contents) of an entire
36             L checkout.
37              
38             However it has become evident that while it is useful (and theoretically
39             optimal from a processing point of view) to maintain an expanded minicpan
40             checkout the sheer size of an expanded minicpan is such that it becomes
41             an undo burdon to manage, move, copy or even delete a directory tree with
42             hundreds of thousands of file totalling in the high single gigabytes in size.
43              
44             Annoyed by this, David Golden created L which takes an alternative
45             approach of sequentially expanding the tarball of each distribution into a
46             temporary directory, do the processing on that distribution, and then delete
47             the temporary directory before moving on to the next directory.
48              
49             This method results in a longer computation time, but with the benefit of
50             dramatically reduced system overhead, greater adaptability, and allow for
51             easy ad-hoc computations.
52              
53             This improvement in flexibility turns out to be worth the extra computation
54             time in almost all cases.
55              
56             B is a simplified and generalised API-based version of
57             David Golden's L script.
58              
59             It implements only the process of discovering, iterating and expanding
60             archives, before handing off control to an arbitrary callback function
61             provided to the constructor.
62              
63             =cut
64              
65 4     4   105463 use 5.008;
  4         16  
  4         145  
66 4     4   22 use strict;
  4         8  
  4         113  
67 4     4   29 use warnings;
  4         11  
  4         118  
68 4     4   21 use Carp ();
  4         7  
  4         78  
69 4     4   21 use File::Spec 0.80 ();
  4         90  
  4         89  
70 4     4   5009 use File::Temp 0.21 ();
  4         106618  
  4         130  
71 4     4   3617 use File::pushd 1.00 ();
  4         8661  
  4         103  
72 4     4   3463 use File::chmod 0.31 ();
  4         11119  
  4         115  
73 4     4   3874 use File::Find::Rule 0.27 ();
  4         33930  
  4         130  
74 4     4   4559 use Archive::Extract 0.32 ();
  4         1298256  
  4         209  
75 4     4   4684 use CPAN::Mini 0.576 ();
  4         665550  
  4         134  
76 4     4   5617 use Params::Util 1.00 ();
  4         16546  
  4         241  
77              
78             our $VERSION = '1.15';
79             # $VERSION = eval $VERSION;
80              
81 4         35 use Object::Tiny 1.06 qw{
82             minicpan
83             authors
84             callback
85             acme
86             author
87             ignore
88             random
89             warnings
90             prefer_bin
91 4     4   9263 };
  4         2416  
92              
93             =pod
94              
95             =head2 new
96              
97             Takes a variety of parameters and creates a new visitor object.
98              
99             The C param should be the root directory of a L
100             download.
101              
102             The C param should be a C reference that will be called
103             for each visit. The first parameter passed to the callback will be a C
104             reference containing the tarball location in the C key, the location
105             of the temporary directory in the C key, the canonical CPAN
106             distribution name in the C key, and the author id in the C key.
107              
108             The C param (true by default) can be set to false to exclude any
109             distributions that contain the string "Acme", allowing the visit to ignore
110             any of the joke modules.
111              
112             The C param can be provided to limit the visit to only the modules
113             owned by a specific author.
114              
115             The C param will cause the archives to be processed in random order
116             if enabled. If not, the archives will be processed in alphabetical order.
117              
118             The C param will turn on L warnings if enabled,
119             or disable warnings otherwise.
120              
121             The C param will tell L to use binary extract
122             instead of CPAN module extract wherever possible. By default, it will use
123             module-based extract.
124              
125             Returns a B object, or throws an exception on error.
126              
127             =cut
128              
129             sub new {
130 7     7 1 3143 my $class = shift;
131 7         108 my $self = bless { @_ }, $class;
132              
133             # Normalise
134 7 100       310 $self->{random} = $self->random ? 1 : 0;
135 7 50       262 $self->{prefer_bin} = $self->prefer_bin ? 1 : 0;
136 7 50       92 $self->{warnings} = 0 unless $self->{warnings};
137              
138             # Check params
139 7 50 33     208 unless (
      66        
140             Params::Util::_HASH($self->minicpan)
141             or (
142             defined Params::Util::_STRING($self->minicpan)
143             and
144             -d $self->minicpan
145             )
146             ) {
147 0         0 Carp::croak("Missing or invalid 'minicpan' param");
148             }
149 7 50       463 unless ( Params::Util::_CODELIKE($self->callback) ) {
150 0         0 Carp::croak("Missing or invalid 'callback' param");
151             }
152 7 100       369 if ( defined $self->ignore ) {
153 3 50       87 unless ( Params::Util::_ARRAYLIKE($self->ignore) ) {
154 0         0 Carp::croak("Invalid 'ignore' param");
155             }
156             # Clone the array so we can prepend more things
157 3         25 $self->{ignore} = [ @{ $self->ignore } ];
  3         75  
158             } else {
159 4         43 $self->{ignore} = [];
160             }
161              
162             # Apply the optional author setting
163 7         180 my $author = Params::Util::_STRING($self->author);
164 7 100       54 if ( defined $author ) {
165 2         255 unshift @{$self->ignore}, sub {
166 8     8   66 $_[0]->{author} ne $author;
167 2         9 };
168             }
169              
170             # Clean and apply the acme setting
171 7 100       65 $self->{acme} = 1 unless defined $self->{acme};
172 7         23 $self->{acme} = !! $self->{acme};
173 7 100       29 unless ( $self->{acme} ) {
174 3         9 unshift @{$self->ignore}, qr/\bAcme\b/;
  3         106  
175             }
176              
177             # Derive the authors directory
178 7         48 $self->{authors} = File::Spec->catdir( $self->_minicpan, 'authors', 'id' );
179 7 50       498 unless ( -d $self->authors ) {
180 0         0 Carp::croak("Authors directory '$self->{authors}' does not exist");
181             }
182              
183 7         185 return $self;
184             }
185              
186             =pod
187              
188             =head2 run
189              
190             The C method executes the visit process, taking no parameters and
191             returning true.
192              
193             Because the object contains no state information, you may call the C
194             method multiple times for a single visit object with no ill effects.
195              
196             =cut
197              
198             sub run {
199 7     7 1 4091 my $self = shift;
200              
201             # If we've been passed a HASH minicpan param,
202             # do an update_mirror first, before the regular run.
203 7 100       465 if ( Params::Util::_HASH($self->minicpan) ) {
204 4         30 CPAN::Mini->update_mirror(%{$self->minicpan});
  4         95  
205             }
206              
207             # Search for the files
208 7         64999 my $find = File::Find::Rule->name('*.tar.gz', '*.tgz', '*.zip', '*.bz2')->file->relative;
209 7         5478 my @files = sort $find->in( $self->authors );
210              
211             # Randomise if applicable
212 7 100       19855 if ( $self->random ) {
213 1         26 @files = sort { rand() <=> rand() } @files;
  4         9  
214             }
215              
216             # Extract the archive
217 7         58 my $counter = 0;
218 7         21 foreach my $path ( @files ) {
219             # Derive the main file properties
220 28         58572 my $archive = File::Spec->catfile( $self->authors, $path );
221 28         466 my $dist = $path;
222 28 50       340 $dist =~ s|^[A-Z]/[A-Z][A-Z]/|| or die "Bad distpath for $path";
223 28 50       239 unless ( $dist =~ /^([A-Z0-9-]+)/ ) {
224 0         0 die "Bad author for $path";
225             }
226 28         87 my $author = "$1";
227              
228             # Apply the ignore filters
229 28         58 my $skip = 0;
230 28         56 foreach my $filter ( @{$self->ignore} ) {
  28         735  
231 32 50       6171 if ( defined Params::Util::_STRING($filter) ) {
232 0         0 $filter = quotemeta $filter;
233 0         0 $filter = qr/$filter/;
234             }
235 32 100       160 if ( Params::Util::_REGEX($filter) ) {
    50          
236 20 100       667 $skip = 1 if $dist =~ $filter;
237             } elsif ( Params::Util::_CODELIKE($filter) ) {
238 12 100       94 $skip = 1 if $filter->( {
239             counter => $counter,
240             archive => $archive,
241             dist => $dist,
242             author => $author,
243             } );
244             } else {
245 0         0 Carp::croak("Missing or invalid filter");
246             }
247             }
248 28 100       181 next if $skip;
249              
250             # Explicitly ignore some damaging distributions
251             # if we are using Perl extraction
252 20 50       686 unless ( $self->prefer_bin ) {
253 20 50       168 next if $dist =~ /\bHarvey-\d/;
254 20 50       93 next if $dist =~ /\bText-SenseClusters\b/;
255 20 50       83 next if $dist =~ /\bBio-Affymetrix\b/;
256 20 50       84 next if $dist =~ /\bAlien-MeCab\b/;
257             }
258              
259             # Extract the archive
260 20         936 local $Archive::Extract::WARN = !! ($self->warnings > 1);
261 20         537 local $Archive::Extract::PREFER_BIN = $self->prefer_bin;
262 20         306 my $extract = Archive::Extract->new( archive => $archive );
263 20         7512 my $tmpdir = File::Temp->newdir;
264 20         13116 my $ok = 0;
265 20         183 SCOPE: {
266 20         42 my $pushd1 = File::pushd::pushd( File::Spec->curdir );
267 20         1841 $ok = eval {
268 20         125 $extract->extract( to => $tmpdir );
269             };
270             }
271 20 50 33     6405363 if ( $@ or not $ok ) {
272 0 0       0 if ( $self->warnings > 1 ) {
    0          
273 0         0 warn("Failed to extract '$archive': $@");
274             } elsif ( $self->warnings ) {
275 0         0 print " Failed: $dist\n";
276             }
277 0         0 next;
278             }
279              
280             # If using bin tools, do an additional check for
281             # damaged tarballs with non-executable directories (on unix)
282 20         122 my $extracted = $extract->extract_path;
283 20 50 33     1120 unless ( -r $extracted and -x $extracted ) {
284             # Handle special case where we have screwed up
285             # permissions on the extract directory.
286             # Just assume we have permissions for that.
287 0         0 File::chmod::chmod( 0755, $extracted );
288             }
289              
290             # Change into the directory
291 20         238 my $pushd2 = File::pushd::pushd( $extracted );
292              
293             # Invoke the callback
294 20         6384 $self->callback->( {
295             counter => ++$counter,
296             archive => $archive,
297             dist => $dist,
298             author => $author,
299             tempdir => $extracted,
300             } );
301             }
302              
303 7         27767 return 1;
304             }
305              
306              
307              
308              
309              
310             ######################################################################
311             # Support Methods
312              
313             sub _minicpan {
314 7     7   62 my $self = shift;
315 7 100       221 return Params::Util::_HASH($self->minicpan)
316             ? $self->minicpan->{local}
317             : $self->minicpan;
318             }
319              
320             1;
321              
322             =pod
323              
324             =head1 SUPPORT
325              
326             Bugs should be reported via the CPAN bug tracker at
327              
328             L
329              
330             For other issues, contact the author.
331              
332             =head1 AUTHOR
333              
334             Adam Kennedy Eadamk@cpan.orgE
335              
336             =head1 COPYRIGHT
337              
338             Copyright 2009 - 2012 Adam Kennedy.
339              
340             This program is free software; you can redistribute
341             it and/or modify it under the same terms as Perl itself.
342              
343             The full text of the license can be found in the
344             LICENSE file included with this module.
345              
346             =cut