File Coverage

blib/lib/CPAN/Mini/Visit.pm
Criterion Covered Total %
statement 109 122 89.3
branch 41 62 66.1
condition 5 12 41.6
subroutine 17 17 100.0
pod 2 2 100.0
total 174 215 80.9


line stmt bran cond sub pod time code
1             package CPAN::Mini::Visit;
2              
3             =pod
4              
5             =head1 NAME
6              
7             CPAN::Mini::Visit - A generalised API version of David Golden's visitcpan
8              
9             =head1 SYNOPSIS
10              
11             CPAN::Mini::Visit->new(
12             minicpan => '/minicpan',
13             acme => 0,
14             author => 'ADAMK',
15             warnings => 1,
16             random => 1,
17             callback => sub {
18             print "# counter: $_[0]->{counter}\n";
19             print "# archive: $_[0]->{archive}\n";
20             print "# tempdir: $_[0]->{tempdir}\n";
21             print "# dist: $_[0]->{dist}\n";
22             print "# author: $_[0]->{author}\n";
23             }
24             )->run;
25            
26             # counter: 1234
27             # archive: /minicpan/authors/id/A/AD/ADAMK/Config-Tiny-1.00.tar.gz
28             # tempdir: /tmp/1a4YRmFAJ3/Config-Tiny-1.00
29             # dist: ADAMK/Config-Tiny-1.00.tar.gz
30             # author: ADAMK
31              
32             =head1 DESCRIPTION
33              
34             L has been relatively successful at allowing processes
35             to run across the contents (or a subset of the contents) of an entire
36             L checkout.
37              
38             However it has become evident that while it is useful (and theoretically
39             optimal from a processing point of view) to maintain an expanded minicpan
40             checkout the sheer size of an expanded minicpan is such that it becomes
41             an undo burdon to manage, move, copy or even delete a directory tree with
42             hundreds of thousands of file totalling in the high single gigabytes in size.
43              
44             Annoyed by this, David Golden created L which takes an alternative
45             approach of sequentially expanding the tarball of each distribution into a
46             temporary directory, do the processing on that distribution, and then delete
47             the temporary directory before moving on to the next directory.
48              
49             This method results in a longer computation time, but with the benefit of
50             dramatically reduced system overhead, greater adaptability, and allow for
51             easy ad-hoc computations.
52              
53             This improvement in flexibility turns out to be worth the extra computation
54             time in almost all cases.
55              
56             B is a simplified and generalised API-based version of
57             David Golden's L script.
58              
59             It implements only the process of discovering, iterating and expanding
60             archives, before handing off control to an arbitrary callback function
61             provided to the constructor.
62              
63             =cut
64              
65 4     4   51395 use 5.008;
  4         10  
66 4     4   15 use strict;
  4         4  
  4         68  
67 4     4   18 use warnings;
  4         3  
  4         95  
68 4     4   11 use Carp ();
  4         4  
  4         62  
69 4     4   11 use File::Spec 0.80 ();
  4         60  
  4         71  
70 4     4   2260 use File::Temp 0.21 ();
  4         60813  
  4         102  
71 4     4   1462 use File::pushd 1.00 ();
  4         3105  
  4         87  
72 4     4   1513 use File::chmod 0.31 ();
  4         8147  
  4         95  
73 4     4   1731 use File::Find::Rule 0.27 ();
  4         22982  
  4         100  
74 4     4   2081 use Archive::Extract 0.32 ();
  4         507912  
  4         136  
75 4     4   2097 use CPAN::Mini 0.576 ();
  4         328943  
  4         126  
76 4     4   1703 use Params::Util 1.00 ();
  4         7916  
  4         244  
77              
78             our $VERSION = '0.12_01';
79             $VERSION = eval $VERSION;
80              
81 4         19 use Object::Tiny 1.06 qw{
82             minicpan
83             authors
84             callback
85             skip
86             acme
87             author
88             ignore
89             random
90             warnings
91             prefer_bin
92 4     4   1556 };
  4         1022  
93              
94             =pod
95              
96             =head2 new
97              
98             Takes a variety of parameters and creates a new visitor object.
99              
100             The C param should be the root directory of a L
101             download.
102              
103             The C param should be a C reference that will be called
104             for each visit. The first parameter passed to the callback will be a C
105             reference containing the tarball location in the C key, the location
106             of the temporary directory in the C key, the canonical CPAN
107             distribution name in the C key, and the author id in the C key.
108              
109             The optional C param should be a C reference
110             that will be called for each visit before extracting dist. The first
111             parameter passed to the callback will be a C reference with C,
112             C and C keys. Callback should return 1 if dist should be skipped
113             and 0 otherwise.
114              
115             The C param (true by default) can be set to false to exclude any
116             distributions that contain the string "Acme", allowing the visit to ignore
117             any of the joke modules.
118              
119             The C param can be provided to limit the visit to only the modules
120             owned by a specific author.
121              
122             The C param will cause the archives to be processed in random order
123             if enabled. If not, the archives will be processed in alphabetical order.
124              
125             The C param will turn on L warnings if enabled,
126             or disable warnings otherwise.
127              
128             The C param will tell L to use binary extract
129             instead of CPAN module extract wherever possible. By default, it will use
130             module-based extract.
131              
132             Returns a B object, or throws an exception on error.
133              
134             =cut
135              
136             sub new {
137 7     7 1 1418 my $class = shift;
138 7         41 my $self = bless { @_ }, $class;
139              
140             # Normalise
141 7 100       154 $self->{random} = $self->random ? 1 : 0;
142 7 50       191 $self->{prefer_bin} = $self->prefer_bin ? 1 : 0;
143 7 50       59 $self->{warnings} = 0 unless $self->{warnings};
144              
145             # Check params
146 7 50 33     109 unless (
      66        
147             Params::Util::_HASH($self->minicpan)
148             or (
149             defined Params::Util::_STRING($self->minicpan)
150             and
151             -d $self->minicpan
152             )
153             ) {
154 0         0 Carp::croak("Missing or invalid 'minicpan' param");
155             }
156 7 50       265 unless ( Params::Util::_CODELIKE($self->callback) ) {
157 0         0 Carp::croak("Missing or invalid 'callback' param");
158             }
159 7 100       125 if ( defined $self->ignore ) {
160 3 50       46 unless ( Params::Util::_ARRAYLIKE($self->ignore) ) {
161 0         0 Carp::croak("Invalid 'ignore' param");
162             }
163             # Clone the array so we can prepend more things
164 3         13 $self->{ignore} = [ @{ $self->ignore } ];
  3         41  
165             } else {
166 4         22 $self->{ignore} = [];
167             }
168              
169             # Apply the optional author setting
170 7         109 my $author = Params::Util::_STRING($self->author);
171 7 100       33 if ( defined $author ) {
172 2         28 unshift @{$self->ignore}, sub {
173 8     8   39 $_[0]->{author} ne $author;
174 2         6 };
175             }
176              
177             # Clean and apply the acme setting
178 7 100       37 $self->{acme} = 1 unless defined $self->{acme};
179 7         18 $self->{acme} = !! $self->{acme};
180 7 100       56 unless ( $self->{acme} ) {
181 3         5 unshift @{$self->ignore}, qr/\bAcme\b/;
  3         42  
182             }
183              
184             # Derive the authors directory
185 7         36 $self->{authors} = File::Spec->catdir( $self->_minicpan, 'authors', 'id' );
186 7 50       268 unless ( -d $self->authors ) {
187 0         0 Carp::croak("Authors directory '$self->{authors}' does not exist");
188             }
189              
190 7         125 return $self;
191             }
192              
193             =pod
194              
195             =head2 run
196              
197             The C method executes the visit process, taking no parameters and
198             returning true.
199              
200             Because the object contains no state information, you may call the C
201             method multiple times for a single visit object with no ill effects.
202              
203             =cut
204              
205             sub run {
206 7     7 1 2026 my $self = shift;
207              
208             # If we've been passed a HASH minicpan param,
209             # do an update_mirror first, before the regular run.
210 7 100       139 if ( Params::Util::_HASH($self->minicpan) ) {
211 4         20 CPAN::Mini->update_mirror(%{$self->minicpan});
  4         50  
212             }
213              
214             # Search for the files
215 7         10918 my $find = File::Find::Rule->name('*.tar.gz', '*.tgz', '*.zip', '*.bz2')->file->relative;
216 7         2784 my @files = sort $find->in( $self->authors );
217              
218             # Randomise if applicable
219 7 100       7730 if ( $self->random ) {
220 1         15 @files = sort { rand() <=> rand() } @files;
  4         9  
221             }
222              
223             # Extract the archive
224 7         29 my $counter = 0;
225 7         15 foreach my $path ( @files ) {
226             # Derive the main file properties
227 28         22499 my $archive = File::Spec->catfile( $self->authors, $path );
228 28         283 my $dist = $path;
229 28 50       178 $dist =~ s|^[A-Z]/[A-Z][A-Z]/|| or die "Bad distpath for $path";
230 28 50       116 unless ( $dist =~ /^([A-Z]+)/ ) {
231 0         0 die "Bad author for $path";
232             }
233 28         63 my $author = "$1";
234              
235             # Apply the ignore filters
236 28         30 my $skip = 0;
237 28         33 foreach my $filter ( @{$self->ignore} ) {
  28         395  
238 32 50       155 if ( defined Params::Util::_STRING($filter) ) {
239 0         0 $filter = quotemeta $filter;
240 0         0 $filter = qr/$filter/;
241             }
242 32 100       96 if ( Params::Util::_REGEX($filter) ) {
    50          
243 20 100       98 $skip = 1 if $dist =~ $filter;
244             } elsif ( Params::Util::_CODELIKE($filter) ) {
245 12 100       56 $skip = 1 if $filter->( {
246             counter => $counter,
247             archive => $archive,
248             dist => $dist,
249             author => $author,
250             } );
251             } else {
252 0         0 Carp::croak("Missing or invalid filter");
253             }
254             }
255 28 100       80 next if $skip;
256              
257             # Explicitly ignore some damaging distributions
258             # if we are using Perl extraction
259 20 50       302 unless ( $self->prefer_bin ) {
260 20 50       117 next if $dist =~ /\bHarvey-\d/;
261 20 50       52 next if $dist =~ /\bText-SenseClusters\b/;
262 20 50       56 next if $dist =~ /\bBio-Affymetrix\b/;
263 20 50       52 next if $dist =~ /\bAlien-MeCab\b/;
264             }
265              
266             # Extract the archive
267 20         266 local $Archive::Extract::WARN = !! ($self->warnings > 1);
268 20         288 local $Archive::Extract::PREFER_BIN = $self->prefer_bin;
269 20         157 my $extract = Archive::Extract->new( archive => $archive );
270 20         3794 my $tmpdir = File::Temp->newdir;
271 20         6817 my $ok = 0;
272             SCOPE: {
273 20         26 my $pushd1 = File::pushd::pushd( File::Spec->curdir );
  20         108  
274 20         777 $ok = eval {
275 20         77 $extract->extract( to => $tmpdir );
276             };
277             }
278 20 50 33     1371198 if ( $@ or not $ok ) {
279 0 0       0 if ( $self->warnings > 1 ) {
    0          
280 0         0 warn("Failed to extract '$archive': $@");
281             } elsif ( $self->warnings ) {
282 0         0 print " Failed: $dist\n";
283             }
284 0         0 next;
285             }
286              
287             # If using bin tools, do an additional check for
288             # damaged tarballs with non-executable directories (on unix)
289 20         74 my $extracted = $extract->extract_path;
290 20 50 33     460 unless ( -r $extracted and -x $extracted ) {
291             # Handle special case where we have screwed up
292             # permissions on the extract directory.
293             # Just assume we have permissions for that.
294 0         0 File::chmod::chmod( 0755, $extracted );
295             }
296              
297             # Change into the directory
298 20         101 my $pushd2 = File::pushd::pushd( $extracted );
299              
300             # Invoke the callback
301 20         2484 $self->callback->( {
302             counter => ++$counter,
303             archive => $archive,
304             dist => $dist,
305             author => $author,
306             tempdir => $extracted,
307             } );
308             }
309              
310 7         9751 return 1;
311             }
312              
313              
314              
315              
316              
317             ######################################################################
318             # Support Methods
319              
320             sub _minicpan {
321 7     7   11 my $self = shift;
322             return Params::Util::_HASH($self->minicpan)
323             ? $self->minicpan->{local}
324 7 100       124 : $self->minicpan;
325             }
326              
327             1;
328              
329             =pod
330              
331             =head1 SUPPORT
332              
333             Bugs should be reported via the CPAN bug tracker at
334              
335             L
336              
337             For other issues, contact the author.
338              
339             =head1 AUTHOR
340              
341             Adam Kennedy Eadamk@cpan.orgE
342              
343             =head1 COPYRIGHT
344              
345             Copyright 2009 - 2011 Adam Kennedy.
346              
347             This program is free software; you can redistribute
348             it and/or modify it under the same terms as Perl itself.
349              
350             The full text of the license can be found in the
351             LICENSE file included with this module.
352              
353             =cut