File Coverage

blib/lib/CPAN/ParseDistribution.pm
Criterion Covered Total %
statement 166 175 94.8
branch 65 88 73.8
condition 16 24 66.6
subroutine 24 24 100.0
pod 5 5 100.0
total 276 316 87.3


line stmt bran cond sub pod time code
1             package CPAN::ParseDistribution;
2              
3 4004     4004   2709022 use strict;
  4004         7558  
  4004         69200  
4 89     89   801 use warnings;
  89         178  
  89         4272  
5              
6 89     89   445 use vars qw($VERSION);
  89         534  
  89         5073  
7              
8             $VERSION = '1.54';
9              
10 89     89   267 use Cwd qw(getcwd abs_path);
  89         178  
  89         5518  
11 89     89   61321 use File::Temp qw(tempdir);
  89         1522256  
  89         4806  
12 89     89   124511 use File::Find::Rule;
  89         555449  
  89         801  
13 89     89   4183 use File::Path;
  89         89  
  89         4806  
14 89     89   64436 use Data::Dumper;
  89         507033  
  89         5963  
15 89     89   63457 use Archive::Tar;
  89         7329595  
  89         8099  
16 89     89   71645 use Archive::Zip;
  89         4805288  
  89         9790  
17 89     89   58384 use YAML qw(LoadFile);
  89         692242  
  89         6408  
18 89     89   65237 use Safe;
  89         2742001  
  89         4628  
19 89     89   47615 use Parallel::ForkManager;
  89         1276972  
  89         3115  
20 89     89   1691 use Devel::CheckOS qw(os_is);
  89         178  
  89         60876  
21              
22             $Archive::Tar::DO_NOT_USE_PREFIX = 1;
23             $Archive::Tar::CHMOD = 0;
24              
25             =head1 NAME
26              
27             CPAN::ParseDistribution - index a file from the BackPAN
28              
29             =head1 DESCRIPTION
30              
31             Given a file from the BackPAN, this will let you find out what versions
32             of what modules it contains, the distribution name and version
33              
34             =head1 SYNOPSIS
35              
36             my $dist = CPAN::ParseDistribution->new(
37             'A/AU/AUTHORID/subdirectory/Some-Distribution-1.23.tar.gz',
38             use_tar => '/bin/tar',
39             ...
40             );
41             my $modules = $dist->modules(); # hashref of modname => version
42             my $distname = $dist->dist();
43             my $distversion = $dist->distversion();
44              
45             =head1 METHODS
46              
47             =head2 new
48              
49             Constructor, takes a single mandatory argument, which should be a tarball
50             or zip file from the CPAN or BackPAN, and some optional named arguments:
51              
52             =over
53              
54             =item use_tar
55              
56             The full path to 'tar'. This is assumed to be GNU tar, and to be
57             sufficiently well-endowed as to be able to support bzip2 files.
58             Maybe I'll fix that at some point. If this isn't specified, then
59             Archive::Tar is used instead.
60              
61             You might want to use this if dealing with very large files, as
62             Archive::Tar is rather profligate with memory.
63              
64             =back
65              
66             =cut
67              
68             sub new {
69 2580     2580 1 1413919 my($class, $file, %extra_params) = @_;
70 2580 50       10756 die("file parameter is mandatory\n") unless($file);
71 2580 100       49081 die("$file doesn't exist\n") if(!-e $file);
72 2534 100       13182 die("$file looks like a ppm\n")
73             if($file =~ /\.ppm\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
74 2488 100       28213 die("$file isn't the right type\n")
75             if($file !~ /\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
76 2442         97216 $file = abs_path($file);
77              
78             # dist name and version
79 2442         33937 (my $dist = $file) =~ s{(^.*/|\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$)}{}gi;
80 2442         12535 $dist =~ /^(.*)-(\d.*)$/;
81 2442         18098 ($dist, my $distversion) = ($1, $2);
82 2442 100       15218 die("Can't index perl itself ($dist-$distversion)\n")
83             if($dist =~ /^(perl|ponie|kurila|parrot|Perl6-Pugs|v6-pugs)$/);
84              
85 2212         28445 bless {
86             file => $file,
87             modules => {},
88             dist => $dist,
89             distversion => $distversion,
90             extra_params => \%extra_params,
91             }, $class;
92             }
93              
94             # takes a filename, unarchives it, returns the directory it's been
95             # unarchived into
96             sub _unarchive {
97 1996     1996   5339 my($file, %extra_params) = @_;
98 1996         9535 my $olddir = getcwd();
99 1996         17933 my $tempdir = tempdir(TMPDIR => 1);
100 1996         790931 chdir($tempdir);
101 1996 100       30788 if($file =~ /\.zip$/i) {
    100          
102 122         2782 my $zip = Archive::Zip->new($file);
103 122 50       564572 $zip->extractTree() if($zip);
104             } elsif($file =~ /\.(tar(\.gz)?|tgz)$/i) {
105 1622 100       7051 if($extra_params{use_tar}) {
106             system(
107             $extra_params{use_tar},
108 393 100       4117861 (($file =~ /gz$/) ? 'xzf' : 'xf'),
109             $file
110             );
111 393         1767522 system("chmod -R u+r *"); # tar might preserve unreadable perms
112             } else {
113 1229         19784 my $tar = Archive::Tar->new($file, 1);
114 1229 50       21546940 $tar->extract() if($tar);
115             }
116             } else {
117 252 100       593 if($extra_params{use_tar}) {
118 82         843258 system( $extra_params{use_tar}, 'xjf', $file);
119 82         411598 system("chmod -R u+r *");
120             } else {
121 170 50       502784 open(my $fh, '-|', qw(bzip2 -dc), $file) || die("Can't unbzip2\n");
122 170         7406 my $tar = Archive::Tar->new($fh);
123 170 50       1196123 $tar->extract() if($tar);
124             }
125             }
126 1996         17144695 chdir($olddir);
127 1996         23929 return $tempdir;
128             }
129              
130             # adapted from PAUSE::pmfile::parse_version_safely in mldistwatch.pm
131             sub _parse_version_safely {
132 4098     4098   11437 my($parsefile) = @_;
133 4098         9982 my $result;
134             my $eval;
135 4098         16645 local $/ = "\n";
136 4098 50       151307 open(my $fh, $parsefile) or die "Could not open '$parsefile': $!";
137 4098         13282 my $inpod = 0;
138 4098         62691 while (<$fh>) {
139 89158 100       165993 $inpod = /^=(?!cut)/ ? 1 : /^=cut/ ? 0 : $inpod;
    100          
140 89158 100 100     260598 next if $inpod || /^\s*#/;
141 55194         52339 chop;
142 55194 100       174448 next unless /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/;
143 4004         23578 my($sigil, $var) = ($1, $2);
144 4004         10068 my $current_parsed_line = $_;
145             {
146 4004         7393 local $^W = 0;
  4004         21193  
147 89     89   445 no strict;
  89         89  
  89         95586  
148 4004         50100 my $c = Safe->new();
149 4004 50       3634182 $c->deny(qw(
150             tie untie tied chdir flock ioctl socket getpeername
151             ssockopt bind connect listen accept shutdown gsockopt
152             getsockname sleep alarm entereval reset dbstate
153             readline rcatline getc read formline enterwrite
154             leavewrite print sysread syswrite send recv eof
155             tell seek sysseek readdir telldir seekdir rewinddir
156             lock stat lstat readlink ftatime ftblk ftchr ftctime
157             ftdir fteexec fteowned fteread ftewrite ftfile ftis
158             ftlink ftmtime ftpipe ftrexec ftrowned ftrread ftsgid
159             ftsize ftsock ftsuid fttty ftzero ftrwrite ftsvtx
160             fttext ftbinary fileno ghbyname ghbyaddr ghostent
161             shostent ehostent gnbyname gnbyaddr gnetent snetent
162             enetent gpbyname gpbynumber gprotoent sprotoent
163             eprotoent gsbyname gsbyport gservent sservent
164             eservent gpwnam gpwuid gpwent spwent epwent
165             getlogin ggrnam ggrgid ggrent sgrent egrent msgctl
166             msgget msgrcv msgsnd semctl semget semop shmctl
167             shmget shmread shmwrite require dofile caller
168             syscall dump chroot link unlink rename symlink
169             truncate backtick system fork wait waitpid glob
170             exec exit kill time tms mkdir rmdir utime chmod
171             chown fcntl sysopen open close umask binmode
172             open_dir closedir
173             ), ($] >= 5.010 ? qw(say) : ()));
174 4004         290008 $c->share_from(__PACKAGE__, [qw(qv)]);
175 4004         172298 s/\buse\s+version\b.*?;//gs;
176             # qv broke some time between version.pm 0.74 and 0.82
177             # so just extract it and hope for the best
178 4004         11954 s/\bqv\s*\(\s*(["']?)([\d\.]+)\1\s*\)\s*/"$2"/;
179 4004         8771 s/\buse\s+vars\b//g;
180 4004         23743 $eval = qq{
181             local ${sigil}${var};
182             \$$var = undef; do {
183             $_
184             }; \$$var
185             };
186              
187 4004         10730 $result = _run_safely($c, $eval);
188             };
189             # stuff that's my fault because of the Safe compartment
190 3916 100 66     3030006 if($result->{error} && $result->{error} =~ /trapped by operation mask|safe compartment timed out/i) {
    50          
191 34         4097 warn("Unsafe code in \$VERSION\n".$result->{error}."\n$parsefile\n$eval");
192 34         174 $result = undef;
193             } elsif($result->{error}) {
194             warn "_parse_version_safely: ".Dumper({
195             eval => $eval,
196             line => $current_parsed_line,
197             file => $parsefile,
198             err => $result->{error},
199 0         0 });
200             }
201 3916         18105 last;
202             }
203 4010         58999 close $fh;
204              
205 4010 100       79570 return exists($result->{result}) ? $result->{result} : undef;
206             }
207              
208             sub _run_safely {
209 4004 50   4004   33857 if(os_is('Unix')) {
    0          
210 89     89   41207 eval 'use CPAN::ParseDistribution::Unix';
  89         356  
  89         1424  
  4004         11083528  
211 4004         29449 return CPAN::ParseDistribution::Unix->_run(@_);
212             } elsif(os_is('MicrosoftWindows')) {
213             # FIXME once someone supplies CPAN::ParseDistribution::Windows
214 0         0 warn("Windows is not fully supported by CPAN::ParseDistribution\n");
215 0         0 warn("See the LIMITATIONS section in the documentation\n");
216 0         0 eval 'use CPAN::ParseDistribution::Unix';
217 0         0 return CPAN::ParseDistribution::Unix->_run(@_);
218             }
219             }
220              
221             =head2 isdevversion
222              
223             Returns true or false depending on whether this is a developer-only
224             or trial release of a distribution. This is determined by looking for
225             an underscore in the distribution version or the string '-TRIAL' at the
226             end of the distribution version.
227              
228             =cut
229              
230             sub isdevversion {
231 326     326 1 2658 my $self = shift;
232 326 100       1228 return 1 if($self->distversion() =~ /(_|-TRIAL$)/);
233 110         528 return 0;
234             }
235              
236             =head2 modules
237              
238             Returns a hashref whose keys are module names, and their values are
239             the versions of the modules. The version number is retrieved by
240             eval()ing what looks like a $VERSION line in the code. This is done
241             in a C compartment, but may be a security risk if you do this
242             with untrusted code. Caveat user!
243              
244             =cut
245              
246             sub modules {
247 2104     2104 1 17917 my $self = shift;
248 2104 100       2872 if(!(keys %{$self->{modules}})) {
  2104         10183  
249 1996         6199 $self->{_modules_runs}++;
250 1996         3385 my $tempdir = _unarchive($self->{file}, %{$self->{extra_params}});
  1996         8565  
251              
252 1996         152093 my $meta = (File::Find::Rule->file()->name('META.yml')->in($tempdir))[0];
253 1996         3789412 my $ignore = join('|', qw(t inc xt));
254 1996         6088 my %ignorefiles;
255             my %ignorepackages;
256 0         0 my %ignorenamespaces;
257 1996 100 66     38120 if($meta && -e $meta) {
258 1540         3769 my $yaml = eval { LoadFile($meta); };
  1540         12939  
259 1540 50 33     13147736 if(!$@ &&
      33        
      33        
260             # can we hash-deref this thing?
261             ref($yaml) eq 'HASH' &&
262             exists($yaml->{no_index}) &&
263             ref($yaml->{no_index}) eq 'HASH'
264             ) {
265 1540 100       6667 if(exists($yaml->{no_index}->{directory})) {
266 1348 100       8997 if(ref($yaml->{no_index}->{directory}) eq 'ARRAY') {
    50          
267             $ignore = join('|', $ignore,
268 1242         3102 map { "$_/" } @{$yaml->{no_index}->{directory}}
  2718         9738  
  1242         4973  
269             );
270             } elsif(!ref($yaml->{no_index}->{directory})) {
271 106         711 $ignore .= '|'.$yaml->{no_index}->{directory}.'/'
272             }
273             }
274 1540 100       5942 if(exists($yaml->{no_index}->{file})) {
275 242 50       1763 if(ref($yaml->{no_index}->{file}) eq 'ARRAY') {
    0          
276 242         1704 %ignorefiles = map { $_, 1 }
277 242         512 @{$yaml->{no_index}->{file}};
  242         912  
278             } elsif(!ref($yaml->{no_index}->{file})) {
279 0         0 $ignorefiles{$yaml->{no_index}->{file}} = 1;
280             }
281             }
282 1540 100       5421 if(exists($yaml->{no_index}->{package})) {
283 288 50       1463 if(ref($yaml->{no_index}->{package}) eq 'ARRAY') {
    0          
284 384         1456 %ignorepackages = map { $_, 1 }
285 288         551 @{$yaml->{no_index}->{package}};
  288         1246  
286             } elsif(!ref($yaml->{no_index}->{package})) {
287 0         0 $ignorepackages{$yaml->{no_index}->{package}} = 1;
288             }
289             }
290 1540 100       12754 if(exists($yaml->{no_index}->{namespace})) {
291 94 50       608 if(ref($yaml->{no_index}->{namespace}) eq 'ARRAY') {
    0          
292 94         1272 %ignorenamespaces = map { $_, 1 }
293 94         257 @{$yaml->{no_index}->{namespace}};
  94         307  
294             } elsif(!ref($yaml->{no_index}->{namespace})) {
295 0         0 $ignorenamespaces{$yaml->{no_index}->{namespace}} = 1;
296             }
297             }
298             }
299             }
300             # find modules
301             my @PMs = grep {
302 1996         80777 my $pm = $_;
  7168         3099632  
303             $pm !~ m{^\Q$tempdir\E/[^/]+/($ignore)} &&
304 7168   100     154638 !grep { $pm =~ m{^\Q$tempdir\E/[^/]+/$_$} } (keys %ignorefiles)
305             } File::Find::Rule->file()->name('*.pm', '*.pm.PL')->in($tempdir);
306 1996         14651 foreach my $PM (@PMs) {
307 4098         19227 local $/ = undef;
308 4098         20490 my $version = _parse_version_safely($PM);
309 4010 50       199824 open(my $fh, $PM) || die("Can't read $PM\n");
310 4010         115091 $PM = <$fh>;
311 4010         24829 close($fh);
312              
313             # from PAUSE::pmfile::packages_per_pmfile in mldistwatch.pm
314 4010 100       72394 if($PM =~ /\bpackage[ \t]+([\w\:\']+)\s*($|[};])/) {
315 3952         14722 my $module = $1;
316             $self->{modules}->{$module} = $version unless(
317             exists($ignorepackages{$module}) ||
318 3952 100 100     98511 (grep { $module =~ /${_}::/ } keys %ignorenamespaces)
  594         16478  
319             );
320             }
321             }
322 1908         14044024 rmtree($tempdir);
323             }
324 2016         38604 return $self->{modules};
325             }
326              
327             =head2 dist
328              
329             Return the name of the distribution. eg, in the synopsis above, it would
330             return 'Some-Distribution'.
331              
332             =cut
333              
334             sub dist {
335 110     110 1 891 my $self = shift;
336 110         1001 return $self->{dist};
337             }
338              
339             =head2 distversion
340              
341             Return the version of the distribution. eg, in the synopsis above, it would
342             return 1.23.
343              
344             Strictly speaking, the CPAN doesn't have distribution versions -
345             Foo-Bar-1.23.tar.gz is not considered to have any relationship to
346             Foo-Bar-1.24.tar.gz, they just happen to coincidentally have rather
347             similar contents. But other tools, such as those used by the CPAN testers,
348             do treat distributions as being versioned.
349              
350             =cut
351              
352             sub distversion{
353 652     652 1 7215 my $self = shift;
354 652         5199 return $self->{distversion};
355             }
356              
357             =head1 SECURITY
358              
359             This module executes a very small amount of code from each module that
360             it finds in a distribution. While every effort has been made to do
361             this safely, there are no guarantees that it won't let the distributions
362             you're examining do horrible things to your machine, such as email your
363             password file to strangers. You are strongly advised to read the source
364             code and to run it in a very heavily restricted user account.
365              
366             =head1 LIMITATIONS, BUGS and FEEDBACK
367              
368             I welcome feedback about my code, including constructive criticism.
369             Bug reports should be made using L
370             and should include the smallest possible chunk of code, along with
371             any necessary data, which demonstrates the bug. Ideally, this
372             will be in the form of files which I can drop in to the module's
373             test suite.
374              
375             There is a known problem with parsing some pathological distributions
376             on Windows, where CPAN::ParseDistribution may either hang or crash. This
377             is because Windows doesn't properly support fork()ing and signals. I can
378             not fix this, but welcome patches with tests.
379              
380             =cut
381              
382             =head1 SEE ALSO
383              
384             L
385              
386             L
387              
388             =head1 AUTHOR, COPYRIGHT and LICENCE
389              
390             Copyright 2009-2011 David Cantrell Edavid@cantrell.org.ukE
391              
392             Contains code originally from the PAUSE by Andreas Koenig.
393              
394             This software is free-as-in-speech software, and may be used,
395             distributed, and modified under the terms of either the GNU
396             General Public Licence version 2 or the Artistic Licence. It's
397             up to you which one you use. The full text of the licences can
398             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
399              
400             =head1 CONSPIRACY
401              
402             This module is also free-as-in-mason software.
403              
404             =cut
405              
406             1;