File Coverage

blib/lib/CPAN/ParseDistribution.pm
Criterion Covered Total %
statement 173 182 95.0
branch 65 88 73.8
condition 16 24 66.6
subroutine 24 24 100.0
pod 5 5 100.0
total 283 323 87.6


line stmt bran cond sub pod time code
1             package CPAN::ParseDistribution;
2              
3 2210     2210   4476663 use strict;
  2210         10371  
  2210         68466  
4 261     261   4105 use warnings;
  261         933  
  261         20103  
5              
6 155     155   2436 use vars qw($VERSION);
  155         3012  
  155         9212  
7              
8             $VERSION = '1.52';
9              
10 66     66   396 use Cwd qw(getcwd abs_path);
  66         132  
  66         42042  
11 66     66   198858 use File::Temp qw(tempdir);
  66         2045406  
  66         6798  
12 66     66   787644 use File::Find::Rule;
  66         924066  
  66         792  
13 66     66   5016 use File::Path;
  66         132  
  66         6006  
14 66     66   91542 use Data::Dumper;
  66         628914  
  66         5610  
15 66     66   394218 use Archive::Tar;
  66         24566520  
  66         5082  
16 66     66   688578 use Archive::Zip;
  66         10895082  
  66         3894  
17 66     66   282942 use YAML qw(LoadFile);
  66         1537932  
  66         8712  
18 66     66   240372 use Safe;
  66         3250104  
  66         4752  
19 66     66   303930 use Parallel::ForkManager;
  66         1269510  
  66         3102  
20 66     66   1650 use Devel::CheckOS qw(os_is);
  66         66  
  66         83886  
21              
22             $Archive::Tar::DO_NOT_USE_PREFIX = 1;
23             $Archive::Tar::CHMOD = 0;
24              
25             =head1 NAME
26              
27             CPAN::ParseDistribution - index a file from the BackPAN
28              
29             =head1 DESCRIPTION
30              
31             Given a file from the BackPAN, this will let you find out what versions
32             of what modules it contains, the distribution name and version
33              
34             =head1 SYNOPSIS
35              
36             my $dist = CPAN::ParseDistribution->new(
37             'A/AU/AUTHORID/subdirectory/Some-Distribution-1.23.tar.gz',
38             use_tar => '/bin/tar',
39             ...
40             );
41             my $modules = $dist->modules(); # hashref of modname => version
42             my $distname = $dist->dist();
43             my $distversion = $dist->distversion();
44              
45             =head1 METHODS
46              
47             =head2 new
48              
49             Constructor, takes a single mandatory argument, which should be a tarball
50             or zip file from the CPAN or BackPAN, and some optional named arguments:
51              
52             =over
53              
54             =item use_tar
55              
56             The full path to 'tar'. This is assumed to be GNU tar, and to be
57             sufficiently well-endowed as to be able to support bzip2 files.
58             Maybe I'll fix that at some point. If this isn't specified, then
59             Archive::Tar is used instead.
60              
61             You might want to use this if dealing with very large files, as
62             Archive::Tar is rather profligate with memory.
63              
64             =back
65              
66             =cut
67              
68             sub new {
69 1836     1836 1 2559492 my($class, $file, %extra_params) = @_;
70 1836 50       15826 die("file parameter is mandatory\n") unless($file);
71 1836 100       10913023 die("$file doesn't exist\n") if(!-e $file);
72 1800 100       14789 die("$file looks like a ppm\n")
73             if($file =~ /\.ppm\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
74 1764 100       43911 die("$file isn't the right type\n")
75             if($file !~ /\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$/i);
76 1728         167814 $file = abs_path($file);
77              
78             # dist name and version
79 1728         36947 (my $dist = $file) =~ s{(^.*/|\.(tar(\.gz|\.bz2)?|tbz|tgz|zip)$)}{}gi;
80 1728         17624 $dist =~ /^(.*)-(\d.*)$/;
81 1728         19833 ($dist, my $distversion) = ($1, $2);
82 1728 100       17759 die("Can't index perl itself ($dist-$distversion)\n")
83             if($dist =~ /^(perl|ponie|kurila|parrot|Perl6-Pugs|v6-pugs)$/);
84              
85 1548         30857 bless {
86             file => $file,
87             modules => {},
88             dist => $dist,
89             distversion => $distversion,
90             extra_params => \%extra_params,
91             }, $class;
92             }
93              
94             # takes a filename, unarchives it, returns the directory it's been
95             # unarchived into
96             sub _unarchive {
97 1346     1346   6069 my($file, %extra_params) = @_;
98 1346         12718 my $olddir = getcwd();
99 1346         23823 my $tempdir = tempdir(TMPDIR => 1);
100 1346         1035033 chdir($tempdir);
101 1346 100       59255 if($file =~ /\.zip$/i) {
    100          
102 90         2970 my $zip = Archive::Zip->new($file);
103 90 50       1648950 $zip->extractTree() if($zip);
104             } elsif($file =~ /\.(tar(\.gz)?|tgz)$/i) {
105 1069 100       11031 if($extra_params{use_tar}) {
106 274 100       7515305 system(
107             $extra_params{use_tar},
108             (($file =~ /gz$/) ? 'xzf' : 'xf'),
109             $file
110             );
111 274         5515133 system("chmod -R u+r *"); # tar might preserve unreadable perms
112             } else {
113 795         22269 my $tar = Archive::Tar->new($file, 1);
114 795 50       165491953 $tar->extract() if($tar);
115             }
116             } else {
117 187 100       748 if($extra_params{use_tar}) {
118 63         1869259 system( $extra_params{use_tar}, 'xjf', $file);
119 63         839958 system("chmod -R u+r *");
120             } else {
121 124 50       670046 open(my $fh, '-|', qw(bzip2 -dc), $file) || die("Can't unbzip2\n");
122 124         12952 my $tar = Archive::Tar->new($fh);
123 124 50       12139822 $tar->extract() if($tar);
124             }
125             }
126 1346         47621555 chdir($olddir);
127 1346         66071 return $tempdir;
128             }
129              
130             # adapted from PAUSE::pmfile::parse_version_safely in mldistwatch.pm
131             sub _parse_version_safely {
132 2556     2556   14251 my($parsefile) = @_;
133 2556         17299 my $result;
134             my $eval;
135 2556         14468 local $/ = "\n";
136 2556 50       268353 open(my $fh, $parsefile) or die "Could not open '$parsefile': $!";
137 2556         6917 my $inpod = 0;
138 2556         63346 while (<$fh>) {
139 60484 100       326341 $inpod = /^=(?!cut)/ ? 1 : /^=cut/ ? 0 : $inpod;
    100          
140 60484 100 100     433455 next if $inpod || /^\s*#/;
141 37148         164675 chop;
142 37148 100       376075 next unless /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/;
143 2494         25623 my($sigil, $var) = ($1, $2);
144 2494         6081 my $current_parsed_line = $_;
145             {
146 2494         5469 local $^W = 0;
  2494         15964  
147 66     66   528 no strict;
  66         132  
  66         118404  
148 2494         63295 my $c = Safe->new();
149 2494 50       4184310 $c->deny(qw(
150             tie untie tied chdir flock ioctl socket getpeername
151             ssockopt bind connect listen accept shutdown gsockopt
152             getsockname sleep alarm entereval reset dbstate
153             readline rcatline getc read formline enterwrite
154             leavewrite print sysread syswrite send recv eof
155             tell seek sysseek readdir telldir seekdir rewinddir
156             lock stat lstat readlink ftatime ftblk ftchr ftctime
157             ftdir fteexec fteowned fteread ftewrite ftfile ftis
158             ftlink ftmtime ftpipe ftrexec ftrowned ftrread ftsgid
159             ftsize ftsock ftsuid fttty ftzero ftrwrite ftsvtx
160             fttext ftbinary fileno ghbyname ghbyaddr ghostent
161             shostent ehostent gnbyname gnbyaddr gnetent snetent
162             enetent gpbyname gpbynumber gprotoent sprotoent
163             eprotoent gsbyname gsbyport gservent sservent
164             eservent gpwnam gpwuid gpwent spwent epwent
165             getlogin ggrnam ggrgid ggrent sgrent egrent msgctl
166             msgget msgrcv msgsnd semctl semget semop shmctl
167             shmget shmread shmwrite require dofile caller
168             syscall dump chroot link unlink rename symlink
169             truncate backtick system fork wait waitpid glob
170             exec exit kill time tms mkdir rmdir utime chmod
171             chown fcntl sysopen open close umask binmode
172             open_dir closedir
173             ), ($] >= 5.010 ? qw(say) : ()));
174 2494         260195 $c->share_from(__PACKAGE__, [qw(qv)]);
175 2494         179038 s/\buse\s+version\b.*?;//gs;
176             # qv broke some time between version.pm 0.74 and 0.82
177             # so just extract it and hope for the best
178 2494         14587 s/\bqv\s*\(\s*(["']?)([\d\.]+)\1\s*\)\s*/"$2"/;
179 2494         6767 s/\buse\s+vars\b//g;
180 2494         25965 $eval = qq{
181             local ${sigil}${var};
182             \$$var = undef; do {
183             $_
184             }; \$$var
185             };
186              
187 2494         16241 $result = _run_safely($c, $eval);
188             };
189             # stuff that's my fault because of the Safe compartment
190 2429 100 66     13195583 if($result->{error} && $result->{error} =~ /trapped by operation mask|safe compartment timed out/i) {
    50          
191 292         53070 warn("Unsafe code in \$VERSION\n".$result->{error}."\n$parsefile\n$eval");
192 292         2126 $result = undef;
193             } elsif($result->{error}) {
194 0         0 warn "_parse_version_safely: ".Dumper({
195             eval => $eval,
196             line => $current_parsed_line,
197             file => $parsefile,
198             err => $result->{error},
199             });
200             }
201 2429         21592 last;
202             }
203 2491         74422 close $fh;
204              
205 2491 100       101290 return exists($result->{result}) ? $result->{result} : undef;
206             }
207              
208             sub _run_safely {
209 2494 50   2494   61688 if(os_is('Unix')) {
    0          
210 66     66   153318 eval 'use CPAN::ParseDistribution::Unix';
  66         198  
  66         1716  
  2494         16721011  
211 2494         47592 return CPAN::ParseDistribution::Unix->_run(@_);
212             } elsif(os_is('MicrosoftWindows')) {
213             # FIXME once someone supplies CPAN::ParseDistribution::Windows
214 0         0 warn("Windows is not fully supported by CPAN::ParseDistribution\n");
215 0         0 warn("See the LIMITATIONS section in the documentation\n");
216 0         0 eval 'use CPAN::ParseDistribution::Unix';
217 0         0 return CPAN::ParseDistribution::Unix->_run(@_);
218             }
219             }
220              
221             =head2 isdevversion
222              
223             Returns true or false depending on whether this is a developer-only
224             or trial release of a distribution. This is determined by looking for
225             an underscore in the distribution version or the string '-TRIAL' at the
226             end of the distribution version.
227              
228             =cut
229              
230             sub isdevversion {
231 230     230 1 3009 my $self = shift;
232 230 100       1060 return 1 if($self->distversion() =~ /(_|-TRIAL$)/);
233 78         546 return 0;
234             }
235              
236             =head2 modules
237              
238             Returns a hashref whose keys are module names, and their values are
239             the versions of the modules. The version number is retrieved by
240             eval()ing what looks like a $VERSION line in the code. This is done
241             in a C compartment, but may be a security risk if you do this
242             with untrusted code. Caveat user!
243              
244             =cut
245              
246             sub modules {
247 1422     1422 1 16189 my $self = shift;
248 1422 100       3142 if(!(keys %{$self->{modules}})) {
  1422         8188  
249 1346         4735 $self->{_modules_runs}++;
250 1346         3547 my $tempdir = _unarchive($self->{file}, %{$self->{extra_params}});
  1346         7633  
251              
252 1346         233643 my $meta = (File::Find::Rule->file()->name('META.yml')->in($tempdir))[0];
253 1346         5083888 my $ignore = join('|', qw(t inc xt));
254 1346         4495 my %ignorefiles;
255             my %ignorepackages;
256 0         0 my %ignorenamespaces;
257 1346 100 66     70378 if($meta && -e $meta) {
258 1000         2921 my $yaml = eval { LoadFile($meta); };
  1000         15429  
259 1000 50 33     23027038 if(!$@ &&
      33        
      33        
260             UNIVERSAL::isa($yaml, 'HASH') &&
261             exists($yaml->{no_index}) &&
262             UNIVERSAL::isa($yaml->{no_index}, 'HASH')
263             ) {
264 1000 100       5557 if(exists($yaml->{no_index}->{directory})) {
265 872 100       2605 if(eval { @{$yaml->{no_index}->{directory}} }) {
  872 50       1564  
  872         8180  
266 798         6802 $ignore = join('|', $ignore,
267 798         1781 @{$yaml->{no_index}->{directory}}
268             );
269             } elsif(!ref($yaml->{no_index}->{directory})) {
270 74         636 $ignore .= '|'.$yaml->{no_index}->{directory}
271             }
272             }
273 1000 100       7245 if(exists($yaml->{no_index}->{file})) {
274 66 50       438 if(eval { @{$yaml->{no_index}->{file}} }) {
  66 0       312  
  66         750  
275 66         720 %ignorefiles = map { $_, 1 }
  66         336  
276 66         378 @{$yaml->{no_index}->{file}};
277             } elsif(!ref($yaml->{no_index}->{file})) {
278 0         0 $ignorefiles{$yaml->{no_index}->{file}} = 1;
279             }
280             }
281 1000 100       4461 if(exists($yaml->{no_index}->{package})) {
282 192 50       609 if(eval { @{$yaml->{no_index}->{package}} }) {
  192 0       925  
  192         2208  
283 256         2023 %ignorepackages = map { $_, 1 }
  192         1775  
284 192         545 @{$yaml->{no_index}->{package}};
285             } elsif(!ref($yaml->{no_index}->{package})) {
286 0         0 $ignorepackages{$yaml->{no_index}->{package}} = 1;
287             }
288             }
289 1000 100       13116 if(exists($yaml->{no_index}->{namespace})) {
290 62 50       448 if(eval { @{$yaml->{no_index}->{namespace}} }) {
  62 0       124  
  62         666  
291 62         2218 %ignorenamespaces = map { $_, 1 }
  62         232  
292 62         324 @{$yaml->{no_index}->{namespace}};
293             } elsif(!ref($yaml->{no_index}->{namespace})) {
294 0         0 $ignorenamespaces{$yaml->{no_index}->{namespace}} = 1;
295             }
296             }
297             }
298             }
299             # find modules
300 4475         3997732 my @PMs = grep {
301 1346         88505 my $pm = $_;
302             $pm !~ m{^\Q$tempdir\E/[^/]+/($ignore)} &&
303 4475   100     160465 !grep { $pm =~ m{^\Q$tempdir\E/[^/]+/$_$} } (keys %ignorefiles)
304             } File::Find::Rule->file()->name('*.pm', '*.pm.PL')->in($tempdir);
305 1346         16566 foreach my $PM (@PMs) {
306 2556         22142 local $/ = undef;
307 2556         21360 my $version = _parse_version_safely($PM);
308 2491 50       264758 open(my $fh, $PM) || die("Can't read $PM\n");
309 2491         209277 $PM = <$fh>;
310 2491         38845 close($fh);
311              
312             # from PAUSE::pmfile::packages_per_pmfile in mldistwatch.pm
313 2491 100       60757 if($PM =~ /\bpackage[ \t]+([\w\:\']+)\s*($|[};])/) {
314 2446         15477 my $module = $1;
315 380         23297 $self->{modules}->{$module} = $version unless(
316             exists($ignorepackages{$module}) ||
317 2446 100 100     365626 (grep { $module =~ /${_}::/ } keys %ignorenamespaces)
318             );
319             }
320             }
321 1281         18043694 rmtree($tempdir);
322             }
323 1357         51836 return $self->{modules};
324             }
325              
326             =head2 dist
327              
328             Return the name of the distribution. eg, in the synopsis above, it would
329             return 'Some-Distribution'.
330              
331             =cut
332              
333             sub dist {
334 78     78 1 642 my $self = shift;
335 78         912 return $self->{dist};
336             }
337              
338             =head2 distversion
339              
340             Return the version of the distribution. eg, in the synopsis above, it would
341             return 1.23.
342              
343             Strictly speaking, the CPAN doesn't have distribution versions -
344             Foo-Bar-1.23.tar.gz is not considered to have any relationship to
345             Foo-Bar-1.24.tar.gz, they just happen to coincidentally have rather
346             similar contents. But other tools, such as those used by the CPAN testers,
347             do treat distributions as being versioned.
348              
349             =cut
350              
351             sub distversion{
352 460     460 1 945 my $self = shift;
353 460         11643 return $self->{distversion};
354             }
355              
356             =head1 SECURITY
357              
358             This module executes a very small amount of code from each module that
359             it finds in a distribution. While every effort has been made to do
360             this safely, there are no guarantees that it won't let the distributions
361             you're examining do horrible things to your machine, such as email your
362             password file to strangers. You are strongly advised to read the source
363             code and to run it in a very heavily restricted user account.
364              
365             =head1 LIMITATIONS, BUGS and FEEDBACK
366              
367             I welcome feedback about my code, including constructive criticism.
368             Bug reports should be made using L
369             and should include the smallest possible chunk of code, along with
370             any necessary data, which demonstrates the bug. Ideally, this
371             will be in the form of files which I can drop in to the module's
372             test suite.
373              
374             There is a known problem with parsing some pathological distributions
375             on Windows, where CPAN::ParseDistribution may either hang or crash. This
376             is because Windows doesn't properly support fork()ing and signals. I can
377             not fix this, but welcome patches with tests.
378              
379             =cut
380              
381             =head1 SEE ALSO
382              
383             L
384              
385             L
386              
387             =head1 AUTHOR, COPYRIGHT and LICENCE
388              
389             Copyright 2009-2011 David Cantrell Edavid@cantrell.org.ukE
390              
391             Contains code originally from the PAUSE by Andreas Koenig.
392              
393             This software is free-as-in-speech software, and may be used,
394             distributed, and modified under the terms of either the GNU
395             General Public Licence version 2 or the Artistic Licence. It's
396             up to you which one you use. The full text of the licences can
397             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
398              
399             =head1 CONSPIRACY
400              
401             This module is also free-as-in-mason software.
402              
403             =cut
404              
405             1;