File Coverage

blib/lib/CPAN/SQLite/Info.pm
Criterion Covered Total %
statement 118 137 86.1
branch 26 44 59.0
condition 10 21 47.6
subroutine 17 18 94.4
pod 0 9 0.0
total 171 229 74.6


line stmt bran cond sub pod time code
1             # $Id: Info.pm 84 2020-05-31 06:29:34Z stro $
2              
3             package CPAN::SQLite::Info;
4 5     5   170936 use strict;
  5         33  
  5         167  
5 5     5   30 use warnings;
  5         10  
  5         212  
6              
7             our $VERSION = '0.219';
8              
9 5     5   1105 use English qw/-no_match_vars/;
  5         7113  
  5         25  
10              
11 5     5   2778 use CPAN::DistnameInfo;
  5         2134  
  5         162  
12 5     5   484 use File::Spec::Functions qw(catfile);
  5         858  
  5         394  
13 5     5   3189 use Compress::Zlib;
  5         318992  
  5         1223  
14 5     5   53 use File::Basename;
  5         13  
  5         389  
15 5     5   2300 use Safe;
  5         151659  
  5         291  
16 5     5   2374 use CPAN::SQLite::Util qw(vcmp print_debug);
  5         13  
  5         6229  
17              
18             my $ext = qr/\.(tar\.gz|tar\.Z|tgz|zip)$/;
19              
20             sub new {
21 1     1 0 1313 my ($class, %args) = @_;
22 1         9 my $self = { dists => {}, auths => {}, mods => {}, info => {}, %args };
23 1         4 return bless $self, $class;
24             }
25              
26             sub fetch_info {
27 1     1 0 413 my $self = shift;
28 1 50       6 $self->mailrc() or return;
29 1 50       5 $self->dists_and_mods() or return;
30 1         7 return 1;
31             }
32              
33             sub dists_and_mods {
34 1     1 0 2 my $self = shift;
35 1         5 my ($packages, $cpan_files) = $self->packages();
36              
37 1         3 my ($dists, $mods);
38 1         4 my $ignore = $self->{ignore};
39 1         2 my $pat;
40 1 50 33     4 if ($ignore and ref($ignore) eq 'ARRAY') {
41 0         0 $pat = join '|', @$ignore;
42             }
43 1         30 foreach my $cpan_file (keys %$cpan_files) {
44 105 50 33     205 if ($pat and ($cpan_file =~ /^($pat)/)) {
45 0         0 delete $cpan_files->{$cpan_file};
46 0         0 print_debug("Ignoring $cpan_file\n");
47 0         0 next;
48             }
49 105         233 my $d = CPAN::DistnameInfo->new($cpan_file);
50 105 50       6141 next unless ($d->maturity eq 'released');
51 105         463 my $dist_name = $d->dist;
52 105         357 my $dist_vers = $d->version;
53 105         362 my $cpanid = $d->cpanid;
54 105         366 my $dist_file = $d->filename;
55 105 50 33     551 unless ($dist_name and $dist_vers and $cpanid) {
      33        
56 0         0 print_debug("No dist_name/version/cpanid for $cpan_file: skipping\n");
57 0         0 delete $cpan_files->{$cpan_file};
58 0         0 next;
59             }
60              
61             # ignore specified dists
62 105 50 33     211 if ($pat and ($dist_name =~ /^($pat)$/)) {
63 0         0 delete $cpan_files->{$cpan_file};
64 0         0 print_debug("Ignoring $dist_name\n");
65 0         0 next;
66             }
67 105 100 100     246 if (not $dists->{$dist_name} or vcmp($dist_vers, $dists->{$dist_name}->{dist_vers}) > 0) {
68 94         266 $dists->{$dist_name}->{dist_vers} = $dist_vers;
69 94         146 $dists->{$dist_name}->{dist_file} = $dist_file;
70 94         331 $dists->{$dist_name}->{cpanid} = $cpanid;
71             }
72             }
73              
74 1         13 my $wanted;
75 1         14 foreach my $dist_name (keys %$dists) {
76 92         1746 $wanted->{ basename($dists->{$dist_name}->{dist_file}) } = $dist_name;
77             }
78 1         103 foreach my $mod_name (keys %$packages) {
79 606         11539 my $file = basename($packages->{$mod_name}->{dist_file});
80 606         1370 my $dist_name = $wanted->{$file};
81 606 100 66     1769 unless ($dist_name and $dists->{$dist_name}) {
82 62         116 delete $packages->{$mod_name};
83 62         110 next;
84             }
85 544         1391 $mods->{$mod_name}->{dist_name} = $dist_name;
86 544         1095 $dists->{$dist_name}->{modules}->{$mod_name}++;
87 544         1162 $mods->{$mod_name}->{mod_vers} = $packages->{$mod_name}->{mod_vers};
88             }
89 1         30 $self->{dists} = $dists;
90 1         167 return $self->{mods} = $mods;
91             }
92              
93             sub modlist {
94 0     0 0 0 my $self = shift;
95 0         0 warn 'Modlist does not contain any useful info anymore';
96 0         0 return;
97             }
98              
99             sub packages {
100 1     1 0 3 my $self = shift;
101 1         2 my $index = 'modules/02packages.details.txt.gz';
102             my $packages =
103             $self->{keep_source_where}
104             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
105 1 50       9 : catfile($self->{CPAN}, $index);
106 1 50       4 return unless check_file('modules/02packages.details.txt.gz', $packages);
107 1         12 print_debug("Reading information from $packages\n");
108 1         4 my $lines = zcat($packages);
109 1         58 while (@$lines) {
110 9         16 my $shift = shift(@$lines);
111 9 100       38 last if $shift =~ /^\s*$/;
112             }
113 1         4 my ($mods, $cpan_files);
114 1         4 foreach (@$lines) {
115 606         1643 my ($mod_name, $mod_vers, $dist_file) = split(" ", $_, 4);
116 606 100       1161 $mod_vers = undef if $mod_vers eq 'undef';
117 606         1863 $mods->{$mod_name} = { mod_vers => $mod_vers, dist_file => $dist_file };
118 606         1128 $cpan_files->{$dist_file}++;
119             }
120 1         29 return ($mods, $cpan_files);
121             }
122              
123             sub mailrc {
124 1     1 0 2 my $self = shift;
125 1         2 my $index = 'authors/01mailrc.txt.gz';
126             my $mailrc =
127             $self->{keep_source_where}
128             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
129 1 50       14 : catfile($self->{CPAN}, $index);
130 1 50       6 return unless check_file('authors/01mailrc.txt.gz', $mailrc);
131 1         10 print_debug("Reading information from $mailrc\n");
132 1         4 my $lines = zcat($mailrc);
133 1         55 my $auths;
134 1         5 foreach (@$lines) {
135              
136             #my($cpanid,$fullname,$email) =
137             #m/alias\s+(\S+)\s+\"([^\"\<]+)\s+\<([^\>]+)\>\"/;
138 4         24 my ($cpanid, $authinfo) = m/alias\s+(\S+)\s+\"([^\"]+)\"/;
139 4 50       10 next unless $cpanid;
140 4         8 my ($fullname, $email);
141 4 50       17 if ($authinfo =~ m/([^<]+)\<(.*)\>/) {
142 4         9 $fullname = $1;
143 4         7 $email = $2;
144             } else {
145 0         0 $fullname = '';
146 0         0 $email = lc($cpanid) . '@cpan.org';
147             }
148 4         8 $auths->{$cpanid} = {
149             fullname => trim($fullname),
150             email => trim($email) };
151             }
152 1         7 return $self->{auths} = $auths;
153             }
154              
155             sub check_file {
156 2     2 0 6 my ($index, $file) = @_;
157 2 50       6 unless ($file) {
158 0         0 warn qq{index file '$index' not defined};
159 0         0 return;
160             }
161 2 50       50 unless (-f $file) {
162 0         0 warn qq{index file '$file' not found};
163 0         0 return;
164             }
165 2         13 return 1;
166             }
167              
168             sub zcat {
169 2     2 0 5 my $file = shift;
170 2         5 my ($buffer, $lines);
171 2 50       8 my $gz = gzopen($file, 'rb')
172             or die "Cannot open $file: $gzerrno";
173 2         5682 while ($gz->gzreadline($buffer) > 0) {
174 619         60831 push @$lines, $buffer;
175             }
176 2 50       300 die "Error reading from $file: $gzerrno" . ($gzerrno + 0)
177             if $gzerrno != Z_STREAM_END;
178 2         20 $gz->gzclose();
179 2         246 return $lines;
180             }
181              
182             sub trim {
183 8     8 0 11 my $string = shift;
184 8 50       16 return '' unless $string;
185 8         17 $string =~ s/^\s+//;
186 8         21 $string =~ s/\s+$//;
187 8         19 $string =~ s/\s+/ /g;
188 8         28 return $string;
189             }
190              
191             1;
192              
193             =head1 NAME
194              
195             CPAN::SQLite::Info - extract information from CPAN indices
196              
197             =head1 VERSION
198              
199             version 0.219
200              
201             =head1 DESCRIPTION
202              
203             This module extracts information from the CPAN indices
204             F<$CPAN/modules/02packages.details.txt.gz> and
205             F<$CPAN/authors/01mailrc.txt.gz>.
206              
207             A C object is created with
208              
209             my $info = CPAN::SQLite::Info->new(CPAN => $cpan);
210              
211             where C<$cpan> specifies the top-level CPAN directory
212             underneath which the index files are found. Calling
213              
214             $info->fetch_info();
215              
216             will result in the object being populated with 3 hash references:
217              
218             =over 3
219              
220             =item * C<$info-E{dists}>
221              
222             This contains information on distributions. Keys of this hash
223             reference are the distribution names, with the associated value being a
224             hash reference with keys of
225              
226             =over 3
227              
228             =item C - the version of the CPAN file
229              
230             =item C - the CPAN filename
231              
232             =item C - the CPAN author id
233              
234             =item C - a description, if available
235              
236             =item C - specifies the modules present in the distribution:
237              
238             for my $module (keys %{$info->{$distname}->{modules}}) {
239             print "Module: $module\n";
240             }
241              
242             =back
243              
244             =item * C<$info-E{mods}>
245              
246             This contains information on modules. Keys of this hash
247             reference are the module names, with the associated values being a
248             hash reference with keys of
249              
250             =over 3
251              
252             =item C - the distribution name containing the module
253              
254             =item C - the version
255              
256             =item C - a description, if available
257              
258             =back
259              
260             =item * C<$info-E{auths}>
261              
262             This contains information on CPAN authors. Keys of this hash
263             reference are the CPAN ids, with the associated value being a
264             hash reference with keys of
265              
266             =over 3
267              
268             =item C - the author's full name
269              
270             =item C - the author's email address
271              
272             =back
273              
274             =back
275              
276             =head1 SEE ALSO
277              
278             L
279              
280             =cut