File Coverage

blib/lib/CPAN/SQLite/Info.pm
Criterion Covered Total %
statement 118 137 86.1
branch 26 44 59.0
condition 10 21 47.6
subroutine 17 18 94.4
pod 0 9 0.0
total 171 229 74.6


line stmt bran cond sub pod time code
1             # $Id: Info.pm 82 2020-05-30 06:14:27Z stro $
2              
3             package CPAN::SQLite::Info;
4 5     5   173829 use strict;
  5         34  
  5         149  
5 5     5   30 use warnings;
  5         9  
  5         298  
6              
7             our $VERSION = '0.218';
8              
9 5     5   1070 use English qw/-no_match_vars/;
  5         7278  
  5         25  
10              
11 5     5   2984 use CPAN::DistnameInfo;
  5         2152  
  5         174  
12 5     5   497 use File::Spec::Functions qw(catfile);
  5         838  
  5         400  
13 5     5   3396 use Compress::Zlib;
  5         322933  
  5         1134  
14 5     5   51 use File::Basename;
  5         10  
  5         382  
15 5     5   2252 use Safe;
  5         151935  
  5         288  
16 5     5   2377 use CPAN::SQLite::Util qw(vcmp print_debug);
  5         12  
  5         6240  
17              
18             my $ext = qr/\.(tar\.gz|tar\.Z|tgz|zip)$/;
19              
20             sub new {
21 1     1 0 1342 my ($class, %args) = @_;
22 1         8 my $self = { dists => {}, auths => {}, mods => {}, info => {}, %args };
23 1         6 return bless $self, $class;
24             }
25              
26             sub fetch_info {
27 1     1 0 455 my $self = shift;
28 1 50       5 $self->mailrc() or return;
29 1 50       6 $self->dists_and_mods() or return;
30 1         6 return 1;
31             }
32              
33             sub dists_and_mods {
34 1     1 0 2 my $self = shift;
35 1         4 my ($packages, $cpan_files) = $self->packages();
36              
37 1         3 my ($dists, $mods);
38 1         5 my $ignore = $self->{ignore};
39 1         3 my $pat;
40 1 50 33     6 if ($ignore and ref($ignore) eq 'ARRAY') {
41 0         0 $pat = join '|', @$ignore;
42             }
43 1         29 foreach my $cpan_file (keys %$cpan_files) {
44 105 50 33     207 if ($pat and ($cpan_file =~ /^($pat)/)) {
45 0         0 delete $cpan_files->{$cpan_file};
46 0         0 print_debug("Ignoring $cpan_file\n");
47 0         0 next;
48             }
49 105         213 my $d = CPAN::DistnameInfo->new($cpan_file);
50 105 50       6284 next unless ($d->maturity eq 'released');
51 105         453 my $dist_name = $d->dist;
52 105         351 my $dist_vers = $d->version;
53 105         356 my $cpanid = $d->cpanid;
54 105         374 my $dist_file = $d->filename;
55 105 50 33     547 unless ($dist_name and $dist_vers and $cpanid) {
      33        
56 0         0 print_debug("No dist_name/version/cpanid for $cpan_file: skipping\n");
57 0         0 delete $cpan_files->{$cpan_file};
58 0         0 next;
59             }
60              
61             # ignore specified dists
62 105 50 33     197 if ($pat and ($dist_name =~ /^($pat)$/)) {
63 0         0 delete $cpan_files->{$cpan_file};
64 0         0 print_debug("Ignoring $dist_name\n");
65 0         0 next;
66             }
67 105 100 100     255 if (not $dists->{$dist_name} or vcmp($dist_vers, $dists->{$dist_name}->{dist_vers}) > 0) {
68 97         266 $dists->{$dist_name}->{dist_vers} = $dist_vers;
69 97         159 $dists->{$dist_name}->{dist_file} = $dist_file;
70 97         341 $dists->{$dist_name}->{cpanid} = $cpanid;
71             }
72             }
73              
74 1         7 my $wanted;
75 1         14 foreach my $dist_name (keys %$dists) {
76 92         1700 $wanted->{ basename($dists->{$dist_name}->{dist_file}) } = $dist_name;
77             }
78 1         108 foreach my $mod_name (keys %$packages) {
79 606         11125 my $file = basename($packages->{$mod_name}->{dist_file});
80 606         1259 my $dist_name = $wanted->{$file};
81 606 100 66     1725 unless ($dist_name and $dists->{$dist_name}) {
82 62         131 delete $packages->{$mod_name};
83 62         109 next;
84             }
85 544         1362 $mods->{$mod_name}->{dist_name} = $dist_name;
86 544         1090 $dists->{$dist_name}->{modules}->{$mod_name}++;
87 544         1141 $mods->{$mod_name}->{mod_vers} = $packages->{$mod_name}->{mod_vers};
88             }
89 1         29 $self->{dists} = $dists;
90 1         174 return $self->{mods} = $mods;
91             }
92              
93             sub modlist {
94 0     0 0 0 my $self = shift;
95 0         0 warn 'Modlist does not contain any useful info anymore';
96 0         0 return;
97             }
98              
99             sub packages {
100 1     1 0 2 my $self = shift;
101 1         2 my $index = 'modules/02packages.details.txt.gz';
102             my $packages =
103             $self->{keep_source_where}
104             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
105 1 50       10 : catfile($self->{CPAN}, $index);
106 1 50       7 return unless check_file('modules/02packages.details.txt.gz', $packages);
107 1         10 print_debug("Reading information from $packages\n");
108 1         5 my $lines = zcat($packages);
109 1         56 while (@$lines) {
110 9         13 my $shift = shift(@$lines);
111 9 100       39 last if $shift =~ /^\s*$/;
112             }
113 1         3 my ($mods, $cpan_files);
114 1         4 foreach (@$lines) {
115 606         1546 my ($mod_name, $mod_vers, $dist_file) = split(" ", $_, 4);
116 606 100       1155 $mod_vers = undef if $mod_vers eq 'undef';
117 606         2088 $mods->{$mod_name} = { mod_vers => $mod_vers, dist_file => $dist_file };
118 606         1198 $cpan_files->{$dist_file}++;
119             }
120 1         28 return ($mods, $cpan_files);
121             }
122              
123             sub mailrc {
124 1     1 0 2 my $self = shift;
125 1         3 my $index = 'authors/01mailrc.txt.gz';
126             my $mailrc =
127             $self->{keep_source_where}
128             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
129 1 50       14 : catfile($self->{CPAN}, $index);
130 1 50       6 return unless check_file('authors/01mailrc.txt.gz', $mailrc);
131 1         9 print_debug("Reading information from $mailrc\n");
132 1         3 my $lines = zcat($mailrc);
133 1         49 my $auths;
134 1         5 foreach (@$lines) {
135              
136             #my($cpanid,$fullname,$email) =
137             #m/alias\s+(\S+)\s+\"([^\"\<]+)\s+\<([^\>]+)\>\"/;
138 4         24 my ($cpanid, $authinfo) = m/alias\s+(\S+)\s+\"([^\"]+)\"/;
139 4 50       10 next unless $cpanid;
140 4         8 my ($fullname, $email);
141 4 50       18 if ($authinfo =~ m/([^<]+)\<(.*)\>/) {
142 4         10 $fullname = $1;
143 4         8 $email = $2;
144             } else {
145 0         0 $fullname = '';
146 0         0 $email = lc($cpanid) . '@cpan.org';
147             }
148 4         10 $auths->{$cpanid} = {
149             fullname => trim($fullname),
150             email => trim($email) };
151             }
152 1         7 return $self->{auths} = $auths;
153             }
154              
155             sub check_file {
156 2     2 0 5 my ($index, $file) = @_;
157 2 50       6 unless ($file) {
158 0         0 warn qq{index file '$index' not defined};
159 0         0 return;
160             }
161 2 50       51 unless (-f $file) {
162 0         0 warn qq{index file '$file' not found};
163 0         0 return;
164             }
165 2         12 return 1;
166             }
167              
168             sub zcat {
169 2     2 0 5 my $file = shift;
170 2         4 my ($buffer, $lines);
171 2 50       9 my $gz = gzopen($file, 'rb')
172             or die "Cannot open $file: $gzerrno";
173 2         5588 while ($gz->gzreadline($buffer) > 0) {
174 619         60688 push @$lines, $buffer;
175             }
176 2 50       310 die "Error reading from $file: $gzerrno" . ($gzerrno + 0)
177             if $gzerrno != Z_STREAM_END;
178 2         17 $gz->gzclose();
179 2         236 return $lines;
180             }
181              
182             sub trim {
183 8     8 0 10 my $string = shift;
184 8 50       18 return '' unless $string;
185 8         14 $string =~ s/^\s+//;
186 8         22 $string =~ s/\s+$//;
187 8         19 $string =~ s/\s+/ /g;
188 8         28 return $string;
189             }
190              
191             1;
192              
193             =head1 NAME
194              
195             CPAN::SQLite::Info - extract information from CPAN indices
196              
197             =head1 VERSION
198              
199             version 0.218
200              
201             =head1 DESCRIPTION
202              
203             This module extracts information from the CPAN indices
204             F<$CPAN/modules/02packages.details.txt.gz> and
205             F<$CPAN/authors/01mailrc.txt.gz>.
206              
207             A C object is created with
208              
209             my $info = CPAN::SQLite::Info->new(CPAN => $cpan);
210              
211             where C<$cpan> specifies the top-level CPAN directory
212             underneath which the index files are found. Calling
213              
214             $info->fetch_info();
215              
216             will result in the object being populated with 3 hash references:
217              
218             =over 3
219              
220             =item * C<$info-E{dists}>
221              
222             This contains information on distributions. Keys of this hash
223             reference are the distribution names, with the associated value being a
224             hash reference with keys of
225              
226             =over 3
227              
228             =item C - the version of the CPAN file
229              
230             =item C - the CPAN filename
231              
232             =item C - the CPAN author id
233              
234             =item C - a description, if available
235              
236             =item C - specifies the modules present in the distribution:
237              
238             for my $module (keys %{$info->{$distname}->{modules}}) {
239             print "Module: $module\n";
240             }
241              
242             =back
243              
244             =item * C<$info-E{mods}>
245              
246             This contains information on modules. Keys of this hash
247             reference are the module names, with the associated values being a
248             hash reference with keys of
249              
250             =over 3
251              
252             =item C - the distribution name containing the module
253              
254             =item C - the version
255              
256             =item C - a description, if available
257              
258             =back
259              
260             =item * C<$info-E{auths}>
261              
262             This contains information on CPAN authors. Keys of this hash
263             reference are the CPAN ids, with the associated value being a
264             hash reference with keys of
265              
266             =over 3
267              
268             =item C - the author's full name
269              
270             =item C - the author's email address
271              
272             =back
273              
274             =back
275              
276             =head1 SEE ALSO
277              
278             L
279              
280             =cut