File Coverage

blib/lib/CPAN/SQLite/Index.pm
Criterion Covered Total %
statement 76 166 45.7
branch 17 76 22.3
condition 4 15 26.6
subroutine 16 21 76.1
pod 3 8 37.5
total 116 286 40.5


line stmt bran cond sub pod time code
1             # $Id: Index.pm 84 2020-05-31 06:29:34Z stro $
2              
3             package CPAN::SQLite::Index;
4 3     3   450523 use strict;
  3         61  
  3         125  
5 3     3   25 use warnings;
  3         6  
  3         196  
6              
7             our $VERSION = '0.219';
8              
9 3     3   1632 use English qw/-no_match_vars/;
  3         7890  
  3         18  
10              
11 3     3   2554 use CPAN::SQLite::Info;
  3         9  
  3         120  
12 3     3   1369 use CPAN::SQLite::State;
  3         11  
  3         111  
13 3     3   1538 use CPAN::SQLite::Populate;
  3         8  
  3         95  
14 3     3   22 use CPAN::SQLite::DBI qw($tables);
  3         6  
  3         249  
15 3     3   22 use File::Spec::Functions qw(catfile);
  3         7  
  3         119  
16 3     3   16 use File::Basename;
  3         12  
  3         140  
17 3     3   17 use File::Path;
  3         7  
  3         131  
18 3     3   2277 use HTTP::Tiny;
  3         128619  
  3         155  
19              
20 3     3   30 use Scalar::Util 'weaken';
  3         7  
  3         5291  
21              
22             unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
23             $ENV{CPAN_SQLITE_DEBUG} = 1;
24             }
25              
26             our ($oldout);
27             my $log_file = 'cpan_sqlite_log.' . time;
28              
29             # This is usually already defined in real life, but tests need it to be set
30             $CPAN::FrontEnd ||= "CPAN::Shell";
31              
32             sub new {
33 2     2 0 4741 my ($class, %args) = @_;
34 2 0 33     14 if ($args{setup} and $args{reindex}) {
35 0         0 die "Reindexing must be done on an exisiting database";
36             }
37              
38 2         13 my $self = { index => undef, state => undef, %args };
39              
40 2         12 return bless $self, $class;
41             }
42              
43             sub download_index {
44 1     1 0 590 my $self = shift;
45              
46 1 50       5 if ($ENV{'CPAN_SQLITE_DOWNLOAD'}) {
47 1 50       5 $ENV{'CPAN_SQLITE_DOWNLOAD_URL'} = 'http://cpansqlite.trouchelle.com/' unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
48             }
49              
50 1 50       4 return 0 unless $ENV{'CPAN_SQLITE_DOWNLOAD_URL'};
51              
52 1         10 $CPAN::FrontEnd->myprint("Downloading the compiled index db ... ");
53              
54 1 50       12 if (my $response =
55             HTTP::Tiny->new->mirror($ENV{'CPAN_SQLITE_DOWNLOAD_URL'} => catfile($self->{'db_dir'}, $self->{'db_name'})))
56             {
57 1 50 33     2956263 if ($response->{'success'} and $response->{'status'} and $response->{'status'} eq '200') {
      33        
58 1 50       7 if (my $type = $response->{'headers'}->{'content-type'}) {
59 1 50       5 if ($type eq 'application/x-sqlite3') {
60 1         20 return 1;
61             }
62             }
63             }
64             }
65              
66 0         0 $CPAN::FrontEnd->mywarn('Cannot download the compiled index db');
67 0         0 return 0;
68             }
69              
70             sub index {
71 0     0 1 0 my $self = shift;
72 0         0 my $setup = $self->{'setup'};
73              
74 0 0       0 if ($setup) {
75 0         0 my $db_name = catfile($self->{'db_dir'}, $self->{db_name});
76 0 0       0 if (-f $db_name) {
77 0         0 $CPAN::FrontEnd->myprint("Removing existing $db_name ... ");
78 0 0       0 if (unlink $db_name) {
79 0         0 $CPAN::FrontEnd->myprint("Done.\n");
80             } else {
81 0         0 $CPAN::FrontEnd->mywarn("Failed: $!\n");
82             }
83             }
84             }
85              
86 0         0 my $log = catfile($self->{'log_dir'}, $log_file);
87              
88 0 0       0 unless ($ENV{'CPAN_SQLITE_NO_LOG_FILES'}) {
89 0         0 $oldout = error_fh($log);
90             }
91              
92 0         0 my $log_cleanup = $ENV{'CPAN_SQLITE_LOG_FILES_CLEANUP'};
93 0 0       0 $log_cleanup = 30 unless defined $log_cleanup;
94 0 0 0     0 if ($log_cleanup and $log_cleanup =~ /^\d+$/) {
95 0 0       0 if (opendir(my $DIR, $self->{'log_dir'})) {
96 0         0 my @files = grep { /cpan_sqlite_log\./ } readdir $DIR;
  0         0  
97 0         0 closedir $DIR;
98              
99 0         0 @files = grep { -C $_ > $log_cleanup } map { catfile($self->{'log_dir'}, $_) } @files;
  0         0  
  0         0  
100              
101 0 0       0 if (@files) {
102 0         0 $CPAN::FrontEnd->myprint('Cleaning old log files ... ');
103 0         0 unlink @files;
104 0         0 $CPAN::FrontEnd->myprint("Done.\n");
105             }
106             }
107             }
108              
109 0 0       0 if ($self->download_index()) {
110 0         0 return 1;
111             }
112              
113 0 0       0 if ($self->{'update_indices'}) {
114 0         0 $CPAN::FrontEnd->myprint('Fetching index files ... ');
115 0 0       0 if ($self->fetch_cpan_indices()) {
116 0         0 $CPAN::FrontEnd->myprint("Done.\n");
117             } else {
118 0         0 $CPAN::FrontEnd->mywarn("Failed\n");
119 0         0 return;
120             }
121             }
122              
123 0         0 $CPAN::FrontEnd->myprint('Gathering information from index files ... ');
124 0 0       0 if ($self->fetch_info()) {
125 0         0 $CPAN::FrontEnd->myprint("Done.\n");
126             } else {
127 0         0 $CPAN::FrontEnd->mywarn("Failed\n");
128 0         0 return;
129             }
130              
131 0 0       0 unless ($setup) {
132 0         0 $CPAN::FrontEnd->myprint('Obtaining current state of database ... ');
133 0 0       0 if ($self->state()) {
134 0         0 $CPAN::FrontEnd->myprint("Done.\n");
135             } else {
136 0         0 $CPAN::FrontEnd->mywarn("Failed\n");
137 0         0 return;
138             }
139             }
140              
141 0         0 $CPAN::FrontEnd->myprint('Populating database tables ... ');
142 0 0       0 if ($self->populate()) {
143 0         0 $CPAN::FrontEnd->myprint("Done.\n");
144             } else {
145 0         0 $CPAN::FrontEnd->mywarn("Failed\n");
146 0         0 return;
147             }
148              
149 0         0 return 1;
150             }
151              
152             sub fetch_cpan_indices {
153 1     1 0 377 my $self = shift;
154              
155 1         5 my $CPAN = $self->{CPAN};
156 1         12 my $indices = {
157             '01mailrc.txt.gz' => 'authors',
158             '02packages.details.txt.gz' => 'modules',
159             };
160              
161 1         14 foreach my $index (keys %$indices) {
162 2         28 my $file = catfile($CPAN, $indices->{$index}, $index);
163 2 50 33     53 next if (-e $file and -M $file < 1);
164 2         178 my $dir = dirname($file);
165 2 50       32 unless (-d $dir) {
166 2 50       320 mkpath($dir, 0, oct(755)) or die "Cannot mkpath $dir: $!";
167             }
168 2         9 my @urllist = @{ $self->{urllist} };
  2         11  
169 2         6 foreach my $cpan (@urllist) {
170 2         10 my $from = join '/', ($cpan, $indices->{$index}, $index);
171 2 50       26 if (my $response = HTTP::Tiny->new->get($from)) {
172 2 50       1493925 if ($response->{'success'}) {
173 2 50       390 if (open(my $FILE, '>', $file)) {
174 2         18 binmode $FILE;
175 2         4728 print $FILE $response->{'content'};
176 2 50       77 if (close($FILE)) {
177 2         63 next;
178             }
179             }
180             }
181             }
182             }
183 2 50       1295 unless (-f $file) {
184 0         0 $CPAN::FrontEnd->mywarn("Cannot retrieve '$file'");
185 0         0 return;
186             }
187             }
188 1         12 return 1;
189             }
190              
191             sub fetch_info {
192 0     0 0 0 my $self = shift;
193 0         0 my %wanted = map { $_ => $self->{$_} } qw(CPAN ignore keep_source_where);
  0         0  
194 0         0 my $info = CPAN::SQLite::Info->new(%wanted);
195 0 0       0 $info->fetch_info() or return;
196 0         0 my @tables = qw(dists mods auths info);
197 0         0 my $index;
198 0         0 foreach my $table (@tables) {
199 0         0 my $class = __PACKAGE__ . '::' . $table;
200 0         0 my $this = { info => $info->{$table} };
201 0         0 $index->{$table} = bless $this, $class;
202             }
203 0         0 $self->{index} = $index;
204 0         0 return 1;
205             }
206              
207             sub state {
208 0     0 1 0 my $self = shift;
209              
210 0         0 my %wanted = map { $_ => $self->{$_} } qw(db_name index setup reindex db_dir);
  0         0  
211 0         0 my $state = CPAN::SQLite::State->new(%wanted);
212 0 0       0 $state->state() or return;
213 0         0 $self->{state} = $state;
214 0         0 return 1;
215             }
216              
217             sub populate {
218 0     0 1 0 my $self = shift;
219 0         0 my %wanted = map { $_ => $self->{$_} } qw(db_name index setup state db_dir);
  0         0  
220 0         0 my $db = CPAN::SQLite::Populate->new(%wanted);
221 0 0       0 $db->populate() or return;
222 0         0 return 1;
223             }
224              
225             sub error_fh {
226 0     0 0 0 my $file = shift;
227 0 0       0 open(my $tmp, '>', $file) or die "Cannot open $file: $!";
228 0         0 close $tmp;
229              
230             # Should be open(my $oldout, '>&', \*STDOUT); but it fails on 5.6.2
231 0         0 open(my $oldout, '>&STDOUT');
232 0 0       0 open(STDOUT, '>', $file) or die "Cannot tie STDOUT to $file: $!";
233 0         0 select STDOUT;
234 0         0 $| = 1;
235 0         0 return $oldout;
236             }
237              
238             sub DESTROY {
239 2 50   2   1591 unless ($ENV{CPAN_SQLITE_NO_LOG_FILES}) {
240 2         20 close STDOUT;
241 2 50       11 open(STDOUT, '>&', $oldout) if $oldout;
242             }
243 2         319 return;
244             }
245              
246             1;
247              
248             =head1 NAME
249              
250             CPAN::SQLite::Index - set up or update database tables.
251              
252             =head1 VERSION
253              
254             version 0.219
255              
256             =head1 SYNOPSIS
257              
258             my $index = CPAN::SQLite::Index->new(setup => 1);
259             $index->index();
260              
261             =head1 DESCRIPTION
262              
263             This is the main module used to set up or update the
264             database tables used to store information from the
265             CPAN and ppm indices. The creation of the object
266              
267             my $index = CPAN::SQLite::Index->new(%args);
268              
269             accepts two possible arguments:
270              
271             =over 3
272              
273             =item * setup =E 1
274              
275             This (optional) argument specifies that the database is being set up.
276             Any existing tables will be dropped.
277              
278             =item * reindex =E value
279              
280             This (optional) argument specifies distribution names that
281             one would like to reindex in an existing database. These may
282             be specified as either a scalar, for a single distribution,
283             or as an array reference for a list of distributions.
284              
285             =back
286              
287             =head1 DETAILS
288              
289             Calling
290              
291             $index->index();
292              
293             will start the indexing procedure. Various messages
294             detailing the progress will written to I,
295             which by default will be captured into a file
296             F, where the extension
297             is the C
298             are not captured, and will appear in I.
299              
300             The steps of the indexing procedure are as follows.
301              
302             =over 4
303              
304             =item * download existing pre-compiled index (optional)
305              
306             If CPAN_SQLITE_DOWNLOAD or CPAN_SQLITE_DOWNLOAD_URL variables are set, an
307             already existing and up-to-date cpandb.sql file will be downloaded from
308             either specified URL or http://cpansqlite.trouchelle.com/ where it's
309             updated every hour. This greatly increases performance and decreases CPU
310             and memory consumption during the indexing process but if your CPAN
311             mirror is out-of-sync or you're using DarkPAN, it obviously wouldn't
312             work. It also wouldn't work without an internet connection.
313              
314             See L if you want to setup your own service for
315             pre-compiling the database.
316              
317             If neither variable is set, this step is skipped.
318              
319             =item * fetch index data
320              
321             The necessary CPAN index files
322             F<$CPAN/authors/01mailrc.txt.gz> and
323             F<$CPAN/modules/02packages.details.txt.gz> will be fetched
324             from the CPAN mirror specified by the C<$cpan> variable
325             at the beginning of L. If you are
326             using this option, it is recommended to use the
327             same CPAN mirror with subsequent updates, to ensure consistency
328             of the database. As well, the information on the locations
329             of the CPAN mirrors used for Template-Toolkit and GeoIP
330             is written.
331              
332             =item * get index information
333              
334             Information from the CPAN indices is extracted through
335             L.
336              
337             =item * get state information
338              
339             Unless the C argument within the C
340             method of L is specified,
341             this will get information on the state of the database
342             through L.
343             A comparison is then made between this information
344             and that gathered from the CPAN indices, and if there's
345             a discrepancy in some items, those items are marked
346             for either insertion, updating, or deletion, as appropriate.
347              
348             =item * populate the database
349              
350             At this stage the gathered information is used to populate
351             the database, through L,
352             either inserting new items, updating
353             existing ones, or deleting obsolete items.
354              
355             =back
356              
357             =head1 SEE ALSO
358              
359             L, L,
360             L,
361             and L.
362             Development takes place on the CPAN-SQLite project
363             at L.
364              
365             =head1 AUTHORS
366              
367             Randy Kobes (passed away on September 18, 2010)
368              
369             Serguei Trouchelle Estro@cpan.orgE
370              
371             =head1 COPYRIGHT
372              
373             Copyright 2006 by Randy Kobes Er.kobes@uwinnipeg.caE.
374              
375             Copyright 2011 by Serguei Trouchelle Estro@cpan.orgE.
376              
377             Use and redistribution are under the same terms as Perl itself.
378              
379             =cut