File Coverage

blib/lib/BackPAN/Version/Discover/_BackPAN/Index.pm
Criterion Covered Total %
statement 33 190 17.3
branch 0 50 0.0
condition 0 26 0.0
subroutine 11 33 33.3
pod n/a
total 44 299 14.7


line stmt bran cond sub pod time code
1             package BackPAN::Index;
2              
3 2     2   13 use strict;
  2         4  
  2         151  
4 2     2   14 use warnings;
  2         4  
  2         171  
5              
6             our $VERSION = '0.39';
7              
8 2     2   4320 use autodie;
  2         55239  
  2         17  
9 2     2   21413 use App::Cache 0.37;
  2         268017  
  2         28  
10 2     2   3181 use CPAN::DistnameInfo 0.09;
  2         3411  
  2         73  
11 2     2   3196 use LWP::Simple qw(getstore head is_success);
  2         72491  
  2         21  
12 2     2   4409 use Archive::Extract;
  2         217883  
  2         110  
13 2     2   24 use Path::Class ();
  2         5  
  2         39  
14 2     2   13 use File::stat;
  2         4  
  2         20  
15 2     2   4196 use BackPAN::Index::Schema;
  2         573539  
  2         98  
16              
17 2     2   22 use parent qw( Class::Accessor::Fast );
  2         5  
  2         31  
18              
19             __PACKAGE__->mk_accessors(qw(
20             update
21             cache_ttl
22             debug
23             releases_only_from_authors
24             cache_dir
25             backpan_index_url
26              
27             backpan_index schema cache
28             ));
29              
30             my %Defaults = (
31             backpan_index_url => "http://www.astray.com/tmp/backpan.txt.gz",
32             releases_only_from_authors => 1,
33             debug => 0,
34             cache_ttl => 60 * 60,
35             );
36              
37              
38             sub new {
39 0     0     my $class = shift;
40 0           my $options = shift;
41              
42 0   0       $options ||= {};
43              
44             # Apply defaults
45 0           %$options = ( %Defaults, %$options );
46              
47 0           my $self = $class->SUPER::new($options);
48              
49 0           my %cache_opts;
50 0           $cache_opts{ttl} = $self->cache_ttl;
51 0 0         $cache_opts{directory} = $self->cache_dir if $self->cache_dir;
52 0           $cache_opts{enabled} = !$self->update;
53              
54 0           my $cache = App::Cache->new( \%cache_opts );
55 0           $self->cache($cache);
56              
57 0           $self->_update_database();
58              
59 0           return $self;
60             }
61              
62             sub _dbh {
63 0     0     my $self = shift;
64 0           return $self->schema->storage->dbh;
65             }
66              
67             sub _log {
68 0     0     my $self = shift;
69 0 0         return unless $self->debug;
70 0           print STDERR @_, "\n";
71             }
72              
73             sub _update_database {
74 0     0     my $self = shift;
75              
76             # Delay loading it into memory until we need it
77 0           $self->_log("Fetching BackPAN index...");
78 0           $self->_get_backpan_index;
79 0           $self->_log("Done.");
80              
81 0           my $cache = $self->cache;
82 0           my $db_file = Path::Class::file($cache->directory, "backpan.sqlite");
83              
84 0           my $should_update_db;
85 0 0         if( ! -e $db_file ) {
    0          
86 0           $should_update_db = 1;
87             }
88             elsif( defined $self->update ) {
89 0           $should_update_db = $self->update;
90             }
91             else {
92             # Check the database file before we connect to it. Connecting will create
93             # the file.
94             # XXX Should probably just put a timestamp in the DB
95 0           my $db_mtime = $db_file->stat->mtime;
96 0           my $db_age = time - $db_mtime;
97 0           $should_update_db = ($db_age > $cache->ttl);
98              
99             # No matter what, update the DB if we got a new index file.
100 0 0         my $archive_mtime = -e $self->_backpan_index_archive ? $self->_backpan_index_archive->stat->mtime : 0;
101 0 0         $should_update_db = 1 if $db_mtime < $archive_mtime;
102             }
103              
104 0 0 0       unlink $db_file if -e $db_file and $should_update_db;
105              
106 0           $self->schema( BackPAN::Index::Schema->connect("dbi:SQLite:dbname=$db_file") );
107 0           $self->_setup_database;
108              
109 0 0         $should_update_db = 1 if $self->_database_is_empty;
110              
111 0 0         return unless $should_update_db;
112              
113 0           my $dbh = $self->_dbh;
114              
115 0           $self->_log("Populating database...");
116 0           $dbh->begin_work;
117              
118             # Get it out of the hot loop.
119 0           my $only_authors = $self->releases_only_from_authors;
120              
121 0           my $insert_file_sth = $dbh->prepare(q[
122             INSERT INTO files
123             (path, date, size)
124             VALUES (?, ?, ? )
125             ]);
126              
127 0           my $insert_release_sth = $dbh->prepare(q[
128             INSERT INTO releases
129             (path, dist, version, normal_version, date, size, maturity, cpanid, distvname)
130             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ? )
131             ]);
132              
133 0           my $insert_dist_sth = $dbh->prepare(q[
134             INSERT INTO dists
135             (name, num_releases,
136             first_release, first_date, first_author,
137             latest_release, latest_date, latest_author)
138             VALUES (?, ?,
139             ?, ?, ?,
140             ?, ?, ?)
141             ]);
142              
143 0           my %dists;
144             my %files;
145 0           open my $fh, $self->_backpan_index_file;
146 0           while( my $line = <$fh> ) {
147 0           chomp $line;
148 0           my ( $path, $date, $size, @junk ) = split ' ', $line;
149              
150 0 0         if( $files{$path}++ ) {
151 0           $self->_log("Duplicate file $path in index, ignoring");
152 0           next;
153             }
154              
155 0 0 0       if( !defined $path or !defined $date or !defined $size or @junk ) {
      0        
156 0           $self->_log("Bad data read at line $.: $line");
157 0           next;
158             }
159              
160 0 0         next unless $size;
161 0 0 0       next if $only_authors and $path !~ m{^authors/};
162              
163 0           $insert_file_sth->execute($path, $date, $size);
164              
165 0 0         next if $path =~ /\.(readme|meta)$/;
166              
167 0           my $i = CPAN::DistnameInfo->new( $path );
168              
169 0           my $dist = $i->dist;
170 0 0         next unless $i->dist;
171              
172             # make searching by version *much* easier.
173 0           my $normal_version = eval {
174 0     0     local $SIG{__WARN__} = sub {}; # shut up
  0            
175 0           version->parse($i->version)->normal;
176             };
177              
178 0   0       $insert_release_sth->execute(
      0        
179             $path,
180             $dist,
181             $i->version || '',
182             $normal_version || '',
183             $date,
184             $size,
185             $i->maturity,
186             $i->cpanid,
187             $i->distvname,
188             );
189              
190              
191             # Update aggregate data about dists
192 0   0       my $distdata = ($dists{$dist} ||= { name => $dist });
193              
194 0 0 0       if( !defined $distdata->{first_release} ||
195             $date < $distdata->{first_date} )
196             {
197 0           $distdata->{first_release} = $path;
198 0           $distdata->{first_author} = $i->cpanid;
199 0           $distdata->{first_date} = $date;
200             }
201              
202 0 0 0       if( !defined $distdata->{latest_release} ||
203             $date > $distdata->{latest_date} )
204             {
205 0           $distdata->{latest_release} = $path;
206 0           $distdata->{latest_author} = $i->cpanid;
207 0           $distdata->{latest_date} = $date;
208             }
209              
210 0           $distdata->{num_releases}++;
211             }
212              
213 0           for my $dist (values %dists) {
214 0           $insert_dist_sth->execute(
215 0           @{$dist}
216             {qw(name num_releases
217             first_release first_date first_author
218             latest_release latest_date latest_author
219             )}
220             );
221             }
222              
223             # Add indexes after inserting so as not to slow down the inserts
224 0           $self->_add_indexes;
225              
226 0           $dbh->commit;
227              
228 0           $self->_log("Done.");
229              
230 0           return;
231             }
232              
233              
234             sub _database_is_empty {
235 0     0     my $self = shift;
236              
237 0 0         return 1 unless $self->files->count;
238 0 0         return 1 unless $self->releases->count;
239 0           return 0;
240             }
241              
242              
243             # This is denormalized for performance, its read-only anyway
244             sub _setup_database {
245 0     0     my $self = shift;
246              
247 0           my %create_for = (
248             files => <<'SQL',
249             CREATE TABLE IF NOT EXISTS files (
250             path TEXT NOT NULL PRIMARY KEY,
251             date INTEGER NOT NULL,
252             size INTEGER NOT NULL CHECK ( size >= 0 )
253             )
254             SQL
255             releases => <<'SQL',
256             CREATE TABLE IF NOT EXISTS releases (
257             path TEXT NOT NULL PRIMARY KEY REFERENCES files,
258             dist TEXT NOT NULL REFERENCES dists,
259             date INTEGER NOT NULL,
260             size TEXT NOT NULL,
261             version TEXT NOT NULL,
262             normal_version TEXT NOT NULL,
263             maturity TEXT NOT NULL,
264             distvname TEXT NOT NULL,
265             cpanid TEXT NOT NULL
266             )
267             SQL
268              
269             dists => <<'SQL',
270             CREATE TABLE IF NOT EXISTS dists (
271             name TEXT NOT NULL PRIMARY KEY,
272             first_release TEXT NOT NULL REFERENCES releases,
273             latest_release TEXT NOT NULL REFERENCES releases,
274             first_date INTEGER NOT NULL,
275             latest_date INTEGER NOT NULL,
276             first_author TEXT NOT NULL,
277             latest_author TEXT NOT NULL,
278             num_releases INTEGER NOT NULL
279             )
280             SQL
281             );
282              
283 0           my $dbh = $self->_dbh;
284 0           for my $sql (values %create_for) {
285 0           $dbh->do($sql);
286             }
287              
288 0           $self->schema->rescan;
289              
290 0           return;
291             }
292              
293              
294             sub _add_indexes {
295 0     0     my $self = shift;
296              
297 0           my @indexes = (
298             # Speed up dists_by several orders of magnitude
299             "CREATE INDEX IF NOT EXISTS dists_by ON releases (cpanid, dist)",
300              
301             # Speed up files_by a lot
302             "CREATE INDEX IF NOT EXISTS files_by ON releases (cpanid, path)",
303              
304             # Let us order releases by date quickly
305             "CREATE INDEX IF NOT EXISTS releases_by_date ON releases (date, dist)",
306             );
307 0           my $dbh = $self->_dbh;
308 0           for my $sql (@indexes) {
309 0           $dbh->do($sql);
310             }
311             }
312              
313              
314             sub _get_backpan_index {
315 0     0     my $self = shift;
316            
317 0           my $url = $self->backpan_index_url;
318              
319 0 0         return if !$self->_backpan_index_has_changed;
320              
321 0           my $status = getstore($url, $self->_backpan_index_archive.'');
322 0 0         die "Error fetching $url: $status" unless is_success($status);
323              
324             # Faster
325 0           local $Archive::Extract::PREFER_BIN = 1;
326              
327             # Archive::Extract is vulnerable to the ORS.
328 0           local $\;
329              
330 0           my $ae = Archive::Extract->new( archive => $self->_backpan_index_archive );
331 0 0         $ae->extract( to => $self->_backpan_index_file )
332 0           or die "Problem extracting @{[ $self->_backpan_index_archive ]}: @{[ $ae->error ]}";
  0            
333              
334             # If the backpan index age is older than the TTL this prevents us
335             # from immediately looking again.
336             # XXX Should probably use a "last checked" semaphore file
337 0           $self->_backpan_index_file->touch;
338              
339 0           return;
340             }
341              
342              
343             sub _backpan_index_archive {
344 0     0     my $self = shift;
345              
346 0           my $file = URI->new($self->backpan_index_url)->path;
347 0           $file = Path::Class::file($file)->basename;
348 0           return Path::Class::file($file)->absolute($self->cache->directory);
349             }
350              
351              
352             sub _backpan_index_file {
353 0     0     my $self = shift;
354              
355 0           my $file = $self->_backpan_index_archive;
356 0           $file =~ s{\.[^.]+$}{};
357              
358 0           return Path::Class::file($file);
359             }
360              
361              
362             sub _backpan_index_has_changed {
363 0     0     my $self = shift;
364              
365 0           my $file = $self->_backpan_index_file;
366 0 0         return 1 unless -e $file;
367              
368 0           my $local_mod_time = stat($file)->mtime;
369 0           my $local_age = time - $local_mod_time;
370 0 0         return 0 unless $local_age > $self->cache->ttl;
371              
372             # We looked, don't have to look again until the ttl is up.
373 0           $self->_backpan_index_file->touch;
374              
375 0           my(undef, undef, $remote_mod_time) = head($self->backpan_index_url);
376 0           return $remote_mod_time > $local_mod_time;
377             }
378              
379              
380             sub files {
381 0     0     my $self = shift;
382 0           return $self->schema->resultset('File');
383             }
384              
385              
386             sub dist {
387 0     0     my($self, $dist) = @_;
388              
389 0           return $self->dists->single({ name => $dist });
390             }
391              
392              
393             sub releases {
394 0     0     my($self, $dist) = @_;
395              
396 0 0         return $self->schema->resultset("Release") unless defined $dist;
397 0           return $self->schema->resultset("Release")->search({ dist => $dist });
398             }
399              
400              
401             sub release {
402 0     0     my($self, $dist, $version) = @_;
403              
404 0           return $self->releases($dist)->search(
405             -or => [
406             version => $version,
407             normal_version => $version,
408             ],
409             )->single;
410             }
411              
412              
413             sub dists {
414 0     0     my $self = shift;
415              
416 0           return $self->schema->resultset("Dist");
417             }
418              
419              
420             =head1 NAME
421              
422             BackPAN::Index - An interface to the BackPAN index
423              
424             =head1 SYNOPSIS
425              
426             use BackPAN::Index;
427             my $backpan = BackPAN::Index->new;
428              
429             # These are all DBIx::Class::ResultSet's
430             my $files = $backpan->files;
431             my $dists = $backpan->dists;
432             my $releases = $backpan->releases("Acme-Pony");
433              
434             # Use DBIx::Class::ResultSet methods on them
435             my $release = $releases->single({ version => '1.23' });
436              
437             my $dist = $backpan->dist("Test-Simple");
438             my $releases = $dist->releases;
439              
440             =head1 DESCRIPTION
441              
442             This downloads, caches and parses the BackPAN index into a local
443             database for efficient querying.
444              
445             Its a pretty thin wrapper around DBIx::Class returning
446             L objects which makes it efficient and
447             flexible.
448              
449             The Comprehensive Perl Archive Network (CPAN) is a very useful
450             collection of Perl code. However, in order to keep CPAN relatively
451             small, authors of modules can delete older versions of modules to only
452             let CPAN have the latest version of a module. BackPAN is where these
453             deleted modules are backed up. It's more like a full CPAN mirror, only
454             without the deletions. This module provides an index of BackPAN and
455             some handy methods.
456              
457             =head1 METHODS
458              
459             =head2 new
460              
461             my $backpan = BackPAN::Index->new(\%options);
462              
463             Create a new object representing the BackPAN index.
464              
465             It will, if necessary, download the BackPAN index and compile it into
466             a database for efficient storage. Initial creation is slow, but it
467             will be cached.
468              
469             new() takes some options
470              
471             =head3 update
472              
473             Because it is rather large, BackPAN::Index caches a copy of the
474             BackPAN index and builds a local database to speed access. This flag
475             controls if the local index is updated.
476              
477             If true, forces an update of the BACKPAN index.
478              
479             If false, the index will never be updated even if the cache is
480             expired. It will always create a new index if one does not exist.
481              
482             By default the index is cached and checked for updates according to
483             C<<$backpan->cache_ttl>>.
484              
485             =head3 cache_ttl
486              
487             How many seconds before checking for an updated index.
488              
489             Defaults to an hour.
490              
491             =head3 debug
492              
493             If true, debug messages will be printed.
494              
495             Defaults to false.
496              
497             =head3 releases_only_from_authors
498              
499             If true, only files in the C directory will be considered as
500             releases. If false any file in the index may be considered for a
501             release.
502              
503             Defaults to true.
504              
505             =head3 cache_dir
506              
507             Location of the cache directory.
508              
509             Defaults to whatever L does.
510              
511             =head3 backpan_index_url
512              
513             URL to the BackPAN index.
514              
515             Defaults to a sensible location.
516              
517              
518             =head2 files
519              
520             my $files = $backpan->files;
521              
522             Returns a ResultSet representing all the files on BackPAN.
523              
524             =head2 files_by
525              
526             my $files = $backpan->files_by($cpanid);
527             my @files = $backpan->files_by($cpanid);
528              
529             Returns all the files by a given $cpanid.
530              
531             Returns either a list of BackPAN::Index::Files or a ResultSet.
532              
533             =cut
534              
535             sub files_by {
536 0     0     my $self = shift;
537 0           my $cpanid = shift;
538              
539 0           return $self->files->search({ "releases.cpanid" => $cpanid }, { join => "releases" });
540             }
541              
542             =head2 dists
543              
544             my $dists = $backpan->dists;
545              
546             Returns a ResultSet representing all the distributions on BackPAN.
547              
548             =head2 dist
549              
550             my $dists = $backpan->dist($dist_name);
551              
552             Returns a single BackPAN::Index::Dist object for $dist_name.
553              
554             =head2 dists_by
555              
556             my $dists = $backpan->dists_by($cpanid);
557             my @dists = $backpan->dists_by($cpanid);
558              
559             Returns the dists which contain at least one release by the given
560             $cpanid.
561              
562             Returns either a ResultSet or a list of the Dists.
563              
564             =cut
565              
566             sub dists_by {
567 0     0     my $self = shift;
568 0           my $cpanid = shift;
569              
570 0           return $self->dists->search({ "releases.cpanid" => $cpanid }, { join => "releases", distinct => 1 });
571             }
572              
573              
574             =head2 dists_changed_since
575              
576             my $dists = $backpan->dists_changed_since($time);
577              
578             Returns a ResultSet of distributions which have had releases at or after after $time.
579              
580             =cut
581              
582             sub dists_changed_since {
583 0     0     my $self = shift;
584 0           my $time = shift;
585              
586 0           return $self->dists->search( latest_date => \">= $time" );
587             }
588              
589             =head2 releases
590              
591             my $all_releases = $backpan->releases();
592             my $dist_releases = $backpan->releases($dist_name);
593              
594             Returns a ResultSet representing all the releases on BackPAN. If a
595             $dist_name is given it returns the releases of just one distribution.
596              
597             =head2 release
598              
599             my $release = $backpan->release($dist_name, $version);
600              
601             Returns a single BackPAN::Index::Release object for the given
602             $dist_name and $version.
603              
604             =head2 releases_by
605              
606             my $releases = $backpan->releases_by($cpanid);
607             my @releases = $backpan->releases_by($cpanid);
608              
609             Returns all the releases of a single author.
610              
611             Returns either a list of Releases or a ResultSet representing those releases.
612              
613             =cut
614              
615             sub releases_by {
616 0     0     my $self = shift;
617 0           my $cpanid = shift;
618              
619 0           return $self->releases->search({ cpanid => $cpanid });
620             }
621              
622              
623             =head2 releases_since
624              
625             my $releases = $backpan->releases_since($time);
626              
627             Returns a ResultSet of releases which were released at or after $time.
628              
629             =cut
630              
631             sub releases_since {
632 0     0     my $self = shift;
633 0           my $time = shift;
634              
635 0           return $self->releases->search( date => \">= $time" );
636             }
637              
638              
639             =head1 EXAMPLES
640              
641             The real power of BackPAN::Index comes from L.
642             Its very flexible and very powerful but not always obvious how to get
643             it to do things. Here's some examples.
644              
645             # How many files are on BackPAN?
646             my $count = $backpan->files->count;
647              
648             # How big is BackPAN?
649             my $size = $backpan->files->get_column("size")->sum;
650              
651             # What are the names of all the distributions?
652             my @names = $backpan->dists->get_column("name")->all;
653              
654             # What path contains this release?
655             my $path = $backpan->release("Acme-Pony", 1.01)->path;
656              
657             # Get all the releases of Moose ordered by version
658             my @releases = $backpan->dist("Moose")->releases
659             ->search(undef, { order_by => "version" });
660              
661             =head1 AUTHOR
662              
663             Michael G Schwern
664              
665             =head1 COPYRIGHT
666              
667             Copyright 2009, Michael G Schwern
668              
669             =head1 LICENSE
670              
671             This module is free software; you can redistribute it or modify it under
672             the same terms as Perl itself.
673              
674             =head1 SEE ALSO
675              
676             L, L,
677             L, L
678              
679             Repository: L
680             Bugs: L
681              
682             =cut
683              
684             1;