File Coverage

blib/lib/BackPAN/Index.pm
Criterion Covered Total %
statement 94 111 84.6
branch 27 30 90.0
condition 22 28 78.5
subroutine 18 24 75.0
pod 11 13 84.6
total 172 206 83.5


line stmt bran cond sub pod time code
1             package BackPAN::Index;
2              
3 11     11   499995 use strict;
  11         49  
  11         510  
4 11     11   62 use warnings;
  11         41  
  11         848  
5              
6             our $VERSION = '0.42';
7              
8 11     11   14587 use autodie;
  11         523399  
  11         72  
9 11     11   109618 use CPAN::DistnameInfo 0.09;
  11         12930  
  11         600  
10 11     11   7506 use BackPAN::Index::Schema;
  11         50  
  11         530  
11 11     11   8494 use BackPAN::Index::Types;
  11         47  
  11         365  
12              
13 11     11   93 use Mouse;
  11         22  
  11         55  
14             with 'BackPAN::Index::Role::Log', 'BackPAN::Index::Role::HasCache';
15              
16             has update =>
17             is => 'ro',
18             isa => 'Bool',
19             default => 0;
20              
21             has cache_ttl =>
22             is => 'ro',
23             isa => 'Int',
24             default => 60 * 60;
25              
26             has releases_only_from_authors =>
27             is => 'ro',
28             isa => 'Bool',
29             default => 1;
30              
31             has backpan_index_url =>
32             is => 'ro',
33             isa => 'URI',
34             coerce => 1,
35             builder => 'default_backpan_index_url';
36              
37             sub default_backpan_index_url {
38 10     10 0 150111 return "http://gitpan.integra.net/backpan-index.gz";
39             }
40              
41             has backpan_index =>
42             is => 'ro',
43             isa => 'BackPAN::Index::IndexFile',
44             lazy => 1,
45             default => sub {
46             my $self = shift;
47              
48             require BackPAN::Index::IndexFile;
49             return BackPAN::Index::IndexFile->new(
50             cache => $self->cache,
51             index_url => $self->backpan_index_url
52             );
53             };
54              
55             has cache_dir =>
56             is => 'ro',
57             isa => 'Str'
58             ;
59              
60             has '+cache' =>
61             is => 'rw',
62             required => 0,
63             lazy => 1,
64             default => sub {
65             my $self = shift;
66              
67             my %cache_opts;
68             $cache_opts{ttl} = $self->cache_ttl;
69             $cache_opts{directory} = $self->cache_dir if $self->cache_dir;
70             $cache_opts{enabled} = !$self->update;
71              
72             require App::Cache;
73             return App::Cache->new( \%cache_opts );
74             }
75             ;
76              
77             has db =>
78             is => 'ro',
79             isa => 'BackPAN::Index::Database',
80             handles => [qw(schema)],
81             lazy => 1,
82             default => sub {
83             my $self = shift;
84              
85             require BackPAN::Index::Database;
86             return BackPAN::Index::Database->new(
87             cache => $self->cache
88             );
89             };
90              
91              
92             sub BUILD {
93 10     10 1 1172 my $self = shift;
94              
95 10         53 $self->_update_database();
96              
97 10         181 return $self;
98             }
99              
100             sub _update_database {
101 10     10   22 my $self = shift;
102              
103             # Delay loading it into memory until we need it
104 10 100       184 $self->backpan_index->get_index if $self->backpan_index->should_index_be_updated;
105              
106 10   66     305 my $should_update_db =
107             $self->update ||
108             $self->db->should_update_db ||
109             $self->_index_archive_newer_than_db;
110              
111 10         2430 my $db_file = $self->db->db_file;
112 10 50 66     140 unlink $db_file if -e $db_file and $should_update_db;
113              
114 10 100       640 $self->db->create_tables if $should_update_db;
115              
116 10 100 66     103 $self->_populate_database if $should_update_db || $self->_database_is_empty;
117              
118 10         51 return;
119             }
120              
121             sub _index_archive_newer_than_db {
122 9     9   20 my $self = shift;
123              
124 9         57 return $self->db->db_mtime < $self->backpan_index->index_archive_mtime;
125             }
126              
127             sub _populate_database {
128 1     1   3 my $self = shift;
129              
130 1         15 my $dbh = $self->db->dbh;
131              
132 1         16 $self->_log("Populating database...");
133 1         17 $dbh->begin_work;
134              
135             # Get it out of the hot loop.
136 1         33 my $only_authors = $self->releases_only_from_authors;
137              
138 1         9 my $insert_file_sth = $dbh->prepare(q[
139             INSERT INTO files
140             (path, date, size)
141             VALUES (?, ?, ? )
142             ]);
143              
144 1         102 my $insert_release_sth = $dbh->prepare(q[
145             INSERT INTO releases
146             (path, dist, version, date, size, maturity, cpanid, distvname)
147             VALUES (?, ?, ?, ?, ?, ?, ?, ? )
148             ]);
149              
150 1         76 my $insert_dist_sth = $dbh->prepare(q[
151             INSERT INTO dists
152             (name, num_releases,
153             first_release, first_date, first_author,
154             latest_release, latest_date, latest_author)
155             VALUES (?, ?,
156             ?, ?, ?,
157             ?, ?, ?)
158             ]);
159              
160 1         66 my %dists;
161             my %files;
162 1         25 open my $fh, $self->backpan_index->index_file;
163 1         2715 while( my $line = <$fh> ) {
164 1161019         1509216 chomp $line;
165 1161019         3841968 my ( $path, $date, $size, @junk ) = split ' ', $line;
166              
167 1161019 100       5858629 if( $files{$path}++ ) {
168 2         24 $self->_log("Duplicate file $path in index, ignoring");
169 2         8 next;
170             }
171              
172 1161017 100 66     7997370 if( !defined $path or !defined $date or !defined $size or @junk ) {
      66        
173 1         15 $self->_log("Bad data read at line $.: $line");
174 1         5 next;
175             }
176              
177 1161016 100       2328473 next unless $size;
178 1159438 100 66     8387944 next if $only_authors and $path !~ m{^authors/};
179              
180 627909         16941094 $insert_file_sth->execute($path, $date, $size);
181              
182 627909 100       4100245 next if $path =~ /\.(readme|meta)$/;
183              
184 249523         812978 my $i = CPAN::DistnameInfo->new( $path );
185              
186 249523         15925921 my $dist = $i->dist;
187 249523 100       1145556 next unless $i->dist;
188              
189 235934   100     1170784 $insert_release_sth->execute(
190             $path,
191             $dist,
192             $i->version || '',
193             $date,
194             $size,
195             $i->maturity,
196             $i->cpanid,
197             $i->distvname,
198             );
199              
200              
201             # Update aggregate data about dists
202 235934   100     9855636 my $distdata = ($dists{$dist} ||= { name => $dist });
203              
204 235934 100 100     1232358 if( !defined $distdata->{first_release} ||
205             $date < $distdata->{first_date} )
206             {
207 39512         75106 $distdata->{first_release} = $path;
208 39512         101114 $distdata->{first_author} = $i->cpanid;
209 39512         181469 $distdata->{first_date} = $date;
210             }
211              
212 235934 100 100     933231 if( !defined $distdata->{latest_release} ||
213             $date > $distdata->{latest_date} )
214             {
215 206259         304494 $distdata->{latest_release} = $path;
216 206259         538565 $distdata->{latest_author} = $i->cpanid;
217 206259         903583 $distdata->{latest_date} = $date;
218             }
219              
220 235934         1712211 $distdata->{num_releases}++;
221             }
222              
223 1         8935 for my $dist (values %dists) {
224 35776         1012780 $insert_dist_sth->execute(
225 35776         53108 @{$dist}
226             {qw(name num_releases
227             first_release first_date first_author
228             latest_release latest_date latest_author
229             )}
230             );
231             }
232              
233             # Add indexes after inserting so as not to slow down the inserts
234 1         22 $self->db->create_indexes;
235              
236 1         1531612 $dbh->commit;
237              
238 1         25 $self->_log("Done.");
239              
240 1         1115066 return;
241             }
242              
243              
244             sub _database_is_empty {
245 9     9   24 my $self = shift;
246              
247 9 50       45 return 1 unless $self->files->count;
248 9 50       2726862 return 1 unless $self->releases->count;
249 9         848478 return 0;
250             }
251              
252              
253             sub file {
254 1     1 0 54 my($self, $path) = @_;
255 1         7 return $self->files->single({ path => $path });
256             }
257              
258             sub files {
259 14     14 1 1120659 my $self = shift;
260 14         103 return $self->schema->resultset('File');
261             }
262              
263              
264             sub dist {
265 2     2 1 10602 my($self, $dist) = @_;
266              
267 2         16 return $self->dists->single({ name => $dist });
268             }
269              
270              
271             sub releases {
272 11     11 1 95 my($self, $dist) = @_;
273              
274 11 100       149 return $self->schema->resultset("Release") unless defined $dist;
275 2         20 return $self->schema->resultset("Release")->search({ dist => $dist });
276             }
277              
278              
279             sub release {
280 0     0 1 0 my($self, $dist, $version) = @_;
281              
282 0         0 return $self->releases($dist)->single({ version => $version });
283             }
284              
285              
286             sub dists {
287 5     5 1 179 my $self = shift;
288              
289 5         46 return $self->schema->resultset("Dist");
290             }
291              
292              
293             =head1 NAME
294              
295             BackPAN::Index - An interface to the BackPAN index
296              
297             =head1 SYNOPSIS
298              
299             use BackPAN::Index;
300             my $backpan = BackPAN::Index->new;
301              
302             # These are all DBIx::Class::ResultSet's
303             my $files = $backpan->files;
304             my $dists = $backpan->dists;
305             my $releases = $backpan->releases("Acme-Pony");
306              
307             # Use DBIx::Class::ResultSet methods on them
308             my $release = $releases->single({ version => '1.23' });
309              
310             my $dist = $backpan->dist("Test-Simple");
311             my $releases = $dist->releases;
312              
313             =head1 DESCRIPTION
314              
315             This downloads, caches and parses the BackPAN index into a local
316             database for efficient querying.
317              
318             Its a pretty thin wrapper around DBIx::Class returning
319             L objects which makes it efficient and
320             flexible.
321              
322             The Comprehensive Perl Archive Network (CPAN) is a very useful
323             collection of Perl code. However, in order to keep CPAN relatively
324             small, authors of modules can delete older versions of modules to only
325             let CPAN have the latest version of a module. BackPAN is where these
326             deleted modules are backed up. It's more like a full CPAN mirror, only
327             without the deletions. This module provides an index of BackPAN and
328             some handy methods.
329              
330             =head1 METHODS
331              
332             =head2 new
333              
334             my $backpan = BackPAN::Index->new(\%options);
335              
336             Create a new object representing the BackPAN index.
337              
338             It will, if necessary, download the BackPAN index and compile it into
339             a database for efficient storage. Initial creation is slow, but it
340             will be cached.
341              
342             new() takes some options
343              
344             =head3 update
345              
346             Because it is rather large, BackPAN::Index caches a copy of the
347             BackPAN index and builds a local database to speed access. This flag
348             controls if the local index is updated.
349              
350             If true, forces an update of the BACKPAN index.
351              
352             If false, the index will never be updated even if the cache is
353             expired. It will always create a new index if one does not exist.
354              
355             By default the index is cached and checked for updates according to
356             C<<$backpan->cache_ttl>>.
357              
358             =head3 cache_ttl
359              
360             How many seconds before checking for an updated index.
361              
362             Defaults to an hour.
363              
364             =head3 debug
365              
366             If true, debug messages will be printed.
367              
368             Defaults to false.
369              
370             =head3 releases_only_from_authors
371              
372             If true, only files in the C directory will be considered as
373             releases. If false any file in the index may be considered for a
374             release.
375              
376             Defaults to true.
377              
378             =head3 cache_dir
379              
380             Location of the cache directory.
381              
382             Defaults to whatever L does.
383              
384             =head3 backpan_index_url
385              
386             URL to the BackPAN index.
387              
388             Defaults to a sensible location.
389              
390              
391             =head2 files
392              
393             my $files = $backpan->files;
394              
395             Returns a ResultSet representing all the files on BackPAN.
396              
397             =head2 files_by
398              
399             my $files = $backpan->files_by($cpanid);
400             my @files = $backpan->files_by($cpanid);
401              
402             Returns all the files by a given $cpanid.
403              
404             Returns either a list of BackPAN::Index::Files or a ResultSet.
405              
406             =cut
407              
408             sub files_by {
409 0     0 1   my $self = shift;
410 0           my $cpanid = shift;
411              
412 0           return $self->files->search({ "releases.cpanid" => $cpanid }, { join => "releases" });
413             }
414              
415             =head2 dists
416              
417             my $dists = $backpan->dists;
418              
419             Returns a ResultSet representing all the distributions on BackPAN.
420              
421             =head2 dist
422              
423             my $dists = $backpan->dist($dist_name);
424              
425             Returns a single BackPAN::Index::Dist object for $dist_name.
426              
427             =head2 dists_by
428              
429             my $dists = $backpan->dists_by($cpanid);
430             my @dists = $backpan->dists_by($cpanid);
431              
432             Returns the dists which contain at least one release by the given
433             $cpanid.
434              
435             Returns either a ResultSet or a list of the Dists.
436              
437             =cut
438              
439             sub dists_by {
440 0     0 1   my $self = shift;
441 0           my $cpanid = shift;
442              
443 0           return $self->dists->search({ "releases.cpanid" => $cpanid }, { join => "releases", distinct => 1 });
444             }
445              
446              
447             =head2 dists_changed_since
448              
449             my $dists = $backpan->dists_changed_since($time);
450              
451             Returns a ResultSet of distributions which have had releases at or after after $time.
452              
453             =cut
454              
455             sub dists_changed_since {
456 0     0 1   my $self = shift;
457 0           my $time = shift;
458              
459 0           return $self->dists->search( latest_date => \">= $time" );
460             }
461              
462             =head2 releases
463              
464             my $all_releases = $backpan->releases();
465             my $dist_releases = $backpan->releases($dist_name);
466              
467             Returns a ResultSet representing all the releases on BackPAN. If a
468             $dist_name is given it returns the releases of just one distribution.
469              
470             =head2 release
471              
472             my $release = $backpan->release($dist_name, $version);
473              
474             Returns a single BackPAN::Index::Release object for the given
475             $dist_name and $version.
476              
477             =head2 releases_by
478              
479             my $releases = $backpan->releases_by($cpanid);
480             my @releases = $backpan->releases_by($cpanid);
481              
482             Returns all the releases of a single author.
483              
484             Returns either a list of Releases or a ResultSet representing those releases.
485              
486             =cut
487              
488             sub releases_by {
489 0     0 1   my $self = shift;
490 0           my $cpanid = shift;
491              
492 0           return $self->releases->search({ cpanid => $cpanid });
493             }
494              
495              
496             =head2 releases_since
497              
498             my $releases = $backpan->releases_since($time);
499              
500             Returns a ResultSet of releases which were released at or after $time.
501              
502             =cut
503              
504             sub releases_since {
505 0     0 1   my $self = shift;
506 0           my $time = shift;
507              
508 0           return $self->releases->search( date => \">= $time" );
509             }
510              
511              
512             =head1 EXAMPLES
513              
514             The real power of BackPAN::Index comes from L.
515             Its very flexible and very powerful but not always obvious how to get
516             it to do things. Here's some examples.
517              
518             # How many files are on BackPAN?
519             my $count = $backpan->files->count;
520              
521             # How big is BackPAN?
522             my $size = $backpan->files->get_column("size")->sum;
523              
524             # What are the names of all the distributions?
525             my @names = $backpan->dists->get_column("name")->all;
526              
527             # What path contains this release?
528             my $path = $backpan->release("Acme-Pony", 1.01)->path;
529              
530             # Get all the releases of Moose ordered by version
531             my @releases = $backpan->dist("Moose")->releases
532             ->search(undef, { order_by => "version" });
533              
534             =head1 AUTHOR
535              
536             Michael G Schwern
537              
538             =head1 COPYRIGHT
539              
540             Copyright 2009, Michael G Schwern
541              
542             =head1 LICENSE
543              
544             This module is free software; you can redistribute it or modify it under
545             the same terms as Perl itself.
546              
547             =head1 SEE ALSO
548              
549             L, L,
550             L, L
551              
552             Repository: L
553             Bugs: L
554              
555             =cut
556              
557             1;