File Coverage

blib/lib/Perl/Metrics2.pm

Criterion	Covered	Total	%
statement	52	54	96.3
branch			n/a
condition			n/a
subroutine	18	18	100.0
pod			n/a
total	70	72	97.2

line	stmt	sub	time	code
1				package Perl::Metrics2;
2
3				=pod
4
5				=head1 NAME
6
7				Perl::Metrics2 - Perl metrics storage and processing engine
8
9				=head1 DESCRIPTION
10
11				B
12
13				B is a 2nd-generation implementation of the Perl Code
14				Metrics System.
15
16				The Perl Code Metrics System is a module which provides a Perl document
17				metrics processing engine, and a database in which to store the
18				resulting metrics data.
19
20				The intent is to be able to take a large collection of Perl documents,
21				and relatively easily parse the files and run a series of processes on
22				the documents.
23
24				The resulting data can then be stored, and later used to generate useful
25				information about the documents.
26
27				=head2 General Structure
28
29				Perl::Metrics2 consists of two primary elements. Firstly, an
30				L database that stores the metrics informationg.
31
32				See L for the data class stored in the
33				database.
34
35				The second element is a plugin structure for creating metrics packages,
36				so that the metrics capture can be done independant of the underlying
37				mechanisms used for parsing, storage and analysis.
38
39				See L for more information.
40
41				=head2 Getting Started
42
43				C comes with on default plugin,
44				L, which provides a sampling of metrics.
45
46				To get started load the module, providing the database location as a
47				param (it will create it if needed). Then call the C
48				method, providing it with an absolute path to a directory of Perl code
49				on the local filesystem.
50
51				C will work on the files in the directory, and when it
52				finishes you will have a nice database full of metrics data about your
53				files.
54
55				Of course, how you actually USE that data is up to you, but you can
56				query L just like any other L
57				database once you have collected it all.
58
59				=head1 METHODS
60
61				=cut
62
63	2	2	30829	use 5.008005;
	2		6
	2		77
64	2	2	10	use strict;
	2		3
	2		56
65	2	2	10	use Carp ();
	2		13
	2		25
66	2	2	11926	use DBI ();
	2		40511
	2		68
67	2	2	2089	use Time::HiRes ();
	2		3461
	2		48
68	2	2	1604	use Time::Elapsed ();
	2		10282
	2		57
69	2	2	21	use File::Spec ();
	2		4
	2		36
70	2	2	2270	use File::Next ();
	2		4782
	2		52
71	2	2	1998	use File::HomeDir ();
	2		14229
	2		53
72	2	2	1903	use File::ShareDir ();
	2		14515
	2		56
73	2	2	5183	use File::Find::Rule ();
	2		21003
	2		54
74	2	2	2096	use File::Find::Rule::VCS ();
	2		2399
	2		44
75	2	2	1863	use File::Find::Rule::Perl ();
	2		20070
	2		51
76	2	2	20	use Params::Util ();
	2		4
	2		48
77	2	2	2373	use Process ();
	2		484
	2		38
78	2	2	1830	use Process::Storable ();
	2		44892
	2		50
79	2	2	1819	use Process::Delegatable ();
	2		96849
	2		114
80	2	2	2722	use PPI::Util ();
	0
	0
81				use PPI::Document ();
82				use PPI::Cache ();
83				use Module::Pluggable;
84
85				our $VERSION = '0.06';
86
87				use constant ORLITE_FILE => File::Spec->catfile(
88				File::HomeDir->my_data,
89				($^O eq 'MSWin32' ? 'Perl' : '.perl'),
90				'Perl-Metrics2',
91				'Perl-Metrics2.sqlite',
92				);
93
94				use constant ORLITE_TIMELINE => File::Spec->catdir(
95				File::ShareDir::dist_dir('Perl-Metrics2'),
96				'timeline',
97				);
98
99				use ORLite 1.21 ();
100				use ORLite::Migrate 0.03 {
101				file => ORLITE_FILE,
102				create => 1,
103				timeline => ORLITE_TIMELINE,
104				user_version => 3,
105				};
106
107				use Perl::Metrics2::CpanFile ();
108
109
110
111
112
113				#####################################################################
114				# Constructor
115
116				sub new {
117				my $class = shift;
118				my $self = bless { @_,
119				plugins => {},
120				}, $class;
121
122				# Load the plugins
123				foreach my $plugin ( $class->plugins ) {
124				eval "require $plugin";
125				die $@ if $@;
126				$self->{plugins}->{$plugin} = $plugin->new;
127				}
128
129				# Study if needed.
130				if ( $self->study ) {
131				# If a document is present in all of the plugins
132				# use a more efficient single scalar.
133				my $all = scalar keys %{$self->{plugins}};
134				my $sql = 'SELECT md5, package FROM file_metric';
135				my $sth = $self->prepare($sql) or die("prepare: $DBI::errstr");
136				$sth->execute or die("execute: $DBI::errstr");
137				my %seen = ();
138				while ( my $row = $sth->fetchrow_arrayref ) {
139				my $md5 = $row->[0];
140				my $pkg = $row->[1];
141				unless ( $seen{$md5} ) {
142				$seen{$md5} \|\|= {};
143				}
144				unless ( ref $seen{$md5} ) {
145				# All registered already
146				next;
147				}
148				$seen{$md5}->{$pkg} = 1;
149				if ( scalar keys %{$seen{$md5}} == $all ) {
150				$seen{$md5} = 1;
151				}
152				}
153				$sth->finish or die("finish: $DBI::errstr");
154				$self->{seen} = \%seen;
155				}
156
157				# Initialise the PPI cache if available
158				if ( $self->cache ) {
159				PPI::Cache->import( path => $self->cache );
160				}
161
162				return $self;
163				}
164
165				sub study {
166				$_[0]->{study};
167				}
168
169				sub cache {
170				$_[0]->{cache};
171				}
172
173				sub seen {
174				my $self = shift;
175				my $md5 = shift;
176				my $seen = $self->{seen}->{$md5};
177
178				# Document was seen by none
179				return 0 unless $seen;
180
181				# Document was seen by all
182				return 1 if not ref $seen;
183
184				# Seen by a specific plugin?
185				if ( @_ ) {
186				return 1 if $seen->{$_[0]};
187				}
188
189				return 0;
190				}
191
192
193
194
195
196				#####################################################################
197				# Main Methods
198
199				sub process_cache {
200				my $self = shift;
201				unless ( $self->cache ) {
202				Carp::croak("No cache provided, cannot process_cache");
203				}
204				unless ( $self->study ) {
205				Carp::croak("Must have study true to process_cache");
206				}
207
208				$\| = 1;
209
210				# Remove indexes to speed up inserts
211				$self->trace("Removing indexes for faster inserts...");
212				foreach my $col ( qw{ md5 name package value version } ) {
213				my $sql = "DROP INDEX IF EXISTS file_metric__$col";
214				$self->trace($sql);
215				Perl::Metrics2->do($sql);
216				}
217
218				# Find all the files in the cache
219				$self->trace("Processing cache directory " . $self->cache . "...");
220				$self->begin;
221				my $count = 0;
222				my $files = 0;
223				my $cache = PPI::Document->get_cache;
224				my $search = File::Next::files( {
225				sort_files => 1,
226				}, $self->cache );
227				while ( my @file = $search->() ) {
228				$file[1] =~ /([a-f0-9]+)\.ppi\z/ or next;
229				(++$files % 100) or print '.';
230
231				# Filter out things we've done already
232				my $md5 = $1;
233				$self->seen($md5) and next;
234				print "$1\n";
235
236				# Fetch the document from the cache
237				my $document = $cache->get_document($md5);
238				unless ( $document ) {
239				warn("Failed to retrieve $md5 from the cache");
240				next;
241				}
242
243				# Process the document
244				$self->process_document(
245				document => $document,
246				md5 => $md5,
247				hintsafe => 1,
248				);
249				next if ++$count % 100;
250				$self->commit_begin;
251				}
252				$self->commit;
253				print "\n";
254
255				# Add the indexes back to the database
256				$self->trace("Restoring indexes...");
257				foreach my $col ( qw{ md5 name package value version } ) {
258				my $sql = "CREATE INDEX IF NOT EXISTS file_metric__$col ON file_metric ( $col )";
259				$self->trace($sql);
260				Perl::Metrics2->do($sql);
261				}
262
263				return 1;
264				}
265
266				sub process_distribution {
267				my $self = shift;
268
269				# Get and check the directory name
270				my $path = File::Spec->canonpath(shift);
271				unless ( defined Params::Util::_STRING($path) ) {
272				Carp::croak("Did not pass a file name to index_file");
273				}
274				unless ( File::Spec->file_name_is_absolute($path) ) {
275				Carp::croak("Cannot index relative path '$path'. Must be absolute");
276				}
277				Carp::croak("Cannot index '$path'. File does not exist") unless -d $path;
278				Carp::croak("Cannot index '$path'. No read permissions") unless -r _;
279
280				# Find the documents
281				my $files = $self->find_files($path);
282				my @files = File::Find::Rule->ignore_svn->no_index->perl_module->in($path);
283				$self->trace("$path: Found " . scalar(@files) . " files");
284				foreach my $file ( @files ) {
285				$self->trace($file);
286				$self->process_file($file);
287				}
288				return 1;
289				}
290
291				sub process_file {
292				my $self = shift;
293
294				# Get and check the filename
295				my $path = File::Spec->canonpath(shift);
296				unless ( defined Params::Util::_STRING($path) ) {
297				Carp::croak("Did not pass a file name to index_file");
298				}
299				unless ( File::Spec->file_name_is_absolute($path) ) {
300				Carp::croak("Cannot index relative path '$path'. Must be absolute");
301				}
302				Carp::croak("Cannot index '$path'. File does not exist") unless -f $path;
303				Carp::croak("Cannot index '$path'. No read permissions") unless -r _;
304
305				if ( $self->study ) {
306				# If and only if every plugin has seen the document
307				# we can shortcut and don't need to load it.
308				my $md5 = PPI::Util::md5hex_file($path);
309				return 1 if $self->seen($md5);
310				}
311
312				# Load the document
313				my $document = PPI::Document->new( $path,
314				readonly => 1,
315				);
316				unless ( $document ) {
317				warn("Failed to parse '$path'");
318				next;
319				}
320				$self->process_document(
321				document => $document,
322				);
323				}
324
325				# Forcefully process a docucment
326				sub process_document {
327				my $self = shift;
328				my %params = (@_ > 1) ? @_ : ( document => $_[0] );
329				my $document = $params{document};
330				my $md5 = $params{md5} \|\| $document->hex_id;
331				my $hintsafe = $params{hintsafe};
332
333				# Filter out plugins we don't need to rerun
334				# and sort plugins with destructive last
335				my $plugins = $self->{plugins};
336				my @names = sort {
337				$plugins->{$a}->destructive <=> $plugins->{$b}->destructive
338				or
339				$a cmp $b
340				} grep {
341				not $self->seen($md5, $_)
342				} keys %$plugins;
343
344				# Create the plugin objects
345				foreach my $name ( @names ) {
346				# Clone the document if the plugin is destructive, UNLESS it is the
347				# last destructive plugin. If so, let it destroy the document anyway
348				# since we won't be needing it any more.
349				if ( $plugins->{$name}->destructive and $name ne $names[-1] ) {
350				# Run the plugin on a copy
351				my $copy = $document->clone;
352				$plugins->{$name}->process_document(
353				document => $document,
354				md5 => $md5,
355				hintsafe => $hintsafe,
356				);
357				} else {
358				$plugins->{$name}->process_document(
359				document => $document,
360				md5 => $md5,
361				hintsafe => $hintsafe,
362				);
363				}
364				}
365
366				return 1;
367				}
368
369				sub index_distribution {
370				my $self = shift;
371				my $release = shift;
372				my $path = shift;
373				my $hintsafe = !! shift;
374
375				# Find the documents
376				my $files = $self->perl_files($path);
377
378				# Generate the md5 checksums for the files
379				my %md5 = map {
380				$_ => PPI::Util::md5hex_file(
381				File::Spec->catfile($path, $_)
382				)
383				} sort keys %$files;
384
385				# Flush and push the files into the database
386				unless ( $hintsafe ) {
387				Perl::Metrics2::CpanFile->delete(
388				'where release = ?', $release,
389				);
390				}
391				foreach my $file ( sort keys %$files ) {
392				Perl::Metrics2::CpanFile->create(
393				release => $release,
394				file => $file,
395				md5 => $md5{$file},
396				indexable => $files->{$file},
397				);
398				}
399
400				return 1;
401				}
402
403
404
405
406
407				#####################################################################
408				# Index Optimisation Methods
409
410				my @INDEX = (
411				[ 'file_metric', 'md5' ],
412				[ 'file_metric', 'name' ],
413				[ 'file_metric', 'package' ],
414				[ 'file_metric', 'value' ],
415				[ 'file_metric', 'version' ],
416				[ 'cpan_file', 'release' ],
417				[ 'cpan_file', 'file' ],
418				[ 'cpan_file', 'md5' ],
419				[ 'cpan_file', 'indexable' ],
420				);
421
422				sub index_remove {
423				my $self = shift;
424
425				$self->trace("Removing indexes...");
426				foreach ( @INDEX ) {
427				my $sql = "DROP INDEX IF EXISTS $_->[0]__$_->[1]";
428				$self->trace($sql);
429				Perl::Metrics2->do($sql);
430				}
431
432				return 1;
433				}
434
435				sub index_restore {
436				my $self = shift;
437
438				$self->trace("Restoring indexes...");
439				foreach ( @INDEX ) {
440				my $sql = "CREATE INDEX IF NOT EXISTS $_->[0]__$_->[1] ON $_->[0] ( $_->[1] )";
441				$self->trace($sql);
442				Perl::Metrics2->do($sql);
443				}
444
445				return 1;
446				}
447
448
449
450
451
452				######################################################################
453				# File Search
454
455				sub perl_files {
456				my $class = shift;
457				my $path = shift;
458
459				# Find the basic file list
460				my @basic = File::Find::Rule->ignore_svn->perl_file->relative->in($path);
461				my %files = map { $_ => 0 } @basic;
462
463				# Find the subset that will be indexed
464				# If parsing the META.yml failes, don't ignore anything
465				eval {
466				my @index = File::Find::Rule->ignore_svn->no_index->perl_module->relative->in($path);
467				foreach ( @index ) {
468				$files{$_} = 1;
469				}
470				};
471
472				return \%files;
473				}
474
475				sub perl_modules {
476				my $class = shift;
477				my $path = shift;
478
479				# Find the basic file list
480				my @basic = File::Find::Rule->ignore_svn->perl_module->relative->in($path);
481				my %files = map { $_ => 0 } @basic;
482
483				# Find the subset that will be indexed
484				# If parsing the META.yml failes, don't ignore anything
485				eval {
486				my @index = File::Find::Rule->ignore_svn->no_index->perl_module->relative->in($path);
487				foreach ( @index ) {
488				$files{$_} = 1;
489				}
490				};
491
492				return \%files;
493				}
494
495
496
497
498
499				#####################################################################
500				# Support Methods
501
502				sub selectcol_index {
503				my ($dbh, $stmt, $attr, @bind) = @_;
504				my $sth = (ref $stmt) ? $stmt : $dbh->prepare($stmt, $attr);
505				return unless $sth;
506				$sth->execute(@bind) \|\| return;
507				my $column = $attr->{Columns} ? $attr->{Columns}->[0] : 1;
508				my $value = undef;
509				$sth->bind_col($column, \$value) \|\| return;
510				my $row = 0;
511				my %hash = ();
512				if ( my $max = $attr->{MaxRows} ) {
513				while ( $sth->fetch ) {
514				last if ++$row > $max;
515				$hash{$value} = 1;
516				}
517				} else {
518				while ( $sth->fetch ) {
519				$hash{$value} = 1;
520				}
521				}
522				return \%hash;
523				}
524
525				sub in {
526				my $self = shift;
527				my $sql = '( ' . join( ', ', map { '?' } @_ ) . ' )';
528				return ( $sql, @_ );
529				}
530
531				sub trace {
532				print STDERR map { "# $_\n" } @_[1..$#_];
533				}
534
535				1;
536
537				=pod
538
539				=head1 SUPPORT
540
541				Bugs should be reported via the CPAN bug tracker at
542
543				L
544
545				For other issues, contact the author.
546
547				=head1 AUTHOR
548
549				Adam Kennedy Eadamk@cpan.orgE
550
551				=head1 COPYRIGHT
552
553				Copyright 2009 Adam Kennedy.
554
555				This program is free software; you can redistribute
556				it and/or modify it under the same terms as Perl itself.
557
558				The full text of the license can be found in the
559				LICENSE file included with this module.
560
561				=cut