File Coverage

blib/lib/DBI/ProfileData.pm

Criterion	Covered	Total	%
statement	151	204	74.0
branch	42	84	50.0
condition	0	3	0.0
subroutine	17	21	80.9
pod	11	12	91.6
total	221	324	68.2

line	stmt	bran	cond	sub	pod	time	code
1							package DBI::ProfileData;
2	2			2		2995	use strict;
	2					3
	2					115
3
4							=head1 NAME
5
6							DBI::ProfileData - manipulate DBI::ProfileDumper data dumps
7
8							=head1 SYNOPSIS
9
10							The easiest way to use this module is through the dbiprof frontend
11							(see L for details):
12
13							dbiprof --number 15 --sort count
14
15							This module can also be used to roll your own profile analysis:
16
17							# load data from dbi.prof
18							$prof = DBI::ProfileData->new(File => "dbi.prof");
19
20							# get a count of the records (unique paths) in the data set
21							$count = $prof->count();
22
23							# sort by longest overall time
24							$prof->sort(field => "longest");
25
26							# sort by longest overall time, least to greatest
27							$prof->sort(field => "longest", reverse => 1);
28
29							# exclude records with key2 eq 'disconnect'
30							$prof->exclude(key2 => 'disconnect');
31
32							# exclude records with key1 matching /^UPDATE/i
33							$prof->exclude(key1 => qr/^UPDATE/i);
34
35							# remove all records except those where key1 matches /^SELECT/i
36							$prof->match(key1 => qr/^SELECT/i);
37
38							# produce a formatted report with the given number of items
39							$report = $prof->report(number => 10);
40
41							# clone the profile data set
42							$clone = $prof->clone();
43
44							# get access to hash of header values
45							$header = $prof->header();
46
47							# get access to sorted array of nodes
48							$nodes = $prof->nodes();
49
50							# format a single node in the same style as report()
51							$text = $prof->format($nodes->[0]);
52
53							# get access to Data hash in DBI::Profile format
54							$Data = $prof->Data();
55
56							=head1 DESCRIPTION
57
58							This module offers the ability to read, manipulate and format
59							L profile data.
60
61							Conceptually, a profile consists of a series of records, or nodes,
62							each of each has a set of statistics and set of keys. Each record
63							must have a unique set of keys, but there is no requirement that every
64							record have the same number of keys.
65
66							=head1 METHODS
67
68							The following methods are supported by DBI::ProfileData objects.
69
70							=cut
71
72							our $VERSION = "2.010008";
73
74	2			2		14	use Carp qw(croak);
	2					4
	2					88
75	2			2		12	use Symbol;
	2					5
	2					119
76	2			2		14	use Fcntl qw(:flock);
	2					4
	2					209
77
78	2			2		14	use DBI::Profile qw(dbi_profile_merge);
	2					4
	2					6385
79
80							# some constants for use with node data arrays
81							sub COUNT () { 0 };
82							sub TOTAL () { 1 };
83							sub FIRST () { 2 };
84							sub SHORTEST () { 3 };
85							sub LONGEST () { 4 };
86							sub FIRST_AT () { 5 };
87							sub LAST_AT () { 6 };
88							sub PATH () { 7 };
89
90
91							my $HAS_FLOCK = (defined $ENV{DBI_PROFILE_FLOCK})
92							? $ENV{DBI_PROFILE_FLOCK}
93							: do { local $@; eval { flock STDOUT, 0; 1 } };
94
95
96							=head2 $prof = DBI::ProfileData->new(File => "dbi.prof")
97
98							=head2 $prof = DBI::ProfileData->new(File => "dbi.prof", Filter => sub { ... })
99
100							=head2 $prof = DBI::ProfileData->new(Files => [ "dbi.prof.1", "dbi.prof.2" ])
101
102							Creates a new DBI::ProfileData object. Takes either a single file
103							through the File option or a list of Files in an array ref. If
104							multiple files are specified then the header data from the first file
105							is used.
106
107							=head3 Files
108
109							Reference to an array of file names to read.
110
111							=head3 File
112
113							Name of file to read. Takes precedence over C.
114
115							=head3 DeleteFiles
116
117							If true, the files are deleted after being read.
118
119							Actually the files are renamed with a C suffix before being read,
120							and then, after reading all the files, they're all deleted together.
121
122							The files are locked while being read which, combined with the rename, makes it
123							safe to 'consume' files that are still being generated by L.
124
125							=head3 Filter
126
127							The C parameter can be used to supply a code reference that can
128							manipulate the profile data as it is being read. This is most useful for
129							editing SQL statements so that slightly different statements in the raw data
130							will be merged and aggregated in the loaded data. For example:
131
132							Filter => sub {
133							my ($path_ref, $data_ref) = @_;
134							s/foo = '.*?'/foo = '...'/ for @$path_ref;
135							}
136
137							Here's an example that performs some normalization on the SQL. It converts all
138							numbers to C and all quoted strings to C~~. It can also convert digits to~~
139							N within names. Finally, it summarizes long "IN (...)" clauses.
140
141							It's aggressive and simplistic, but it's often sufficient, and serves as an
142							example that you can tailor to suit your own needs:
143
144							Filter => sub {
145							my ($path_ref, $data_ref) = @_;
146							local $_ = $path_ref->[0]; # whichever element contains the SQL Statement
147							s/\b\d+\b/N/g; # 42 -> N
148							s/\b0x[0-9A-Fa-f]+\b/N/g; # 0xFE -> N
149							s/'.*?'/'S'/g; # single quoted strings (doesn't handle escapes)
150							s/".*?"/"S"/g; # double quoted strings (doesn't handle escapes)
151							# convert names like log_20001231 into log_NNNNNNNN, controlled by $opt{n}
152							s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n};
153							# abbreviate massive "in (...)" statements and similar
154							s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg;
155							}
156
157							It's often better to perform this kinds of normalization in the DBI while the
158							data is being collected, to avoid too much memory being used by storing profile
159							data for many different SQL statement. See L.
160
161							=cut
162
163							sub new {
164	4			4	1	2691	my $pkg = shift;
165	4					43	my $self = {
166							Files => [ "dbi.prof" ],
167							Filter => undef,
168							DeleteFiles => 0,
169							LockFile => $HAS_FLOCK,
170							_header => {},
171							_nodes => [],
172							_node_lookup => {},
173							_sort => 'none',
174							@_
175							};
176	4					12	bless $self, $pkg;
177
178							# File (singular) overrides Files (plural)
179	4	50				27	$self->{Files} = [ $self->{File} ] if exists $self->{File};
180
181	4					17	$self->_read_files();
182	4					23	return $self;
183							}
184
185							# read files into _header and _nodes
186							sub _read_files {
187	4			4		8	my $self = shift;
188	4					10	my $files = $self->{Files};
189	4					7	my $read_header = 0;
190	4					7	my @files_to_delete;
191
192	4					15	my $fh = gensym;
193	4					59	foreach (@$files) {
194	4					11	my $filename = $_;
195
196	4	100				31	if ($self->{DeleteFiles}) {
197	2					10	my $newfilename = $filename . ".deleteme";
198	2	50				15	if ($^O eq 'VMS') {
199							# VMS default filesystem can only have one period
200	0					0	$newfilename = $filename . 'deleteme';
201							}
202							# will clobber an existing $newfilename
203	2	50				83	rename($filename, $newfilename)
204							or croak "Can't rename($filename, $newfilename): $!";
205							# On a versioned filesystem we want old versions to be removed
206	2					33	1 while (unlink $filename);
207	2					6	$filename = $newfilename;
208							}
209
210	4	50				156	open($fh, "<", $filename)
211							or croak("Unable to read profile file '$filename': $!");
212
213							# lock the file in case it's still being written to
214							# (we'll be forced to wait till the write is complete)
215	4	50				59	flock($fh, LOCK_SH) if $self->{LockFile};
216
217	4	50				51	if (-s $fh) { # not empty
218	4	50				31	$self->_read_header($fh, $filename, $read_header ? 0 : 1);
219	4					10	$read_header = 1;
220	4					20	$self->_read_body($fh, $filename);
221							}
222	4					53	close($fh); # and release lock
223
224							push @files_to_delete, $filename
225	4	100				87	if $self->{DeleteFiles};
226							}
227	4					18	for (@files_to_delete){
228							# for versioned file systems
229	2					175	1 while (unlink $_);
230	2	50				32	if(-e $_){
231	0					0	warn "Can't delete '$_': $!";
232							}
233							}
234
235							# discard node_lookup now that all files are read
236	4					34	delete $self->{_node_lookup};
237							}
238
239							# read the header from the given $fh named $filename. Discards the
240							# data unless $keep.
241							sub _read_header {
242	4			4		16	my ($self, $fh, $filename, $keep) = @_;
243
244							# get profiler module id
245	4					83	my $first = <$fh>;
246	4					14	chomp $first;
247	4	50				21	$self->{_profiler} = $first if $keep;
248
249							# collect variables from the header
250	4					9	local $_;
251	4					23	while (<$fh>) {
252	12					20	chomp;
253	12	100				32	last unless length $_;
254	8	50				58	/^(\S+)\s=\s(.*)/
255							or croak("Syntax error in header in $filename line $.: $_");
256							# XXX should compare new with existing (from previous file)
257							# and warn if they differ (different program or path)
258	8	50				31	$self->{_header}{$1} = unescape_key($2) if $keep;
259							}
260							}
261
262
263							sub unescape_key { # inverse of escape_key() in DBI::ProfileDumper
264	374			374	0	565	local $_ = shift;
265	374					560	s/(?
266	374					454	s/(?
267	374					453	s/\\\\/\\/g; # \\ to \
268	374					1417	return $_;
269							}
270
271
272							# reads the body of the profile data
273							sub _read_body {
274	4			4		15	my ($self, $fh, $filename) = @_;
275	4					13	my $nodes = $self->{_nodes};
276	4					10	my $lookup = $self->{_node_lookup};
277	4					9	my $filter = $self->{Filter};
278
279							# build up node array
280	4					14	my @path = ("");
281	4					10	my (@data, $path_key);
282	4					9	local $_;
283	4					21	while (<$fh>) {
284	640					925	chomp;
285	640	100				2350	if (/^\+\s+(\d+)\s?(.*)/) {
		50
286							# it's a key
287	366					1041	my ($key, $index) = ($2, $1 - 1);
288
289	366					583	$#path = $index; # truncate path to new length
290	366					541	$path[$index] = unescape_key($key); # place new key at end
291
292							}
293							elsif (s/^=\s+//) {
294							# it's data - file in the node array with the path in index 0
295							# (the optional minus is to make it more robust against systems
296							# with unstable high-res clocks - typically due to poor NTP config
297							# of kernel SMP behaviour, i.e. min time may be -0.000008))
298
299	274					906	@data = split / /, $_;
300
301							# corrupt data?
302	274	50				550	croak("Invalid number of fields in $filename line $.: $_")
303							unless @data == 7;
304	274	50				781	croak("Invalid leaf node characters $filename line $.: $_")
305							unless m/^[-+ 0-9eE\.]+$/;
306
307							# hook to enable pre-processing of the data - such as mangling SQL
308							# so that slightly different statements get treated as the same
309							# and so merged in the results
310	274	100				796	$filter->(\@path, \@data) if $filter;
311
312							# elements of @path can't have NULLs in them, so this
313							# forms a unique string per @path. If there's some way I
314							# can get this without arbitrarily stripping out a
315							# character I'd be happy to hear it!
316	274					1342	$path_key = join("\0",@path);
317
318							# look for previous entry
319	274	100				496	if (exists $lookup->{$path_key}) {
320							# merge in the new data
321	208					1707	dbi_profile_merge($nodes->[$lookup->{$path_key}], \@data);
322							} else {
323							# insert a new node - nodes are arrays with data in 0-6
324							# and path data after that
325	66					212	push(@$nodes, [ @data, @path ]);
326
327							# record node in %seen
328	66					306	$lookup->{$path_key} = $#$nodes;
329							}
330							}
331							else {
332	0					0	croak("Invalid line type syntax error in $filename line $.: $_");
333							}
334							}
335							}
336
337
338
339							=head2 $copy = $prof->clone();
340
341							Clone a profile data set creating a new object.
342
343							=cut
344
345							sub clone {
346	4			4	1	1154	my $self = shift;
347
348							# start with a simple copy
349	4					41	my $clone = bless { %$self }, ref($self);
350
351							# deep copy nodes
352	4					34	$clone->{_nodes} = [ map { [ @$_ ] } @{$self->{_nodes}} ];
	78					227
	4					14
353
354							# deep copy header
355	4					9	$clone->{_header} = { %{$self->{_header}} };
	4					16
356
357	4					39	return $clone;
358							}
359
360							=head2 $header = $prof->header();
361
362							Returns a reference to a hash of header values. These are the key
363							value pairs included in the header section of the L
364							data format. For example:
365
366							$header = {
367							Path => [ '!Statement', '!MethodName' ],
368							Program => 't/42profile_data.t',
369							};
370
371							Note that modifying this hash will modify the header data stored
372							inside the profile object.
373
374							=cut
375
376	0			0	1	0	sub header { shift->{_header} }
377
378
379							=head2 $nodes = $prof->nodes()
380
381							Returns a reference the sorted nodes array. Each element in the array
382							is a single record in the data set. The first seven elements are the
383							same as the elements provided by L. After that each key is
384							in a separate element. For example:
385
386							$nodes = [
387							[
388							2, # 0, count
389							0.0312958955764771, # 1, total duration
390							0.000490069389343262, # 2, first duration
391							0.000176072120666504, # 3, shortest duration
392							0.00140702724456787, # 4, longest duration
393							1023115819.83019, # 5, time of first event
394							1023115819.86576, # 6, time of last event
395							'SELECT foo FROM bar' # 7, key1
396							'execute' # 8, key2
397							# 6+N, keyN
398							],
399							# ...
400							];
401
402							Note that modifying this array will modify the node data stored inside
403							the profile object.
404
405							=cut
406
407	10			10	1	913	sub nodes { shift->{_nodes} }
408
409
410							=head2 $count = $prof->count()
411
412							Returns the number of items in the profile data set.
413
414							=cut
415
416	4			4	1	1230	sub count { scalar @{shift->{_nodes}} }
	4					26
417
418
419							=head2 $prof->sort(field => "field")
420
421							=head2 $prof->sort(field => "field", reverse => 1)
422
423							Sorts data by the given field. Available fields are:
424
425							longest
426							total
427							count
428							shortest
429
430							The default sort is greatest to smallest, which is the opposite of the
431							normal Perl meaning. This, however, matches the expected behavior of
432							the dbiprof frontend.
433
434							=cut
435
436
437							# sorts data by one of the available fields
438							{
439							my %FIELDS = (
440							longest => LONGEST,
441							total => TOTAL,
442							count => COUNT,
443							shortest => SHORTEST,
444							key1 => PATH+0,
445							key2 => PATH+1,
446							key3 => PATH+2,
447							);
448							sub sort {
449	10			10	1	3793	my $self = shift;
450	10					20	my $nodes = $self->{_nodes};
451	10					33	my %opt = @_;
452
453	10	50				29	croak("Missing required field option.") unless $opt{field};
454
455	10					25	my $index = $FIELDS{$opt{field}};
456
457	10	50				20	croak("Unrecognized sort field '$opt{field}'.")
458							unless defined $index;
459
460							# sort over index
461	10	100				25	if ($opt{reverse}) {
462							@$nodes = sort {
463	4					16	$a->[$index] <=> $b->[$index]
	128					155
464							} @$nodes;
465							} else {
466							@$nodes = sort {
467	6					26	$b->[$index] <=> $a->[$index]
	337					457
468							} @$nodes;
469							}
470
471							# remember how we're sorted
472	10					26	$self->{_sort} = $opt{field};
473
474	10					29	return $self;
475							}
476							}
477
478
479							=head2 $count = $prof->exclude(key2 => "disconnect")
480
481							=head2 $count = $prof->exclude(key2 => "disconnect", case_sensitive => 1)
482
483							=head2 $count = $prof->exclude(key1 => qr/^SELECT/i)
484
485							Removes records from the data set that match the given string or
486							regular expression. This method modifies the data in a permanent
487							fashion - use clone() first to maintain the original data after
488							exclude(). Returns the number of nodes left in the profile data set.
489
490							=cut
491
492							sub exclude {
493	2			2	1	16	my $self = shift;
494	2					6	my $nodes = $self->{_nodes};
495	2					10	my %opt = @_;
496
497							# find key index number
498	2					6	my ($index, $val);
499	2					11	foreach (keys %opt) {
500	2	50				19	if (/^key(\d+)$/) {
501	2					12	$index = PATH + $1 - 1;
502	2					5	$val = $opt{$_};
503	2					5	last;
504							}
505							}
506	2	50				8	croak("Missing required keyN option.") unless $index;
507
508	2	50				19	if (UNIVERSAL::isa($val,"Regexp")) {
509							# regex match
510							@$nodes = grep {
511	0	0				0	$#$_ < $index or $_->[$index] !~ /$val/
	0					0
512							} @$nodes;
513							} else {
514	2	50				8	if ($opt{case_sensitive}) {
515							@$nodes = grep {
516	0	0				0	$#$_ < $index or $_->[$index] ne $val;
	0					0
517							} @$nodes;
518							} else {
519	2					7	$val = lc $val;
520							@$nodes = grep {
521	2	50				6	$#$_ < $index or lc($_->[$index]) ne $val;
	39					123
522							} @$nodes;
523							}
524							}
525
526	2					11	return scalar @$nodes;
527							}
528
529
530							=head2 $count = $prof->match(key2 => "disconnect")
531
532							=head2 $count = $prof->match(key2 => "disconnect", case_sensitive => 1)
533
534							=head2 $count = $prof->match(key1 => qr/^SELECT/i)
535
536							Removes records from the data set that do not match the given string
537							or regular expression. This method modifies the data in a permanent
538							fashion - use clone() first to maintain the original data after
539							match(). Returns the number of nodes left in the profile data set.
540
541							=cut
542
543							sub match {
544	4			4	1	9	my $self = shift;
545	4					8	my $nodes = $self->{_nodes};
546	4					13	my %opt = @_;
547
548							# find key index number
549	4					9	my ($index, $val);
550	4					13	foreach (keys %opt) {
551	4	50				27	if (/^key(\d+)$/) {
552	4					13	$index = PATH + $1 - 1;
553	4					8	$val = $opt{$_};
554	4					9	last;
555							}
556							}
557	4	50				13	croak("Missing required keyN option.") unless $index;
558
559	4	50				19	if (UNIVERSAL::isa($val,"Regexp")) {
560							# regex match
561							@$nodes = grep {
562	0	0				0	$#$_ >= $index and $_->[$index] =~ /$val/
	0					0
563							} @$nodes;
564							} else {
565	4	50				10	if ($opt{case_sensitive}) {
566							@$nodes = grep {
567	0	0				0	$#$_ >= $index and $_->[$index] eq $val;
	0					0
568							} @$nodes;
569							} else {
570	4					10	$val = lc $val;
571							@$nodes = grep {
572	4	50				8	$#$_ >= $index and lc($_->[$index]) eq $val;
	46					164
573							} @$nodes;
574							}
575							}
576
577	4					21	return scalar @$nodes;
578							}
579
580
581							=head2 $Data = $prof->Data()
582
583							Returns the same Data hash structure as seen in L. This
584							structure is not sorted. The nodes() structure probably makes more
585							sense for most analysis.
586
587							=cut
588
589							sub Data {
590	4			4	1	1868	my $self = shift;
591	4					9	my (%Data, @data, $ptr);
592
593	4					6	foreach my $node (@{$self->{_nodes}}) {
	4					13
594							# traverse to key location
595	66					85	$ptr = \%Data;
596	66					98	foreach my $key (@{$node}[PATH .. $#$node - 1]) {
	66					94
597	66	100				139	$ptr->{$key} = {} unless exists $ptr->{$key};
598	66					99	$ptr = $ptr->{$key};
599							}
600
601							# slice out node data
602	66					81	$ptr->{$node->[-1]} = [ @{$node}[0 .. 6] ];
	66					197
603							}
604
605	4					53	return \%Data;
606							}
607
608
609							=head2 $text = $prof->format($nodes->[0])
610
611							Formats a single node into a human-readable block of text.
612
613							=cut
614
615							sub format {
616	0			0	1		my ($self, $node) = @_;
617	0						my $format;
618
619							# setup keys
620	0						my $keys = "";
621	0						for (my $i = PATH; $i <= $#$node; $i++) {
622	0						my $key = $node->[$i];
623
624							# remove leading and trailing space
625	0						$key =~ s/^\s+//;
626	0						$key =~ s/\s+$//;
627
628							# if key has newlines or is long take special precautions
629	0	0	0				if (length($key) > 72 or $key =~ /\n/) {
630	0						$keys .= " Key " . ($i - PATH + 1) . " :\n\n$key\n\n";
631							} else {
632	0						$keys .= " Key " . ($i - PATH + 1) . " : $key\n";
633							}
634							}
635
636							# nodes with multiple runs get the long entry format, nodes with
637							# just one run get a single count.
638	0	0					if ($node->[COUNT] > 1) {
639	0						$format = <
640							Count : %d
641							Total Time : %3.6f seconds
642							Longest Time : %3.6f seconds
643							Shortest Time : %3.6f seconds
644							Average Time : %3.6f seconds
645							END
646	0						return sprintf($format, @{$node}[COUNT,TOTAL,LONGEST,SHORTEST],
	0
647							$node->[TOTAL] / $node->[COUNT]) . $keys;
648							} else {
649	0						$format = <
650							Count : %d
651							Time : %3.6f seconds
652							END
653
654	0						return sprintf($format, @{$node}[COUNT,TOTAL]) . $keys;
	0
655
656							}
657							}
658
659
660							=head2 $text = $prof->report(number => 10)
661
662							Produces a report with the given number of items.
663
664							=cut
665
666							sub report {
667	0			0	1		my $self = shift;
668	0						my $nodes = $self->{_nodes};
669	0						my %opt = @_;
670
671	0	0					croak("Missing required number option") unless exists $opt{number};
672
673	0	0					$opt{number} = @$nodes if @$nodes < $opt{number};
674
675	0						my $report = $self->_report_header($opt{number});
676	0						for (0 .. $opt{number} - 1) {
677	0						$report .= sprintf("#" x 5 . "[ %d ]". "#" x 59 . "\n",
678							$_ + 1);
679	0						$report .= $self->format($nodes->[$_]);
680	0						$report .= "\n";
681							}
682	0						return $report;
683							}
684
685							# format the header for report()
686							sub _report_header {
687	0			0			my ($self, $number) = @_;
688	0						my $nodes = $self->{_nodes};
689	0						my $node_count = @$nodes;
690
691							# find total runtime and method count
692	0						my ($time, $count) = (0,0);
693	0						foreach my $node (@$nodes) {
694	0						$time += $node->[TOTAL];
695	0						$count += $node->[COUNT];
696							}
697
698	0						my $header = <
699
700							DBI Profile Data ($self->{_profiler})
701
702							END
703
704							# output header fields
705	0						while (my ($key, $value) = each %{$self->{_header}}) {
	0
706	0						$header .= sprintf(" %-13s : %s\n", $key, $value);
707							}
708
709							# output summary data fields
710	0						$header .= sprintf(<{_sort}, $count, $time);
711							Total Records : %d (showing %d, sorted by %s)
712							Total Count : %d
713							Total Runtime : %3.6f seconds
714
715							END
716
717	0						return $header;
718							}
719
720
721							1;
722
723							__END__