File Coverage

blib/lib/DBI/ProfileData.pm
Criterion Covered Total %
statement 151 204 74.0
branch 42 84 50.0
condition 0 3 0.0
subroutine 17 21 80.9
pod 11 12 91.6
total 221 324 68.2


line stmt bran cond sub pod time code
1             package DBI::ProfileData;
2 2     2   2995 use strict;
  2         3  
  2         115  
3              
4             =head1 NAME
5              
6             DBI::ProfileData - manipulate DBI::ProfileDumper data dumps
7              
8             =head1 SYNOPSIS
9              
10             The easiest way to use this module is through the dbiprof frontend
11             (see L for details):
12              
13             dbiprof --number 15 --sort count
14              
15             This module can also be used to roll your own profile analysis:
16              
17             # load data from dbi.prof
18             $prof = DBI::ProfileData->new(File => "dbi.prof");
19              
20             # get a count of the records (unique paths) in the data set
21             $count = $prof->count();
22              
23             # sort by longest overall time
24             $prof->sort(field => "longest");
25              
26             # sort by longest overall time, least to greatest
27             $prof->sort(field => "longest", reverse => 1);
28              
29             # exclude records with key2 eq 'disconnect'
30             $prof->exclude(key2 => 'disconnect');
31              
32             # exclude records with key1 matching /^UPDATE/i
33             $prof->exclude(key1 => qr/^UPDATE/i);
34              
35             # remove all records except those where key1 matches /^SELECT/i
36             $prof->match(key1 => qr/^SELECT/i);
37              
38             # produce a formatted report with the given number of items
39             $report = $prof->report(number => 10);
40              
41             # clone the profile data set
42             $clone = $prof->clone();
43              
44             # get access to hash of header values
45             $header = $prof->header();
46              
47             # get access to sorted array of nodes
48             $nodes = $prof->nodes();
49              
50             # format a single node in the same style as report()
51             $text = $prof->format($nodes->[0]);
52              
53             # get access to Data hash in DBI::Profile format
54             $Data = $prof->Data();
55              
56             =head1 DESCRIPTION
57              
58             This module offers the ability to read, manipulate and format
59             L profile data.
60              
61             Conceptually, a profile consists of a series of records, or nodes,
62             each of each has a set of statistics and set of keys. Each record
63             must have a unique set of keys, but there is no requirement that every
64             record have the same number of keys.
65              
66             =head1 METHODS
67              
68             The following methods are supported by DBI::ProfileData objects.
69              
70             =cut
71              
72             our $VERSION = "2.010008";
73              
74 2     2   14 use Carp qw(croak);
  2         4  
  2         88  
75 2     2   12 use Symbol;
  2         5  
  2         119  
76 2     2   14 use Fcntl qw(:flock);
  2         4  
  2         209  
77              
78 2     2   14 use DBI::Profile qw(dbi_profile_merge);
  2         4  
  2         6385  
79              
80             # some constants for use with node data arrays
81             sub COUNT () { 0 };
82             sub TOTAL () { 1 };
83             sub FIRST () { 2 };
84             sub SHORTEST () { 3 };
85             sub LONGEST () { 4 };
86             sub FIRST_AT () { 5 };
87             sub LAST_AT () { 6 };
88             sub PATH () { 7 };
89              
90              
91             my $HAS_FLOCK = (defined $ENV{DBI_PROFILE_FLOCK})
92             ? $ENV{DBI_PROFILE_FLOCK}
93             : do { local $@; eval { flock STDOUT, 0; 1 } };
94              
95              
96             =head2 $prof = DBI::ProfileData->new(File => "dbi.prof")
97              
98             =head2 $prof = DBI::ProfileData->new(File => "dbi.prof", Filter => sub { ... })
99              
100             =head2 $prof = DBI::ProfileData->new(Files => [ "dbi.prof.1", "dbi.prof.2" ])
101              
102             Creates a new DBI::ProfileData object. Takes either a single file
103             through the File option or a list of Files in an array ref. If
104             multiple files are specified then the header data from the first file
105             is used.
106              
107             =head3 Files
108              
109             Reference to an array of file names to read.
110              
111             =head3 File
112              
113             Name of file to read. Takes precedence over C.
114              
115             =head3 DeleteFiles
116              
117             If true, the files are deleted after being read.
118              
119             Actually the files are renamed with a C suffix before being read,
120             and then, after reading all the files, they're all deleted together.
121              
122             The files are locked while being read which, combined with the rename, makes it
123             safe to 'consume' files that are still being generated by L.
124              
125             =head3 Filter
126              
127             The C parameter can be used to supply a code reference that can
128             manipulate the profile data as it is being read. This is most useful for
129             editing SQL statements so that slightly different statements in the raw data
130             will be merged and aggregated in the loaded data. For example:
131              
132             Filter => sub {
133             my ($path_ref, $data_ref) = @_;
134             s/foo = '.*?'/foo = '...'/ for @$path_ref;
135             }
136              
137             Here's an example that performs some normalization on the SQL. It converts all
138             numbers to C and all quoted strings to C. It can also convert digits to
139             N within names. Finally, it summarizes long "IN (...)" clauses.
140              
141             It's aggressive and simplistic, but it's often sufficient, and serves as an
142             example that you can tailor to suit your own needs:
143              
144             Filter => sub {
145             my ($path_ref, $data_ref) = @_;
146             local $_ = $path_ref->[0]; # whichever element contains the SQL Statement
147             s/\b\d+\b/N/g; # 42 -> N
148             s/\b0x[0-9A-Fa-f]+\b/N/g; # 0xFE -> N
149             s/'.*?'/'S'/g; # single quoted strings (doesn't handle escapes)
150             s/".*?"/"S"/g; # double quoted strings (doesn't handle escapes)
151             # convert names like log_20001231 into log_NNNNNNNN, controlled by $opt{n}
152             s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n};
153             # abbreviate massive "in (...)" statements and similar
154             s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg;
155             }
156              
157             It's often better to perform this kinds of normalization in the DBI while the
158             data is being collected, to avoid too much memory being used by storing profile
159             data for many different SQL statement. See L.
160              
161             =cut
162              
163             sub new {
164 4     4 1 2691 my $pkg = shift;
165 4         43 my $self = {
166             Files => [ "dbi.prof" ],
167             Filter => undef,
168             DeleteFiles => 0,
169             LockFile => $HAS_FLOCK,
170             _header => {},
171             _nodes => [],
172             _node_lookup => {},
173             _sort => 'none',
174             @_
175             };
176 4         12 bless $self, $pkg;
177              
178             # File (singular) overrides Files (plural)
179 4 50       27 $self->{Files} = [ $self->{File} ] if exists $self->{File};
180              
181 4         17 $self->_read_files();
182 4         23 return $self;
183             }
184              
185             # read files into _header and _nodes
186             sub _read_files {
187 4     4   8 my $self = shift;
188 4         10 my $files = $self->{Files};
189 4         7 my $read_header = 0;
190 4         7 my @files_to_delete;
191              
192 4         15 my $fh = gensym;
193 4         59 foreach (@$files) {
194 4         11 my $filename = $_;
195              
196 4 100       31 if ($self->{DeleteFiles}) {
197 2         10 my $newfilename = $filename . ".deleteme";
198 2 50       15 if ($^O eq 'VMS') {
199             # VMS default filesystem can only have one period
200 0         0 $newfilename = $filename . 'deleteme';
201             }
202             # will clobber an existing $newfilename
203 2 50       83 rename($filename, $newfilename)
204             or croak "Can't rename($filename, $newfilename): $!";
205             # On a versioned filesystem we want old versions to be removed
206 2         33 1 while (unlink $filename);
207 2         6 $filename = $newfilename;
208             }
209              
210 4 50       156 open($fh, "<", $filename)
211             or croak("Unable to read profile file '$filename': $!");
212              
213             # lock the file in case it's still being written to
214             # (we'll be forced to wait till the write is complete)
215 4 50       59 flock($fh, LOCK_SH) if $self->{LockFile};
216              
217 4 50       51 if (-s $fh) { # not empty
218 4 50       31 $self->_read_header($fh, $filename, $read_header ? 0 : 1);
219 4         10 $read_header = 1;
220 4         20 $self->_read_body($fh, $filename);
221             }
222 4         53 close($fh); # and release lock
223              
224             push @files_to_delete, $filename
225 4 100       87 if $self->{DeleteFiles};
226             }
227 4         18 for (@files_to_delete){
228             # for versioned file systems
229 2         175 1 while (unlink $_);
230 2 50       32 if(-e $_){
231 0         0 warn "Can't delete '$_': $!";
232             }
233             }
234              
235             # discard node_lookup now that all files are read
236 4         34 delete $self->{_node_lookup};
237             }
238              
239             # read the header from the given $fh named $filename. Discards the
240             # data unless $keep.
241             sub _read_header {
242 4     4   16 my ($self, $fh, $filename, $keep) = @_;
243              
244             # get profiler module id
245 4         83 my $first = <$fh>;
246 4         14 chomp $first;
247 4 50       21 $self->{_profiler} = $first if $keep;
248              
249             # collect variables from the header
250 4         9 local $_;
251 4         23 while (<$fh>) {
252 12         20 chomp;
253 12 100       32 last unless length $_;
254 8 50       58 /^(\S+)\s*=\s*(.*)/
255             or croak("Syntax error in header in $filename line $.: $_");
256             # XXX should compare new with existing (from previous file)
257             # and warn if they differ (different program or path)
258 8 50       31 $self->{_header}{$1} = unescape_key($2) if $keep;
259             }
260             }
261              
262              
263             sub unescape_key { # inverse of escape_key() in DBI::ProfileDumper
264 374     374 0 565 local $_ = shift;
265 374         560 s/(?
266 374         454 s/(?
267 374         453 s/\\\\/\\/g; # \\ to \
268 374         1417 return $_;
269             }
270              
271              
272             # reads the body of the profile data
273             sub _read_body {
274 4     4   15 my ($self, $fh, $filename) = @_;
275 4         13 my $nodes = $self->{_nodes};
276 4         10 my $lookup = $self->{_node_lookup};
277 4         9 my $filter = $self->{Filter};
278              
279             # build up node array
280 4         14 my @path = ("");
281 4         10 my (@data, $path_key);
282 4         9 local $_;
283 4         21 while (<$fh>) {
284 640         925 chomp;
285 640 100       2350 if (/^\+\s+(\d+)\s?(.*)/) {
    50          
286             # it's a key
287 366         1041 my ($key, $index) = ($2, $1 - 1);
288              
289 366         583 $#path = $index; # truncate path to new length
290 366         541 $path[$index] = unescape_key($key); # place new key at end
291              
292             }
293             elsif (s/^=\s+//) {
294             # it's data - file in the node array with the path in index 0
295             # (the optional minus is to make it more robust against systems
296             # with unstable high-res clocks - typically due to poor NTP config
297             # of kernel SMP behaviour, i.e. min time may be -0.000008))
298              
299 274         906 @data = split / /, $_;
300              
301             # corrupt data?
302 274 50       550 croak("Invalid number of fields in $filename line $.: $_")
303             unless @data == 7;
304 274 50       781 croak("Invalid leaf node characters $filename line $.: $_")
305             unless m/^[-+ 0-9eE\.]+$/;
306              
307             # hook to enable pre-processing of the data - such as mangling SQL
308             # so that slightly different statements get treated as the same
309             # and so merged in the results
310 274 100       796 $filter->(\@path, \@data) if $filter;
311              
312             # elements of @path can't have NULLs in them, so this
313             # forms a unique string per @path. If there's some way I
314             # can get this without arbitrarily stripping out a
315             # character I'd be happy to hear it!
316 274         1342 $path_key = join("\0",@path);
317              
318             # look for previous entry
319 274 100       496 if (exists $lookup->{$path_key}) {
320             # merge in the new data
321 208         1707 dbi_profile_merge($nodes->[$lookup->{$path_key}], \@data);
322             } else {
323             # insert a new node - nodes are arrays with data in 0-6
324             # and path data after that
325 66         212 push(@$nodes, [ @data, @path ]);
326              
327             # record node in %seen
328 66         306 $lookup->{$path_key} = $#$nodes;
329             }
330             }
331             else {
332 0         0 croak("Invalid line type syntax error in $filename line $.: $_");
333             }
334             }
335             }
336              
337              
338              
339             =head2 $copy = $prof->clone();
340              
341             Clone a profile data set creating a new object.
342              
343             =cut
344              
345             sub clone {
346 4     4 1 1154 my $self = shift;
347              
348             # start with a simple copy
349 4         41 my $clone = bless { %$self }, ref($self);
350              
351             # deep copy nodes
352 4         34 $clone->{_nodes} = [ map { [ @$_ ] } @{$self->{_nodes}} ];
  78         227  
  4         14  
353              
354             # deep copy header
355 4         9 $clone->{_header} = { %{$self->{_header}} };
  4         16  
356              
357 4         39 return $clone;
358             }
359              
360             =head2 $header = $prof->header();
361              
362             Returns a reference to a hash of header values. These are the key
363             value pairs included in the header section of the L
364             data format. For example:
365              
366             $header = {
367             Path => [ '!Statement', '!MethodName' ],
368             Program => 't/42profile_data.t',
369             };
370              
371             Note that modifying this hash will modify the header data stored
372             inside the profile object.
373              
374             =cut
375              
376 0     0 1 0 sub header { shift->{_header} }
377              
378              
379             =head2 $nodes = $prof->nodes()
380              
381             Returns a reference the sorted nodes array. Each element in the array
382             is a single record in the data set. The first seven elements are the
383             same as the elements provided by L. After that each key is
384             in a separate element. For example:
385              
386             $nodes = [
387             [
388             2, # 0, count
389             0.0312958955764771, # 1, total duration
390             0.000490069389343262, # 2, first duration
391             0.000176072120666504, # 3, shortest duration
392             0.00140702724456787, # 4, longest duration
393             1023115819.83019, # 5, time of first event
394             1023115819.86576, # 6, time of last event
395             'SELECT foo FROM bar' # 7, key1
396             'execute' # 8, key2
397             # 6+N, keyN
398             ],
399             # ...
400             ];
401              
402             Note that modifying this array will modify the node data stored inside
403             the profile object.
404              
405             =cut
406              
407 10     10 1 913 sub nodes { shift->{_nodes} }
408              
409              
410             =head2 $count = $prof->count()
411              
412             Returns the number of items in the profile data set.
413              
414             =cut
415              
416 4     4 1 1230 sub count { scalar @{shift->{_nodes}} }
  4         26  
417              
418              
419             =head2 $prof->sort(field => "field")
420              
421             =head2 $prof->sort(field => "field", reverse => 1)
422              
423             Sorts data by the given field. Available fields are:
424              
425             longest
426             total
427             count
428             shortest
429              
430             The default sort is greatest to smallest, which is the opposite of the
431             normal Perl meaning. This, however, matches the expected behavior of
432             the dbiprof frontend.
433              
434             =cut
435              
436              
437             # sorts data by one of the available fields
438             {
439             my %FIELDS = (
440             longest => LONGEST,
441             total => TOTAL,
442             count => COUNT,
443             shortest => SHORTEST,
444             key1 => PATH+0,
445             key2 => PATH+1,
446             key3 => PATH+2,
447             );
448             sub sort {
449 10     10 1 3793 my $self = shift;
450 10         20 my $nodes = $self->{_nodes};
451 10         33 my %opt = @_;
452              
453 10 50       29 croak("Missing required field option.") unless $opt{field};
454              
455 10         25 my $index = $FIELDS{$opt{field}};
456              
457 10 50       20 croak("Unrecognized sort field '$opt{field}'.")
458             unless defined $index;
459              
460             # sort over index
461 10 100       25 if ($opt{reverse}) {
462             @$nodes = sort {
463 4         16 $a->[$index] <=> $b->[$index]
  128         155  
464             } @$nodes;
465             } else {
466             @$nodes = sort {
467 6         26 $b->[$index] <=> $a->[$index]
  337         457  
468             } @$nodes;
469             }
470              
471             # remember how we're sorted
472 10         26 $self->{_sort} = $opt{field};
473              
474 10         29 return $self;
475             }
476             }
477              
478              
479             =head2 $count = $prof->exclude(key2 => "disconnect")
480              
481             =head2 $count = $prof->exclude(key2 => "disconnect", case_sensitive => 1)
482              
483             =head2 $count = $prof->exclude(key1 => qr/^SELECT/i)
484              
485             Removes records from the data set that match the given string or
486             regular expression. This method modifies the data in a permanent
487             fashion - use clone() first to maintain the original data after
488             exclude(). Returns the number of nodes left in the profile data set.
489              
490             =cut
491              
492             sub exclude {
493 2     2 1 16 my $self = shift;
494 2         6 my $nodes = $self->{_nodes};
495 2         10 my %opt = @_;
496              
497             # find key index number
498 2         6 my ($index, $val);
499 2         11 foreach (keys %opt) {
500 2 50       19 if (/^key(\d+)$/) {
501 2         12 $index = PATH + $1 - 1;
502 2         5 $val = $opt{$_};
503 2         5 last;
504             }
505             }
506 2 50       8 croak("Missing required keyN option.") unless $index;
507              
508 2 50       19 if (UNIVERSAL::isa($val,"Regexp")) {
509             # regex match
510             @$nodes = grep {
511 0 0       0 $#$_ < $index or $_->[$index] !~ /$val/
  0         0  
512             } @$nodes;
513             } else {
514 2 50       8 if ($opt{case_sensitive}) {
515             @$nodes = grep {
516 0 0       0 $#$_ < $index or $_->[$index] ne $val;
  0         0  
517             } @$nodes;
518             } else {
519 2         7 $val = lc $val;
520             @$nodes = grep {
521 2 50       6 $#$_ < $index or lc($_->[$index]) ne $val;
  39         123  
522             } @$nodes;
523             }
524             }
525              
526 2         11 return scalar @$nodes;
527             }
528              
529              
530             =head2 $count = $prof->match(key2 => "disconnect")
531              
532             =head2 $count = $prof->match(key2 => "disconnect", case_sensitive => 1)
533              
534             =head2 $count = $prof->match(key1 => qr/^SELECT/i)
535              
536             Removes records from the data set that do not match the given string
537             or regular expression. This method modifies the data in a permanent
538             fashion - use clone() first to maintain the original data after
539             match(). Returns the number of nodes left in the profile data set.
540              
541             =cut
542              
543             sub match {
544 4     4 1 9 my $self = shift;
545 4         8 my $nodes = $self->{_nodes};
546 4         13 my %opt = @_;
547              
548             # find key index number
549 4         9 my ($index, $val);
550 4         13 foreach (keys %opt) {
551 4 50       27 if (/^key(\d+)$/) {
552 4         13 $index = PATH + $1 - 1;
553 4         8 $val = $opt{$_};
554 4         9 last;
555             }
556             }
557 4 50       13 croak("Missing required keyN option.") unless $index;
558              
559 4 50       19 if (UNIVERSAL::isa($val,"Regexp")) {
560             # regex match
561             @$nodes = grep {
562 0 0       0 $#$_ >= $index and $_->[$index] =~ /$val/
  0         0  
563             } @$nodes;
564             } else {
565 4 50       10 if ($opt{case_sensitive}) {
566             @$nodes = grep {
567 0 0       0 $#$_ >= $index and $_->[$index] eq $val;
  0         0  
568             } @$nodes;
569             } else {
570 4         10 $val = lc $val;
571             @$nodes = grep {
572 4 50       8 $#$_ >= $index and lc($_->[$index]) eq $val;
  46         164  
573             } @$nodes;
574             }
575             }
576              
577 4         21 return scalar @$nodes;
578             }
579              
580              
581             =head2 $Data = $prof->Data()
582              
583             Returns the same Data hash structure as seen in L. This
584             structure is not sorted. The nodes() structure probably makes more
585             sense for most analysis.
586              
587             =cut
588              
589             sub Data {
590 4     4 1 1868 my $self = shift;
591 4         9 my (%Data, @data, $ptr);
592              
593 4         6 foreach my $node (@{$self->{_nodes}}) {
  4         13  
594             # traverse to key location
595 66         85 $ptr = \%Data;
596 66         98 foreach my $key (@{$node}[PATH .. $#$node - 1]) {
  66         94  
597 66 100       139 $ptr->{$key} = {} unless exists $ptr->{$key};
598 66         99 $ptr = $ptr->{$key};
599             }
600              
601             # slice out node data
602 66         81 $ptr->{$node->[-1]} = [ @{$node}[0 .. 6] ];
  66         197  
603             }
604              
605 4         53 return \%Data;
606             }
607              
608              
609             =head2 $text = $prof->format($nodes->[0])
610              
611             Formats a single node into a human-readable block of text.
612              
613             =cut
614              
615             sub format {
616 0     0 1   my ($self, $node) = @_;
617 0           my $format;
618              
619             # setup keys
620 0           my $keys = "";
621 0           for (my $i = PATH; $i <= $#$node; $i++) {
622 0           my $key = $node->[$i];
623              
624             # remove leading and trailing space
625 0           $key =~ s/^\s+//;
626 0           $key =~ s/\s+$//;
627              
628             # if key has newlines or is long take special precautions
629 0 0 0       if (length($key) > 72 or $key =~ /\n/) {
630 0           $keys .= " Key " . ($i - PATH + 1) . " :\n\n$key\n\n";
631             } else {
632 0           $keys .= " Key " . ($i - PATH + 1) . " : $key\n";
633             }
634             }
635              
636             # nodes with multiple runs get the long entry format, nodes with
637             # just one run get a single count.
638 0 0         if ($node->[COUNT] > 1) {
639 0           $format = <
640             Count : %d
641             Total Time : %3.6f seconds
642             Longest Time : %3.6f seconds
643             Shortest Time : %3.6f seconds
644             Average Time : %3.6f seconds
645             END
646 0           return sprintf($format, @{$node}[COUNT,TOTAL,LONGEST,SHORTEST],
  0            
647             $node->[TOTAL] / $node->[COUNT]) . $keys;
648             } else {
649 0           $format = <
650             Count : %d
651             Time : %3.6f seconds
652             END
653              
654 0           return sprintf($format, @{$node}[COUNT,TOTAL]) . $keys;
  0            
655              
656             }
657             }
658              
659              
660             =head2 $text = $prof->report(number => 10)
661              
662             Produces a report with the given number of items.
663              
664             =cut
665              
666             sub report {
667 0     0 1   my $self = shift;
668 0           my $nodes = $self->{_nodes};
669 0           my %opt = @_;
670              
671 0 0         croak("Missing required number option") unless exists $opt{number};
672              
673 0 0         $opt{number} = @$nodes if @$nodes < $opt{number};
674              
675 0           my $report = $self->_report_header($opt{number});
676 0           for (0 .. $opt{number} - 1) {
677 0           $report .= sprintf("#" x 5 . "[ %d ]". "#" x 59 . "\n",
678             $_ + 1);
679 0           $report .= $self->format($nodes->[$_]);
680 0           $report .= "\n";
681             }
682 0           return $report;
683             }
684              
685             # format the header for report()
686             sub _report_header {
687 0     0     my ($self, $number) = @_;
688 0           my $nodes = $self->{_nodes};
689 0           my $node_count = @$nodes;
690              
691             # find total runtime and method count
692 0           my ($time, $count) = (0,0);
693 0           foreach my $node (@$nodes) {
694 0           $time += $node->[TOTAL];
695 0           $count += $node->[COUNT];
696             }
697              
698 0           my $header = <
699              
700             DBI Profile Data ($self->{_profiler})
701              
702             END
703              
704             # output header fields
705 0           while (my ($key, $value) = each %{$self->{_header}}) {
  0            
706 0           $header .= sprintf(" %-13s : %s\n", $key, $value);
707             }
708              
709             # output summary data fields
710 0           $header .= sprintf(<{_sort}, $count, $time);
711             Total Records : %d (showing %d, sorted by %s)
712             Total Count : %d
713             Total Runtime : %3.6f seconds
714              
715             END
716              
717 0           return $header;
718             }
719              
720              
721             1;
722              
723             __END__