| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package DBIx::DataAudit; |
|
2
|
3
|
|
|
3
|
|
4215
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
334
|
|
|
3
|
3
|
|
|
3
|
|
48
|
use Carp qw(croak carp); |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
598
|
|
|
4
|
3
|
|
|
3
|
|
7472
|
use DBI; |
|
|
3
|
|
|
|
|
77995
|
|
|
|
3
|
|
|
|
|
397
|
|
|
5
|
3
|
|
|
3
|
|
4381
|
use parent 'Class::Accessor'; |
|
|
3
|
|
|
|
|
890
|
|
|
|
3
|
|
|
|
|
15
|
|
|
6
|
3
|
|
|
3
|
|
9842
|
use vars '$VERSION'; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
222
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '0.13'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
DBIx::DataAudit - summarize column data for a table |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use DBIx::DataAudit; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
warn "Running audit for table $table"; |
|
18
|
|
|
|
|
|
|
my $audit = DBIx::DataAudit->audit( dsn => 'dbi:SQLite:dbname=test.sqlite', table => 'test' ); |
|
19
|
|
|
|
|
|
|
print $audit->as_text; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# or |
|
22
|
|
|
|
|
|
|
print $audit->as_html; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This module provides a summary about the data contained in a table. It provides |
|
25
|
|
|
|
|
|
|
the descriptive statistics for every column. It's surprising |
|
26
|
|
|
|
|
|
|
how much bad data you find by looking at the minimum and maximum |
|
27
|
|
|
|
|
|
|
values of a column alone. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
It tries to get the information in one table scan. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 HOW IT WORKS |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
The module works by constructing an SQL statement that collects the information |
|
34
|
|
|
|
|
|
|
about the columns in a single full table scan. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 COLUMN TRAITS |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
You can specify which information is collected about every column by specifying the traits. |
|
39
|
|
|
|
|
|
|
The hierarchy of traits is as follows: |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
any < ordered < numeric |
|
42
|
|
|
|
|
|
|
< string |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The following traits are collected for every column by default: |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=over 4 |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item * C |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Number of rows in the column |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item * C |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Number of distinct values in the column |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item * C |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Number of C values for the column |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=back |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
For columns that are recognized as ordered, the following additional traits are collected: |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=over 4 |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item * C |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Minimum value for the column |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item * C |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Maximum value for the column |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
For columns that are recognized as numeric, the following additional traits are collected: |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=over 4 |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item * C |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Average value for the column |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=back |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
For columns that are recognized as string, the following additional traits are collected: |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=over 4 |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item * C |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Number of values that consist only of blanks (C) |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item * C |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Number of values that consist only of the empty string (C<''>) |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=item * C |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Number of values that consist only of the empty string (C<''>), |
|
101
|
|
|
|
|
|
|
are blank (C) or are C |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=back |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 GLOBAL VARIABLES |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
To customize some default behaviour, the some global variables |
|
110
|
|
|
|
|
|
|
are defined. Read the source to find their names. |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
|
113
|
|
|
|
|
|
|
|
|
114
|
3
|
|
|
3
|
|
13
|
use vars qw'@default_traits %trait_type %trait_hierarchy $trait_inapplicable %sql_type_map'; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
6181
|
|
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
@default_traits = qw[min max count values null avg blank empty missing ]; |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
%trait_type = ( |
|
119
|
|
|
|
|
|
|
count => ['any','count(%s)'], |
|
120
|
|
|
|
|
|
|
values => ['any','count(distinct %s)'], |
|
121
|
|
|
|
|
|
|
null => ['any','sum(case when %s is null then 1 else 0 end)'], |
|
122
|
|
|
|
|
|
|
min => ['ordered','min(%s)'], |
|
123
|
|
|
|
|
|
|
max => ['ordered','max(%s)'], |
|
124
|
|
|
|
|
|
|
avg => ['numeric','avg(%s)'], |
|
125
|
|
|
|
|
|
|
#modus => ['any','sum(1)group by %s'], # find the element that occurs the most |
|
126
|
|
|
|
|
|
|
# Possibly with only a single table scan |
|
127
|
|
|
|
|
|
|
blank => ['string',"sum(case when trim(%s)='' then 1 else 0 end)"], |
|
128
|
|
|
|
|
|
|
empty => ['string',"sum(case when %s='' then 1 else 0 end)"], |
|
129
|
|
|
|
|
|
|
missing => ['string',"sum(case when trim(%s)='' then 1 when %s is null then 1 else 0 end)"], |
|
130
|
|
|
|
|
|
|
); |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
%trait_hierarchy = ( |
|
133
|
|
|
|
|
|
|
any => [], |
|
134
|
|
|
|
|
|
|
ordered => ['any'], |
|
135
|
|
|
|
|
|
|
numeric => ['ordered','any'], |
|
136
|
|
|
|
|
|
|
string => ['ordered','any'], |
|
137
|
|
|
|
|
|
|
); |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
$trait_inapplicable = 'NULL'; |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
%sql_type_map = ( |
|
142
|
|
|
|
|
|
|
BIGINT => 'numeric', |
|
143
|
|
|
|
|
|
|
BOOLEAN => 'any', |
|
144
|
|
|
|
|
|
|
CHAR => 'string', |
|
145
|
|
|
|
|
|
|
'CHARACTER VARYING' => 'string', |
|
146
|
|
|
|
|
|
|
DATETIME => 'ordered', |
|
147
|
|
|
|
|
|
|
DATE => 'ordered', |
|
148
|
|
|
|
|
|
|
DECIMAL => 'numeric', |
|
149
|
|
|
|
|
|
|
ENUM => 'ordered', |
|
150
|
|
|
|
|
|
|
INET => 'any', |
|
151
|
|
|
|
|
|
|
INTEGER => 'numeric', |
|
152
|
|
|
|
|
|
|
INT => 'numeric', |
|
153
|
|
|
|
|
|
|
NUMERIC => 'numeric', |
|
154
|
|
|
|
|
|
|
SMALLINT => 'numeric', |
|
155
|
|
|
|
|
|
|
TEXT => 'string', |
|
156
|
|
|
|
|
|
|
TIME => 'ordered', |
|
157
|
|
|
|
|
|
|
'TIMESTAMP WITHOUT TIME ZONE' => 'ordered', |
|
158
|
|
|
|
|
|
|
TIMESTAMP => 'ordered', |
|
159
|
|
|
|
|
|
|
TINYINT => 'numeric', |
|
160
|
|
|
|
|
|
|
'UNSIGNED BIGINT' => 'numeric', |
|
161
|
|
|
|
|
|
|
VARCHAR => 'string', |
|
162
|
|
|
|
|
|
|
); |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 METHODS |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
The class implements the following methods: |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(qw(table dbh dsn columns traits results where)); |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 C<< __PACKAGE__->audit ARGS >> |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Performs the data audit. Valid arguments are: |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=over 4 |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item * C
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Name of the table to audit. No default. |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=item * C |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Array reference to the traits. Default traits are |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
min max count null avg blank empty missing |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item * C |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Names of the columns to audit. Default are all columns of the table. |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=item * C |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Database handle. If missing, hopefully you have specified the C. |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=item * C |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
DSN to use. Can be omitted if you pass in a valid C instead. |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=item * C |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Column information, in the same format as the DBI returns it. |
|
203
|
|
|
|
|
|
|
By default, this will be read in via DBI. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=back |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub audit { |
|
210
|
0
|
|
|
0
|
1
|
0
|
my ($class, %args) = @_; |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
0
|
|
|
0
|
$args{traits} ||= [ @default_traits ]; |
|
213
|
0
|
0
|
|
|
|
0
|
if (! @{$args{traits}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
214
|
0
|
|
|
|
|
0
|
$args{traits} = [ @default_traits ]; |
|
215
|
|
|
|
|
|
|
}; |
|
216
|
0
|
|
0
|
|
|
0
|
$args{dbh} ||= DBI->connect( $args{dsn}, undef, undef, {RaiseError => 1}); |
|
217
|
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
my $self = \%args; |
|
219
|
0
|
|
|
|
|
0
|
bless $self => $class; |
|
220
|
0
|
|
0
|
|
|
0
|
$self->{columns} ||= [$self->get_columns]; |
|
221
|
0
|
0
|
|
|
|
0
|
if (! @{ $self->{columns}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
222
|
0
|
|
|
|
|
0
|
croak "Couldn't retrieve column information for table '$args{table}'. Does your DBD implement ->column_info?"; |
|
223
|
|
|
|
|
|
|
}; |
|
224
|
0
|
|
0
|
|
|
0
|
$self->{column_info} ||= $self->collect_column_info; |
|
225
|
|
|
|
|
|
|
|
|
226
|
0
|
|
|
|
|
0
|
$self |
|
227
|
|
|
|
|
|
|
}; |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=head2 C<< $audit->as_text RESULTS >> |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Returns a table drawn as text with the results. |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub as_text { |
|
236
|
0
|
|
|
0
|
1
|
0
|
my ($self,$results) = @_; |
|
237
|
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
0
|
require Text::Table; |
|
239
|
0
|
|
|
|
|
0
|
my $data = $self->template_data($results); |
|
240
|
0
|
|
|
|
|
0
|
my $table = Text::Table->new( @{$data->{headings}} ); |
|
|
0
|
|
|
|
|
0
|
|
|
241
|
0
|
|
|
|
|
0
|
$table->load( @{$data->{rows}} ); |
|
|
0
|
|
|
|
|
0
|
|
|
242
|
|
|
|
|
|
|
|
|
243
|
0
|
|
|
|
|
0
|
"Data analysis for $data->{table}:\n\n" . $table->table; |
|
244
|
|
|
|
|
|
|
}; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 C<< $audit->as_html RESULTS, TEMPLATE >> |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Returns a HTML page with the results. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
You can pass in a custom resultset or C if you want |
|
251
|
|
|
|
|
|
|
the module to determine the results. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
You can pass in a custom (L) template |
|
254
|
|
|
|
|
|
|
if you want fancier rendering. |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub as_html { |
|
259
|
0
|
|
|
0
|
1
|
0
|
my ($self,$results,$template) = @_; |
|
260
|
0
|
|
|
|
|
0
|
require Template; |
|
261
|
0
|
|
0
|
|
|
0
|
$template ||= <
|
|
262
|
|
|
|
|
|
|
Data audit of table '[% table %]' |
|
263
|
|
|
|
|
|
|
Data audit of table '[% table %]' |
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
| [% FOR h IN headings %]| [%h%] | [%END%]
|
267
|
|
|
|
|
|
|
| |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
[% FOR r IN rows %] |
|
270
|
|
|
|
|
|
|
| [% FOR v IN r %]| [%v FILTER html_entity%] | [%END%]
|
271
|
|
|
|
|
|
|
[% END %] |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
| |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
TEMPLATE |
|
276
|
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
0
|
my $t = Template->new(); |
|
278
|
0
|
|
|
|
|
0
|
my $data = $self->template_data($results); |
|
279
|
|
|
|
|
|
|
|
|
280
|
0
|
0
|
|
|
|
0
|
$t->process(\$template,$data,\my $result) |
|
281
|
|
|
|
|
|
|
|| croak $t->error; |
|
282
|
0
|
|
|
|
|
0
|
$result |
|
283
|
|
|
|
|
|
|
}; |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head2 C<< $audit->template_data >> |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Returns a hash with the following three keys, suitable |
|
288
|
|
|
|
|
|
|
for using with whatever templating system you have: |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=over 4 |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=item * |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
C - the name of the table
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=item * |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
C - the headings of the columns |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=item * |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
C - the values of the traits of every column |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=back |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub template_data { |
|
309
|
0
|
|
|
0
|
1
|
0
|
my ($self,$results) = @_; |
|
310
|
0
|
|
0
|
|
|
0
|
$results ||= $self->{results} || $self->run_audit; |
|
|
|
|
0
|
|
|
|
|
|
311
|
0
|
|
|
|
|
0
|
my @results = @{ $results->[0] }; |
|
|
0
|
|
|
|
|
0
|
|
|
312
|
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
0
|
my @headings = (@{ $self->traits }); |
|
|
0
|
|
|
|
|
0
|
|
|
314
|
0
|
|
|
|
|
0
|
my @rows; |
|
315
|
0
|
|
|
|
|
0
|
for my $column (@{ $self->columns }) { |
|
|
0
|
|
|
|
|
0
|
|
|
316
|
0
|
|
|
|
|
0
|
my @row = $column; |
|
317
|
0
|
|
|
|
|
0
|
for my $trait (@headings) { |
|
318
|
0
|
|
|
|
|
0
|
my $val = shift @results; |
|
319
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
|
320
|
0
|
0
|
|
|
|
0
|
if (length($val) > 20) { |
|
321
|
0
|
|
|
|
|
0
|
$val = substr($val,0,20); |
|
322
|
|
|
|
|
|
|
}; |
|
323
|
0
|
|
|
|
|
0
|
$val =~ s/[\x00-\x1f]/./g; |
|
324
|
|
|
|
|
|
|
}; |
|
325
|
0
|
0
|
|
|
|
0
|
push @row, defined $val ? $val : 'n/a'; |
|
326
|
|
|
|
|
|
|
}; |
|
327
|
0
|
|
|
|
|
0
|
push @rows, \@row; |
|
328
|
|
|
|
|
|
|
}; |
|
329
|
|
|
|
|
|
|
|
|
330
|
0
|
|
|
|
|
0
|
my $res = { |
|
331
|
|
|
|
|
|
|
table => $self->table, |
|
332
|
|
|
|
|
|
|
headings => ['column',@headings], |
|
333
|
|
|
|
|
|
|
rows => \@rows, |
|
334
|
|
|
|
|
|
|
}; |
|
335
|
|
|
|
|
|
|
}; |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head2 C<< $audit->run_audit >> |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Actually runs the SQL in the database. |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub run_audit { |
|
344
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
345
|
|
|
|
|
|
|
|
|
346
|
0
|
|
|
|
|
0
|
my $sql = $self->get_sql; |
|
347
|
0
|
|
|
|
|
0
|
$self->{results} = $self->dbh->selectall_arrayref($sql,{}); |
|
348
|
|
|
|
|
|
|
}; |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head2 C<< $audit->column_type COLUMN >> |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Returns the type for the column. The four valid types are C, C, C and C. |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=cut |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub column_type { |
|
357
|
0
|
|
|
0
|
1
|
0
|
my ($self,$column) = @_; |
|
358
|
0
|
0
|
|
|
|
0
|
if (! $self->{column_info}) { |
|
359
|
0
|
|
|
|
|
0
|
$self->{column_info} = $self->collect_column_info; |
|
360
|
|
|
|
|
|
|
}; |
|
361
|
0
|
|
|
|
|
0
|
my $info = $self->{column_info}; |
|
362
|
0
|
|
|
|
|
0
|
map { |
|
363
|
0
|
|
|
|
|
0
|
$_->{trait_type}; |
|
364
|
0
|
|
|
|
|
0
|
} grep { $_->{COLUMN_NAME} eq $column } @$info; |
|
365
|
|
|
|
|
|
|
}; |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=head2 C<< $audit->get_columns TABLE >> |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Returns the names of the columns for the table C.
|
370
|
|
|
|
|
|
|
By default, the value of C will be taken from the value
|
371
|
|
|
|
|
|
|
passed to the constructor C. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub get_columns { |
|
376
|
0
|
|
|
0
|
1
|
0
|
my ($self,$table) = @_; |
|
377
|
0
|
|
0
|
|
|
0
|
$table ||= $self->table; |
|
378
|
0
|
0
|
|
|
|
0
|
if (! $self->{column_info}) { |
|
379
|
0
|
|
|
|
|
0
|
$self->{column_info} = $self->collect_column_info; |
|
380
|
|
|
|
|
|
|
}; |
|
381
|
0
|
|
|
|
|
0
|
my $info = $self->{column_info}; |
|
382
|
0
|
|
|
|
|
0
|
my @sorted = @$info; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# Order the columns in the "right" order, if possible |
|
385
|
0
|
0
|
0
|
|
|
0
|
if (exists $sorted[0]->{ORDINAL_POSITION} && defined $sorted[0]->{ORDINAL_POSITION}) { |
|
386
|
0
|
|
|
|
|
0
|
@sorted = sort { $a->{ORDINAL_POSITION} <=> $b->{ORDINAL_POSITION} } @sorted; |
|
|
0
|
|
|
|
|
0
|
|
|
387
|
|
|
|
|
|
|
}; |
|
388
|
0
|
|
|
|
|
0
|
map { |
|
389
|
0
|
|
|
|
|
0
|
$_->{COLUMN_NAME}; |
|
390
|
|
|
|
|
|
|
} @sorted; |
|
391
|
|
|
|
|
|
|
}; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 C<< $audit->collect_column_info TABLE >> |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Collects the information about the columns for the table C
|
396
|
|
|
|
|
|
|
from the DBI. By default, C will be taken from the
|
397
|
|
|
|
|
|
|
value passed to the constructor C. |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
If your database driver does not implement the C<< ->column_info >> |
|
400
|
|
|
|
|
|
|
method you are out of luck. A fatal error is raised by this method |
|
401
|
|
|
|
|
|
|
if C<< ->column_info >> does not return anything. |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
This method will raise warnings if it encounters a data type that |
|
404
|
|
|
|
|
|
|
it doesn't know yet. You can either patch the |
|
405
|
|
|
|
|
|
|
global variable C<%sql_type_map> to add the type or submit a patch |
|
406
|
|
|
|
|
|
|
to me to add the type and its interpretation. |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=cut |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub collect_column_info { |
|
411
|
0
|
|
|
0
|
1
|
0
|
my ($self,$table) = @_; |
|
412
|
0
|
|
0
|
|
|
0
|
$table ||= $self->table; |
|
413
|
0
|
|
|
|
|
0
|
my $schema; |
|
414
|
0
|
0
|
|
|
|
0
|
if ($table =~ s/^(.*)\.//) { |
|
415
|
0
|
|
|
|
|
0
|
$schema = $1; |
|
416
|
|
|
|
|
|
|
}; |
|
417
|
0
|
|
|
|
|
0
|
my $sth = $self->dbh->column_info(undef,$schema,$table,undef); |
|
418
|
0
|
0
|
|
|
|
0
|
if (! $sth) { |
|
419
|
0
|
0
|
|
|
|
0
|
if( $schema ) { |
|
420
|
0
|
|
|
|
|
0
|
$schema= "$schema."; |
|
421
|
|
|
|
|
|
|
} else { |
|
422
|
0
|
|
|
|
|
0
|
$schema= ''; |
|
423
|
|
|
|
|
|
|
}; |
|
424
|
0
|
|
|
|
|
0
|
croak "Couldn't collect column information for table '$schema$table'. Does your DBD implement ->column_info?"; |
|
425
|
|
|
|
|
|
|
}; |
|
426
|
0
|
|
|
|
|
0
|
my $info = $sth->fetchall_arrayref({}); |
|
427
|
|
|
|
|
|
|
|
|
428
|
0
|
0
|
|
|
|
0
|
if( !@$info ) { |
|
429
|
0
|
|
|
|
|
0
|
croak "'$schema$table' seems to have no columns. Does your DBD implement ->column_info?"; |
|
430
|
|
|
|
|
|
|
}; |
|
431
|
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
0
|
for my $i (@$info) { |
|
433
|
0
|
|
|
|
|
0
|
my $sqltype = uc $i->{TYPE_NAME}; |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# Fix for Pg - convert enum types to "ENUM": |
|
436
|
0
|
0
|
0
|
|
|
0
|
if (exists $i->{pg_enum_values} && defined $i->{pg_enum_values}) { |
|
437
|
0
|
|
|
|
|
0
|
$sqltype = 'ENUM'; |
|
438
|
|
|
|
|
|
|
}; |
|
439
|
|
|
|
|
|
|
|
|
440
|
0
|
0
|
|
|
|
0
|
if (not exists $sql_type_map{ $sqltype }) { |
|
441
|
0
|
|
|
|
|
0
|
warn sprintf q{Unknown SQL data type '%s' for column "%s.%s"; some traits will be unavailable\n}, |
|
442
|
|
|
|
|
|
|
$sqltype, $table, $i->{COLUMN_NAME}; |
|
443
|
|
|
|
|
|
|
}; |
|
444
|
0
|
|
0
|
|
|
0
|
$i->{trait_type} = $sql_type_map{ $sqltype } || 'any'; |
|
445
|
|
|
|
|
|
|
}; |
|
446
|
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
0
|
$info |
|
448
|
|
|
|
|
|
|
}; |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 C<< $audit->get_sql TABLE >> |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Creates the SQL statement to collect the information. |
|
453
|
|
|
|
|
|
|
The default value for C will be the table passed
|
454
|
|
|
|
|
|
|
to the constructor C. |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
If you encounter errors from your SQL engine, you may want |
|
457
|
|
|
|
|
|
|
to print the result of this method out. |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub get_sql { |
|
462
|
0
|
|
|
0
|
1
|
0
|
my ($self,$table) = @_; |
|
463
|
0
|
|
0
|
|
|
0
|
$table ||= $self->table; |
|
464
|
0
|
|
|
|
|
0
|
my @columns = @{ $self->columns }; |
|
|
0
|
|
|
|
|
0
|
|
|
465
|
0
|
|
|
|
|
0
|
my @traits = @{$self->traits}; |
|
|
0
|
|
|
|
|
0
|
|
|
466
|
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
my @resultset; |
|
468
|
0
|
|
|
|
|
0
|
for my $column (@columns) { |
|
469
|
0
|
|
|
|
|
0
|
for my $trait (@traits) { |
|
470
|
0
|
|
|
|
|
0
|
my $name = "${column}_${trait}"; |
|
471
|
0
|
|
|
|
|
0
|
$name =~ s/"//g; # unquote quoted columns |
|
472
|
0
|
0
|
|
|
|
0
|
if ($self->trait_applies( $trait, $column )) { |
|
473
|
0
|
|
|
|
|
0
|
my $tmpl = $trait_type{$trait}->[1]; |
|
474
|
0
|
|
|
|
|
0
|
$tmpl =~ s/%s/$column/g; |
|
475
|
0
|
|
|
|
|
0
|
push @resultset, "$tmpl as $name"; |
|
476
|
|
|
|
|
|
|
} else { |
|
477
|
0
|
|
|
|
|
0
|
push @resultset, "NULL as $name"; |
|
478
|
|
|
|
|
|
|
}; |
|
479
|
|
|
|
|
|
|
}; |
|
480
|
|
|
|
|
|
|
}; |
|
481
|
0
|
0
|
|
|
|
0
|
my $where = $self->where ? "WHERE " . $self->where : ''; |
|
482
|
0
|
|
|
|
|
0
|
my $statement = sprintf "SELECT %s FROM %s\n%s", join("\n ,", @resultset), $table, $where; |
|
483
|
0
|
|
|
|
|
0
|
return $statement |
|
484
|
|
|
|
|
|
|
}; |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=head2 C<< $audit->trait_applies TRAIT, COLUMN >> |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Checks whether a trait applies to a column. |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
A trait applies to a column if the trait type is C |
|
491
|
|
|
|
|
|
|
or if it is the same type as the column type as returned |
|
492
|
|
|
|
|
|
|
by C. |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
The method will raise an error if it is passed an unknown |
|
495
|
|
|
|
|
|
|
trait name. See the source code for how to add custom |
|
496
|
|
|
|
|
|
|
traits. |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=cut |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub trait_applies { |
|
501
|
36
|
|
|
36
|
1
|
9370
|
my ($self, $trait, $column) = @_; |
|
502
|
36
|
50
|
|
|
|
94
|
if (not exists $trait_type{$trait}) { |
|
503
|
0
|
|
|
|
|
0
|
carp "Unknown trait '$trait'"; |
|
504
|
|
|
|
|
|
|
}; |
|
505
|
36
|
|
50
|
|
|
114
|
my $trait_type = $trait_type{$trait}->[0] || ''; |
|
506
|
|
|
|
|
|
|
|
|
507
|
36
|
100
|
|
|
|
81
|
return 1 if ($trait_type eq 'any'); |
|
508
|
|
|
|
|
|
|
|
|
509
|
24
|
|
|
|
|
68
|
(my $type) = $self->column_type($column); |
|
510
|
24
|
|
|
|
|
69
|
my @subtypes = @{ $trait_hierarchy{ $type } }; |
|
|
24
|
|
|
|
|
56
|
|
|
511
|
|
|
|
|
|
|
|
|
512
|
24
|
|
|
|
|
34
|
return scalar grep { $trait_type eq $_ } ($type,@subtypes); |
|
|
54
|
|
|
|
|
132
|
|
|
513
|
|
|
|
|
|
|
}; |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head1 COMMAND LINE USAGE |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
You can use this mail from the command line if you need a quick check of data: |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
perl -MDBIx::DataAudit=dbi:SQLite:dbname=some/db.sqlite my_table [traits] |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
This could also incredibly useful if you want a breakdown of a csv-file: |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
perl -MDBIx::DataAudit=dbi:AnyData:dbname=some/db.sqlite my_table [traits] |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
Unfortunately, that does not work yet, as I haven't found a convenient |
|
526
|
|
|
|
|
|
|
oneliner way to make a CSV file appear as database. |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub import { |
|
531
|
2
|
|
|
2
|
|
20
|
my ($class, $dsn) = @_; |
|
532
|
2
|
|
|
|
|
5
|
(my $target) = caller; |
|
533
|
2
|
50
|
33
|
|
|
53
|
if ($target eq 'main' and $dsn) { |
|
534
|
0
|
|
|
|
|
|
my ($table,@traits) = @ARGV; |
|
535
|
0
|
|
|
|
|
|
my @tables = split /,/,$table; |
|
536
|
0
|
0
|
|
|
|
|
if (! @traits) { |
|
537
|
0
|
|
|
|
|
|
@traits = @default_traits; |
|
538
|
|
|
|
|
|
|
}; |
|
539
|
0
|
|
|
|
|
|
for my $table (@tables) { |
|
540
|
0
|
|
|
|
|
|
my $self = $class->audit(dsn => $dsn, table => $table, traits => \@traits); |
|
541
|
0
|
|
|
|
|
|
print "Data audit for table '$table'\n\n"; |
|
542
|
0
|
|
|
|
|
|
print $self->as_text; |
|
543
|
|
|
|
|
|
|
}; |
|
544
|
|
|
|
|
|
|
}; |
|
545
|
|
|
|
|
|
|
}; |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
1; |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
__END__ |
| | | | | | |