line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package SQL::Abstract::Tree; |
2
|
|
|
|
|
|
|
|
3
|
20
|
|
|
20
|
|
363040
|
use Moo; |
|
20
|
|
|
|
|
203899
|
|
|
20
|
|
|
|
|
100
|
|
4
|
20
|
|
|
20
|
|
26905
|
no warnings 'qw'; |
|
20
|
|
|
|
|
43
|
|
|
20
|
|
|
|
|
538
|
|
5
|
|
|
|
|
|
|
|
6
|
20
|
|
|
20
|
|
94
|
use Carp; |
|
20
|
|
|
|
|
37
|
|
|
20
|
|
|
|
|
1150
|
|
7
|
20
|
|
|
20
|
|
9316
|
use Sub::Quote 'quote_sub'; |
|
20
|
|
|
|
|
87231
|
|
|
20
|
|
|
|
|
8370
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $op_look_ahead = '(?: (?= [\s\)\(\;] ) | \z)'; |
10
|
|
|
|
|
|
|
my $op_look_behind = '(?: (?<= [\,\s\)\(] ) | \A )'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $quote_left = qr/[\`\'\"\[]/; |
13
|
|
|
|
|
|
|
my $quote_right = qr/[\`\'\"\]]/; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my $placeholder_re = qr/(?: \? | \$\d+ )/x; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# These SQL keywords always signal end of the current expression (except inside |
18
|
|
|
|
|
|
|
# of a parenthesized subexpression). |
19
|
|
|
|
|
|
|
# Format: A list of strings that will be compiled to extended syntax ie. |
20
|
|
|
|
|
|
|
# /.../x) regexes, without capturing parentheses. They will be automatically |
21
|
|
|
|
|
|
|
# anchored to op boundaries (excluding quotes) to match the whole token. |
22
|
|
|
|
|
|
|
my @expression_start_keywords = ( |
23
|
|
|
|
|
|
|
'SELECT', |
24
|
|
|
|
|
|
|
'UPDATE', |
25
|
|
|
|
|
|
|
'SET', |
26
|
|
|
|
|
|
|
'INSERT \s+ INTO', |
27
|
|
|
|
|
|
|
'DELETE \s+ FROM', |
28
|
|
|
|
|
|
|
'FROM', |
29
|
|
|
|
|
|
|
'(?: |
30
|
|
|
|
|
|
|
(?: |
31
|
|
|
|
|
|
|
(?: (?: LEFT | RIGHT | FULL ) \s+ )? |
32
|
|
|
|
|
|
|
(?: (?: CROSS | INNER | OUTER ) \s+ )? |
33
|
|
|
|
|
|
|
)? |
34
|
|
|
|
|
|
|
JOIN |
35
|
|
|
|
|
|
|
)', |
36
|
|
|
|
|
|
|
'ON', |
37
|
|
|
|
|
|
|
'WHERE', |
38
|
|
|
|
|
|
|
'(?: DEFAULT \s+ )? VALUES', |
39
|
|
|
|
|
|
|
'GROUP \s+ BY', |
40
|
|
|
|
|
|
|
'HAVING', |
41
|
|
|
|
|
|
|
'ORDER \s+ BY', |
42
|
|
|
|
|
|
|
'SKIP', |
43
|
|
|
|
|
|
|
'FETCH', |
44
|
|
|
|
|
|
|
'FIRST', |
45
|
|
|
|
|
|
|
'LIMIT', |
46
|
|
|
|
|
|
|
'OFFSET', |
47
|
|
|
|
|
|
|
'FOR', |
48
|
|
|
|
|
|
|
'UNION', |
49
|
|
|
|
|
|
|
'INTERSECT', |
50
|
|
|
|
|
|
|
'EXCEPT', |
51
|
|
|
|
|
|
|
'BEGIN \s+ WORK', |
52
|
|
|
|
|
|
|
'COMMIT', |
53
|
|
|
|
|
|
|
'ROLLBACK \s+ TO \s+ SAVEPOINT', |
54
|
|
|
|
|
|
|
'ROLLBACK', |
55
|
|
|
|
|
|
|
'SAVEPOINT', |
56
|
|
|
|
|
|
|
'RELEASE \s+ SAVEPOINT', |
57
|
|
|
|
|
|
|
'RETURNING', |
58
|
|
|
|
|
|
|
); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
my $expr_start_re = join ("\n\t|\n", @expression_start_keywords ); |
61
|
|
|
|
|
|
|
$expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# These are binary operator keywords always a single LHS and RHS |
64
|
|
|
|
|
|
|
# * AND/OR are handled separately as they are N-ary |
65
|
|
|
|
|
|
|
# * so is NOT as being unary |
66
|
|
|
|
|
|
|
# * BETWEEN without parentheses around the ANDed arguments (which |
67
|
|
|
|
|
|
|
# makes it a non-binary op) is detected and accommodated in |
68
|
|
|
|
|
|
|
# _recurse_parse() |
69
|
|
|
|
|
|
|
# * AS is not really an operator but is handled here as it's also LHS/RHS |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# this will be included in the $binary_op_re, the distinction is interesting during |
72
|
|
|
|
|
|
|
# testing as one is tighter than the other, plus alphanum cmp ops have different |
73
|
|
|
|
|
|
|
# look ahead/behind (e.g. "x"="y" ) |
74
|
|
|
|
|
|
|
my @alphanum_cmp_op_keywords = (qw/< > != <> = <= >= /); |
75
|
|
|
|
|
|
|
my $alphanum_cmp_op_re = join ("\n\t|\n", map |
76
|
|
|
|
|
|
|
{ "(?: (?<= [\\w\\s] | $quote_right ) | \\A )" . quotemeta ($_) . "(?: (?= [\\w\\s] | $quote_left ) | \\z )" } |
77
|
|
|
|
|
|
|
@alphanum_cmp_op_keywords |
78
|
|
|
|
|
|
|
); |
79
|
|
|
|
|
|
|
$alphanum_cmp_op_re = qr/$alphanum_cmp_op_re/x; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN [RI]?LIKE REGEXP/) . ')'; |
82
|
|
|
|
|
|
|
$binary_op_re = join "\n\t|\n", |
83
|
|
|
|
|
|
|
"$op_look_behind (?i: $binary_op_re | AS ) $op_look_ahead", |
84
|
|
|
|
|
|
|
$alphanum_cmp_op_re, |
85
|
|
|
|
|
|
|
$op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", |
86
|
|
|
|
|
|
|
; |
87
|
|
|
|
|
|
|
$binary_op_re = qr/$binary_op_re/x; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $rno_re = qr/ROW_NUMBER \s* \( \s* \) \s+ OVER/ix; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $unary_op_re = 'NOT \s+ EXISTS | NOT | ' . $rno_re; |
92
|
|
|
|
|
|
|
$unary_op_re = join "\n\t|\n", |
93
|
|
|
|
|
|
|
"$op_look_behind (?i: $unary_op_re ) $op_look_ahead", |
94
|
|
|
|
|
|
|
; |
95
|
|
|
|
|
|
|
$unary_op_re = qr/$unary_op_re/x; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
my $asc_desc_re = qr/$op_look_behind (?i: ASC | DESC ) $op_look_ahead /x; |
98
|
|
|
|
|
|
|
my $and_or_re = qr/$op_look_behind (?i: AND | OR ) $op_look_ahead /x; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
my $tokenizer_re = join("\n\t|\n", |
101
|
|
|
|
|
|
|
$expr_start_re, |
102
|
|
|
|
|
|
|
$binary_op_re, |
103
|
|
|
|
|
|
|
$unary_op_re, |
104
|
|
|
|
|
|
|
$asc_desc_re, |
105
|
|
|
|
|
|
|
$and_or_re, |
106
|
|
|
|
|
|
|
$op_look_behind . ' \* ' . $op_look_ahead, |
107
|
|
|
|
|
|
|
(map { quotemeta $_ } qw/, ( )/), |
108
|
|
|
|
|
|
|
$placeholder_re, |
109
|
|
|
|
|
|
|
); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# this one *is* capturing for the split below |
112
|
|
|
|
|
|
|
# splits on whitespace if all else fails |
113
|
|
|
|
|
|
|
# has to happen before the composing qr's are anchored (below) |
114
|
|
|
|
|
|
|
$tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* | \s+ /x; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Parser states for _recurse_parse() |
117
|
20
|
|
|
20
|
|
168
|
use constant PARSE_TOP_LEVEL => 0; |
|
20
|
|
|
|
|
40
|
|
|
20
|
|
|
|
|
1704
|
|
118
|
20
|
|
|
20
|
|
137
|
use constant PARSE_IN_EXPR => 1; |
|
20
|
|
|
|
|
53
|
|
|
20
|
|
|
|
|
1060
|
|
119
|
20
|
|
|
20
|
|
132
|
use constant PARSE_IN_PARENS => 2; |
|
20
|
|
|
|
|
37
|
|
|
20
|
|
|
|
|
993
|
|
120
|
20
|
|
|
20
|
|
113
|
use constant PARSE_IN_FUNC => 3; |
|
20
|
|
|
|
|
39
|
|
|
20
|
|
|
|
|
988
|
|
121
|
20
|
|
|
20
|
|
113
|
use constant PARSE_RHS => 4; |
|
20
|
|
|
|
|
54
|
|
|
20
|
|
|
|
|
873
|
|
122
|
20
|
|
|
20
|
|
109
|
use constant PARSE_LIST_ELT => 5; |
|
20
|
|
|
|
|
35
|
|
|
20
|
|
|
|
|
70522
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $expr_term_re = qr/$expr_start_re | \)/x; |
125
|
|
|
|
|
|
|
my $rhs_term_re = qr/ $expr_term_re | $binary_op_re | $unary_op_re | $asc_desc_re | $and_or_re | \, /x; |
126
|
|
|
|
|
|
|
my $all_std_keywords_re = qr/ $rhs_term_re | \( | $placeholder_re /x; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# anchor everything - even though keywords are separated by the tokenizer, leakage may occur |
129
|
|
|
|
|
|
|
for ( |
130
|
|
|
|
|
|
|
$quote_left, |
131
|
|
|
|
|
|
|
$quote_right, |
132
|
|
|
|
|
|
|
$placeholder_re, |
133
|
|
|
|
|
|
|
$expr_start_re, |
134
|
|
|
|
|
|
|
$alphanum_cmp_op_re, |
135
|
|
|
|
|
|
|
$binary_op_re, |
136
|
|
|
|
|
|
|
$unary_op_re, |
137
|
|
|
|
|
|
|
$asc_desc_re, |
138
|
|
|
|
|
|
|
$and_or_re, |
139
|
|
|
|
|
|
|
$expr_term_re, |
140
|
|
|
|
|
|
|
$rhs_term_re, |
141
|
|
|
|
|
|
|
$all_std_keywords_re, |
142
|
|
|
|
|
|
|
) { |
143
|
|
|
|
|
|
|
$_ = qr/ \A $_ \z /x; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# what can be bunched together under one MISC in an AST |
147
|
|
|
|
|
|
|
my $compressable_node_re = qr/^ \- (?: MISC | LITERAL | PLACEHOLDER ) $/x; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
my %indents = ( |
150
|
|
|
|
|
|
|
select => 0, |
151
|
|
|
|
|
|
|
update => 0, |
152
|
|
|
|
|
|
|
'insert into' => 0, |
153
|
|
|
|
|
|
|
'delete from' => 0, |
154
|
|
|
|
|
|
|
from => 1, |
155
|
|
|
|
|
|
|
where => 0, |
156
|
|
|
|
|
|
|
join => 1, |
157
|
|
|
|
|
|
|
'left join' => 1, |
158
|
|
|
|
|
|
|
on => 2, |
159
|
|
|
|
|
|
|
having => 0, |
160
|
|
|
|
|
|
|
'group by' => 0, |
161
|
|
|
|
|
|
|
'order by' => 0, |
162
|
|
|
|
|
|
|
set => 1, |
163
|
|
|
|
|
|
|
into => 1, |
164
|
|
|
|
|
|
|
values => 1, |
165
|
|
|
|
|
|
|
limit => 1, |
166
|
|
|
|
|
|
|
offset => 1, |
167
|
|
|
|
|
|
|
skip => 1, |
168
|
|
|
|
|
|
|
first => 1, |
169
|
|
|
|
|
|
|
); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
has [qw( |
173
|
|
|
|
|
|
|
newline indent_string indent_amount fill_in_placeholders placeholder_surround |
174
|
|
|
|
|
|
|
)] => (is => 'ro'); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
has [qw( indentmap colormap )] => ( is => 'ro', default => quote_sub('{}') ); |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# class global is in fact desired |
179
|
|
|
|
|
|
|
my $merger; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub BUILDARGS { |
182
|
23
|
|
|
23
|
0
|
36619
|
my $class = shift; |
183
|
23
|
100
|
|
|
|
137
|
my $args = ref $_[0] eq 'HASH' ? $_[0] : {@_}; |
184
|
|
|
|
|
|
|
|
185
|
23
|
100
|
|
|
|
548
|
if (my $p = delete $args->{profile}) { |
186
|
2
|
|
|
|
|
5
|
my %extra_args; |
187
|
2
|
100
|
|
|
|
12
|
if ($p eq 'console') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
188
|
|
|
|
|
|
|
%extra_args = ( |
189
|
|
|
|
|
|
|
fill_in_placeholders => 1, |
190
|
|
|
|
|
|
|
placeholder_surround => ['?/', ''], |
191
|
|
|
|
|
|
|
indent_string => ' ', |
192
|
|
|
|
|
|
|
indent_amount => 2, |
193
|
|
|
|
|
|
|
newline => "\n", |
194
|
|
|
|
|
|
|
colormap => {}, |
195
|
|
|
|
|
|
|
indentmap => \%indents, |
196
|
|
|
|
|
|
|
|
197
|
1
|
50
|
|
|
|
4
|
! ( eval { require Term::ANSIColor } ) ? () : do { |
|
1
|
|
|
|
|
571
|
|
198
|
1
|
|
|
|
|
7787
|
my $c = \&Term::ANSIColor::color; |
199
|
|
|
|
|
|
|
|
200
|
1
|
|
|
|
|
4
|
my $red = [$c->('red') , $c->('reset')]; |
201
|
1
|
|
|
|
|
36
|
my $cyan = [$c->('cyan') , $c->('reset')]; |
202
|
1
|
|
|
|
|
27
|
my $green = [$c->('green') , $c->('reset')]; |
203
|
1
|
|
|
|
|
26
|
my $yellow = [$c->('yellow') , $c->('reset')]; |
204
|
1
|
|
|
|
|
26
|
my $blue = [$c->('blue') , $c->('reset')]; |
205
|
1
|
|
|
|
|
31
|
my $magenta = [$c->('magenta'), $c->('reset')]; |
206
|
1
|
|
|
|
|
45
|
my $b_o_w = [$c->('black on_white'), $c->('reset')]; |
207
|
|
|
|
|
|
|
( |
208
|
1
|
|
|
|
|
32
|
placeholder_surround => [$c->('black on_magenta'), $c->('reset')], |
209
|
|
|
|
|
|
|
colormap => { |
210
|
|
|
|
|
|
|
'begin work' => $b_o_w, |
211
|
|
|
|
|
|
|
commit => $b_o_w, |
212
|
|
|
|
|
|
|
rollback => $b_o_w, |
213
|
|
|
|
|
|
|
savepoint => $b_o_w, |
214
|
|
|
|
|
|
|
'rollback to savepoint' => $b_o_w, |
215
|
|
|
|
|
|
|
'release savepoint' => $b_o_w, |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
select => $red, |
218
|
|
|
|
|
|
|
'insert into' => $red, |
219
|
|
|
|
|
|
|
update => $red, |
220
|
|
|
|
|
|
|
'delete from' => $red, |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
set => $cyan, |
223
|
|
|
|
|
|
|
from => $cyan, |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
where => $green, |
226
|
|
|
|
|
|
|
values => $yellow, |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
join => $magenta, |
229
|
|
|
|
|
|
|
'left join' => $magenta, |
230
|
|
|
|
|
|
|
on => $blue, |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
'group by' => $yellow, |
233
|
|
|
|
|
|
|
having => $yellow, |
234
|
|
|
|
|
|
|
'order by' => $yellow, |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
skip => $green, |
237
|
|
|
|
|
|
|
first => $green, |
238
|
|
|
|
|
|
|
limit => $green, |
239
|
|
|
|
|
|
|
offset => $green, |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
); |
242
|
|
|
|
|
|
|
}, |
243
|
|
|
|
|
|
|
); |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
elsif ($p eq 'console_monochrome') { |
246
|
0
|
|
|
|
|
0
|
%extra_args = ( |
247
|
|
|
|
|
|
|
fill_in_placeholders => 1, |
248
|
|
|
|
|
|
|
placeholder_surround => ['?/', ''], |
249
|
|
|
|
|
|
|
indent_string => ' ', |
250
|
|
|
|
|
|
|
indent_amount => 2, |
251
|
|
|
|
|
|
|
newline => "\n", |
252
|
|
|
|
|
|
|
indentmap => \%indents, |
253
|
|
|
|
|
|
|
); |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
elsif ($p eq 'html') { |
256
|
|
|
|
|
|
|
%extra_args = ( |
257
|
|
|
|
|
|
|
fill_in_placeholders => 1, |
258
|
|
|
|
|
|
|
placeholder_surround => ['', ''], |
259
|
|
|
|
|
|
|
indent_string => ' ', |
260
|
|
|
|
|
|
|
indent_amount => 2, |
261
|
|
|
|
|
|
|
newline => " \n", |
262
|
|
|
|
|
|
|
colormap => { map { |
263
|
1
|
|
|
|
|
12
|
(my $class = $_) =~ s/\s+/-/g; |
|
25
|
|
|
|
|
47
|
|
264
|
25
|
|
|
|
|
67
|
( $_ => [ qq||, '' ] ) |
265
|
|
|
|
|
|
|
} ( |
266
|
|
|
|
|
|
|
keys %indents, |
267
|
|
|
|
|
|
|
qw(commit rollback savepoint), |
268
|
|
|
|
|
|
|
'begin work', 'rollback to savepoint', 'release savepoint', |
269
|
|
|
|
|
|
|
) }, |
270
|
|
|
|
|
|
|
indentmap => \%indents, |
271
|
|
|
|
|
|
|
); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
elsif ($p eq 'none') { |
274
|
|
|
|
|
|
|
# nada |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
else { |
277
|
0
|
|
|
|
|
0
|
croak "No such profile '$p'"; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# see if we got any duplicates and merge if needed |
281
|
2
|
100
|
|
|
|
55
|
if (scalar grep { exists $args->{$_} } keys %extra_args) { |
|
14
|
|
|
|
|
26
|
|
282
|
|
|
|
|
|
|
# heavy-duty merge |
283
|
1
|
|
33
|
|
|
7
|
$args = ($merger ||= do { |
284
|
1
|
|
|
|
|
964
|
require Hash::Merge; |
285
|
1
|
|
|
|
|
7637
|
my $m = Hash::Merge->new; |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
$m->specify_behavior({ |
288
|
|
|
|
|
|
|
SCALAR => { |
289
|
0
|
|
|
0
|
|
0
|
SCALAR => sub { $_[1] }, |
290
|
0
|
|
|
0
|
|
0
|
ARRAY => sub { [ $_[0], @{$_[1]} ] }, |
|
0
|
|
|
|
|
0
|
|
291
|
0
|
|
|
0
|
|
0
|
HASH => sub { $_[1] }, |
292
|
|
|
|
|
|
|
}, |
293
|
|
|
|
|
|
|
ARRAY => { |
294
|
1
|
|
|
1
|
|
62
|
SCALAR => sub { $_[1] }, |
295
|
1
|
|
|
1
|
|
66
|
ARRAY => sub { $_[1] }, |
296
|
0
|
|
|
0
|
|
0
|
HASH => sub { $_[1] }, |
297
|
|
|
|
|
|
|
}, |
298
|
|
|
|
|
|
|
HASH => { |
299
|
0
|
|
|
0
|
|
0
|
SCALAR => sub { $_[1] }, |
300
|
0
|
|
|
0
|
|
0
|
ARRAY => sub { [ values %{$_[0]}, @{$_[1]} ] }, |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
301
|
2
|
|
|
2
|
|
240
|
HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) }, |
302
|
|
|
|
|
|
|
}, |
303
|
1
|
|
|
|
|
96
|
}, 'SQLA::Tree Behavior' ); |
304
|
|
|
|
|
|
|
|
305
|
1
|
|
|
|
|
45
|
$m; |
306
|
|
|
|
|
|
|
})->merge(\%extra_args, $args ); |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
else { |
310
|
1
|
|
|
|
|
5
|
$args = { %extra_args, %$args }; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
23
|
|
|
|
|
432
|
$args; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub parse { |
318
|
4475
|
|
|
4475
|
1
|
11748
|
my ($self, $s) = @_; |
319
|
|
|
|
|
|
|
|
320
|
4475
|
50
|
|
|
|
9548
|
return [] unless defined $s; |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# tokenize string, and remove all optional whitespace |
323
|
4475
|
|
|
|
|
6952
|
my $tokens = []; |
324
|
4475
|
|
|
|
|
405616
|
foreach my $token (split $tokenizer_re, $s) { |
325
|
104574
|
100
|
100
|
|
|
421865
|
push @$tokens, $token if ( |
|
|
|
66
|
|
|
|
|
326
|
|
|
|
|
|
|
defined $token |
327
|
|
|
|
|
|
|
and |
328
|
|
|
|
|
|
|
length $token |
329
|
|
|
|
|
|
|
and |
330
|
|
|
|
|
|
|
$token =~ /\S/ |
331
|
|
|
|
|
|
|
); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
4475
|
|
|
|
|
17242
|
return [ $self->_recurse_parse($tokens, PARSE_TOP_LEVEL) ]; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub _recurse_parse { |
338
|
44752
|
|
|
44752
|
|
67628
|
my ($self, $tokens, $state) = @_; |
339
|
|
|
|
|
|
|
|
340
|
44752
|
|
|
|
|
50957
|
my @left; |
341
|
44752
|
|
|
|
|
50611
|
while (1) { # left-associative parsing |
342
|
|
|
|
|
|
|
|
343
|
107870
|
100
|
100
|
|
|
819357
|
if (! @$tokens |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
344
|
|
|
|
|
|
|
or |
345
|
|
|
|
|
|
|
($state == PARSE_IN_PARENS && $tokens->[0] eq ')') |
346
|
|
|
|
|
|
|
or |
347
|
|
|
|
|
|
|
($state == PARSE_IN_EXPR && $tokens->[0] =~ $expr_term_re ) |
348
|
|
|
|
|
|
|
or |
349
|
|
|
|
|
|
|
($state == PARSE_RHS && $tokens->[0] =~ $rhs_term_re ) |
350
|
|
|
|
|
|
|
or |
351
|
|
|
|
|
|
|
($state == PARSE_LIST_ELT && ( $tokens->[0] eq ',' or $tokens->[0] =~ $expr_term_re ) ) |
352
|
|
|
|
|
|
|
) { |
353
|
35268
|
|
|
|
|
91489
|
return @left; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
72602
|
|
|
|
|
115416
|
my $token = shift @$tokens; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# nested expression in () |
359
|
72602
|
100
|
100
|
|
|
705217
|
if ($token eq '(' ) { |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
360
|
7026
|
|
|
|
|
14023
|
my @right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
361
|
7026
|
50
|
|
|
|
14595
|
$token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse(\@right); |
362
|
7026
|
50
|
|
|
|
12838
|
$token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse(\@right); |
363
|
|
|
|
|
|
|
|
364
|
7026
|
|
|
|
|
15367
|
push @left, [ '-PAREN' => \@right ]; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# AND/OR |
368
|
|
|
|
|
|
|
elsif ($token =~ $and_or_re) { |
369
|
3513
|
|
|
|
|
7515
|
my $op = uc $token; |
370
|
|
|
|
|
|
|
|
371
|
3513
|
|
|
|
|
7457
|
my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
# Merge chunks if "logic" matches |
374
|
|
|
|
|
|
|
@left = [ $op => [ @left, (@right and $op eq $right[0][0]) |
375
|
3513
|
100
|
100
|
|
|
19016
|
? @{ $right[0][1] } |
|
420
|
|
|
|
|
1504
|
|
376
|
|
|
|
|
|
|
: @right |
377
|
|
|
|
|
|
|
] ]; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# LIST (,) |
381
|
|
|
|
|
|
|
elsif ($token eq ',') { |
382
|
|
|
|
|
|
|
|
383
|
4013
|
|
|
|
|
8727
|
my @right = $self->_recurse_parse($tokens, PARSE_LIST_ELT); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
# deal with malformed lists ( foo, bar, , baz ) |
386
|
4013
|
100
|
|
|
|
6530
|
@right = [] unless @right; |
387
|
|
|
|
|
|
|
|
388
|
4013
|
50
|
|
|
|
6177
|
@right = [ -MISC => [ @right ] ] if @right > 1; |
389
|
|
|
|
|
|
|
|
390
|
4013
|
100
|
|
|
|
8119
|
if (!@left) { |
|
|
100
|
|
|
|
|
|
391
|
9
|
|
|
|
|
58
|
@left = [ -LIST => [ [], @right ] ]; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
elsif ($left[0][0] eq '-LIST') { |
394
|
3286
|
|
|
|
|
4805
|
push @{$left[0][1]}, (@{$right[0]} and $right[0][0] eq '-LIST') |
395
|
3286
|
50
|
66
|
|
|
3475
|
? @{$right[0][1]} |
|
0
|
|
|
|
|
0
|
|
396
|
|
|
|
|
|
|
: @right |
397
|
|
|
|
|
|
|
; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
else { |
400
|
718
|
|
|
|
|
2193
|
@left = [ -LIST => [ @left, @right ] ]; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# binary operator keywords |
405
|
|
|
|
|
|
|
elsif ($token =~ $binary_op_re) { |
406
|
9075
|
|
|
|
|
16422
|
my $op = uc $token; |
407
|
|
|
|
|
|
|
|
408
|
9075
|
|
|
|
|
19993
|
my @right = $self->_recurse_parse($tokens, PARSE_RHS); |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# A between with a simple LITERAL for a 1st RHS argument needs a |
411
|
|
|
|
|
|
|
# rerun of the search to (hopefully) find the proper AND construct |
412
|
9075
|
50
|
66
|
|
|
18337
|
if ($op eq 'BETWEEN' and $right[0] eq '-LITERAL') { |
413
|
0
|
|
|
|
|
0
|
unshift @$tokens, $right[1][0]; |
414
|
0
|
|
|
|
|
0
|
@right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
9075
|
100
|
|
|
|
30403
|
push @left, [$op => [ (@left ? pop @left : ''), @right ]]; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# unary op keywords |
421
|
|
|
|
|
|
|
elsif ($token =~ $unary_op_re) { |
422
|
83
|
|
|
|
|
153
|
my $op = uc $token; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# normalize RNO explicitly |
425
|
83
|
100
|
|
|
|
375
|
$op = 'ROW_NUMBER() OVER' if $op =~ /^$rno_re$/; |
426
|
|
|
|
|
|
|
|
427
|
83
|
|
|
|
|
207
|
my @right = $self->_recurse_parse($tokens, PARSE_RHS); |
428
|
|
|
|
|
|
|
|
429
|
83
|
|
|
|
|
206
|
push @left, [ $op => \@right ]; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# expression terminator keywords |
433
|
|
|
|
|
|
|
elsif ($token =~ $expr_start_re) { |
434
|
16188
|
|
|
|
|
31518
|
my $op = uc $token; |
435
|
16188
|
|
|
|
|
33042
|
my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
436
|
|
|
|
|
|
|
|
437
|
16188
|
|
|
|
|
42523
|
push @left, [ $op => \@right ]; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# a '?' |
441
|
|
|
|
|
|
|
elsif ($token =~ $placeholder_re) { |
442
|
2177
|
|
|
|
|
6284
|
push @left, [ -PLACEHOLDER => [ $token ] ]; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# check if the current token is an unknown op-start |
446
|
|
|
|
|
|
|
elsif (@$tokens and ($tokens->[0] eq '(' or $tokens->[0] =~ $placeholder_re ) ) { |
447
|
379
|
|
|
|
|
1063
|
push @left, [ $token => [ $self->_recurse_parse($tokens, PARSE_RHS) ] ]; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# we're now in "unknown token" land - start eating tokens until |
451
|
|
|
|
|
|
|
# we see something familiar, OR in the case of RHS (binop) stop |
452
|
|
|
|
|
|
|
# after the first token |
453
|
|
|
|
|
|
|
# Also stop processing when we could end up with an unknown func |
454
|
|
|
|
|
|
|
else { |
455
|
30148
|
|
|
|
|
91534
|
my @lits = [ -LITERAL => [$token] ]; |
456
|
|
|
|
|
|
|
|
457
|
30148
|
100
|
|
|
|
57598
|
unshift @lits, pop @left if @left == 1; |
458
|
|
|
|
|
|
|
|
459
|
30148
|
100
|
|
|
|
54449
|
unless ( $state == PARSE_RHS ) { |
460
|
22777
|
|
100
|
|
|
157077
|
while ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
461
|
|
|
|
|
|
|
@$tokens |
462
|
|
|
|
|
|
|
and |
463
|
|
|
|
|
|
|
$tokens->[0] !~ $all_std_keywords_re |
464
|
|
|
|
|
|
|
and |
465
|
|
|
|
|
|
|
! (@$tokens > 1 and $tokens->[1] eq '(') |
466
|
|
|
|
|
|
|
) { |
467
|
292
|
|
|
|
|
2466
|
push @lits, [ -LITERAL => [ shift @$tokens ] ]; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
30148
|
100
|
|
|
|
52415
|
@lits = [ -MISC => [ @lits ] ] if @lits > 1; |
472
|
|
|
|
|
|
|
|
473
|
30148
|
|
|
|
|
46165
|
push @left, @lits; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# compress -LITERAL -MISC and -PLACEHOLDER pieces into a single |
477
|
|
|
|
|
|
|
# -MISC container |
478
|
72602
|
100
|
|
|
|
116742
|
if (@left > 1) { |
479
|
11251
|
|
|
|
|
13412
|
my $i = 0; |
480
|
11251
|
|
|
|
|
19359
|
while ($#left > $i) { |
481
|
22407
|
100
|
100
|
|
|
64274
|
if ($left[$i][0] =~ $compressable_node_re and $left[$i+1][0] =~ $compressable_node_re) { |
482
|
|
|
|
|
|
|
splice @left, $i, 2, [ -MISC => [ |
483
|
33
|
100
|
|
|
|
88
|
map { $_->[0] eq '-MISC' ? @{$_->[1]} : $_ } (@left[$i, $i+1]) |
|
66
|
|
|
|
|
241
|
|
|
33
|
|
|
|
|
70
|
|
484
|
|
|
|
|
|
|
]]; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
else { |
487
|
22374
|
|
|
|
|
40205
|
$i++; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
72602
|
100
|
|
|
|
119970
|
return @left if $state == PARSE_RHS; |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
# deal with post-fix operators |
495
|
63118
|
100
|
|
|
|
95289
|
if (@$tokens) { |
496
|
|
|
|
|
|
|
# asc/desc |
497
|
53054
|
100
|
|
|
|
158466
|
if ($tokens->[0] =~ $asc_desc_re) { |
498
|
176
|
|
|
|
|
564
|
@left = [ ('-' . uc (shift @$tokens)) => [ @left ] ]; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
sub format_keyword { |
505
|
696
|
|
|
696
|
1
|
5933
|
my ($self, $keyword) = @_; |
506
|
|
|
|
|
|
|
|
507
|
696
|
100
|
|
|
|
2073
|
if (my $around = $self->colormap->{lc $keyword}) { |
508
|
10
|
|
|
|
|
34
|
$keyword = "$around->[0]$keyword$around->[1]"; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
696
|
|
|
|
|
5325
|
return $keyword |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
my %starters = ( |
515
|
|
|
|
|
|
|
select => 1, |
516
|
|
|
|
|
|
|
update => 1, |
517
|
|
|
|
|
|
|
'insert into' => 1, |
518
|
|
|
|
|
|
|
'delete from' => 1, |
519
|
|
|
|
|
|
|
); |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
sub pad_keyword { |
522
|
697
|
|
|
697
|
1
|
7533
|
my ($self, $keyword, $depth) = @_; |
523
|
|
|
|
|
|
|
|
524
|
697
|
|
|
|
|
1019
|
my $before = ''; |
525
|
697
|
100
|
|
|
|
2421
|
if (defined $self->indentmap->{lc $keyword}) { |
526
|
11
|
|
|
|
|
44
|
$before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
527
|
|
|
|
|
|
|
} |
528
|
697
|
100
|
100
|
|
|
2757
|
$before = '' if $depth == 0 and defined $starters{lc $keyword}; |
529
|
697
|
|
|
|
|
2196
|
return [$before, '']; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
11
|
|
50
|
11
|
1
|
56
|
sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
|
|
|
50
|
|
|
|
|
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _is_key { |
535
|
1485
|
|
|
1485
|
|
2351
|
my ($self, $tree) = @_; |
536
|
1485
|
|
|
|
|
4217
|
$tree = $tree->[0] while ref $tree; |
537
|
|
|
|
|
|
|
|
538
|
1485
|
100
|
|
|
|
10477
|
defined $tree && defined $self->indentmap->{lc $tree}; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub fill_in_placeholder { |
542
|
70
|
|
|
70
|
1
|
155
|
my ($self, $bindargs) = @_; |
543
|
|
|
|
|
|
|
|
544
|
70
|
100
|
|
|
|
181
|
if ($self->fill_in_placeholders) { |
545
|
9
|
|
50
|
|
|
33
|
my $val = shift @{$bindargs} || ''; |
546
|
9
|
|
|
|
|
26
|
my $quoted = $val =~ s/^(['"])(.*)\1$/$2/; |
547
|
9
|
|
|
|
|
15
|
my ($left, $right) = @{$self->placeholder_surround}; |
|
9
|
|
|
|
|
24
|
|
548
|
9
|
|
|
|
|
17
|
$val =~ s/\\/\\\\/g; |
549
|
9
|
|
|
|
|
16
|
$val =~ s/'/\\'/g; |
550
|
9
|
50
|
|
|
|
17
|
$val = qq('$val') if $quoted; |
551
|
9
|
|
|
|
|
54
|
return qq($left$val$right) |
552
|
|
|
|
|
|
|
} |
553
|
61
|
|
|
|
|
253
|
return '?' |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# FIXME - terrible name for a user facing API |
557
|
|
|
|
|
|
|
sub unparse { |
558
|
2605
|
|
|
2605
|
1
|
1069671
|
my ($self, $tree, $bindargs) = @_; |
559
|
2605
|
100
|
|
|
|
3633
|
$self->_unparse($tree, [@{$bindargs||[]}], 0); |
|
2605
|
|
|
|
|
10760
|
|
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
sub _unparse { |
563
|
13964
|
|
|
13964
|
|
22941
|
my ($self, $tree, $bindargs, $depth) = @_; |
564
|
|
|
|
|
|
|
|
565
|
13964
|
100
|
100
|
|
|
36434
|
if (not $tree or not @$tree) { |
566
|
17
|
|
|
|
|
47
|
return ''; |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# FIXME - needs a config switch to disable |
570
|
13947
|
|
|
|
|
27089
|
$self->_parenthesis_unroll($tree); |
571
|
|
|
|
|
|
|
|
572
|
13947
|
|
|
|
|
17877
|
my ($op, $args) = @{$tree}[0,1]; |
|
13947
|
|
|
|
|
25018
|
|
573
|
|
|
|
|
|
|
|
574
|
13947
|
50
|
66
|
|
|
45528
|
if (! defined $op or (! ref $op and ! defined $args) ) { |
|
|
|
33
|
|
|
|
|
575
|
0
|
|
|
|
|
0
|
require Data::Dumper; |
576
|
0
|
|
|
|
|
0
|
Carp::confess( sprintf ( "Internal error - malformed branch at depth $depth:\n%s", |
577
|
|
|
|
|
|
|
Data::Dumper::Dumper($tree) |
578
|
|
|
|
|
|
|
) ); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
13947
|
100
|
100
|
|
|
43447
|
if (ref $op) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
582
|
2502
|
|
|
|
|
6087
|
return join (' ', map $self->_unparse($_, $bindargs, $depth), @$tree); |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
elsif ($op eq '-LITERAL') { # literal has different sig |
585
|
8108
|
|
|
|
|
33723
|
return $args->[0]; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
elsif ($op eq '-PLACEHOLDER') { |
588
|
68
|
|
|
|
|
128
|
return $self->fill_in_placeholder($bindargs); |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
elsif ($op eq '-PAREN') { |
591
|
|
|
|
|
|
|
return sprintf ('( %s )', |
592
|
1485
|
50
|
0
|
|
|
1849
|
join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$args} ) |
|
1505
|
|
|
|
|
2789
|
|
|
1485
|
|
|
|
|
2225
|
|
593
|
|
|
|
|
|
|
. |
594
|
|
|
|
|
|
|
($self->_is_key($args) |
595
|
|
|
|
|
|
|
? ( $self->newline||'' ) . $self->indent($depth + 1) |
596
|
|
|
|
|
|
|
: '' |
597
|
|
|
|
|
|
|
) |
598
|
|
|
|
|
|
|
); |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
elsif ($op eq 'AND' or $op eq 'OR' or $op =~ $binary_op_re ) { |
601
|
908
|
|
|
|
|
1900
|
return join (" $op ", map $self->_unparse($_, $bindargs, $depth), @{$args}); |
|
908
|
|
|
|
|
2483
|
|
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
elsif ($op eq '-LIST' ) { |
604
|
37
|
|
|
|
|
60
|
return join (', ', map $self->_unparse($_, $bindargs, $depth), @{$args}); |
|
37
|
|
|
|
|
180
|
|
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
elsif ($op eq '-MISC' ) { |
607
|
132
|
|
|
|
|
212
|
return join (' ', map $self->_unparse($_, $bindargs, $depth), @{$args}); |
|
132
|
|
|
|
|
377
|
|
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
elsif ($op =~ qr/^-(ASC|DESC)$/ ) { |
610
|
18
|
|
|
|
|
44
|
my $dir = $1; |
611
|
18
|
|
|
|
|
26
|
return join (' ', (map $self->_unparse($_, $bindargs, $depth), @{$args}), $dir); |
|
18
|
|
|
|
|
41
|
|
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
else { |
614
|
689
|
|
|
|
|
1172
|
my ($l, $r) = @{$self->pad_keyword($op, $depth)}; |
|
689
|
|
|
|
|
1426
|
|
615
|
|
|
|
|
|
|
|
616
|
689
|
|
|
|
|
1682
|
my $rhs = $self->_unparse($args, $bindargs, $depth); |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
return sprintf "$l%s$r", join( |
619
|
689
|
100
|
100
|
|
|
2155
|
( ref $args eq 'ARRAY' and @{$args} == 1 and $args->[0][0] eq '-PAREN' ) |
|
|
100
|
|
|
|
|
|
620
|
|
|
|
|
|
|
? '' # mysql-- |
621
|
|
|
|
|
|
|
: ' ' |
622
|
|
|
|
|
|
|
, |
623
|
|
|
|
|
|
|
$self->format_keyword($op), |
624
|
|
|
|
|
|
|
(length $rhs ? $rhs : () ), |
625
|
|
|
|
|
|
|
); |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics |
630
|
|
|
|
|
|
|
my @unrollable_ops = ( |
631
|
|
|
|
|
|
|
'ON', |
632
|
|
|
|
|
|
|
'WHERE', |
633
|
|
|
|
|
|
|
'GROUP \s+ BY', |
634
|
|
|
|
|
|
|
'HAVING', |
635
|
|
|
|
|
|
|
'ORDER \s+ BY', |
636
|
|
|
|
|
|
|
'I?LIKE', |
637
|
|
|
|
|
|
|
); |
638
|
|
|
|
|
|
|
my $unrollable_ops_re = join ' | ', @unrollable_ops; |
639
|
|
|
|
|
|
|
$unrollable_ops_re = qr/$unrollable_ops_re/xi; |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
sub _parenthesis_unroll { |
642
|
61064
|
|
|
61064
|
|
79200
|
my $self = shift; |
643
|
61064
|
|
|
|
|
66658
|
my $ast = shift; |
644
|
|
|
|
|
|
|
|
645
|
61064
|
100
|
66
|
|
|
163587
|
return unless (ref $ast and ref $ast->[1]); |
646
|
|
|
|
|
|
|
|
647
|
58126
|
|
|
|
|
64026
|
my $changes; |
648
|
58126
|
|
|
|
|
64955
|
do { |
649
|
60806
|
|
|
|
|
67850
|
my @children; |
650
|
60806
|
|
|
|
|
65359
|
$changes = 0; |
651
|
|
|
|
|
|
|
|
652
|
60806
|
|
|
|
|
64825
|
for my $child (@{$ast->[1]}) { |
|
60806
|
|
|
|
|
99772
|
|
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# the current node in this loop is *always* a PAREN |
655
|
79393
|
100
|
100
|
|
|
237608
|
if (! ref $child or ! @$child or $child->[0] ne '-PAREN') { |
|
|
|
100
|
|
|
|
|
656
|
73166
|
|
|
|
|
119692
|
push @children, $child; |
657
|
73166
|
|
|
|
|
98760
|
next; |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
6227
|
|
|
|
|
9846
|
my $parent_op = $ast->[0]; |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
# unroll nested parenthesis |
663
|
6227
|
|
100
|
|
|
11779
|
while ( $parent_op ne 'IN' and @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { |
|
6584
|
|
100
|
|
|
25138
|
|
664
|
452
|
|
|
|
|
1022
|
$child = $child->[1][0]; |
665
|
452
|
|
|
|
|
832
|
$changes++; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# set to CHILD in the case of PARENT ( CHILD ) |
669
|
|
|
|
|
|
|
# but NOT in the case of PARENT( CHILD1, CHILD2 ) |
670
|
6227
|
100
|
|
|
|
8375
|
my $single_child_op = (@{$child->[1]} == 1) ? $child->[1][0][0] : ''; |
|
6227
|
|
|
|
|
12447
|
|
671
|
|
|
|
|
|
|
|
672
|
6227
|
100
|
|
|
|
9653
|
my $child_op_argc = $single_child_op ? scalar @{$child->[1][0][1]} : undef; |
|
5591
|
|
|
|
|
8364
|
|
673
|
|
|
|
|
|
|
|
674
|
6227
|
100
|
100
|
|
|
22935
|
my $single_grandchild_op |
675
|
|
|
|
|
|
|
= ( $child_op_argc||0 == 1 and ref $child->[1][0][1][0] eq 'ARRAY' ) |
676
|
|
|
|
|
|
|
? $child->[1][0][1][0][0] |
677
|
|
|
|
|
|
|
: '' |
678
|
|
|
|
|
|
|
; |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
# if the parent operator explicitly allows it AND the child isn't a subselect |
681
|
|
|
|
|
|
|
# nuke the parenthesis |
682
|
6227
|
100
|
100
|
|
|
109974
|
if ($parent_op =~ $unrollable_ops_re and $single_child_op ne 'SELECT') { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
683
|
1454
|
|
|
|
|
2333
|
push @children, @{$child->[1]}; |
|
1454
|
|
|
|
|
2767
|
|
684
|
1454
|
|
|
|
|
2632
|
$changes++; |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list |
688
|
|
|
|
|
|
|
elsif ( |
689
|
|
|
|
|
|
|
$single_child_op eq $parent_op |
690
|
|
|
|
|
|
|
and |
691
|
|
|
|
|
|
|
( $parent_op eq 'AND' or $parent_op eq 'OR') |
692
|
|
|
|
|
|
|
) { |
693
|
131
|
|
|
|
|
228
|
push @children, @{$child->[1][0][1]}; |
|
131
|
|
|
|
|
245
|
|
694
|
131
|
|
|
|
|
269
|
$changes++; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
# only *ONE* LITERAL or placeholder element |
698
|
|
|
|
|
|
|
# as an AND/OR/NOT argument |
699
|
|
|
|
|
|
|
elsif ( |
700
|
|
|
|
|
|
|
( $single_child_op eq '-LITERAL' or $single_child_op eq '-PLACEHOLDER' ) |
701
|
|
|
|
|
|
|
and |
702
|
|
|
|
|
|
|
( $parent_op eq 'AND' or $parent_op eq 'OR' or $parent_op eq 'NOT' ) |
703
|
|
|
|
|
|
|
) { |
704
|
5
|
|
|
|
|
12
|
push @children, @{$child->[1]}; |
|
5
|
|
|
|
|
13
|
|
705
|
5
|
|
|
|
|
12
|
$changes++; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# an AND/OR expression with only one binop in the parenthesis |
709
|
|
|
|
|
|
|
# with exactly two grandchildren |
710
|
|
|
|
|
|
|
# the only time when we can *not* unroll this is when both |
711
|
|
|
|
|
|
|
# the parent and the child are mathops (in which case we'll |
712
|
|
|
|
|
|
|
# break precedence) or when the child is BETWEEN (special |
713
|
|
|
|
|
|
|
# case) |
714
|
|
|
|
|
|
|
elsif ( |
715
|
|
|
|
|
|
|
($parent_op eq 'AND' or $parent_op eq 'OR') |
716
|
|
|
|
|
|
|
and |
717
|
|
|
|
|
|
|
$single_child_op =~ $binary_op_re |
718
|
|
|
|
|
|
|
and |
719
|
|
|
|
|
|
|
$single_child_op ne 'BETWEEN' |
720
|
|
|
|
|
|
|
and |
721
|
|
|
|
|
|
|
$child_op_argc == 2 |
722
|
|
|
|
|
|
|
and |
723
|
|
|
|
|
|
|
! ( |
724
|
|
|
|
|
|
|
$single_child_op =~ $alphanum_cmp_op_re |
725
|
|
|
|
|
|
|
and |
726
|
|
|
|
|
|
|
$parent_op =~ $alphanum_cmp_op_re |
727
|
|
|
|
|
|
|
) |
728
|
|
|
|
|
|
|
) { |
729
|
1715
|
|
|
|
|
3139
|
push @children, @{$child->[1]}; |
|
1715
|
|
|
|
|
3356
|
|
730
|
1715
|
|
|
|
|
3283
|
$changes++; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# a function binds tighter than a mathop - see if our ancestor is a |
734
|
|
|
|
|
|
|
# mathop, and our content is: |
735
|
|
|
|
|
|
|
# a single non-mathop child with a single PAREN grandchild which |
736
|
|
|
|
|
|
|
# would indicate mathop ( nonmathop ( ... ) ) |
737
|
|
|
|
|
|
|
# or a single non-mathop with a single LITERAL ( nonmathop foo ) |
738
|
|
|
|
|
|
|
# or a single non-mathop with a single PLACEHOLDER ( nonmathop ? ) |
739
|
|
|
|
|
|
|
elsif ( |
740
|
|
|
|
|
|
|
$single_child_op |
741
|
|
|
|
|
|
|
and |
742
|
|
|
|
|
|
|
$parent_op =~ $alphanum_cmp_op_re |
743
|
|
|
|
|
|
|
and |
744
|
|
|
|
|
|
|
$single_child_op !~ $alphanum_cmp_op_re |
745
|
|
|
|
|
|
|
and |
746
|
|
|
|
|
|
|
$child_op_argc == 1 |
747
|
|
|
|
|
|
|
and |
748
|
|
|
|
|
|
|
( |
749
|
|
|
|
|
|
|
$single_grandchild_op eq '-PAREN' |
750
|
|
|
|
|
|
|
or |
751
|
|
|
|
|
|
|
$single_grandchild_op eq '-LITERAL' |
752
|
|
|
|
|
|
|
or |
753
|
|
|
|
|
|
|
$single_grandchild_op eq '-PLACEHOLDER' |
754
|
|
|
|
|
|
|
) |
755
|
|
|
|
|
|
|
) { |
756
|
9
|
|
|
|
|
17
|
push @children, @{$child->[1]}; |
|
9
|
|
|
|
|
13
|
|
757
|
9
|
|
|
|
|
17
|
$changes++; |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# a construct of ... ( somefunc ( ... ) ) ... can safely lose the outer parens |
761
|
|
|
|
|
|
|
# except for the case of ( NOT ( ... ) ) which has already been handled earlier |
762
|
|
|
|
|
|
|
# and except for the case of RNO, where the double are explicit syntax |
763
|
|
|
|
|
|
|
elsif ( |
764
|
|
|
|
|
|
|
$parent_op ne 'ROW_NUMBER() OVER' |
765
|
|
|
|
|
|
|
and |
766
|
|
|
|
|
|
|
$single_child_op |
767
|
|
|
|
|
|
|
and |
768
|
|
|
|
|
|
|
$single_child_op ne 'NOT' |
769
|
|
|
|
|
|
|
and |
770
|
|
|
|
|
|
|
$child_op_argc == 1 |
771
|
|
|
|
|
|
|
and |
772
|
|
|
|
|
|
|
$single_grandchild_op eq '-PAREN' |
773
|
|
|
|
|
|
|
) { |
774
|
5
|
|
|
|
|
9
|
push @children, @{$child->[1]}; |
|
5
|
|
|
|
|
8
|
|
775
|
5
|
|
|
|
|
19
|
$changes++; |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# otherwise no more mucking for this pass |
780
|
|
|
|
|
|
|
else { |
781
|
2908
|
|
|
|
|
7856
|
push @children, $child; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
60806
|
|
|
|
|
186853
|
$ast->[1] = \@children; |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
} while ($changes); |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
sub _strip_asc_from_order_by { |
791
|
48110
|
|
|
48110
|
|
72365
|
my ($self, $ast) = @_; |
792
|
|
|
|
|
|
|
|
793
|
48110
|
100
|
66
|
|
|
166234
|
return $ast if ( |
794
|
|
|
|
|
|
|
ref $ast ne 'ARRAY' |
795
|
|
|
|
|
|
|
or |
796
|
|
|
|
|
|
|
$ast->[0] ne 'ORDER BY' |
797
|
|
|
|
|
|
|
); |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
|
800
|
139
|
|
|
|
|
182
|
my $to_replace; |
801
|
|
|
|
|
|
|
|
802
|
139
|
100
|
66
|
|
|
169
|
if (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-ASC') { |
|
139
|
100
|
66
|
|
|
470
|
|
803
|
8
|
|
|
|
|
17
|
$to_replace = [ $ast->[1][0] ]; |
804
|
|
|
|
|
|
|
} |
805
|
131
|
|
|
|
|
443
|
elsif (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-LIST') { |
806
|
72
|
|
|
|
|
93
|
$to_replace = [ grep { $_->[0] eq '-ASC' } @{$ast->[1][0][1]} ]; |
|
200
|
|
|
|
|
375
|
|
|
72
|
|
|
|
|
136
|
|
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
139
|
|
|
|
|
284
|
@$_ = @{$_->[1][0]} for @$to_replace; |
|
65
|
|
|
|
|
149
|
|
810
|
|
|
|
|
|
|
|
811
|
139
|
|
|
|
|
331
|
$ast; |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
23
|
|
|
23
|
1
|
10585
|
sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } |
|
23
|
|
|
|
|
61
|
|
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
1; |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=pod |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head1 NAME |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
SQL::Abstract::Tree - Represent SQL as an AST |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head1 SYNOPSIS |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
# SELECT * |
831
|
|
|
|
|
|
|
# FROM foo |
832
|
|
|
|
|
|
|
# WHERE foo.a > 2 |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=head1 METHODS |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=head2 new |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
$args = { |
841
|
|
|
|
|
|
|
profile => 'console', # predefined profile to use (default: 'none') |
842
|
|
|
|
|
|
|
fill_in_placeholders => 1, # true for placeholder population |
843
|
|
|
|
|
|
|
placeholder_surround => # The strings that will be wrapped around |
844
|
|
|
|
|
|
|
[GREEN, RESET], # populated placeholders if the above is set |
845
|
|
|
|
|
|
|
indent_string => ' ', # the string used when indenting |
846
|
|
|
|
|
|
|
indent_amount => 2, # how many of above string to use for a single |
847
|
|
|
|
|
|
|
# indent level |
848
|
|
|
|
|
|
|
newline => "\n", # string for newline |
849
|
|
|
|
|
|
|
colormap => { |
850
|
|
|
|
|
|
|
select => [RED, RESET], # a pair of strings defining what to surround |
851
|
|
|
|
|
|
|
# the keyword with for colorization |
852
|
|
|
|
|
|
|
# ... |
853
|
|
|
|
|
|
|
}, |
854
|
|
|
|
|
|
|
indentmap => { |
855
|
|
|
|
|
|
|
select => 0, # A zero means that the keyword will start on |
856
|
|
|
|
|
|
|
# a new line |
857
|
|
|
|
|
|
|
from => 1, # Any other positive integer means that after |
858
|
|
|
|
|
|
|
on => 2, # said newline it will get that many indents |
859
|
|
|
|
|
|
|
# ... |
860
|
|
|
|
|
|
|
}, |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
Returns a new SQL::Abstract::Tree object. All arguments are optional. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=head3 profiles |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
There are four predefined profiles, C, C, C, |
868
|
|
|
|
|
|
|
and C. Typically a user will probably just use C or |
869
|
|
|
|
|
|
|
C, but if something about a profile bothers you, merely |
870
|
|
|
|
|
|
|
use the profile and override the parts that you don't like. |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
=head2 format |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
$sqlat->format('SELECT * FROM bar WHERE x = ?', [1]) |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
Takes C<$sql> and C<\@bindargs>. |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
Returns a formatting string based on the string passed in |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
=head2 parse |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
$sqlat->parse('SELECT * FROM bar WHERE x = ?') |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Returns a "tree" representing passed in SQL. Please do not depend on the |
885
|
|
|
|
|
|
|
structure of the returned tree. It may be stable at some point, but not yet. |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
=head2 unparse |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
$sqlat->unparse($tree_structure, \@bindargs) |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
Transform "tree" into SQL, applying various transforms on the way. |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=head2 format_keyword |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
$sqlat->format_keyword('SELECT') |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Currently this just takes a keyword and puts the C stuff around it. |
898
|
|
|
|
|
|
|
Later on it may do more and allow for coderef based transforms. |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=head2 pad_keyword |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
my ($before, $after) = @{$sqlat->pad_keyword('SELECT')}; |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
Returns whitespace to be inserted around a keyword. |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
=head2 fill_in_placeholder |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
my $value = $sqlat->fill_in_placeholder(\@bindargs) |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Removes last arg from passed arrayref and returns it, surrounded with |
911
|
|
|
|
|
|
|
the values in placeholder_surround, and then surrounded with single quotes. |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=head2 indent |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
Returns as many indent strings as indent amounts times the first argument. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=head1 ACCESSORS |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=head2 colormap |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
See L |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=head2 fill_in_placeholders |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
See L |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head2 indent_amount |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
See L |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
=head2 indent_string |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
See L |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=head2 indentmap |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
See L |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head2 newline |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
See L |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
=head2 placeholder_surround |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
See L |
946
|
|
|
|
|
|
|
|