File Coverage

blib/lib/SQL/SplitStatement/Tokenizer.pm
Criterion Covered Total %
statement 13 14 92.8
branch 2 4 50.0
condition n/a
subroutine 4 4 100.0
pod 1 1 100.0
total 20 23 86.9


line stmt bran cond sub pod time code
1 33     33   234 use strict;
  33         74  
  33         1027  
2 33     33   179 use warnings;
  33         62  
  33         1621  
3             package SQL::SplitStatement::Tokenizer;
4              
5              
6 33     33   171 use Exporter;
  33         70  
  33         18250  
7              
8             our @ISA = qw(Exporter);
9              
10             our @EXPORT_OK= qw(tokenize_sql);
11              
12             our $VERSION = '1.00022'; # TRIAL
13              
14             my $re= qr{
15             (
16             (?:--|\#)[\ \t\S]* # single line comments
17             |
18             (?:<>|<=>|>=|<=|==|=|!=|!|<<|>>|<|>|\|\||\||&&|&|-|\+|\*(?!/)|/(?!\*)|\%|~|\^|\?)
19             # operators and tests
20             |
21             [\[\]\(\)\{\},;.] # punctuation (parenthesis, comma)
22             |
23             \'\'(?!\') # empty single quoted string
24             |
25             \"\"(?!\"") # empty double quoted string
26             |
27             "(?>(?:(?>[^"\\]+)|""|\\.)*)+"
28             # anything inside double quotes, ungreedy
29             |
30             `(?>(?:(?>[^`\\]+)|``|\\.)*)+`
31             # anything inside backticks quotes, ungreedy
32             |
33             '(?>(?:(?>[^'\\]+)|''|\\.)*)+'
34             # anything inside single quotes, ungreedy.
35             |
36             /\*[\ \t\r\n\S]*?\*/ # C style comments
37             |
38             (?:[\w:@]+(?:\.(?:\w+|\*)?)*)
39             # words, standard named placeholders, db.table.*, db.*
40             |
41             (?: \$_\$ | \$\d+ | \${1,2} )
42             # dollar expressions - eg $_$ $3 $$
43             |
44             \n # newline
45             |
46             [\t\ ]+ # any kind of white spaces
47             )
48             }smx;
49              
50             sub tokenize_sql {
51 79     79 1 552 my ( $query, $remove_white_tokens )= @_;
52              
53 79         56291 my @query= $query =~ m{$re}smxg;
54              
55 79 50       587 if ($remove_white_tokens) {
56 0         0 @query= grep( !/^[\s\n\r]*$/, @query );
57             }
58              
59 79 50       24389 return wantarray ? @query : \@query;
60             }
61              
62             1;
63              
64             =pod
65              
66             =head1 NAME
67              
68             SQL::SplitStatement::Tokenizer - A simple SQL tokenizer.
69              
70             =head1 SYNOPSIS
71              
72             use SQL::SplitStatement::Tokenizer qw(tokenize_sql);
73              
74             my $query= q{SELECT 1 + 1};
75             my @tokens= tokenize_sql($query);
76              
77             # @tokens now contains ('SELECT', ' ', '1', ' ', '+', ' ', '1')
78              
79             =head1 DESCRIPTION
80              
81             SQL::SplitStatement::Tokenizer is a simple tokenizer for SQL queries. It does
82             not claim to be a parser or query verifier. It just creates sane tokens from a
83             valid SQL query.
84              
85             It supports SQL with comments like:
86              
87             -- This query is used to insert a message into
88             -- logs table
89             INSERT INTO log (application, message) VALUES (?, ?)
90              
91             Also supports C<''>, C<""> and C<\'> escaping methods, so tokenizing queries
92             like the one below should not be a problem:
93              
94             INSERT INTO log (application, message)
95             VALUES ('myapp', 'Hey, this is a ''single quoted string''!')
96              
97             =head1 API
98              
99             =over 4
100              
101             =item tokenize_sql
102              
103             use SQL::SplitStatement::Tokenizer qw(tokenize_sql);
104              
105             my @tokens = tokenize_sql($query);
106             my $tokens = tokenize_sql($query);
107              
108             $tokens = tokenize_sql( $query, $remove_white_tokens );
109              
110             C can be imported to current namespace on request. It receives a
111             SQL query, and returns an array of tokens if called in list context, or an
112             arrayref if called in scalar context.
113              
114              
115             If C<$remove_white_tokens> is true, white spaces only tokens will be removed from
116             result.
117              
118             =back
119              
120             =head1 ACKNOWLEDGEMENTS
121              
122             =over 4
123              
124             =item
125              
126             Igor Sutton Lopes for writing SQL::Tokenizer, which this was forked from.
127              
128             =item
129              
130             Evan Harris, for implementing Shell comment style and SQL operators.
131              
132             =item
133              
134             Charlie Hills, for spotting a lot of important issues I haven't thought.
135              
136             =item
137              
138             Jonas Kramer, for fixing MySQL quoted strings and treating dot as punctuation character correctly.
139              
140             =item
141              
142             Emanuele Zeppieri, for asking to fix SQL::Tokenizer to support dollars as well.
143              
144             =item
145              
146             Nigel Metheringham, for extending the dollar signal support.
147              
148             =item
149              
150             Devin Withers, for making it not choke on CR+LF in comments.
151              
152             =item
153              
154             Luc Lanthier, for simplifying the regex and make it not choke on backslashes.
155              
156             =back
157              
158             =head1 AUTHOR
159              
160             Copyright (c) 2007, 2008, 2009, 2010, 2011 Igor Sutton Lopes "". All rights
161             reserved.
162              
163             Copyright (c) 2021 Veesh Goldman ""
164              
165             This module is free software; you can redistribute it and/or modify it under
166             the same terms as Perl itself.
167              
168             =cut
169