File Coverage

blib/lib/SQL/KeywordSearch.pm
Criterion Covered Total %
statement 36 37 97.3
branch 14 16 87.5
condition n/a
subroutine 5 5 100.0
pod 1 1 100.0
total 56 59 94.9


line stmt bran cond sub pod time code
1             package SQL::KeywordSearch;
2 1     1   27815 use Params::Validate ':all';
  1         17155  
  1         247  
3 1     1   9 use base 'Exporter';
  1         2  
  1         132  
4              
5             our $VERSION = 1.13;
6              
7             # Make the functions available by default
8             our @EXPORT = qw(
9             sql_keyword_search
10             );
11              
12 1     1   5 use warnings;
  1         6  
  1         22  
13 1     1   5 use strict;
  1         2  
  1         503  
14              
15             =head1 NAME
16              
17             SQL::KeywordSearch - generate SQL for simple keyword searches
18              
19              
20             =head1 SYNOPSIS
21              
22             use SQL::KeywordSearch;
23              
24             my ($search_sql,@bind) =
25             sql_keyword_search(
26             keywords => 'cat,brown,whiskers',
27             columns => ['pets','colors','names']
28             );
29              
30             my $sql = "SELECT title from articles
31             WHERE user_id = 5 AND ".$search_sql;
32              
33             =head1 About keyword searching
34              
35             The solution provided here is I, suitable for relatively
36             small numbers of rows and columns. It is also simple-minded in that
37             it I sort the results based on their relevance.
38              
39             For large data sets and more features, a full-text indexing and searching
40             solution is recommended to be used instead. Tsearch2 for PostgreSQL,
41             L is one such solution.
42              
43             =head1 Database Support
44              
45             This module was developed for use with PostgreSQL. It can work with other
46             databases by specifying the regular expression operator to use. The 'REGEXP'
47             operator should work for MySQL.
48              
49             Since a regular expression for word boundary checking is about the only fancy
50             database feature we used, other databases should work as well.
51              
52             =head1 Functions
53              
54             =head2 sql_keyword_search()
55              
56             ($sql,@bind) = sql_keyword_search(...);
57             (@interp) = sql_keyword_search(interp => 1, ...);
58              
59             B builds a sql statement based on a keyword field containing a
60             list of comma, space, semicolon or colon separated keywords. This prepares a
61             case-insensitive regular expression search.
62              
63             ($sql, @bind) =
64             sql_keyword_search(
65             keywords => 'cat,brown',
66             columns => ['pets','colors'],
67             every_column => 1,
68             every_word => 1,
69             whole_word => 1,
70             operator => 'REGEXP'
71             );
72              
73             Now the result would look like:
74              
75             $sql = qq{(
76             (lower(pets) ~ lower(?)
77             OR lower(colors) ~ lower(?)
78             )
79             OR
80             (lower(pets) ~ lower(?)
81             OR lower(colors) ~ lower(?)
82             ))};
83              
84             @bind = ('cat','cat','brown','brown');
85              
86             You can control the use of AND, OR and other aspects of the SQL generation
87             through the options below.
88              
89             =over 4
90              
91             =item B
92              
93             A string of comma,space,semicolon or color separated keywords. Required.
94              
95             =item B
96              
97             An anonymous array of columns to perform the keyword search on. Required.
98              
99             =item B (default: false)
100              
101             If you would like all words to match in all columns, you set this to 1.
102              
103             By default, words can match in one or more columns.
104              
105             =item B (default: false)
106              
107             If you would like all words to match in particular column for it to be
108             considered a match, set this value to 1
109              
110             By default, one or more words can match in a particular column.
111              
112             =item B (default: false)
113              
114             Set this to true to do only match against whole words. A substring search is
115             the default.
116              
117             =item B (default: ~)
118              
119             Set to 'REGEXP' if you are using MySQL. The default works for PostgreSQL.
120              
121             =item B (default: off)
122              
123             # integrate with DBIx::Interp
124             my $articles = $dbx->selectall_arrayref_i("
125             SELECT article_id, title, summary
126             FROM articles
127             WHERE ",
128             sql_keyword_search(
129             keywords => $q->param('q'),
130             columns => [qw/title summary/]
131             interp => 1,
132             )
133             ,attr(Slice=>{}));
134              
135             Turn this on to return an array of SQL like L or
136             L expect as input.
137              
138             =back
139              
140             =cut
141              
142             sub sql_keyword_search {
143 5     5 1 2660 my %p = validate(@_,{
144             keywords => { type => SCALAR },
145             every_column => { default => 0 },
146             every_word => { default => 0 },
147             whole_word => { default => 0 },
148             columns => { type => ARRAYREF },
149             operator => { default => '~' },
150             interp => { default => 0 },
151             });
152              
153 5         43 my (@sql,@bind);
154              
155 5         32 my @list = split /[\s\,\;\:]+/ , $p{keywords};
156 5         8 my @columns = @{ $p{columns} };
  5         11  
157              
158 5         10 push @sql, "(\n";
159 5         17 foreach (my $j = 0; $j <= $#list; $j++) {
160 10         15 push @sql, "(";
161 10         22 foreach (my $i = 0; $i <= $#columns; $i ++) {
162 20         26 my $word = $list[$j];
163 20 50       38 if (defined $word) {
164 20 50       44 if ($p{whole_word}) {
165 0         0 $word = "(^|[[:<:]])".$word.'([[:>:]]|$)';
166             }
167 20         42 push @sql, "lower($columns[$i]) $p{operator} ";
168 20 100       38 if ($p{interp}) {
169 4         7 push @sql, "lower(",\$word,")";
170             }
171             else {
172 16         19 push @sql, "lower(?)\n";
173 16         25 push @bind, $word;
174             }
175             }
176 20 100       82 push @sql, " ".($p{every_column} ? 'AND' : 'OR' )." " if $i != $#columns;
    100          
177             }
178 10         16 push @sql, ")";
179 10 100       44 push @sql, "\n ".($p{every_word} ? 'AND' : 'OR' )." \n" if $j != $#list;
    100          
180             }
181 5         9 push @sql, "\n)\n";
182              
183 5 100       21 if ($p{interp}) {
184 1         9 return @sql,
185             }
186             else {
187 4         44 return ((join '', @sql),@bind);
188             }
189             }
190              
191             1;
192              
193             __END__