| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::Thesaurus::Storage::SQLite; |
|
2
|
6
|
|
|
6
|
|
3606
|
use 5.010; |
|
|
6
|
|
|
|
|
12
|
|
|
3
|
6
|
|
|
6
|
|
21
|
use Moose; |
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
35
|
|
|
4
|
|
|
|
|
|
|
with 'Lingua::Thesaurus::Storage'; |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
6
|
|
|
6
|
|
37178
|
use DBI; |
|
|
6
|
|
|
|
|
70960
|
|
|
|
6
|
|
|
|
|
369
|
|
|
8
|
6
|
|
|
6
|
|
41
|
use Module::Load (); |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
99
|
|
|
9
|
6
|
|
|
6
|
|
19
|
use Carp qw(croak); |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
286
|
|
|
10
|
6
|
|
|
6
|
|
24
|
use namespace::clean -except => 'meta'; |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
49
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has 'dbname' => (is => 'ro', isa => 'Str', |
|
13
|
|
|
|
|
|
|
documentation => "database file (or might be ':memory:)"); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has 'dbh' => (is => 'ro', isa => 'DBI::db', |
|
16
|
|
|
|
|
|
|
lazy => 1, builder => '_dbh', |
|
17
|
|
|
|
|
|
|
documentation => "database handle"); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#====================================================================== |
|
21
|
|
|
|
|
|
|
# construction |
|
22
|
|
|
|
|
|
|
#====================================================================== |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
around BUILDARGS => sub { |
|
25
|
|
|
|
|
|
|
my $orig = shift; |
|
26
|
|
|
|
|
|
|
my $class = shift; |
|
27
|
|
|
|
|
|
|
if (@_ == 1 && !ref $_[0]) { |
|
28
|
|
|
|
|
|
|
# one single scalar arg => interpreted as dbname |
|
29
|
|
|
|
|
|
|
return $class->$orig(dbname => $_[0]); |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
else { |
|
32
|
|
|
|
|
|
|
return $class->$orig(@_); |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
}; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _dbh { |
|
38
|
6
|
|
|
6
|
|
8
|
my ($self) = @_; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# connect to the SQLite database |
|
41
|
6
|
50
|
|
|
|
156
|
my $dbname = $self->dbname |
|
42
|
|
|
|
|
|
|
or croak "storage has no file"; |
|
43
|
|
|
|
|
|
|
|
|
44
|
6
|
50
|
|
|
|
73
|
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "","", |
|
45
|
|
|
|
|
|
|
{AutoCommit => 1, |
|
46
|
|
|
|
|
|
|
RaiseError => 1, |
|
47
|
|
|
|
|
|
|
private_was_connected_by => __PACKAGE__}) |
|
48
|
|
|
|
|
|
|
or croak $DBI::errstr; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# activate foreign key control |
|
51
|
6
|
|
|
|
|
48806
|
$dbh->do('PRAGMA FOREIGN_KEYS = ON'); |
|
52
|
|
|
|
|
|
|
|
|
53
|
6
|
|
|
|
|
433
|
return $dbh; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _params { |
|
57
|
3
|
|
|
3
|
|
5
|
my ($self) = @_; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# retrieve key-values that were stored in table _params during initialize() |
|
60
|
3
|
|
|
|
|
4
|
my %params; |
|
61
|
3
|
|
|
|
|
84
|
my $sth = $self->dbh->prepare('SELECT key, value FROM params'); |
|
62
|
3
|
|
|
|
|
1527
|
$sth->execute; |
|
63
|
3
|
|
|
|
|
33
|
while (my ($key, $value) = $sth->fetchrow_array) { |
|
64
|
1
|
|
|
|
|
5
|
$params{$key} = $value; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
3
|
|
|
|
|
154
|
return \%params; |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
#====================================================================== |
|
71
|
|
|
|
|
|
|
# methods for populating the database |
|
72
|
|
|
|
|
|
|
#====================================================================== |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub do_transaction { |
|
75
|
4
|
|
|
4
|
1
|
10
|
my ($self, $coderef) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# poor man's transaction ... just for efficiency (don't care about rollback) |
|
78
|
4
|
|
|
|
|
156
|
$self->dbh->begin_work; |
|
79
|
4
|
|
|
|
|
121
|
$coderef->(); |
|
80
|
4
|
|
|
|
|
163
|
$self->dbh->commit; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub initialize { |
|
84
|
3
|
|
|
3
|
0
|
5
|
my ($self) = @_; |
|
85
|
3
|
|
|
|
|
80
|
my $dbh = $self->dbh; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# check that the database is empty |
|
88
|
3
|
50
|
|
|
|
28
|
!$dbh->tables(undef, undef, undef, 'TABLE') |
|
89
|
|
|
|
|
|
|
or croak "can't initialize(): database is not empty"; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# params to be injected into the '_params' table |
|
92
|
3
|
100
|
|
|
|
2764
|
my $params = $self->has_params ? $self->params : {}; |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# default representation for the term table (regular table) |
|
95
|
3
|
|
|
|
|
5
|
my $term_table = "TABLE term(docid INTEGER PRIMARY KEY AUTOINCREMENT, |
|
96
|
|
|
|
|
|
|
content CHAR NOT NULL, |
|
97
|
|
|
|
|
|
|
origin CHAR, |
|
98
|
|
|
|
|
|
|
UNIQUE (content, origin))"; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# alternative representations for the term table : fulltext |
|
101
|
3
|
100
|
|
|
|
12
|
if ($params->{use_fulltext}) { |
|
102
|
2
|
|
|
|
|
23
|
DBD::SQLite->VERSION("1.54"); # because earlier versions have a bug |
|
103
|
|
|
|
|
|
|
# in tokenizer suport |
|
104
|
2
|
|
|
|
|
6
|
my $tokenizer = ""; |
|
105
|
2
|
100
|
|
|
|
6
|
if ($params->{use_unaccent}) { |
|
106
|
1
|
|
|
|
|
460
|
require Search::Tokenizer; |
|
107
|
1
|
|
|
|
|
15204
|
$tokenizer = ", tokenize=perl 'Search::Tokenizer::unaccent'"; |
|
108
|
|
|
|
|
|
|
# NOTE: currently, 'use_unaccent' may produce crashes in the END |
|
109
|
|
|
|
|
|
|
# phase of the user process (bug in DBD::SQLite tokenizers). So |
|
110
|
|
|
|
|
|
|
# 'use_unaccent' is not recommended in production. |
|
111
|
|
|
|
|
|
|
} |
|
112
|
2
|
|
|
|
|
8
|
$term_table = "VIRTUAL TABLE term USING fts4(content, origin $tokenizer)"; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
3
|
|
|
|
|
23
|
$dbh->do(<<""); |
|
116
|
|
|
|
|
|
|
CREATE $term_table; |
|
117
|
|
|
|
|
|
|
|
|
118
|
3
|
|
|
|
|
420294
|
$dbh->do(<<""); |
|
119
|
|
|
|
|
|
|
CREATE TABLE rel_type ( |
|
120
|
|
|
|
|
|
|
rel_id CHAR PRIMARY KEY, |
|
121
|
|
|
|
|
|
|
description CHAR, |
|
122
|
|
|
|
|
|
|
is_external BOOL |
|
123
|
|
|
|
|
|
|
); |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# foreign key control : can't be used with fulltext, because 'docid' |
|
126
|
|
|
|
|
|
|
# is not a regular column that can be referenced |
|
127
|
3
|
100
|
|
|
|
115444
|
my $ref_docid = $params->{use_fulltext} ? '' : 'REFERENCES term(docid)'; |
|
128
|
|
|
|
|
|
|
|
|
129
|
3
|
|
|
|
|
69
|
$dbh->do(<<""); |
|
130
|
|
|
|
|
|
|
CREATE TABLE relation ( |
|
131
|
|
|
|
|
|
|
lead_term_id INTEGER NOT NULL $ref_docid, |
|
132
|
|
|
|
|
|
|
rel_id CHAR NOT NULL REFERENCES rel_type(rel_id), |
|
133
|
|
|
|
|
|
|
rel_order INTEGER DEFAULT 1, |
|
134
|
|
|
|
|
|
|
other_term_id INTEGER $ref_docid, |
|
135
|
|
|
|
|
|
|
external_info CHAR |
|
136
|
|
|
|
|
|
|
); |
|
137
|
|
|
|
|
|
|
|
|
138
|
3
|
|
|
|
|
102542
|
$dbh->do(<<""); |
|
139
|
|
|
|
|
|
|
CREATE INDEX ix_lead_term ON relation(lead_term_id); |
|
140
|
|
|
|
|
|
|
|
|
141
|
3
|
|
|
|
|
139323
|
$dbh->do(<<""); |
|
142
|
|
|
|
|
|
|
CREATE INDEX ix_other_term ON relation(other_term_id); |
|
143
|
|
|
|
|
|
|
|
|
144
|
3
|
|
|
|
|
86072
|
$dbh->do(<<""); |
|
145
|
|
|
|
|
|
|
CREATE TABLE params(key CHAR, value CHAR); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# store additional params into the '_params' table, so they can be |
|
148
|
|
|
|
|
|
|
# retrieved by other processes that will use this thesaurus |
|
149
|
3
|
|
|
|
|
120131
|
my $sth; |
|
150
|
3
|
|
|
|
|
94
|
while (my ($key, $value) = each %$params) { |
|
151
|
3
|
|
66
|
|
|
60
|
$sth //= $dbh->prepare('INSERT INTO params(key, value) VALUES (?, ?)'); |
|
152
|
3
|
|
|
|
|
71046
|
$sth->execute($key, $value); |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub store_term { |
|
158
|
2231
|
|
|
2231
|
1
|
2871
|
my ($self, $term_string, $origin) = @_; |
|
159
|
|
|
|
|
|
|
|
|
160
|
2231
|
|
|
|
|
2021
|
my $sql = 'INSERT INTO term(content, origin) VALUES(?, ?)'; |
|
161
|
2231
|
|
|
|
|
64118
|
my $sth = $self->dbh->prepare($sql); |
|
162
|
2231
|
|
|
|
|
123805
|
$sth->execute($term_string, $origin); |
|
163
|
2231
|
|
|
|
|
179107
|
return $self->dbh->last_insert_id('', '', '', ''); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub store_rel_type { |
|
168
|
30
|
|
|
30
|
0
|
97
|
my ($self, $rel_id, $description, $is_external) = @_; |
|
169
|
|
|
|
|
|
|
|
|
170
|
30
|
|
|
|
|
49
|
my $sql = 'INSERT INTO rel_type VALUES(?, ?, ?)'; |
|
171
|
30
|
|
|
|
|
1080
|
my $sth = $self->dbh->prepare($sql); |
|
172
|
30
|
|
|
|
|
753198
|
$sth->execute($rel_id, $description, $is_external); |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub store_relation { |
|
177
|
606
|
|
|
606
|
0
|
812
|
my ($self, $lead_term_id, $rel_id, $related, $is_external, $inverse_id) = @_; |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# make sure that $related is a list |
|
180
|
606
|
50
|
|
|
|
1028
|
$related = [$related] unless ref $related; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# prepare insertion statement |
|
183
|
606
|
|
|
|
|
469
|
my $sql = 'INSERT INTO relation VALUES(?, ?, ?, ?, ?)'; |
|
184
|
606
|
|
|
|
|
16962
|
my $sth = $self->dbh->prepare($sql); |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# insertion loop |
|
187
|
606
|
|
|
|
|
24015
|
my $count = 1; |
|
188
|
606
|
|
|
|
|
951
|
foreach my $rel (@$related) { |
|
189
|
1941
|
100
|
|
|
|
3158
|
my ($other_term_id, $ext_info) = $is_external ? (undef, $rel) |
|
190
|
|
|
|
|
|
|
: ($rel, undef); |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# insert first relation |
|
193
|
1941
|
|
|
|
|
22961
|
$sth->execute($lead_term_id, $rel_id, $count++, $other_term_id, $ext_info); |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# insert inverse relation, if any |
|
196
|
1941
|
100
|
|
|
|
25254
|
$sth->execute($other_term_id, $inverse_id, 1, $lead_term_id, undef) |
|
197
|
|
|
|
|
|
|
if $inverse_id; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
0
|
0
|
|
sub finalize { |
|
203
|
|
|
|
|
|
|
# nothing to do -- db file is stored automatically by DBD::SQLite |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
#====================================================================== |
|
207
|
|
|
|
|
|
|
# retrieval methods |
|
208
|
|
|
|
|
|
|
#====================================================================== |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub search_terms { |
|
212
|
6
|
|
|
6
|
0
|
176
|
my ($self, $pattern, $origin) = @_; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# retrieve terms data from database |
|
215
|
6
|
|
|
|
|
12
|
my ($sql, @bind) = ('SELECT docid, content, origin FROM term'); |
|
216
|
6
|
50
|
|
|
|
17
|
if ($pattern) { |
|
217
|
6
|
100
|
|
|
|
159
|
if ($self->params->{use_fulltext}) { |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# make sure that Search::Tokenizer is loaded so that SQLite can call |
|
220
|
|
|
|
|
|
|
# the 'unaccent' tokenizer |
|
221
|
2
|
50
|
|
|
|
51
|
require Search::Tokenizer if $self->params->{use_unaccent}; |
|
222
|
|
|
|
|
|
|
|
|
223
|
2
|
|
|
|
|
5
|
$sql .= " WHERE content MATCH ?"; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# SQLITE's fulltext engine doesn't like unbalanced parenthesis |
|
226
|
|
|
|
|
|
|
# in a MATCH term. Besides, it replaces parenthesis by white |
|
227
|
|
|
|
|
|
|
# space, which results in OR-ing the terms. So what we do is |
|
228
|
|
|
|
|
|
|
# explicitly replace parenthesis by white space, and wrap the |
|
229
|
|
|
|
|
|
|
# whole thing in a phrase query, to get more precise answers. |
|
230
|
2
|
|
|
|
|
6
|
my $n_paren = $pattern =~ tr/()/ /; |
|
231
|
2
|
50
|
33
|
|
|
8
|
$pattern = qq{"$pattern"} if $n_paren and $pattern !~ /"/; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
else { |
|
234
|
4
|
|
|
|
|
6
|
$sql .= " WHERE content LIKE ?"; |
|
235
|
4
|
|
|
|
|
8
|
$pattern =~ tr/*/%/; |
|
236
|
4
|
|
|
|
|
4
|
$pattern =~ tr/?/_/; |
|
237
|
|
|
|
|
|
|
}; |
|
238
|
6
|
|
|
|
|
12
|
@bind = ($pattern); |
|
239
|
|
|
|
|
|
|
} |
|
240
|
6
|
100
|
|
|
|
17
|
if (defined $origin) { |
|
241
|
1
|
50
|
|
|
|
5
|
$sql .= ($pattern ? ' AND ' : ' WHERE ') . 'origin = ?'; |
|
242
|
1
|
|
|
|
|
1
|
push @bind, $origin; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
6
|
|
|
|
|
162
|
my $sth = $self->dbh->prepare($sql); |
|
245
|
6
|
|
|
|
|
1461
|
$sth->execute(@bind); |
|
246
|
6
|
|
|
|
|
581
|
my $rows = $sth->fetchall_arrayref; |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# build term objects |
|
249
|
6
|
|
|
|
|
212
|
my $term_class = $self->term_class; |
|
250
|
6
|
|
|
|
|
13
|
return map {$term_class->new(storage => $self, |
|
|
286
|
|
|
|
|
9179
|
|
|
251
|
|
|
|
|
|
|
id => $_->[0], |
|
252
|
|
|
|
|
|
|
string => $_->[1], |
|
253
|
|
|
|
|
|
|
origin => $_->[2])} @$rows; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub fetch_term { |
|
257
|
5
|
|
|
5
|
0
|
65
|
my ($self, $term_string, $origin) = @_; |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# retrieve term data from database |
|
260
|
5
|
|
|
|
|
7
|
my $sql = 'SELECT docid, content, origin FROM term WHERE content = ?'; |
|
261
|
5
|
|
|
|
|
12
|
my @bind = ($term_string); |
|
262
|
5
|
100
|
|
|
|
10
|
if (defined $origin) { |
|
263
|
2
|
|
|
|
|
4
|
$sql .= ' AND origin = ?'; |
|
264
|
2
|
|
|
|
|
3
|
push @bind, $origin; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
5
|
|
|
|
|
134
|
my $sth = $self->dbh->prepare($sql); |
|
267
|
5
|
|
|
|
|
663
|
$sth->execute(@bind); |
|
268
|
5
|
50
|
|
|
|
74
|
(my $id, $term_string, $origin) = $sth->fetchrow_array |
|
269
|
|
|
|
|
|
|
or return; |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# build term object |
|
272
|
5
|
|
|
|
|
226
|
return $self->term_class->new(storage => $self, |
|
273
|
|
|
|
|
|
|
id => $id, |
|
274
|
|
|
|
|
|
|
string => $term_string, |
|
275
|
|
|
|
|
|
|
origin => $origin); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub fetch_term_id { |
|
280
|
10
|
|
|
10
|
0
|
10
|
my ($self, $id, $origin) = @_; |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# retrieve term data from database |
|
283
|
10
|
|
|
|
|
9
|
my $sql = 'SELECT content, origin FROM term WHERE docid = ?'; |
|
284
|
10
|
|
|
|
|
13
|
my @bind = ($id); |
|
285
|
10
|
50
|
|
|
|
18
|
if (defined $origin) { |
|
286
|
0
|
|
|
|
|
0
|
$sql .= ' AND origin = ?'; |
|
287
|
0
|
|
|
|
|
0
|
push @bind, $origin; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
10
|
|
|
|
|
242
|
my $sth = $self->dbh->prepare($sql); |
|
290
|
10
|
|
|
|
|
638
|
$sth->execute(@bind); |
|
291
|
10
|
50
|
|
|
|
81
|
(my $term_string, $origin) = $sth->fetchrow_array |
|
292
|
|
|
|
|
|
|
or return; |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# build term object |
|
295
|
10
|
|
|
|
|
330
|
return $self->term_class->new(storage => $self, |
|
296
|
|
|
|
|
|
|
id => $id, |
|
297
|
|
|
|
|
|
|
string => $term_string, |
|
298
|
|
|
|
|
|
|
origin => $origin); |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub related { |
|
303
|
5
|
|
|
5
|
0
|
6
|
my ($self, $term_id, $rel_ids) = @_; |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# construct the SQL request |
|
306
|
5
|
|
|
|
|
8
|
my $sql = 'SELECT rel_id, other_term_id, external_info FROM relation ' |
|
307
|
|
|
|
|
|
|
. 'WHERE lead_term_id = ?'; |
|
308
|
5
|
|
|
|
|
8
|
my @bind = ($term_id); |
|
309
|
5
|
50
|
|
|
|
21
|
if ($rel_ids) { |
|
310
|
|
|
|
|
|
|
# optional restriction on one or several relation ids |
|
311
|
5
|
50
|
|
|
|
22
|
$rel_ids = [$rel_ids] unless ref $rel_ids; |
|
312
|
5
|
|
|
|
|
15
|
my $placeholders = join ", ", ('?') x @$rel_ids; |
|
313
|
5
|
|
|
|
|
9
|
push @bind, @$rel_ids; |
|
314
|
5
|
|
|
|
|
12
|
$sql .= " AND rel_id IN ($placeholders)"; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
5
|
|
|
|
|
7
|
$sql .= " ORDER BY rel_id, rel_order"; |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# query database |
|
319
|
5
|
|
|
|
|
164
|
my $sth = $self->dbh->prepare($sql); |
|
320
|
5
|
|
|
|
|
702
|
$sth->execute(@bind); |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# build array of results |
|
323
|
5
|
|
|
|
|
9
|
my @results; |
|
324
|
|
|
|
|
|
|
my %rel_types; |
|
325
|
5
|
|
|
|
|
50
|
while (my ($rel_id, $other_term_id, $external_info) = $sth->fetchrow_array) { |
|
326
|
14
|
|
66
|
|
|
43
|
my $rel_type = $rel_types{$rel_id} //= $self->fetch_rel_type($rel_id); |
|
327
|
14
|
100
|
|
|
|
5013
|
my $related |
|
328
|
|
|
|
|
|
|
= $rel_type->is_external ? $external_info |
|
329
|
|
|
|
|
|
|
: $self->fetch_term_id($other_term_id); |
|
330
|
14
|
|
|
|
|
128
|
push @results, [$rel_type, $related]; |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
5
|
|
|
|
|
105
|
return @results; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub rel_types { |
|
338
|
3
|
|
|
3
|
0
|
15
|
my ($self) = @_; |
|
339
|
3
|
|
|
|
|
4
|
my $sql = 'SELECT rel_id FROM rel_type'; |
|
340
|
3
|
|
|
|
|
88
|
my $rel_types = $self->dbh->selectcol_arrayref($sql); |
|
341
|
3
|
|
|
|
|
529
|
return @$rel_types; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub fetch_rel_type { |
|
347
|
5
|
|
|
5
|
0
|
6
|
my ($self, $rel_id) = @_; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# retrieve rel_type data from database |
|
350
|
5
|
|
|
|
|
6
|
my $sql = 'SELECT * FROM rel_type WHERE rel_id = ?'; |
|
351
|
5
|
|
|
|
|
170
|
my $sth = $self->dbh->prepare($sql); |
|
352
|
5
|
|
|
|
|
437
|
$sth->execute($rel_id); |
|
353
|
5
|
50
|
|
|
|
109
|
my $data = $sth->fetchrow_hashref |
|
354
|
|
|
|
|
|
|
or return; |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# build RelType object |
|
357
|
5
|
|
|
|
|
39
|
return $self->_relType_class->new(%$data); |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
1; # End of Lingua::Thesaurus::Storage::SQLite |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
__END__ |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=encoding ISO8859-1 |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 NAME |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Lingua::Thesaurus::Storage::SQLite - Thesaurus storage in an SQLite database |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
This class implements the L<Lingua::Thesaurus::Storage> role, |
|
376
|
|
|
|
|
|
|
by storing thesaurus data in a L<DBD::SQLite> database. |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=head1 METHODS |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head2 new |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
my $storage = Lingua::Thesaurus::Storage::SQLite->new($dbname); |
|
384
|
|
|
|
|
|
|
my $storage = Lingua::Thesaurus::Storage::SQLite->new(%args); |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
If C<new()> has only one scalar argument, this is interpreted |
|
387
|
|
|
|
|
|
|
as C<< new(dbname => $arg) >>. Otherwise, parameters should be |
|
388
|
|
|
|
|
|
|
passed as a hash or hashref, with the following options : |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item dbname |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Filename for storing the L<DBD::SQLite> database. |
|
395
|
|
|
|
|
|
|
This could also be C<:memory:> for an in-memory database. |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item dbh |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Optional handle to an already connected database (in that |
|
400
|
|
|
|
|
|
|
case, the C<dbname> parameter will not be used). |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=item params |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
Hashref of key-value pairs that will be stored into the database, |
|
405
|
|
|
|
|
|
|
and can be retrieved by other processes using the thesaurus. |
|
406
|
|
|
|
|
|
|
This package interprets the following keys : |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=over |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item use_fulltext |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
If true, the C<term> table will use SQLite's fulltext functionalities. |
|
413
|
|
|
|
|
|
|
This means that C<< $thesaurus->search_terms('sci*') >> will also |
|
414
|
|
|
|
|
|
|
retrieve C<'computer science'>; you can also issue boolean |
|
415
|
|
|
|
|
|
|
queries like C<< 'sci* AND NOT comp*' >>. |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
If true, the C<term> table is just a regular SQLite table, and queries |
|
418
|
|
|
|
|
|
|
will be interpreted through SQLite's C<'LIKE'> operator. |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=item use_unaccent |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
This parameter only makes sense together with C<use_fulltext>. |
|
423
|
|
|
|
|
|
|
It will activate L<Search::Tokenizer/unaccent>, so that a |
|
424
|
|
|
|
|
|
|
query for C<thésaurus> will also find C<thesaurus>, or vice-versa. |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item term_class |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Name of the class for instanciating terms. |
|
429
|
|
|
|
|
|
|
Default is L<Lingua::Thesaurus::Term>. |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item relType_class |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Name of the class for instanciating "relation types". |
|
434
|
|
|
|
|
|
|
Default is L<Lingua::Thesaurus::RelType>. |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=back |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=back |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 Retrieval methods |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
See L<Lingua::Thesaurus::Storage/"Retrieval methods"> |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=head2 Populating the database |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
See L<Lingua::Thesaurus::Storage/"Populating the database"> for the API. |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
Below are some particular notes about the SQLite implementation. |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head3 do_transaction |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
This method just performs C<begin_work> .. C<commit>, because |
|
453
|
|
|
|
|
|
|
inserts into an SQLite database are much faster under a transaction. |
|
454
|
|
|
|
|
|
|
No support for rollbacks is programmed, because in this context |
|
455
|
|
|
|
|
|
|
there is no need for it. |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head3 store_term |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
If C<use_fulltext> is false, terms are stored in a regular table |
|
460
|
|
|
|
|
|
|
with a UNIQUE constraint, so it is not possible to store the same |
|
461
|
|
|
|
|
|
|
term string twice. |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
If C<use_fulltext> is true, no constraint is enforced. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=cut |