File Coverage

blib/lib/NNexus/DB/API.pm
Criterion Covered Total %
statement 29 153 18.9
branch 0 42 0.0
condition 0 49 0.0
subroutine 4 15 26.6
pod 11 12 91.6
total 44 271 16.2


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | NNexus Autolinker | #
3             # | Backend API Module | #
4             # |=====================================================================| #
5             # | Part of the Planetary project: http://trac.mathweb.org/planetary | #
6             # | Research software, produced as part of work done by: | #
7             # | the KWARC group at Jacobs University | #
8             # | Copyright (c) 2012 | #
9             # | Released under the MIT License (MIT) | #
10             # |---------------------------------------------------------------------| #
11             # | Adapted from the original NNexus code by | #
12             # | James Gardner and Aaron Krowne | #
13             # |---------------------------------------------------------------------| #
14             # | Deyan Ginev #_# | #
15             # | http://kwarc.info/people/dginev (o o) | #
16             # \=========================================================ooo==U==ooo=/ #
17             package NNexus::DB::API;
18 3     3   13 use strict;
  3         10  
  3         102  
19 3     3   11 use warnings;
  3         5  
  3         256  
20              
21             require Exporter;
22             our @ISA = qw(Exporter);
23             our @EXPORT = qw(add_object_by select_object_by select_concepts_by last_inserted_id
24             add_concept_by delete_concept_by invalidate_by reset_db
25             select_firstword_matches
26             select_linkscache_by delete_linkscache_by add_linkscache_by);
27              
28 3     3   1189 use NNexus::Morphology qw(firstword_split);
  3         5  
  3         3933  
29              
30             ### API for Table: Objects
31              
32             sub add_object_by {
33 0     0 1 0 my ($db,%options) = @_;
34 0         0 my ($url, $domain) = map {$options{$_}} qw(url domain);
  0         0  
35 0 0 0     0 return unless $url && $domain;
36 0         0 my $sth = $db->prepare("INSERT into objects (url, domain) values (?, ?)");
37 0         0 $sth->execute($url,$domain);
38 0         0 $sth->finish();
39             # Return the object id in order to update the concepts and classification
40 0         0 return $db->last_inserted_id();
41             }
42              
43             sub select_object_by {
44 0     0 1 0 my ($db,%options) = @_;
45 0         0 my ($url,$objectid) = map {$options{$_}} qw/url objectid/;
  0         0  
46 0         0 my $sth;
47 0 0       0 if ($url) {
    0          
48 0         0 $sth = $db->prepare("select objectid, domain from objects where (url = ?)");
49 0         0 $sth->execute($url); }
50             elsif ($objectid) {
51 0         0 $sth = $db->prepare("select url from objects where (objectid = ?)");
52 0         0 $sth->execute($objectid); }
53              
54 0         0 my $object = $sth->fetchrow_hashref;
55 0         0 $sth->finish();
56 0         0 return $object;
57             }
58              
59             ### API for Table: Concept
60              
61             sub select_concepts_by {
62 0     0 0 0 my ($db,%options) = @_;
63 0         0 my ($concept,$category,$scheme,$objectid,$firstword,$tailwords) =
64 0         0 map {$options{$_}} qw/concept category scheme objectid firstword tailwords/;
65 0 0 0     0 if ($concept && (!$firstword)) {
66 0         0 ($firstword,$tailwords) = firstword_split($concept);
67             }
68 0         0 my $concepts = [];
69 0         0 my $sth;
70 0 0 0     0 if ($firstword && $category && $scheme && $objectid) {
    0 0        
      0        
71             # Selector for invalidation
72 0         0 $sth = $db->prepare("select * from concepts where (objectid = ? AND firstword = ? AND tailwords = ? AND scheme = ? AND category = ? )");
73 0         0 $sth->execute($objectid,$firstword,$tailwords,$scheme,$category);
74             } elsif ($objectid) {
75 0         0 $sth = $db->prepare("select * from concepts where (objectid = ?)");
76 0         0 $sth->execute($objectid);
77 0         0 } else { return []; } # Garbage in - garbage out. TODO: Error message?
78              
79 0         0 while (my $row = $sth->fetchrow_hashref()) {
80 0   0     0 $row->{tailwords} //= '';
81 0 0       0 $row->{concept} = $row->{firstword}.($row->{tailwords} ? " ".$row->{tailwords} : '');
82 0         0 push @$concepts, $row;
83             }
84 0         0 $sth->finish();
85            
86 0         0 return $concepts;
87             }
88              
89             sub delete_concept_by {
90 0     0 1 0 my ($db, %options) = @_;
91 0         0 my ($firstword, $tailwords, $concept, $category, $objectid) = map {$options{$_}} qw(firstword tailwords concept category objectid);
  0         0  
92 0 0 0     0 if ($concept && (!$firstword)) {
93 0         0 ($firstword,$tailwords) = firstword_split($concept);
94             }
95 0 0 0     0 return unless $firstword && $category && $objectid; # Mandatory fields. TODO: Raise error?
      0        
96 0         0 $firstword = lc($firstword); # We only record lower-cased concepts, avoid oversights
97 0   0     0 $tailwords = lc($tailwords)||''; # ditto
98 0         0 my $sth = $db->prepare("delete from concepts where (firstword = ? AND tailwords = ? AND category = ? AND objectid = ?)");
99 0         0 $sth->execute($firstword,$tailwords,$category,$objectid);
100 0         0 $sth->finish();
101             }
102              
103             sub add_concept_by {
104 0     0 1 0 my ($db, %options) = @_;
105 0         0 my ($concept, $category, $objectid, $domain, $link, $scheme, $firstword, $tailwords) =
106 0         0 map {$options{$_}} qw(concept category objectid domain link scheme firstword tailwords);
107 0 0 0     0 return unless ($firstword || $concept) && $category && $objectid && $link && $domain; # Mandatory fields. TODO: Raise error?
      0        
      0        
      0        
      0        
108 0 0       0 $scheme = 'msc' unless $scheme;
109 0 0       0 if (! $firstword) {
110 0         0 $concept = lc($concept); # Only record lower-cased concepts
111 0         0 ($firstword,$tailwords) = firstword_split($concept);
112             }
113 0 0       0 if (! $firstword) {
114 0         0 print STDERR "Error: No firstword for $concept at $link!\n\n";
115 0         0 return;
116             }
117 0         0 my $sth = $db->prepare("insert into concepts (firstword, tailwords, category, scheme, objectid, domain, link) values (?, ?, ?, ?, ?, ?, ?)");
118 0         0 $sth->execute($firstword, $tailwords, $category, $scheme, $objectid, $domain, $link);
119 0         0 $sth->finish();
120 0         0 return last_inserted_id($db);
121             }
122              
123             # get the possible matches based on the first word of a concept
124             # returns as an array containing a hash with newterm
125             sub select_firstword_matches {
126 0     0 1 0 my ($db,$word,%options) = @_;
127 0         0 my @matches = ();
128 0         0 my $domain = $options{domain};
129 0         0 my $sth;
130 0 0 0     0 if ($domain && ($domain ne 'all')) {
131 0         0 $sth = $db->prepare("SELECT conceptid, firstword, tailwords, category, scheme,
132             domain, link, objectid from concepts where firstword=? AND domain=?");
133 0         0 $sth->execute($word,$domain);
134             } else {
135 0         0 $sth = $db->prepare("SELECT conceptid, firstword, tailwords, category, scheme,
136             domain, link, objectid from concepts where firstword=?");
137 0         0 $sth->execute($word);
138             }
139              
140 0         0 my %row;
141 0         0 $sth->bind_columns( \( @row{ @{$sth->{NAME_lc} } } ));
  0         0  
142 0         0 while ($sth->fetch) {
143 0 0       0 $row{concept} = $row{firstword}.($row{tailwords} ? " ".$row{tailwords} : '');
144 0         0 push @matches, {%row};
145             }
146 0         0 $sth->finish();
147 0         0 return @matches;
148             }
149              
150             ### API for Table: Links_cache
151              
152             sub delete_linkscache_by {
153 0     0 1 0 my ($db,%options) = @_;
154 0         0 my $objectid = $options{objectid};
155 0 0       0 return unless $objectid;
156 0         0 my $sth = $db->prepare("delete from links_cache where objectid=?");
157 0         0 $sth->execute($objectid);
158 0         0 $sth->finish();
159             }
160              
161             sub add_linkscache_by{
162 0     0 1 0 my ($db,%options) = @_;
163 0         0 my $objectid = $options{objectid};
164 0         0 my $conceptid = $options{conceptid};
165 0 0 0     0 return unless $objectid && $conceptid;
166 0         0 my $sth = $db->prepare("insert into links_cache (conceptid,objectid) values (?,?) ");
167 0         0 $sth->execute($conceptid,$objectid);
168 0         0 $sth->finish();
169             }
170              
171             sub select_linkscache_by {
172 0     0 1 0 my ($db,%options)=@_;
173 0         0 my $conceptid = $options{conceptid};
174 0         0 my $objectid = $options{objectid};
175 0         0 my $sth;
176 0 0       0 if ($conceptid) {
    0          
177 0         0 $sth = $db->prepare("SELECT objectid from links_cache WHERE conceptid=?");
178 0         0 $sth->execute($conceptid); }
179 0         0 elsif ($objectid) {
180 0         0 $sth = $db->prepare("SELECT conceptid from links_cache WHERE objectid=?");
181 0         0 $sth->execute($objectid); }
182             else {return []; }
183 0         0 my $results = [];
184 0         0 while (my @row = $sth->fetchrow_array()) {
185 0         0 push @$results, @row;
186             }
187 0         0 $sth->finish();
188 0         0 return $results;
189             }
190              
191             # Alias, more semantic
192             sub invalidate_by {
193 0     0 1 0 my ($db,%options)=@_;
194 0         0 my $objectids = $db->select_linkscache_by(%options);
195 0         0 my @urls = ();
196 0         0 foreach my $objectid(@$objectids) {
197 0         0 push @urls, $db->select_object_by(objectid=>$objectid)->{url};
198             }
199 0         0 return @urls; }
200              
201             ### Generic DB API
202              
203             sub last_inserted_id {
204 0     0 1 0 my ($db) = @_;
205 0         0 my $objid;
206 0         0 my $dbms = $db->{dbms};
207 0 0       0 if ($dbms eq 'mysql') {
    0          
208 0         0 $objid = $db->{handle}->{'mysql_insertid'}; }
209             elsif ($dbms eq 'SQLite') {
210 0         0 $objid = $db->{handle}->sqlite_last_insert_rowid(); }
211 0         0 else { die 'No DBMS information provided! Failing...'; }
212 0         0 return $objid; }
213              
214             ### API for Initializing a SQLite Database:
215             sub reset_db {
216 1     1 1 2 my ($self) = @_;
217 1         4 $self = $self->safe; # unsafe but faster...
218             # Request a 20 MB cache size, reasonable on all modern systems:
219 1         6 $self->do("PRAGMA cache_size = 20000; ");
220             # Table structure for table object
221 1         58 $self->do("DROP TABLE IF EXISTS objects;");
222 1         61 $self->do("CREATE TABLE objects (
223             objectid integer primary key AUTOINCREMENT,
224             url varchar(2083) NOT NULL UNIQUE,
225             domain varchar(50),
226             -- TODO: Do we really care about modified?
227             modified timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
228             );");
229             # TODO: Rethink this trigger, do we need modified?
230 1         355 $self->do("CREATE TRIGGER ObjectModified
231             AFTER UPDATE ON objects
232             BEGIN
233             UPDATE objects SET modified = CURRENT_TIMESTAMP WHERE objectid = old.objectid;
234             END;");
235              
236             # Table structure for table concept
237             # A 'concept' has a 'firstword', belongs to a 'category' (e.g. 10-XX) with a certain 'scheme' (e.g. MSC) and is defined at a 'link', obtained while traversing an object known via 'objectid'. The concept inherits the 'domain' of the object (e.g. PlanetMath).
238             # The distinction between link and objectid allows for a level of indirection, e.g. in DLMF, where we would obtain the 'link's that define concepts while at a higher (e.g. index) webpage, only which we would register in the object table. The reindexing should be driven by the traversal process, while the linking should use the actual obtained URL for the concept definition.
239 1         192 $self->do("DROP TABLE IF EXISTS concepts;");
240 1         63 $self->do("CREATE TABLE concepts (
241             conceptid integer primary key AUTOINCREMENT,
242             firstword varchar(50) NOT NULL,
243             tailwords varchar(255),
244             category varchar(10) NOT NULL,
245             scheme varchar(10) NOT NULL DEFAULT 'msc',
246             domain varchar(50) NOT NULL,
247             link varchar(2053) NOT NULL,
248             objectid int(11) NOT NULL
249             );");
250             # TODO: Do we need this one?
251             #$self->do("CREATE INDEX conceptidx ON concept(concept);");
252 1         175 $self->do("CREATE INDEX conceptidx ON concepts(firstword);");
253 1         202 $self->do("CREATE INDEX objectididx ON concepts(objectid);");
254              
255             # Table structure for table candidates
256 1         140 $self->do("DROP TABLE IF EXISTS candidates;");
257 1         61 $self->do("CREATE TABLE candidates (
258             candidateid integer primary key AUTOINCREMENT,
259             firstword varchar(50) NOT NULL,
260             tailwords varchar(255) NOT NULL,
261             confidence real NOT NULL DEFAULT 0
262             );");
263              
264             # Table structure for table links_cache
265 1         151 $self->do("DROP TABLE IF EXISTS links_cache;");
266 1         60 $self->do("CREATE TABLE links_cache (
267             objectid integer NOT NULL,
268             conceptid integer NOT NULL,
269             PRIMARY KEY (objectid, conceptid)
270             );");
271 1         174 $self->do("CREATE INDEX linkscache_objectid_idx ON links_cache(objectid);");
272 1         142 $self->do("CREATE INDEX linkscache_conceptid_idx ON links_cache(conceptid);");
273              
274             # Table structure for table dangling_cache
275 1         151 $self->do("DROP TABLE IF EXISTS dangling_cache;");
276 1         66 $self->do("CREATE TABLE dangling_cache (
277             objectid integer NOT NULL,
278             candidateid integer NOT NULL,
279             PRIMARY KEY (objectid, candidateid)
280             );");
281 1         178 $self->do("CREATE INDEX danglingcache_objectid_idx ON links_cache(objectid);");
282 1         146 $self->do("CREATE INDEX danglingcache_concept_idx ON links_cache(conceptid);");
283              
284             }
285              
286             1;
287             __END__