File Coverage

blib/lib/NNexus/DB/API.pm
Criterion Covered Total %
statement 137 153 89.5
branch 24 42 57.1
condition 20 49 40.8
subroutine 14 15 93.3
pod 11 12 91.6
total 206 271 76.0


line stmt bran cond sub pod time code
1             # /=====================================================================\ #
2             # | NNexus Autolinker | #
3             # | Backend API Module | #
4             # |=====================================================================| #
5             # | Part of the Planetary project: http://trac.mathweb.org/planetary | #
6             # | Research software, produced as part of work done by: | #
7             # | the KWARC group at Jacobs University | #
8             # | Copyright (c) 2012 | #
9             # | Released under the MIT License (MIT) | #
10             # |---------------------------------------------------------------------| #
11             # | Adapted from the original NNexus code by | #
12             # | James Gardner and Aaron Krowne | #
13             # |---------------------------------------------------------------------| #
14             # | Deyan Ginev #_# | #
15             # | http://kwarc.info/people/dginev (o o) | #
16             # \=========================================================ooo==U==ooo=/ #
17             package NNexus::DB::API;
18 7     7   38 use strict;
  7         25  
  7         424  
19 7     7   32 use warnings;
  7         11  
  7         705  
20              
21             require Exporter;
22             our @ISA = qw(Exporter);
23             our @EXPORT = qw(add_object_by select_object_by select_concepts_by last_inserted_id
24             add_concept_by delete_concept_by invalidate_by reset_db
25             select_firstword_matches
26             select_linkscache_by delete_linkscache_by add_linkscache_by);
27              
28 7     7   1069 use NNexus::Morphology qw(firstword_split);
  7         16  
  7         9794  
29              
30             ### API for Table: Objects
31              
32             sub add_object_by {
33 8     8 1 46 my ($db,%options) = @_;
34 8         19 my ($url, $domain) = map {$options{$_}} qw(url domain);
  16         39  
35 8 50 33     48 return unless $url && $domain;
36 8         27 my $sth = $db->prepare("INSERT into objects (url, domain) values (?, ?)");
37 8         328 $sth->execute($url,$domain);
38 8         31 $sth->finish();
39             # Return the object id in order to update the concepts and classification
40 8         30 return $db->last_inserted_id();
41             }
42              
43             sub select_object_by {
44 11     11 1 27 my ($db,%options) = @_;
45 11         24 my ($url,$objectid) = map {$options{$_}} qw/url objectid/;
  22         53  
46 11         15 my $sth;
47 11 100       29 if ($url) {
    50          
48 9         51 $sth = $db->prepare("select objectid, domain from objects where (url = ?)");
49 9         415 $sth->execute($url); }
50             elsif ($objectid) {
51 2         6 $sth = $db->prepare("select url from objects where (objectid = ?)");
52 2         19 $sth->execute($objectid); }
53              
54 11         297 my $object = $sth->fetchrow_hashref;
55 11         55 $sth->finish();
56 11         47 return $object;
57             }
58              
59             ### API for Table: Concept
60              
61             sub select_concepts_by {
62 7     7 0 19 my ($db,%options) = @_;
63 42         48 my ($concept,$category,$scheme,$objectid,$firstword,$tailwords) =
64 7         15 map {$options{$_}} qw/concept category scheme objectid firstword tailwords/;
65 7 100 66     26 if ($concept && (!$firstword)) {
66 2         8 ($firstword,$tailwords) = firstword_split($concept);
67             }
68 7         11 my $concepts = [];
69 7         8 my $sth;
70 7 100 66     39 if ($firstword && $category && $scheme && $objectid) {
    50 66        
      33        
71             # Selector for invalidation
72 2         9 $sth = $db->prepare("select * from concepts where (objectid = ? AND firstword = ? AND tailwords = ? AND scheme = ? AND category = ? )");
73 2         70 $sth->execute($objectid,$firstword,$tailwords,$scheme,$category);
74             } elsif ($objectid) {
75 5         13 $sth = $db->prepare("select * from concepts where (objectid = ?)");
76 5         144 $sth->execute($objectid);
77 0         0 } else { return []; } # Garbage in - garbage out. TODO: Error message?
78              
79 7         159 while (my $row = $sth->fetchrow_hashref()) {
80 10   50     16 $row->{tailwords} //= '';
81 10 50       29 $row->{concept} = $row->{firstword}.($row->{tailwords} ? " ".$row->{tailwords} : '');
82 10         110 push @$concepts, $row;
83             }
84 7         23 $sth->finish();
85            
86 7         22 return $concepts;
87             }
88              
89             sub delete_concept_by {
90 2     2 1 6 my ($db, %options) = @_;
91 2         6 my ($firstword, $tailwords, $concept, $category, $objectid) = map {$options{$_}} qw(firstword tailwords concept category objectid);
  10         14  
92 2 50 33     13 if ($concept && (!$firstword)) {
93 2         5 ($firstword,$tailwords) = firstword_split($concept);
94             }
95 2 50 33     18 return unless $firstword && $category && $objectid; # Mandatory fields. TODO: Raise error?
      33        
96 2         67 $firstword = lc($firstword); # We only record lower-cased concepts, avoid oversights
97 2   50     6 $tailwords = lc($tailwords)||''; # ditto
98 2         7 my $sth = $db->prepare("delete from concepts where (firstword = ? AND tailwords = ? AND category = ? AND objectid = ?)");
99 2         102 $sth->execute($firstword,$tailwords,$category,$objectid);
100 2         15 $sth->finish();
101             }
102              
103             sub add_concept_by {
104 15     15 1 89 my ($db, %options) = @_;
105 120         178 my ($concept, $category, $objectid, $domain, $link, $scheme, $firstword, $tailwords) =
106 15         31 map {$options{$_}} qw(concept category objectid domain link scheme firstword tailwords);
107 15 50 33     220 return unless ($firstword || $concept) && $category && $objectid && $link && $domain; # Mandatory fields. TODO: Raise error?
      33        
      33        
      33        
      33        
108 15 100       34 $scheme = 'msc' unless $scheme;
109 15 50       43 if (! $firstword) {
110 15         33 $concept = lc($concept); # Only record lower-cased concepts
111 15         51 ($firstword,$tailwords) = firstword_split($concept);
112             }
113 15 50       39 if (! $firstword) {
114 0         0 print STDERR "Error: No firstword for $concept at $link!\n\n";
115 0         0 return;
116             }
117 15         57 my $sth = $db->prepare("insert into concepts (firstword, tailwords, category, scheme, objectid, domain, link) values (?, ?, ?, ?, ?, ?, ?)");
118 15         758 $sth->execute($firstword, $tailwords, $category, $scheme, $objectid, $domain, $link);
119 15         59 $sth->finish();
120 15         36 return last_inserted_id($db);
121             }
122              
123             # get the possible matches based on the first word of a concept
124             # returns as an array containing a hash with newterm
125             sub select_firstword_matches {
126 134     134 1 255 my ($db,$word,%options) = @_;
127 134         145 my @matches = ();
128 134         139 my $domain = $options{domain};
129 134         95 my $sth;
130 134 50 33     293 if ($domain && ($domain ne 'all')) {
131 0         0 $sth = $db->prepare("SELECT conceptid, firstword, tailwords, category, scheme,
132             domain, link, objectid from concepts where firstword=? AND domain=?");
133 0         0 $sth->execute($word,$domain);
134             } else {
135 134         306 $sth = $db->prepare("SELECT conceptid, firstword, tailwords, category, scheme,
136             domain, link, objectid from concepts where firstword=?");
137 134         2861 $sth->execute($word);
138             }
139              
140 134         188 my %row;
141 134         150 $sth->bind_columns( \( @row{ @{$sth->{NAME_lc} } } ));
  134         1739  
142 134         4442 while ($sth->fetch) {
143 176 100       320 $row{concept} = $row{firstword}.($row{tailwords} ? " ".$row{tailwords} : '');
144 176         1994 push @matches, {%row};
145             }
146 134         301 $sth->finish();
147 134         611 return @matches;
148             }
149              
150             ### API for Table: Links_cache
151              
152             sub delete_linkscache_by {
153 0     0 1 0 my ($db,%options) = @_;
154 0         0 my $objectid = $options{objectid};
155 0 0       0 return unless $objectid;
156 0         0 my $sth = $db->prepare("delete from links_cache where objectid=?");
157 0         0 $sth->execute($objectid);
158 0         0 $sth->finish();
159             }
160              
161             sub add_linkscache_by{
162 3     3 1 18 my ($db,%options) = @_;
163 3         6 my $objectid = $options{objectid};
164 3         5 my $conceptid = $options{conceptid};
165 3 50 33     23 return unless $objectid && $conceptid;
166 3         15 my $sth = $db->prepare("insert into links_cache (conceptid,objectid) values (?,?) ");
167 3         105 $sth->execute($conceptid,$objectid);
168 3         23 $sth->finish();
169             }
170              
171             sub select_linkscache_by {
172 14     14 1 22 my ($db,%options)=@_;
173 14         21 my $conceptid = $options{conceptid};
174 14         16 my $objectid = $options{objectid};
175 14         10 my $sth;
176 14 50       26 if ($conceptid) {
    0          
177 14         32 $sth = $db->prepare("SELECT objectid from links_cache WHERE conceptid=?");
178 14         178 $sth->execute($conceptid); }
179 0         0 elsif ($objectid) {
180 0         0 $sth = $db->prepare("SELECT conceptid from links_cache WHERE objectid=?");
181 0         0 $sth->execute($objectid); }
182             else {return []; }
183 14         32 my $results = [];
184 14         86 while (my @row = $sth->fetchrow_array()) {
185 2         12 push @$results, @row;
186             }
187 14         28 $sth->finish();
188 14         27 return $results;
189             }
190              
191             # Alias, more semantic
192             sub invalidate_by {
193 14     14 1 32 my ($db,%options)=@_;
194 14         42 my $objectids = $db->select_linkscache_by(%options);
195 14         22 my @urls = ();
196 14         28 foreach my $objectid(@$objectids) {
197 2         7 push @urls, $db->select_object_by(objectid=>$objectid)->{url};
198             }
199 14         51 return @urls; }
200              
201             ### Generic DB API
202              
203             sub last_inserted_id {
204 23     23 1 32 my ($db) = @_;
205 23         22 my $objid;
206 23         46 my $dbms = $db->{dbms};
207 23 50       84 if ($dbms eq 'mysql') {
    50          
208 0         0 $objid = $db->{handle}->{'mysql_insertid'}; }
209             elsif ($dbms eq 'SQLite') {
210 23         111 $objid = $db->{handle}->sqlite_last_insert_rowid(); }
211 0         0 else { die 'No DBMS information provided! Failing...'; }
212 23         106 return $objid; }
213              
214             ### API for Initializing a SQLite Database:
215             sub reset_db {
216 4     4 1 9 my ($self) = @_;
217 4         19 $self = $self->safe; # unsafe but faster...
218             # Request a 20 MB cache size, reasonable on all modern systems:
219 4         25 $self->do("PRAGMA cache_size = 20000; ");
220             # Table structure for table object
221 4         272 $self->do("DROP TABLE IF EXISTS objects;");
222 4         311 $self->do("CREATE TABLE objects (
223             objectid integer primary key AUTOINCREMENT,
224             url varchar(2083) NOT NULL UNIQUE,
225             domain varchar(50),
226             -- TODO: Do we really care about modified?
227             modified timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
228             );");
229             # TODO: Rethink this trigger, do we need modified?
230 4         1604 $self->do("CREATE TRIGGER ObjectModified
231             AFTER UPDATE ON objects
232             BEGIN
233             UPDATE objects SET modified = CURRENT_TIMESTAMP WHERE objectid = old.objectid;
234             END;");
235              
236             # Table structure for table concept
237             # A 'concept' has a 'firstword', belongs to a 'category' (e.g. 10-XX) with a certain 'scheme' (e.g. MSC) and is defined at a 'link', obtained while traversing an object known via 'objectid'. The concept inherits the 'domain' of the object (e.g. PlanetMath).
238             # The distinction between link and objectid allows for a level of indirection, e.g. in DLMF, where we would obtain the 'link's that define concepts while at a higher (e.g. index) webpage, only which we would register in the object table. The reindexing should be driven by the traversal process, while the linking should use the actual obtained URL for the concept definition.
239 4         621 $self->do("DROP TABLE IF EXISTS concepts;");
240 4         290 $self->do("CREATE TABLE concepts (
241             conceptid integer primary key AUTOINCREMENT,
242             firstword varchar(50) NOT NULL,
243             tailwords varchar(255),
244             category varchar(10) NOT NULL,
245             scheme varchar(10) NOT NULL DEFAULT 'msc',
246             domain varchar(50) NOT NULL,
247             link varchar(2053) NOT NULL,
248             objectid int(11) NOT NULL
249             );");
250             # TODO: Do we need this one?
251             #$self->do("CREATE INDEX conceptidx ON concept(concept);");
252 4         757 $self->do("CREATE INDEX conceptidx ON concepts(firstword);");
253 4         710 $self->do("CREATE INDEX objectididx ON concepts(objectid);");
254              
255             # Table structure for table candidates
256 4         676 $self->do("DROP TABLE IF EXISTS candidates;");
257 4         326 $self->do("CREATE TABLE candidates (
258             candidateid integer primary key AUTOINCREMENT,
259             firstword varchar(50) NOT NULL,
260             tailwords varchar(255) NOT NULL,
261             confidence real NOT NULL DEFAULT 0
262             );");
263              
264             # Table structure for table links_cache
265 4         765 $self->do("DROP TABLE IF EXISTS links_cache;");
266 4         320 $self->do("CREATE TABLE links_cache (
267             objectid integer NOT NULL,
268             conceptid integer NOT NULL,
269             PRIMARY KEY (objectid, conceptid)
270             );");
271 4         775 $self->do("CREATE INDEX linkscache_objectid_idx ON links_cache(objectid);");
272 4         686 $self->do("CREATE INDEX linkscache_conceptid_idx ON links_cache(conceptid);");
273              
274             # Table structure for table dangling_cache
275 4         644 $self->do("DROP TABLE IF EXISTS dangling_cache;");
276 4         288 $self->do("CREATE TABLE dangling_cache (
277             objectid integer NOT NULL,
278             candidateid integer NOT NULL,
279             PRIMARY KEY (objectid, candidateid)
280             );");
281 4         822 $self->do("CREATE INDEX danglingcache_objectid_idx ON links_cache(objectid);");
282 4         609 $self->do("CREATE INDEX danglingcache_concept_idx ON links_cache(conceptid);");
283              
284             }
285              
286             1;
287             __END__