| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# MySql replacement for hdb |
|
2
|
|
|
|
|
|
|
# AA0 2002-09-30 |
|
3
|
|
|
|
|
|
|
# Modified Open to return DBI connection and HDB table name |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Combine::MySQLhdb; |
|
6
|
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
722
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
64
|
|
|
8
|
2
|
|
|
2
|
|
10
|
use Combine::XWI; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
35
|
|
|
9
|
2
|
|
|
2
|
|
1804
|
use HTTP::Date; |
|
|
2
|
|
|
|
|
8929
|
|
|
|
2
|
|
|
|
|
120
|
|
|
10
|
2
|
|
|
2
|
|
2116
|
use Encode; |
|
|
2
|
|
|
|
|
28962
|
|
|
|
2
|
|
|
|
|
272
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $sv; # holds the mysql connection |
|
13
|
|
|
|
|
|
|
my $table = ''; # holds the hdb table name |
|
14
|
|
|
|
|
|
|
my $savehtml; |
|
15
|
|
|
|
|
|
|
my $doOAI; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub Open { #needed?? |
|
18
|
2
|
|
|
2
|
|
587
|
use Combine::Config; |
|
|
2
|
|
|
|
|
7
|
|
|
|
2
|
|
|
|
|
1943
|
|
|
19
|
1
|
|
|
1
|
0
|
10
|
$sv = Combine::Config::Get('MySQLhandle'); |
|
20
|
0
|
|
|
|
|
0
|
$savehtml = Combine::Config::Get('saveHTML'); |
|
21
|
0
|
|
|
|
|
0
|
$doOAI = Combine::Config::Get('doOAI'); |
|
22
|
0
|
|
|
|
|
0
|
my $hdbd = 'hdb'; |
|
23
|
0
|
|
|
|
|
0
|
return ($sv,$hdbd); |
|
24
|
|
|
|
|
|
|
} |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub Close { |
|
27
|
|
|
|
|
|
|
# print "MySQLhdb::Close\n"; |
|
28
|
0
|
|
|
0
|
0
|
0
|
$sv->disconnect ; |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub DESTROY { |
|
32
|
0
|
|
|
0
|
|
0
|
print STDERR "MySQLhdb::DESTROY\n"; |
|
33
|
0
|
|
|
|
|
0
|
$sv->disconnect ; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub Write { |
|
37
|
0
|
|
|
0
|
0
|
0
|
my ($xwi) = @_; |
|
38
|
0
|
0
|
|
|
|
0
|
return undef unless $xwi; |
|
39
|
0
|
0
|
|
|
|
0
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
|
0
|
|
|
|
|
0
|
|
|
40
|
0
|
|
|
|
|
0
|
my $md5 = $xwi->md5; |
|
41
|
0
|
|
|
|
|
0
|
my $recordid = $xwi->recordid; #Set by DataBase.pm |
|
42
|
|
|
|
|
|
|
#OAI |
|
43
|
0
|
0
|
|
|
|
0
|
if ($doOAI) { |
|
44
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO oai SET status='created', recordid=?, md5=?")->execute($recordid, $md5); |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
#OAI |
|
47
|
|
|
|
|
|
|
# $xwi->url_rewind; MORE THAN one URL?? |
|
48
|
|
|
|
|
|
|
# my $url = $xwi->url_get; |
|
49
|
0
|
|
|
|
|
0
|
my $urlid = $xwi->urlid; |
|
50
|
0
|
|
|
|
|
0
|
my $my_netlocid = $xwi->netlocid; |
|
51
|
0
|
|
|
|
|
0
|
my $type = $xwi->type; |
|
52
|
0
|
|
|
|
|
0
|
my $title = $xwi->title; |
|
53
|
|
|
|
|
|
|
#checkedDate is inserted/updated in DataBase.pm and harvpars.pl |
|
54
|
0
|
|
|
|
|
0
|
my $modifiedDate = $xwi->modifiedDate; |
|
55
|
0
|
0
|
|
|
|
0
|
if ( ! $modifiedDate) { $modifiedDate = $xwi->checkedDate; } |
|
|
0
|
|
|
|
|
0
|
|
|
56
|
0
|
|
|
|
|
0
|
my $expiryDate = $xwi->expiryDate; |
|
57
|
|
|
|
|
|
|
# if ($expiryDate) { $expiryDate = str2time($expiryDate) ; } |
|
58
|
|
|
|
|
|
|
# else { $expiryDate = 'NULL'; } |
|
59
|
0
|
|
|
|
|
0
|
my $length = $xwi->length; |
|
60
|
0
|
|
|
|
|
0
|
my $server = $xwi->server; |
|
61
|
0
|
|
|
|
|
0
|
my $etag = $xwi->etag; |
|
62
|
0
|
|
|
|
|
0
|
my $nheadings = $xwi->heading_count; |
|
63
|
0
|
|
|
|
|
0
|
my $headings=''; |
|
64
|
|
|
|
|
|
|
# headings |
|
65
|
0
|
|
|
|
|
0
|
$xwi->heading_rewind; |
|
66
|
0
|
|
|
|
|
0
|
while (1) { |
|
67
|
0
|
0
|
|
|
|
0
|
my $this = $xwi->heading_get or last; |
|
68
|
0
|
|
|
|
|
0
|
$headings .= $this . '; '; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
0
|
|
|
|
|
0
|
my $nlinks = $xwi->link_count; |
|
71
|
0
|
|
|
|
|
0
|
my $this = $xwi->text; |
|
72
|
0
|
|
|
|
|
0
|
my $ip; |
|
73
|
0
|
0
|
|
|
|
0
|
if ($this) { |
|
74
|
0
|
|
|
|
|
0
|
$this = $$this; |
|
75
|
0
|
0
|
|
|
|
0
|
if ($xwi->truncated()) { |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# IMPORTANT! This document was truncated. Therefore: |
|
78
|
|
|
|
|
|
|
# |
|
79
|
|
|
|
|
|
|
# 1) Discard it if no space characters in it, because then it |
|
80
|
|
|
|
|
|
|
# could be binary. |
|
81
|
|
|
|
|
|
|
# |
|
82
|
|
|
|
|
|
|
# 2) If a space is found, then truncate after the last space, |
|
83
|
|
|
|
|
|
|
# so as to avoid erroneous indexing (since the truncation |
|
84
|
|
|
|
|
|
|
# most likely cut a word). |
|
85
|
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
0
|
my $last_blank = rindex($this,' '); |
|
87
|
0
|
0
|
|
|
|
0
|
if ($last_blank > 0) { |
|
88
|
0
|
|
|
|
|
0
|
$ip = substr($this, 0, $last_blank) ; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
else { |
|
92
|
0
|
|
|
|
|
0
|
$ip = $this ; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
0
|
|
|
|
|
0
|
} else { my $t=''; $xwi->text(\$t); } #make sure xwi->text is defined |
|
|
0
|
|
|
|
|
0
|
|
|
95
|
|
|
|
|
|
|
#?? if (length($ip)>250000) {$ip = substr($ip, 0, 250000);} |
|
96
|
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO hdb VALUES (?, ?, ?, FROM_UNIXTIME( ? ), FROM_UNIXTIME( ? ), ?, ?, ?, ?, ?, ?, COMPRESS(?))")->execute( |
|
98
|
|
|
|
|
|
|
$recordid, $type, Encode::encode('utf8',$title), $modifiedDate, $expiryDate, $length, $server, $etag, $nheadings, $nlinks, Encode::encode('utf8',$headings), Encode::encode('utf8',$ip)); |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
0
|
if ( $savehtml == 1 ) { |
|
101
|
0
|
|
|
|
|
0
|
my $html = $xwi->content; |
|
102
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO html SET html=COMPRESS(?), recordid=?")->execute(Encode::encode('utf8',$$html),$recordid); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my $res; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#save links |
|
108
|
0
|
|
|
|
|
0
|
my ( $urlstr, $anchor, $ltype); |
|
109
|
0
|
|
|
|
|
0
|
$xwi->link_rewind; |
|
110
|
0
|
|
|
|
|
0
|
my $link_count = 1; |
|
111
|
0
|
|
|
|
|
0
|
my $netlocid; |
|
112
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM links WHERE recordid='$recordid';}); #needed? |
|
113
|
0
|
|
|
|
|
0
|
while(1) { #links |
|
114
|
0
|
|
|
|
|
0
|
($urlstr, $netlocid, $urlid, $anchor, $ltype) = $xwi->link_get; |
|
115
|
0
|
0
|
|
|
|
0
|
if (defined($urlstr)) { |
|
116
|
|
|
|
|
|
|
#Convert urlstr to urlid,netlocid if needed |
|
117
|
0
|
0
|
0
|
|
|
0
|
if ( ($netlocid <= 0) || ($urlid <= 0) ) { |
|
118
|
0
|
0
|
|
|
|
0
|
if ( $urlstr eq '') { print STDERR "ERR MySQLhdb, save links, no info\n"; } ## sanity check -> log error |
|
|
0
|
|
|
|
|
0
|
|
|
119
|
2
|
|
|
2
|
|
1566
|
use Combine::selurl; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
3968
|
|
|
120
|
0
|
|
|
|
|
0
|
my $u; |
|
121
|
0
|
0
|
|
|
|
0
|
if ( $u = new Combine::selurl($urlstr) ) { |
|
122
|
0
|
|
|
|
|
0
|
$urlstr = $u->normalise(); |
|
123
|
0
|
|
|
|
|
0
|
my $netlocstr = $u->authority; |
|
124
|
0
|
|
|
|
|
0
|
my $path_query = $u->path_query; |
|
125
|
0
|
|
|
|
|
0
|
my $lsth = $sv->prepare(qq{SELECT netlocid,urlid FROM urls WHERE urlstr=?;}); |
|
126
|
0
|
|
|
|
|
0
|
$lsth->execute($urlstr); |
|
127
|
0
|
|
|
|
|
0
|
($netlocid,$urlid) = $lsth->fetchrow_array; |
|
128
|
0
|
0
|
|
|
|
0
|
if ( !defined($urlid) ) { |
|
129
|
0
|
|
|
|
|
0
|
$sv->prepare(qq{INSERT IGNORE INTO netlocs SET netlocstr=?;})->execute($netlocstr); |
|
130
|
|
|
|
|
|
|
# ($netlocid) = $sv->selectrow_array(qq{SELECT netlocid FROM netlocs WHERE netlocstr='$netlocstr';}); |
|
131
|
0
|
|
|
|
|
0
|
my $nlsth = $sv->prepare(qq{SELECT netlocid FROM netlocs WHERE netlocstr=?;}); |
|
132
|
0
|
|
|
|
|
0
|
$nlsth->execute($netlocstr); |
|
133
|
0
|
|
|
|
|
0
|
($netlocid) = $nlsth->fetchrow_array(); |
|
134
|
0
|
|
|
|
|
0
|
$sv->prepare(qq{INSERT IGNORE INTO urls SET urlstr=?, netlocid=?, path=?;})->execute($urlstr,$netlocid,$path_query); |
|
135
|
0
|
|
|
|
|
0
|
$lsth->execute($urlstr); |
|
136
|
0
|
|
|
|
|
0
|
($netlocid,$urlid) = $lsth->fetchrow_array; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO links (recordid,mynetlocid,urlid,netlocid,anchor,linktype) VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid,$my_netlocid,$urlid,$netlocid,Encode::encode('utf8',$anchor),$ltype); |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} else { |
|
141
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO links (recordid,mynetlocid,urlid,netlocid,anchor,linktype) VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid,$my_netlocid,$urlid,$netlocid,Encode::encode('utf8',$anchor),$ltype); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
0
|
|
|
|
|
0
|
} else { last; } |
|
144
|
0
|
0
|
|
|
|
0
|
last if ($link_count++ >= 500); # limit on number of links |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
#save metadata |
|
148
|
0
|
|
|
|
|
0
|
$xwi->meta_rewind; |
|
149
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM meta WHERE recordid='$recordid';}); #needed? |
|
150
|
0
|
|
|
|
|
0
|
my ($name,$content); |
|
151
|
0
|
|
|
|
|
0
|
while (1) { |
|
152
|
0
|
|
|
|
|
0
|
($name,$content) = $xwi->meta_get; |
|
153
|
0
|
0
|
|
|
|
0
|
last unless $name; |
|
154
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO meta VALUES (?, ?, ?)")->execute($recordid, Encode::encode('utf8',$name), Encode::encode('utf8',$content)); |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
#OLD |
|
158
|
|
|
|
|
|
|
#save URLs |
|
159
|
|
|
|
|
|
|
# $xwi->url_rewind; |
|
160
|
|
|
|
|
|
|
# $res = $sv->do(qq{DELETE FROM urls WHERE recordid='$recordid';}); |
|
161
|
|
|
|
|
|
|
# while (1) { |
|
162
|
|
|
|
|
|
|
# $this = $xwi->url_get or last; |
|
163
|
|
|
|
|
|
|
## $res = $sv->do(qq{INSERT INTO urls VALUES ('$recordid','$this');}); |
|
164
|
|
|
|
|
|
|
# my $machine = $this; |
|
165
|
|
|
|
|
|
|
# $machine =~ s|http://([^:/]+)[:/]?.*|$1|; |
|
166
|
|
|
|
|
|
|
# $sv->prepare("INSERT INTO urls VALUES (?, ?, ?)")->execute($recordid, $this, $machine); |
|
167
|
|
|
|
|
|
|
# } |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#save robot data in analys table (uses that URL is stored) |
|
170
|
0
|
|
|
|
|
0
|
$xwi->robot_rewind; |
|
171
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM analys WHERE recordid='$recordid';}); #needed? |
|
172
|
0
|
|
|
|
|
0
|
while (1) { |
|
173
|
0
|
|
|
|
|
0
|
($name,$content) = $xwi->robot_get; |
|
174
|
0
|
0
|
|
|
|
0
|
last unless $name; |
|
175
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, $name, Encode::encode('utf8',$content)); |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
## my $alinks = calclinks($recordid,$machine); #? |
|
178
|
|
|
|
|
|
|
#What if link-stats are inserted double after a Get and following write? |
|
179
|
0
|
|
|
|
|
0
|
my $sth = $sv->prepare(qq{SELECT COUNT(DISTINCT(links.recordid)), COUNT(DISTINCT(mynetlocid)) FROM links,recordurl WHERE recordurl.recordid= ? AND |
|
180
|
|
|
|
|
|
|
links.urlid = recordurl.urlid AND mynetlocid<>links.netlocid;}); |
|
181
|
0
|
|
|
|
|
0
|
$sth->execute($recordid); |
|
182
|
0
|
|
|
|
|
0
|
my ($inlinks,$hostinlinks)=$sth->fetchrow_array; |
|
183
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'inlinks', $inlinks); |
|
184
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'hostinlinks', $hostinlinks); |
|
185
|
0
|
|
|
|
|
0
|
$sth = $sv->prepare(qq{SELECT count(distinct(netlocid)) FROM links WHERE recordid=?;}); |
|
186
|
0
|
|
|
|
|
0
|
$sth->execute($recordid); |
|
187
|
0
|
|
|
|
|
0
|
my ($outlinks)=$sth->fetchrow_array; |
|
188
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO analys VALUES (?, ?, ?)")->execute($recordid, 'outlinks', $outlinks); |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
#save topic, ie result of autoclassification |
|
191
|
0
|
|
|
|
|
0
|
$xwi->topic_rewind; |
|
192
|
0
|
|
|
|
|
0
|
$res = $sv->do(qq{DELETE FROM topic WHERE recordid='$recordid';}); #needed? |
|
193
|
0
|
|
|
|
|
0
|
my ($cls,$absscore, $relscore, $terms, $alg); |
|
194
|
0
|
|
|
|
|
0
|
while (1) { |
|
195
|
0
|
|
|
|
|
0
|
($cls,$absscore, $relscore,$terms, $alg) = $xwi->topic_get; |
|
196
|
0
|
0
|
|
|
|
0
|
last unless $cls; |
|
197
|
0
|
|
|
|
|
0
|
$sv->prepare("INSERT INTO topic VALUES (?, ?, ?, ?, ?, ?)")->execute($recordid, Encode::encode('utf8',$cls), $absscore, $relscore, Encode::encode('utf8',$terms), $alg); |
|
198
|
|
|
|
|
|
|
} |
|
199
|
0
|
0
|
|
|
|
0
|
if (my $zh = Combine::Config::Get('ZebraHost')) { |
|
200
|
0
|
|
|
|
|
0
|
require Combine::Zebra; |
|
201
|
0
|
|
|
|
|
0
|
Combine::Zebra::update($zh,$xwi); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
0
|
0
|
|
|
|
0
|
if (Combine::Config::Get('MySQLfulltext')) { |
|
204
|
0
|
|
|
|
|
0
|
$sv->prepare("REPLACE INTO search VALUES (?, ?)")->execute($recordid, Encode::encode('utf8',$title .' '. $ip)); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
0
|
0
|
|
|
|
0
|
if (my $sh = Combine::Config::Get('SolrHost')) { |
|
207
|
0
|
|
|
|
|
0
|
require Combine::Solr; |
|
208
|
0
|
|
|
|
|
0
|
Combine::Solr::update($sh,$xwi); |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub Delete { #Used?? |
|
213
|
0
|
|
|
0
|
0
|
0
|
my ($xwi) = @_; |
|
214
|
0
|
0
|
|
|
|
0
|
return undef unless $xwi; |
|
215
|
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
0
|
my $recordid = $xwi->recordid; |
|
217
|
|
|
|
|
|
|
#print "MySQLhdb::DeleteMD5 $recordid\n"; |
|
218
|
0
|
|
|
|
|
0
|
DeleteKey($recordid, $xwi->md5); |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub DeleteKey { |
|
222
|
1
|
|
|
1
|
0
|
18
|
my ($key, $md5) = @_; |
|
223
|
1
|
50
|
|
|
|
19
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
|
1
|
|
|
|
|
15
|
|
|
224
|
|
|
|
|
|
|
#OAI |
|
225
|
0
|
0
|
|
|
|
|
if ($doOAI) { |
|
226
|
|
|
|
|
|
|
# $sv->prepare("REPLACE INTO oai SET status='deleted', recordid=?, md5=?")->execute($key,$md5); |
|
227
|
|
|
|
|
|
|
##FEL recurdurl updaterad i Database.pm FIX! |
|
228
|
0
|
|
|
|
|
|
$sv->prepare("REPLACE INTO oai SELECT recordid,md5,NOW(),'deleted' FROM recordurl WHERE recordid=?")->execute($key); |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
#OAI |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
#Zebra |
|
233
|
0
|
0
|
|
|
|
|
if (my $zh = Combine::Config::Get('ZebraHost')) { |
|
234
|
0
|
|
|
|
|
|
require Combine::Zebra; |
|
235
|
|
|
|
|
|
|
#Not needed: if ($md5 eq '') { ($md5)=$sv->selectrow_array('SELECT md5 FROM recordurl WHERE recordid=$key'); } |
|
236
|
0
|
|
|
|
|
|
Combine::Zebra::delete($zh, $md5, $key); |
|
237
|
|
|
|
|
|
|
} |
|
238
|
0
|
0
|
|
|
|
|
if (my $sh = Combine::Config::Get('SolrHost')) { |
|
239
|
0
|
|
|
|
|
|
require Combine::Solr; |
|
240
|
0
|
|
|
|
|
|
Combine::Solr::delete($sh, $md5, $key); |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#print "MySQLhdb::DeleteKey $key\n"; |
|
244
|
0
|
|
|
|
|
|
my $res = $sv->do(qq{DELETE FROM hdb WHERE recordid=$key;}); |
|
245
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM html WHERE recordid=$key;}); |
|
246
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM search WHERE recordid=$key;}); |
|
247
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM meta WHERE recordid=$key;}); |
|
248
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM analys WHERE recordid=$key}); |
|
249
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM links WHERE recordid=$key;}); |
|
250
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM topic WHERE recordid=$key;}); |
|
251
|
0
|
|
|
|
|
|
$res = $sv->do(qq{DELETE FROM recordurl WHERE recordid=$key;}); |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub Get { |
|
255
|
0
|
|
|
0
|
0
|
|
my ($key) = @_; |
|
256
|
|
|
|
|
|
|
#should return an initalized xwi-object |
|
257
|
0
|
0
|
|
|
|
|
if (!defined($sv)) { Open(); } #Init $sv CHANGE? |
|
|
0
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
|
259
|
0
|
|
|
|
|
|
my ($type, $title, $modifiedDate, $expiryDate, $length, $server, $etag, $nheadings, $nlinks, $headings, $ip) = |
|
260
|
|
|
|
|
|
|
$sv->selectrow_array(qq{SELECT type,title, |
|
261
|
|
|
|
|
|
|
UNIX_TIMESTAMP(mdate),IF(expiredate,UNIX_TIMESTAMP(expiredate),0), |
|
262
|
|
|
|
|
|
|
length,server,etag,nheadings,nlinks,headings,UNCOMPRESS(ip) |
|
263
|
|
|
|
|
|
|
FROM hdb WHERE recordid='$key';}); |
|
264
|
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my $xwi = new Combine::XWI ; |
|
266
|
0
|
|
|
|
|
|
$xwi->recordid($key); |
|
267
|
|
|
|
|
|
|
#url Relies on that all urls are in table urls |
|
268
|
0
|
|
|
|
|
|
$xwi->type($type); |
|
269
|
0
|
|
|
|
|
|
$xwi->title(Encode::decode('utf8',$title)); |
|
270
|
0
|
|
|
|
|
|
$xwi->modifiedDate($modifiedDate); |
|
271
|
0
|
0
|
|
|
|
|
if ($expiryDate>0) {$xwi->expiryDate($expiryDate)}; |
|
|
0
|
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
$xwi->length($length); |
|
273
|
0
|
|
|
|
|
|
$xwi->server($server); |
|
274
|
0
|
|
|
|
|
|
$xwi->etag($etag); |
|
275
|
0
|
|
|
|
|
|
$xwi->nheadings($nheadings); |
|
276
|
0
|
|
|
|
|
|
$xwi->nlinks($nlinks); |
|
277
|
0
|
|
|
|
|
|
$headings =~ s/; $//; |
|
278
|
0
|
|
|
|
|
|
$xwi->heading_add(Encode::decode('utf8',$headings)) ; |
|
279
|
0
|
|
|
|
|
|
my $ip1=Encode::decode('utf8',$ip); |
|
280
|
0
|
|
|
|
|
|
$xwi->text(\$ip1); |
|
281
|
0
|
|
|
|
|
|
my ($html1) = $sv->selectrow_array(qq{SELECT UNCOMPRESS(html) FROM html WHERE recordid='$key';}); |
|
282
|
0
|
|
|
|
|
|
my $html = Encode::decode('utf8',$html1); |
|
283
|
0
|
|
|
|
|
|
$xwi->content(\$html); |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
my ($urlpath) = $sv->selectrow_array(qq{SELECT path FROM urls,recordurl WHERE recordid='$key' AND recordurl.urlid=urls.urlid;}); |
|
286
|
0
|
|
|
|
|
|
$xwi->urlpath($urlpath); |
|
287
|
|
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
|
my ($url,$anchor,$lty,$name,$value,$heading); |
|
289
|
|
|
|
|
|
|
#links |
|
290
|
0
|
|
|
|
|
|
my $sth = $sv->prepare(qq{SELECT urlid,netlocid,anchor,linktype from links WHERE recordid='$key';}); |
|
291
|
0
|
|
|
|
|
|
$sth->execute; |
|
292
|
0
|
|
|
|
|
|
my ($urlid,$netlocid,$checkedDate,$md5,$fingerprint,$cls,$absscore,$relscore,$terms,$alg); |
|
293
|
0
|
|
|
|
|
|
while (($urlid,$netlocid,$anchor,$lty)=$sth->fetchrow_array) { |
|
294
|
0
|
|
|
|
|
|
$xwi->link_add('', $netlocid, $urlid, Encode::decode('utf8',$anchor), $lty) ; #no URLstr add? |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#meta |
|
298
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT name,value from meta WHERE recordid='$key';}); |
|
299
|
0
|
|
|
|
|
|
$sth->execute; |
|
300
|
0
|
|
|
|
|
|
while (($name,$value)=$sth->fetchrow_array) { |
|
301
|
0
|
|
|
|
|
|
$xwi->meta_add(Encode::decode('utf8',$name),Encode::decode('utf8',$value)) ; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# analys -> robot |
|
305
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT name,value FROM analys WHERE recordid='$key';}); |
|
306
|
0
|
|
|
|
|
|
$sth->execute; |
|
307
|
0
|
|
|
|
|
|
while (($name,$value)=$sth->fetchrow_array) { |
|
308
|
0
|
|
|
|
|
|
$xwi->robot_add($name,Encode::decode('utf8',$value)) ; |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# topic |
|
312
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT notation,abscore,relscore,terms,algorithm FROM topic WHERE recordid='$key';}); |
|
313
|
0
|
|
|
|
|
|
$sth->execute; |
|
314
|
0
|
|
|
|
|
|
while (($cls,$absscore,$relscore,$terms,$alg)=$sth->fetchrow_array) { |
|
315
|
0
|
|
|
|
|
|
$xwi->topic_add(Encode::decode('utf8',$cls),$absscore,$relscore,Encode::decode('utf8',$terms),$alg) ; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
#recordurl |
|
319
|
0
|
|
|
|
|
|
$sth = $sv->prepare(qq{SELECT urlid,UNIX_TIMESTAMP(lastchecked),md5,fingerprint FROM recordurl WHERE recordid='$key';}); |
|
320
|
0
|
|
|
|
|
|
$sth->execute; |
|
321
|
0
|
|
|
|
|
|
while (($urlid,$checkedDate,$md5,$fingerprint)=$sth->fetchrow_array) { |
|
322
|
0
|
|
|
|
|
|
$xwi->urlid($urlid); |
|
323
|
0
|
|
|
|
|
|
$xwi->checkedDate($checkedDate); |
|
324
|
0
|
|
|
|
|
|
$xwi->md5($md5); |
|
325
|
0
|
|
|
|
|
|
$xwi->fingerprint($fingerprint); |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
0
|
|
|
|
|
|
return $xwi; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
1; |