File Coverage

blib/lib/MyConText/Blob.pm
Criterion Covered Total %
statement 3 129 2.3
branch 0 52 0.0
condition n/a
subroutine 1 8 12.5
pod 0 6 0.0
total 4 195 2.0


line stmt bran cond sub pod time code
1              
2             package MyConText::Blob;
3 1     1   637 use strict;
  1         2  
  1         2098  
4              
5             # Open in the backend just sets the object
6             sub open {
7 0     0 0   my ($class, $ctx) = @_;
8 0           return bless { 'ctx' => $ctx }, $class;
9             }
10             # Create creates the table(s) according to the parameters
11             sub _create_tables {
12 0     0     my ($class, $ctx) = @_;
13 0           my $CREATE_DATA = <
14             create table $ctx->{'data_table'} (
15             word varchar($ctx->{'word_length'}) binary
16             default '' not null,
17             idx longblob default '' not null,
18             primary key (word)
19             )
20             EOF
21 0           my $dbh = $ctx->{'dbh'};
22 0 0         $dbh->do($CREATE_DATA) or return $dbh->errstr;
23 0           push @{$ctx->{'created_tables'}}, $ctx->{'data_table'};
  0            
24 0           return;
25             }
26              
27             sub add_document {
28 0     0 0   my ($self, $id, $words) = @_;
29 0           my $ctx = $self->{'ctx'};
30 0           my $dbh = $ctx->{'dbh'};
31 0           my $data_table = $ctx->{'data_table'};
32              
33 0 0         my $update_sth = ( defined $self->{'adding_update_sth'}
34             ? $self->{'adding_update_sth'}
35             : $self->{'adding_update_sth'} = $dbh->prepare(
36             "update $data_table set idx = concat(idx, ?)
37             where word = ?") );
38              
39 0 0         my $insert_sth = ( defined $self->{'insert_sth'}
40             ? $self->{'insert_sth'}
41             : $self->{'insert_sth'} = $dbh->prepare("
42             insert into $data_table values (?, ?)") );
43              
44 0           my $packstring = $MyConText::BITS_TO_PACK{$ctx->{'doc_id_bits'}}
45             . $MyConText::BITS_TO_PACK{$ctx->{'count_bits'}};
46 0           my $num_words = 0;
47 0           for my $word ( keys %$words ) {
48             ### print STDERR "$word($id) adding\n";
49             # here we will want to parametrize the bit size of the
50             # data
51 0           my $value = pack $packstring, $id, $words->{$word};
52 0           my $rows = $update_sth->execute($value, $word);
53 0 0         $insert_sth->execute($word, $value) if $rows == 0;
54 0           $num_words += $words->{$word};
55             }
56            
57 0           return $num_words;
58             }
59              
60             sub delete_document {
61 0     0 0   my $self = shift;
62 0           for my $id (@_) { $self->update_document($id, {}); }
  0            
63             }
64             sub update_document {
65 0     0 0   my ($self, $id, $words) = @_;
66 0           my $ctx = $self->{'ctx'};
67 0           my $dbh = $ctx->{'dbh'};
68 0           my $data_table = $ctx->{'data_table'};
69              
70 0 0         my $insert_sth = ( defined $self->{'insert_sth'}
71             ? $self->{'insert_sth'}
72             : $self->{'insert_sth'} = $dbh->prepare("
73             insert into $data_table values (?, ?)") );
74              
75 0 0         my $update_sth = ( defined $self->{'update_update_sth'}
76             ? $self->{'update_update_sth'}
77             : $self->{'update_update_sth'} =
78             $dbh->prepare("update $data_table set idx =
79             concat(substring(idx, 1, ?), ?, substring(idx, ?))
80             where word = ?") );
81              
82              
83 0           $dbh->do("lock tables $data_table write");
84              
85 0           my $select_sth = $dbh->prepare("select word from $data_table");
86 0           $select_sth->execute;
87              
88 0           my $packstring = $MyConText::BITS_TO_PACK{$ctx->{'doc_id_bits'}}
89             . $MyConText::BITS_TO_PACK{$ctx->{'count_bits'}};
90 0           my ($packnulls) = pack $packstring, 0, 0;
91 0           my $packlength = length $packnulls;
92 0           my $num_words = 0;
93 0           while (my ($word) = $select_sth->fetchrow_array) {
94 0 0         my $value = (defined $words->{$word} ?
95             pack($packstring, $id, $words->{$word}) : '');
96              
97             # the method find_position finds the position of the
98             # "record" for document $id with word $word; returned is
99             # the position in bytes and yes/no values specifying if
100             # the record is already present in the blob; if it is,
101             # we need to replace it, otherwise just insert.
102              
103 0           my ($pos, $shift) = $self->find_position($word, $id);
104 0 0         if (not defined $pos) {
105 0           $insert_sth->execute($word, $value);
106             }
107             else {
108 0           my $spos = $pos + 1; # I'm not sure why this
109 0 0         $spos += $packlength if $shift;
110 0           $update_sth->execute($pos, $value, $spos, $word);
111             }
112 0           delete $words->{$word};
113 0 0         $num_words++ if defined $value;
114             }
115              
116 0           for my $word ( keys %$words ) {
117 0           my $value = pack $packstring, $id, $words->{$word};
118 0           $insert_sth->execute($word, $value);
119 0           $num_words++;
120             }
121 0           $dbh->do("unlock tables");
122              
123 0           return $num_words;
124             }
125             sub find_position {
126 0     0 0   my ($self, $word, $id) = @_;
127             # here, with the calculation of where in the blob we have the
128             # docid and where the count of words and how long they are, we
129             # should really look at the parameters (num of bits of various
130             # structures and values) given to create
131              
132 0           my $ctx = $self->{'ctx'};
133 0           my $dbh = $ctx->{'dbh'};
134 0           my $data_table = $ctx->{'data_table'};
135              
136             # Sth to read the length of the blob holding the document/count info
137 0 0         my $get_length_sth = ( defined $self->{'get_length_sth'}
138             ? $self->{'get_length_sth'}
139             : $self->{'get_length_sth'} = $dbh->prepare("select
140             length(idx) from $data_table where word = ?"));
141 0           my $length = $dbh->selectrow_array($get_length_sth, {}, $word);
142              
143 0           my $packstring = $MyConText::BITS_TO_PACK{$ctx->{'doc_id_bits'}}
144             . $MyConText::BITS_TO_PACK{$ctx->{'count_bits'}};
145 0           my ($packnulls) = pack $packstring, 0, 0;
146 0           my $packlength = length $packnulls;
147              
148 0 0         if (not defined $length) { return; }
  0            
149 0           $length = int($length/$packlength);
150            
151 0           my ($bot, $top, $med, $val) = (0, $length);
152              
153 0 0         if (not defined $ctx->{'max_doc_id'})
154 0           { $med = int(($top - $bot) / 2); }
155             else
156 0           { $med = int($top * $id / $ctx->{'max_doc_id'}); }
157              
158 0           my $blob_direct_fetch = $ctx->{'blob_direct_fetch'};
159             # we divide the interval
160 0           while ($bot != $top) {
161 0 0         $med = $top - 1 if $med >= $top;
162 0 0         $med = $bot if $med < $bot;
163              
164 0 0         if ($top - $bot <= $blob_direct_fetch) {
165 0 0         my $get_interval_sth = (
166             defined $self->{'get_interval_sth'}
167             ? $self->{'get_interval_sth'}
168             : $self->{'get_interval_sth'} = $dbh->prepare("select substring(idx,?,?) from $data_table where word = ?"));
169 0           my $alldata = $dbh->selectrow_array($get_interval_sth,
170             {},
171             $bot * $packlength + 1,
172             ($top - $bot) * $packlength,
173             $word);
174 0 0         return unless defined $alldata;
175              
176 0           my @docs;
177 0           my $i = 0;
178 0           while ($i < length $alldata) {
179 0           push @docs, unpack $packstring,
180             substr $alldata, $i, $packlength;
181 0           $i += $packlength;
182             }
183 0           for (my $i = 0; $i < @docs; $i += 2) {
184 0 0         if ($docs[$i] == $id) { return (($bot+($i/2))*$packlength, 1); }
  0            
185 0 0         if ($docs[$i] > $id) { return (($bot+($i/2))*$packlength, 0); }
  0            
186             }
187 0           return ($top * $packlength, 0);
188             }
189 0           ($val) = $dbh->selectrow_array(
190             "select substring(idx, ?, 2) from $data_table
191             where word = ?", {}, ($med * $packlength) + 1, $word);
192 0           ($val) = unpack $packstring, $val;
193              
194 0 0         if (not defined $val) { return; }
  0            
195 0 0         if ($val == $id) { return ($med * $packlength, 1); }
  0 0          
196              
197 0           elsif ($val < $id) { $bot = $med + 1; }
198 0           else { $top = $med; }
199              
200 0           $med = int($med * $id / $val);
201             }
202 0           return ($bot * $packlength, 0);
203             }
204              
205             sub contains_hashref {
206 0     0 0   my $self = shift;
207 0           my $ctx = $self->{'ctx'};
208 0           my $dbh = $ctx->{'dbh'};
209 0           my $data_table = $ctx->{'data_table'};
210              
211 0           my $packstring = $MyConText::BITS_TO_PACK{$ctx->{'doc_id_bits'}}
212             . $MyConText::BITS_TO_PACK{$ctx->{'count_bits'}};
213 0           my ($packnulls) = pack $packstring, 0, 0;
214 0           my $packlength = length $packnulls;
215              
216 0 0         my $sth = ( defined $self->{'get_idx_sth'} ?
217             $self->{'get_idx_sth'} :
218             $self->{'get_idx_sth'} =
219             $dbh->prepare(
220             "select idx from $data_table where word like ?"
221             ));
222            
223 0           my $out = {};
224 0           for my $word (@_) {
225 0           $sth->execute($word);
226 0           while (my ($blob) = $sth->fetchrow_array) {
227 0 0         next unless defined $blob;
228 0           my @data;
229 0           my $i = 0;
230 0           while ($i < length $blob) {
231 0           push @data, unpack $packstring,
232             substr $blob, $i, $packlength;
233 0           $i += $packlength;
234             }
235 0           while (@data) {
236 0           my $doc = shift @data;
237 0           my $count = shift @data;
238 0 0         unless (defined $out->{$doc}) { $out->{$doc} = 0; }
  0            
239 0           $out->{$doc} += $count;
240             }
241             }
242 0           $sth->finish;
243             }
244 0           $out;
245             }
246              
247             *parse_and_index_data = \&MyConText::parse_and_index_data_count;
248              
249             1;
250