File Coverage

blib/lib/Class/Indexed.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Class::Indexed;
2              
3             ################################################################################
4             # Indexed - An abstract class providing fulltext indexing for classes
5             # (c) Copyright 2003 Aaron Trevena
6             # Based on my article for www.perl.com
7             # inspired by Bulletproof Monk and Tenacious D
8             #
9             # Couldn't remember the code to the greatest reverse index in the world,
10             # this is just a tribute.
11             #
12             # The peculiar thing is this my friends,
13             # the piece of code that I wrote that fateful day looks nothing like this code.
14             # This is just a tribute. You've got to believe me, and I wish you were there.
15              
16             =head1 NAME
17              
18             Class::Indexed : An abstract class providing fine-grained and incremental update fulltext indexing for classes
19              
20             =head1 SYNOPSIS
21              
22             use Class::Indexed;
23              
24             our @ISA = qw(Class::Indexed);
25              
26             # build the index and metadata tables
27             Class::Indexed->build_index_tables(database=>$db,host=>$host,username=>$user,password=>$password);
28              
29             # set which attributes / fields are to be indexed and their weighting, etc
30             $self->indexed_fields (
31             dbh=>$dbh, key=>'Pub_ID',
32             fields=>[
33             { name=>'Pub_Name', weight=>1 },
34             ],
35             );
36              
37             # index an object
38             $self->index_object();
39              
40             # index a field or attribute of an object
41             $self->index_field($self->{Pub_ID}, $field, $value);
42              
43             # remove the object from the metadata and index tables
44             $self->delete_location();
45              
46             # add the object to the metadata table
47             $self->add_location();
48              
49             =head1 DESCRIPTION
50              
51             This abstract class provides inherited indexing functionality to any
52             class using it as a superclass.
53              
54             Class::Indexed is designed to provide most of the functionality described
55             in the article : 'Adding Search Functionality to Perl Applications'
56             ( http://www.perl.com/pub/a/2003/09/25/searching.html ) and I recommend
57             you read it through to gain understanding of the code and principles
58             involved.
59              
60             see the examples for the best explaination of how to use this class
61              
62             =head1 EXPORT
63              
64             None by default.
65              
66             =cut
67              
68 1     1   7212 use strict;
  1         1  
  1         29  
69 1     1   1374 use DBI;
  0            
  0            
70             use Class::Indexed::Words;
71              
72             our $VERSION = 0.01;
73              
74             ################################################################################
75             # Public methods
76              
77             =head1 METHODS
78              
79             =head2 build_index_tables
80              
81             builds the index and metadata tables, you need to run this before you can use the indexing
82              
83             my $success = Class::Indexed->build_index_tables(database=>$db,host=>$host,username=>$user,password=>$password);
84              
85             =cut
86              
87             sub build_index_tables {
88             my ($self,%options) = @_;
89             my $success = 0;
90             my $dbh = DBI->connect("dbi:mysql:$options{database}:$options{host}", $options{username}, $options{password})
91             or die " couldn't connect to db : $options{database} host : $options{host} ";
92              
93             my $indextable = $options{indexname} || 'CIRIND';
94             my $metadatatable = $options{indexmetadata} || 'CIMETA';
95              
96             # create index table
97             my $query = <
98             create table $indextable (
99             CIRIND_Word varchar(64) not null,
100             CIRIND_Score float,
101             CIMETA_ID int not null,
102             CIRIND_Fields varchar(255),
103             primary key ( CIRIND_Word, CIMETA_ID )
104             )
105             endindex
106             my $rv = $dbh->do($query);
107              
108             # create index metadata table
109             $query = <
110             create table $metadatatable (
111             CIMETA_ID integer primary key auto_increment,
112             CIMETA_Title varchar(64),
113             CIMETA_Type varchar(16),
114             CIMETA_Key varchar(32),
115             CIMETA_KeyValue varchar(128),
116             CIMETA_URL varchar(255),
117             CIMETA_Summary text
118             )
119             endmetadata
120             $rv = $dbh->do($query);
121             return $success;
122             }
123              
124             =head2 index_object
125              
126             indexes the object, updates the metadata if required
127              
128             $self->index_object();
129              
130             before you can call index_object you must set the fields to be indexed
131             with the indexed_fields method
132              
133             =cut
134              
135             sub index_object {
136             my $self = shift;
137             foreach my $field (keys %{$self->{_RIND_fields}}) {
138             warn "index object : $field : $self->{$field}\n";
139             my $success = $self->index_field($field,$self->{$field});
140             warn "success : $success\n";
141             }
142             }
143              
144             =head2 index_field
145              
146             indexes a particular field or attribute of the object
147              
148             $self->index_field($fieldname,$value)
149              
150             takes the name of the attribute/field and the new value
151              
152             before you can call index_field you must set the fields to be indexed
153             with the indexed_fields method
154              
155             =cut
156              
157             sub index_field {
158             my ($self,$field,$value) = @_;
159             warn "index_field : $field,$value \n";
160             return 0 unless ($self->{_RIND_fields}{$field});
161             $self->{_RIND_index_table} ||= 'CIRIND';
162             $self->{_RIND_location_table} ||= 'CIMETA';
163              
164             my %newwords;
165             my @newwords;
166              
167             # extract new words from current field or lookup or replacement text
168             if ((defined $value and $value ne '') or ( $self->{_RIND_fields}{$field}{replace} || $self->{_RIND_fields}{$field}{lookup} )) {
169             MODE: {
170             if (defined $self->{_RIND_fields}{$field}{replace}) {
171             $value = get_words($self->{_RIND_fields}{$field}{replace});
172             last;
173             }
174             if (defined $self->{_RIND_fields}{$field}{lookup} ) {
175             my $column = $self->{_RIND_fields}{$field}{lookup};
176             my $table = $self->{_RIND_fields}{$field}{lookup_table};
177             my $query = $self->{_RIND_fields}{$field}{query};
178             warn "value : $value / column : $column / query : $query \n";
179             unless (defined $query and $query ne '') {
180             $query = qq{select $column from $table where $field = };
181             if ($value =~ /\D/) {
182             $value =~ s/(['"])/\\$1/g;
183             $query .= qq{'$value'};
184             } else {
185             $query .= $value;
186             }
187             }
188             warn "query : $query \n";
189             $value = join ( ' ',@{$self->{_RIND_dbh}->selectcol_arrayref($query)} );
190             last;
191             }
192             } # end of MODE switch
193              
194             warn "value : $value \n";
195             # get words from value
196             @newwords = get_words($value);
197             foreach ( @newwords ) {
198             next if $stopwords{$_};
199             $newwords{$_} += $self->{_RIND_fields}{$field}{weight};
200             }
201             warn "new words : ", @newwords, "\n";
202             }
203              
204             # get old words from reverse index for current object
205             my $location = $self->{_RIND_location};
206             my $query = "select * from $self->{_RIND_index_table} where CIMETA_ID = ?";
207             my $sth = $self->{_RIND_dbh}->prepare($query);
208             my $rv = $sth->execute($location);
209              
210             # update reverse index words for this field of this object
211             warn "update reverse index \n";
212             while ( my $row = $sth->fetchrow_hashref() ) {
213             next unless ($row->{CIRIND_Fields} =~ m/'$field'/); # skip unless this word was in the old value of this field
214             $self->{__RIND_locationwords}{$row->{CIRIND_Word}} = $row;
215             if (exists $newwords{$row->{CIRIND_Word}}) {
216             $self->_RIND_UpdateFieldEntry($row,$field,$newwords{$row->{CIRIND_Word}});
217             delete $newwords{$row->{CIRIND_Word}}
218             } else {
219             $self->_RIND_RemoveFieldEntry($row,$field,$location);
220             }
221             }
222              
223             warn "add to reverse index", keys %newwords , "\n";
224             foreach (keys %newwords) {
225             warn "adding field entry $_ : $newwords{$_} : $field \n";
226             $self->_RIND_AddFieldEntry($location,$_,$newwords{$_},$field);
227             }
228             return 1;
229             }
230              
231             =head2 delete_location
232              
233             remove the object from the metadata and index tables
234              
235             $self->delete_location();
236              
237             =cut
238              
239             sub delete_location {
240             my $self = shift;
241             $self->{_RIND_index_table} ||= 'CIRIND';
242             $self->{_RIND_location_table} ||= 'CIMETA';
243             my $query = "delete from $self->{_RIND_index_table} where CIMETA_ID = ?";
244             my $sth = $self->{_RIND_dbh}->prepare($query);
245             my $rv1 = $sth->execute($self->{_RIND_location});
246             $query = "delete from $self->{_RIND_location_table} where CIMETA_ID = ?";
247             $sth = $self->{_RIND_dbh}->prepare($query);
248             my $rv2 = $sth->execute($self->{_RIND_location});
249              
250             return "$rv1:$rv2";
251             }
252              
253             =head2 add_location
254              
255             add the object to the metadata table
256              
257             $self->add_location();
258              
259             =cut
260              
261             sub add_location {
262             my ($self,%options) = @_;
263             $self->{_RIND_index_table} ||= 'CIRIND';
264             $self->{_RIND_location_table} ||= 'CIMETA';
265             my $dbh = $options{dbh} || $self->{_dbh};
266             my $query = qq{ insert into $self->{_RIND_location_table}
267             ( CIMETA_Title,CIMETA_Type, CIMETA_Key, CIMETA_KeyValue, CIMETA_URL, CIMETA_Summary )
268             values (?,?,?,?,?,?) };
269             warn "query : $query \n";
270             my $location_sth = $dbh->prepare($query);
271             my @values = map { $_ || 'null' } @options{qw(Title Type Key KeyValue URL Summary)};
272             my $rv = $location_sth->execute(@values);
273             $self->{_RIND_location} = $location_sth->{mysql_insertid};
274             return $rv;
275             }
276              
277             =head2 indexed_fields
278              
279             set which attributes / fields are to be indexed and their weighting, etc
280             $self->indexed_fields (
281             dbh=>$dbh, key=>'Pub_ID',
282             fields=>[
283             { name=>'Pub_Name', weight=>1 },
284             ],
285             );
286              
287              
288             =cut
289              
290             sub indexed_fields {
291             my ($self,%args) = @_;
292             $self->{_RIND_index_table} ||= 'CIRIND';
293             $self->{_RIND_location_table} ||= 'CIMETA';
294             if (keys %args) {
295             $self->{_RIND_dbh} = $args{dbh} if defined $args{dbh};
296             if ( defined $args{key} ) {
297             $self->{RIND_key} = $args{key};
298             ($self->{_RIND_location}) = $args{dbh}->selectrow_array("Select CIMETA_ID from $self->{_RIND_location_table} where CIMETA_Key = '$args{key}' and CIMETA_KeyValue = " . $args{dbh}->quote($self->{$args{key}}));
299             }
300             if ( defined $args{fields} ) {
301             foreach ( @{$args{fields}} ) {
302             $self->{_RIND_fields}{$_->{name}} = $_;
303             }
304             }
305             }
306             return @{$self->{_RIND_fields}} if wantarray;
307             }
308              
309             =head1 AUTHOR
310              
311             Aaron J. Trevena, Eaaron.trevena@droogs.orgE
312              
313             =head1 SEE ALSO
314              
315             L.
316              
317             =cut
318              
319             ####################################################################################
320             # private methods : don't touch below here
321             #
322             # I don't reccomend mucking about with these as they are very self-referential
323              
324             sub _RIND_UpdateFieldEntry {
325             my ($self,$row, $field, $score) = @_;
326             my %fields = ( $row->{CIRIND_Fields} =~ m/'(.*?)':([\d.]+)/g );
327              
328             # recalculate total score
329             my $newscore = ($row->{CIRIND_Score} - $fields{$field} ) + $score;
330             return 1 if ($fields{$field} == $score); # skip if score unchanged
331              
332             # update entry
333             $fields{$field} = $score;
334             my $newfields;
335             foreach (keys %fields) {
336             $newfields .= "'$_':$fields{$_}";
337             }
338             $self->_RIND_UpdateIndex( word=>$row->{CIRIND_Word},location=>$row->{CIMETA_ID},
339             newscore=>$newscore,newfields=>$newfields );
340             }
341              
342             sub _RIND_AddFieldEntry {
343             my ($self,$location, $word, $score, $field) = @_;
344             warn "_RIND_AddFieldEntry : ($location, $word, $score, $field) \n";
345             # check if record already exists for this location and update/insert entry
346             if (exists $self->{__RIND_locationwords}{$word}) {
347             # recalculate total score
348             my $newscore = $self->{__RIND_locationwords}{$word}{CIRIND_Score} + $score;
349             # update entry, appending field and score to end
350             my $newfields = $self->{__RIND_locationwords}{$word}{CIRIND_Fields} . "'$field':$score";
351             $self->_RIND_UpdateIndex( word=>$word,location=>$location, newscore=>$newscore,newfields=>$newfields );
352             } else {
353             # insert new entry
354             $self->_RIND_UpdateIndex( insert=>1, word=>$word,location=>$location, newscore=>$score,newfields=>"'$field':$score" );
355             }
356             }
357              
358             sub _RIND_RemoveFieldEntry {
359             my ($self,$row, $field, $location) = @_;
360              
361             # check if record contains scores from other fields
362             my %fields = ( $row->{CIRIND_Fields} =~ m/'(.*?)':([d.]+)/g ) ;
363             if ( keys %fields > 1 ) {
364             # recalculate total score
365             my $newscore = $row->{CIRIND_Score} - $fields{$field};
366             delete $fields{$field};
367             my $newfields;
368             foreach (keys %fields) {
369             $newfields .= "'$_':$fields{$_}";
370             }
371             # update entry
372             $self->_RIND_UpdateIndex( word=>$row->{CIRIND_Word},location=>$location, newscore=>$newscore,newfields=>$newfields );
373             } else {
374             # delete entry
375             $self->_RIND_UpdateIndex( delete=>1, word=>$row->{CIRIND_Word}, location=>$location);
376             }
377             }
378              
379             sub _RIND_UpdateIndex {
380             my ($self,%args) = @_;
381             my $query = qq{ update $self->{_RIND_index_table}
382             set CIRIND_Score = ?, CIRIND_Fields = ?
383             where CIRIND_Word = ? and CIMETA_ID = ? };
384             my @args = ($args{newscore},$args{newfields},$args{word},$args{location});
385              
386             MODE:{
387             if ($args{insert}) {
388             $query = qq{ insert into $self->{_RIND_index_table} ( CIRIND_Score, CIRIND_Fields, CIRIND_Word, CIMETA_ID)
389             values (?,?,?,?) };
390             last;
391             }
392             if ($args{delete}) {
393             $query = "delete from $self->{_RIND_index_table} where CIRIND_Word = ? and CIMETA_ID = ?";
394             shift(@args); shift(@args); # remove unused arguments
395             last;
396             }
397             } # end of MODE switch
398             my $sth = $self->{_RIND_dbh}->prepare($query);
399             warn " .. _RIND_UpdateIndex ";
400             warn " args : @args";
401             my $rv = $sth->execute(@args);
402             return $rv;
403             }
404              
405              
406             ##################################################################################
407              
408             1;
409              
410             ##################################################################################
411             ##################################################################################
412