File Coverage

blib/lib/TM/IndexAble.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package TM::IndexAble;
2              
3 1     1   694 use strict;
  1         1  
  1         38  
4 1     1   5 use warnings;
  1         2  
  1         27  
5              
6 1     1   5 use Data::Dumper;
  1         2  
  1         49  
7 1     1   487 use Class::Trait 'base';
  0            
  0            
8              
9             =pod
10              
11             =head1 NAME
12              
13             TM::IndexAble - Topic Maps, Trait to provide lazy and eager indices
14              
15             =head1 SYNOPSIS
16              
17             my $tm = new TM... # get any map
18             use Class::Trait;
19             Class::Trait->apply ($tm, "TM::IndexAble"); # apply the trait
20              
21             # add a lazy cache for subclassing and instanceOf
22             $tm->index ({ axis => 'taxo' });
23             $tm->index ({ axis => 'taxo', closed => 0}); # the same, lazy is NOT closed
24              
25             # add eager cache (= index) for taxonometrics
26             $tm->index ({ axis => 'taxo', closed => 1}); # eager is closed, will take some time
27              
28             # add index for characteristics
29             $tm->index ({ axis => 'char'});
30             $tm->index ({ axis => 'char', closed => 1}); # can be closed as well
31              
32             # ditto for reification
33             $tm->index ({ axis => 'reify'});
34             $tm->index ({ axis => 'reify', closed => 1});
35              
36             # create index/caches, but separate from map itself
37             $tm->index ({ axis => 'reify', closed => 0, detached => {} });
38              
39              
40             my %stats = $tm->index; # get current indices + statistics
41              
42             =head1 DESCRIPTION
43              
44             Like L, this package also adds index/caching capabilities to any topic map stored via
45             L (or any of its subclasses). The difference, though, is that the index/caching functionality
46             is added as a trait, and not via an explicit attachment. The indices are - by default - part of
47             the map, and not standalone objects as with L.
48              
49             When you add an index/cache then you simply use precomputed navigation results for the TM methods
50             C and C (but not used for C).
51              
52             As with L you can create caching (lazy indexing) and full indices (eager precaching).
53              
54             =head2 Map Attachment
55              
56             To enrich a map with an index/cache, you call the method C provided here. The index/cache
57             will by default be stored I the map. That may be convenient in most cases.
58              
59             If not - as with some storage techniques - you can detach the index to live within your scope. For
60             that purpose you simply pass in an empty hash reference. It is then your responsibility to get rid
61             of it afterwards.
62              
63             Having the index detached also opens the way for you to make the index persistent.
64              
65             =head1 INTERFACE
66              
67             =head2 Methods
68              
69             Following methods are mixed into the class/object:
70              
71             =over
72              
73             =item B
74              
75             I<$tm>->index ({ %spec }, ...)
76              
77             This method establishes one or more indices/caches to the topic map. Each cache/index is described
78             with its own hash reference.
79              
80             Which navigation axes should be covered by a single cache/index is specified with the C field. It
81             can have as value one of the axes in L, or one of the following values:
82              
83             =over
84              
85             =item C
86              
87             Shortcut for the axes: C C C C
88              
89             =item C
90              
91             Shortcut for the axes: C C C C C
92              
93             =item C
94              
95             =back
96              
97             To control whether a cache (lazy indexing) or a full index (eager caching) should be
98             used, the field C can have two values (default is C<0>):
99              
100             =over
101              
102             =item C<0>:
103              
104             The default is to keep the index I. In this mode the index is empty at the start and it will
105             learn more and more on its own. In this sense, the index lives under an I
106             (hence the name), as the absence of information does not mean that there is no result.
107              
108             =item C<1>:
109              
110             A I index has to be populated to be useful. If a query is launched and the result is
111             stored in the index, then it will be used, like for an open index. If no result in the index is
112             found for a query, the empty result will be assumed.
113              
114             =back
115              
116             Additionally, a field C can be passed in for one cache/index. It MUST contain a hash
117             reference.
118              
119             Example:
120              
121             $tm->index (
122             { axis => 'reify', closed => 0, detached => {} },
123             { axis => 'char', closed => 1 }
124             );
125              
126              
127             The method returns a hash with some statistical information for every axis:
128              
129             =over
130              
131             =item C
132              
133             Number of requests since inception of the index.
134              
135             =item C
136              
137             Number of cache hits since inception. For an eager cache (i.e. index) this number
138             should be the same as C
139              
140             =back
141              
142              
143             =cut
144              
145             # map
146             # +---+
147             # | |
148             # | | cache
149             # | | index +-----+ _data +-------+ _data # can be local or detached
150             # | |------->| |-------------->| | key = query _hits # integer
151             # +---+ | | | | value = array ref of LIDs _requests # integer
152             # | |-->HASH(0x123) | |
153             # +-----+ | |
154             # +-------+
155              
156             our %cachesets; # here the detached ones go
157              
158             # +---+ index (detached) # provided by caller
159             # | | HASH(0x123) +-------+ cache
160             # | |-------------->| | +-------+
161             # | | | |------>| |
162             # | | | | | |
163              
164             sub index {
165             my $self = shift;
166              
167             my $index = ($self->{index} || {}); # local handle on all things indexed
168              
169             foreach my $idx (@_) { # whatever we are given by the user
170             my @a = _expand_axes ($idx->{axis});
171              
172              
173             my $index2; # could be detached or local
174             if (my $detached = $idx->{detached}) { # if user provided a detachable index, we take that
175             $cachesets{"$detached"} = $detached; # register that locally (as key this will be stringified)
176             $index->{$_} = "$detached" foreach @a; # and memorize that the real information is in a detached one, not inside the map
177             $index2 = $detached; # from now on we work with that
178             } else {
179             $index2 = $index;
180             }
181              
182             foreach my $a (@a) { # walk over all axes now
183              
184             #warn "indexable index $a";
185             $index2->{"${a}_hits"} //= 0; # initialize stats
186             $index2->{"${a}_requests"} //= 0;
187              
188             next if $index2->{"${a}_closed"}; # if we already got a closed one, we do not touch it
189             #warn "AFTER CLOSED!";
190              
191             $index2->{"${a}_data"} //= {}; # we need to have a place for data
192              
193             next unless $idx->{closed}; # only continue here when we want to close it
194             my $data = $index2->{"${a}_data"}; # this is a potentially expensive operation
195              
196             if ($a eq 'reify') { # this is a special case
197             my $mid2iid = $self->{mid2iid}; # not necessarily cheap
198            
199             %$data = map { $mid2iid->{$_}->[TM->ADDRESS] => $_ } # invert the index
200             grep { $mid2iid->{$_}->[TM->ADDRESS] } # only those which "reify" something survive
201             keys %{$mid2iid}; # all toplet tids
202            
203             } else {
204             my $enum = $TM::forall_handlers{$a}->{enum} # how to generate all assertions of that axes
205             or die "unsupported index axes $a"; # complain if enumeration is not yet supported
206             my $key = $TM::forall_handlers{$a}->{key}; # how to derive a full cache key from one assertion
207            
208             my %as; # collect the assertions for that axis $a
209             map { push @{ $as{ &$key ($self, $_) } } , $_->[TM->LID] } # sort them according to the key
210             &$enum ($self) ; # generate all assertions fitting this axis
211            
212             map { $data->{$_} = $as{$_} } # store the corresponding lists into the cache
213             keys %as; # walk through keys
214             }
215             #warn "after axis $a ". Dumper $data;
216             $index2->{"${a}_data"} = $data; # this is only for MLDBM backed indices (yes, I know a PITA)
217             $index2->{"${a}_closed"} = 1;
218             }
219              
220             }
221             $self->{index} = $index; # kick MLDBM in the ...
222             # warn Dumper ($self->{index}, \%cachesets);
223              
224             return _collect_stats ($index) if (wantarray);
225              
226             sub _collect_stats {
227             my $index = shift;
228             my %s;
229             map {
230             $s{ $1 }->{$2} = $index->{$_} if $_ =~ /(.+)_(.+)/
231             }
232             keys %{$index};
233              
234             %s = (%s , map { _collect_stats ($_) } # and compute the stats from there
235             map { $cachesets{ $index->{$_} } } # these are detached ones, get them
236             grep { $_ !~ /_/ } # but only look for those without a _
237             keys %{$index} # go back to all indices
238             );
239             return %s;
240             }
241             }
242              
243             sub _expand_axes {
244             my $a = shift;
245             use feature 'switch';
246             given ( $a ) {
247             when ('taxo') { # "taxo" shortcuts some axes
248             return qw(subclass.type superclass.type class.type instance.type);
249             }
250             when ('char') { # char shortcut
251             return qw(char.topic char.value char.type char.type.value char.topic.type);
252             }
253             when ('reify') { # this is a special one
254             return qw(reify);
255             }
256             default { # take that as-is
257             return ( $a );
258             }
259             }
260             }
261              
262              
263             =pod
264              
265             =item B
266              
267             I<$tm>->deindex (I<$axis>, ....)
268              
269             I<$tm>->deindex (I<$index>, ....)
270              
271             This method gets rid of certain indices/caches, as specified by their axes.
272              
273             Since v1.55: You can also pass in the hash reference of the index (in the detached
274             case).
275              
276             Since v1.55: Also the expansion of axes (like for C) works now.
277              
278             =cut
279              
280             sub deindex {
281             my $self = shift;
282              
283             my $index = $self->{index};
284             #warn "deindex cacheset keys before ".Dumper [ keys %cachesets ];
285             #warn Dumper $index;
286             foreach my $a (map { _expand_axes ($_) } @_) {
287             #warn "deleting " . $a;
288             if (ref ($a)) { # this is a hash ref, obviously the index
289             delete $cachesets{ "$a" };
290             map { delete $index->{$_} } # delete those index entries
291             grep { $index->{$_} eq "$a" } # which carry data from the {} we passed in
292             keys %$index;
293             } elsif (ref ($index->{$a})) { # not detached
294             delete $index->{$a}; # so we simply get rid of it
295             } else { # this is also a detached one, but this time via an axis (not the index itself)
296             my $h = delete $index->{$a}; # get the hash (stringified) and in one go delete it
297             delete $cachesets{$h};
298             }
299             }
300             #warn "deindex cacheset keys before ".Dumper [ keys %cachesets ];
301             #warn Dumper $index;
302             $self->{index} = $index;
303             }
304              
305             =pod
306              
307             =cut
308              
309             #-- trait mixins
310              
311             sub match_forall {
312             my $self = shift;
313             my %query = @_;
314             #warn "forall ".Dumper \%query;
315              
316             my @skeys = sort keys %query; # all fields make up the key
317             my $skeys = join ('.', @skeys);
318             my @svals = map { $query{$_} } # lookup that key in the incoming query
319             @skeys; # take these query keys
320             my $key = "$skeys:" . join ('.',
321             map { ref ($_) ? @$_ : $_ } # if we have a value, take that and its datatype
322             @svals);
323              
324             #warn "i match ".$skeys;
325             #warn "i match whole key >>$key<<";
326             my $index = $self->{index}; # just a handle
327              
328             if (my $detached = $index->{$skeys}) { # axis is pointing to a detached
329             $index = $cachesets{ $index->{$skeys} };
330             }
331             #warn Dumper $index;
332             unless ( my $data = $index->{"${skeys}_data"} ) {
333             #warn "no index";
334             return TM::_dispatch_forall ($self, \%query, $skeys, @svals);
335              
336             } else {
337             #warn "-> using index! $data";
338             $index->{"${skeys}_requests"}++;
339             #warn "DATA keys ".scalar keys %$data;
340             #warn "DATA ".Dumper $data;
341             if (my $lids = $data->{ $key }) {
342             #warn "and HIT";
343             $index->{"${skeys}_hits"}++;
344              
345             my $asserts = $self->{assertions}; # just in case we have a tied hash ... we create a handle
346             return map { $asserts->{$_} } @$lids; # and return fully fledged assocs
347             }
348             return () if $index->{"${skeys}_closed"}; # the end of wisdom ?????????????????????????? SUSPICIOUS
349             my @as = TM::_dispatch_forall ($self, \%query, $skeys, @svals);
350             $data->{ $key } = [ map { $_->[TM->LID] } @as ];
351             return @as;
352             }
353             }
354              
355              
356              
357             sub is_reified {
358             my $self = shift; # the map
359             my $a = shift; # the thing (assertion or otherwise)
360              
361             my $index = $self->{index};
362             if (my $detached = $index->{'reify'}) { # axis is pointing to a detached
363             $index = $cachesets{ $index->{'reify'} };
364             }
365              
366             unless ( my $data = $index->{'reify_data'} ) { # if an index over reify has NOT been activated
367             return $self->_is_reified ($a); # we look only at the source map
368              
369             } else { # we have an index!
370             $index->{'reify_requests'}++; # bookkeeping
371              
372             my $k = ref ($a) ? $a->[TM->LID] : $a;
373             if (my $tid = $data->{ $k }) { # cache always holds list references
374             $index->{'reify_hits'}++; # bookkeeping
375             return ($tid);
376             }
377             return () if $index->{'reify_closed'}; # the end of wisdom
378             # warn "no hit!";
379             my @tids = $self->_is_reified ($a); # returns a list (strangely)
380             $data->{ $k } = $tids[0]; # tuck it into the cache
381             return @tids; # and give it back to the caller
382             }
383             }
384              
385             =pod
386              
387             =back
388              
389             =head1 SEE ALSO
390              
391             L, L
392              
393             =head1 COPYRIGHT AND LICENSE
394              
395             Copyright 20(10) by Robert Barta, Edrrho@cpan.orgE
396              
397             This library is free software; you can redistribute it and/or modify it under the same terms as Perl
398             itself.
399              
400             =cut
401              
402             our $VERSION = 0.7;
403              
404             1;
405              
406             __END__