File Coverage

blib/lib/Tie/Cache.pm
Criterion Covered Total %
statement 134 215 62.3
branch 56 122 45.9
condition 21 38 55.2
subroutine 14 21 66.6
pod 0 9 0.0
total 225 405 55.5


line stmt bran cond sub pod time code
1             #!/usr/local/bin/perl -w
2              
3             package Tie::Cache;
4 1     1   710 use strict;
  1         3  
  1         59  
5 1         3362 use vars qw(
6             $VERSION $Debug $STRUCT_SIZE $REF_SIZE
7             $BEFORE $AFTER $KEY $VALUE $BYTES $DIRTY
8 1     1   7 );
  1         3  
9              
10             $VERSION = .21;
11             $Debug = 0; # set to 1 for summary, 2 for debug output
12             $STRUCT_SIZE = 240; # per cached elem bytes overhead, approximate
13             $REF_SIZE = 16;
14              
15             # NODE ARRAY STRUCT
16             $KEY = 0;
17             $VALUE = 1;
18             $BYTES = 2;
19             $BEFORE = 3;
20             $AFTER = 4;
21             $DIRTY = 5;
22              
23             =pod
24              
25             =head1 NAME
26              
27             Tie::Cache - LRU Cache in Memory
28              
29             =head1 SYNOPSIS
30              
31             use Tie::Cache;
32             tie %cache, 'Tie::Cache', 100, { Debug => 1 };
33             tie %cache2, 'Tie::Cache', { MaxCount => 100, MaxBytes => 50000 };
34             tie %cache3, 'Tie::Cache', 100, { Debug => 1 , WriteSync => 0};
35              
36             # Options ##################################################################
37             #
38             # Debug => 0 - DEFAULT, no debugging output
39             # 1 - prints cache statistics upon destroying
40             # 2 - prints detailed debugging info
41             #
42             # MaxCount => Maximum entries in cache.
43             #
44             # MaxBytes => Maximum bytes taken in memory for cache based on approximate
45             # size of total cache structure in memory
46             #
47             # There is approximately 240 bytes used per key/value pair in the cache for
48             # the cache data structures, so a cache of 5000 entries would take
49             # at approximately 1.2M plus the size of the data being cached.
50             #
51             # MaxSize => Maximum size of each cache entry. Larger entries are not cached.
52             # This helps prevent much of the cache being flushed when
53             # you set an exceptionally large entry. Defaults to MaxBytes/10
54             #
55             # WriteSync => 1 - DEFAULT, write() when data is dirtied for
56             # TRUE CACHE (see below)
57             # 0 - write() dirty data as late as possible, when leaving
58             # cache, or when cache is being DESTROY'd
59             #
60             ############################################################################
61              
62             # cache supports normal tied hash functions
63             $cache{1} = 2; # STORE
64             print "$cache{1}\n"; # FETCH
65              
66             # FIRSTKEY, NEXTKEY
67             while(($k, $v) = each %cache) { print "$k: $v\n"; }
68            
69             delete $cache{1}; # DELETE
70             %cache = (); # CLEAR
71              
72             =head1 DESCRIPTION
73              
74             This module implements a least recently used (LRU) cache in memory
75             through a tie interface. Any time data is stored in the tied hash,
76             that key/value pair has an entry time associated with it, and
77             as the cache fills up, those members of the cache that are
78             the oldest are removed to make room for new entries.
79              
80             So, the cache only "remembers" the last written entries, up to the
81             size of the cache. This can be especially useful if you access
82             great amounts of data, but only access a minority of the data a
83             majority of the time.
84              
85             The implementation is a hash, for quick lookups,
86             overlaying a doubly linked list for quick insertion and deletion.
87             On a WinNT PII 300, writes to the hash were done at a rate
88             3100 per second, and reads from the hash at 6300 per second.
89             Work has been done to optimize refreshing cache entries that are
90             frequently read from, code like $cache{entry}, which moves the
91             entry to the end of the linked list internally.
92              
93             =cut
94              
95             # Documentation continues at the end of the module.
96              
97             sub TIEHASH {
98 2     2   32 my($class, $max_count, $options) = @_;
99              
100 2 100       9 if(ref($max_count)) {
101 1         2 $options = $max_count;
102 1         3 $max_count = $options->{MaxCount};
103             }
104            
105 2 50 33     10 unless($max_count || $options->{MaxBytes}) {
106 0         0 die('you must specify cache size with either MaxBytes or MaxCount');
107             }
108              
109 2 50       9 my $sync = exists($options->{WriteSync}) ? $options->{WriteSync} : 1;
110              
111 2 50 66     46 my $self = bless
      33        
112             {
113             # how many items to cache
114             max_count=> $max_count,
115            
116             # max bytes to cache
117             max_bytes => $options->{MaxBytes},
118            
119             # max size (in bytes) of an individual cache entry
120             max_size => $options->{MaxSize} || ($options->{MaxBytes} ? (int($options->{MaxBytes}/10) + 1) : 0),
121            
122             # class track, so know if overridden subs should be used
123             'class' => $class,
124             'subclass' => $class ne 'Tie::Cache' ? 1 : 0,
125            
126             # current sizes
127             count=>0,
128             bytes=>0,
129            
130             # inner structures
131             head=>0,
132             tail=>0,
133             nodes=>{},
134             'keys'=>[],
135            
136             # statistics
137             hit => 0,
138             miss => 0,
139            
140             # config
141             sync => $sync,
142             dbg => $options->{Debug} || $Debug
143            
144            
145             }, $class;
146            
147 2 50 66     21 if (($self->{max_bytes} && ! $self->{max_size})) {
148 0         0 die("MaxSize must be defined when MaxBytes is");
149             }
150              
151 2 50 66     15 if($self->{max_bytes} and $self->{max_bytes} < 1000) {
152 0         0 die("cannot set MaxBytes to under 1000, each raw entry takes $STRUCT_SIZE bytes alone");
153             }
154              
155 2 50 66     14 if($self->{max_size} && $self->{max_size} < 3) {
156 0         0 die("cannot set MaxSize to under 3 bytes, assuming error in config");
157             }
158              
159 2         8 $self;
160             }
161              
162             # override to write data leaving cache
163 0     0 0 0 sub write { undef; }
164             # commented this section out for speed
165             # my($self, $key, $value) = @_;
166             # 1;
167             #}
168              
169             # override to get data if not in cache, should return $value
170             # associated with $key
171 0     0 0 0 sub read { undef; }
172             # commented this section out for speed
173             # my($self, $key) = @_;
174             # undef;
175             #}
176              
177             sub FETCH {
178 30005     30005   1405303 my($self, $key) = @_;
179              
180 30005         143060 my $node = $self->{nodes}{$key};
181 30005 100       68868 if($node) {
182             # refresh node's entry
183 15004         23914 $self->{hit}++; # if $self->{dbg};
184              
185             # we used to call delete then insert, but we streamlined code
186 15004 100       37485 if(my $after = $node->[$AFTER]) {
187 15001 50       42971 $self->{dbg} > 1 and $self->print("update() node $node to tail of list");
188             # reconnect the nodes
189 15001         37285 my $before = $after->[$BEFORE] = $node->[$BEFORE];
190 15001 50       29907 if($before) {
191 0         0 $before->[$AFTER] = $after;
192             } else {
193 15001         28199 $self->{head} = $after;
194             }
195              
196             # place at the end
197 15001         25063 $self->{tail}[$AFTER] = $node;
198 15001         23855 $node->[$BEFORE] = $self->{tail};
199 15001         22860 $node->[$AFTER] = undef;
200 15001         46696 $self->{tail} = $node; # always true after this
201             } else {
202             # if there is nothing after node, then we are at the end already
203             # so don't do anything to move the nodes around
204 3 50       15 die("this node is the tail, so something's wrong")
205             unless($self->{tail} eq $node);
206             }
207              
208 15004 50       57672 $self->print("FETCH [$key, $node->[$VALUE]]") if ($self->{dbg} > 1);
209 15004         562593 $node->[$VALUE];
210             } else {
211             # we have a cache miss here
212 15001         25542 $self->{miss}++; # if $self->{dbg};
213              
214             # its fine to always insert a node, even when we have an undef,
215             # because even if we aren't a sub-class, we should assume use
216             # that would then set the entry. This model works well with
217             # sub-classing and reads() that might want to return undef as
218             # a valid value.
219 15001         16764 my $value;
220 15001 50       50812 if ($self->{subclass}) {
221 0 0       0 $self->print("read() for key $key") if $self->{dbg} > 1;
222 0         0 $value = $self->read($key);
223             }
224              
225 15001 50       32265 if(defined $value) {
226 0         0 my $length;
227 0 0       0 if($self->{max_size}) {
228             # check max size of entry, that it not exceed max size
229 0         0 $length = &_get_data_length(\$key, \$value);
230 0 0       0 if($length > $self->{max_size}) {
231 0 0       0 $self->print("direct read() [$key, $value]") if ($self->{dbg} > 1);
232 0         0 return $value;
233             }
234             }
235             # if we get here, we should insert the new node
236 0         0 $node = &create_node($self, \$key, \$value, $length);
237 0         0 &insert($self, $node);
238 0         0 $value;
239             } else {
240 15001         411780 undef;
241             }
242             }
243             }
244              
245             sub STORE {
246 30003     30003   1388761 my($self, $key, $value) = @_;
247 30003         37048 my $node;
248              
249 30003 50       114370 $self->print("STORE [$key,$value]") if ($self->{dbg} > 1);
250              
251             # do not cache undefined values
252 30003 100       284408 defined($value) || return(undef);
253              
254             # check max size of entry, that it not exceed max size
255 20003         40667 my $length;
256 20003 100       52948 if($self->{max_size}) {
257 15003         47833 $length = &_get_data_length(\$key, \$value);
258 15003 50       61808 if($length > $self->{max_size}) {
259 0 0       0 if ($self->{subclass}) {
260 0 0       0 $self->print("direct write() [$key, $value]") if ($self->{dbg} > 1);
261 0         0 $self->write($key, $value);
262             }
263 0         0 return $value;
264             }
265             }
266              
267             # do we have node already ?
268 20003 100       84988 if($self->{nodes}{$key}) {
269 1         36 $node = &delete($self, $key);
270             # $node = &delete($self, $key);
271             # $node->[$VALUE] = $value;
272             # $node->[$BYTES] = $length || &_get_data_length(\$key, \$value);
273             }
274              
275             # insert new node
276 20003         57207 $node = &create_node($self, \$key, \$value, $length);
277             # $node ||= &create_node($self, \$key, \$value, $length);
278 20003         48596 &insert($self, $node);
279              
280             # if the data is sync'd call write now, otherwise defer the data
281             # writing, but mark it dirty so it can be cleanup up at the end
282 20003 50       63114 if ($self->{subclass}) {
283 0 0       0 if($self->{sync}) {
284 0 0       0 $self->print("sync write() [$key, $value]") if $self->{dbg} > 1;
285 0         0 $self->write($key, $value);
286             } else {
287 0         0 $node->[$DIRTY] = 1;
288             }
289             }
290              
291 20003         758815 $value;
292             }
293              
294             sub DELETE {
295 10001     10001   625580 my($self, $key) = @_;
296              
297 10001 50       47802 $self->print("DELETE $key") if ($self->{dbg} > 1);
298 10001         20352 my $node = $self->delete($key);
299 10001 100       299448 $node ? $node->[$VALUE] : undef;
300             }
301              
302             sub CLEAR {
303 1     1   307 my($self) = @_;
304              
305 1 50       10 $self->print("CLEAR CACHE") if ($self->{dbg} > 1);
306              
307 1 50       6 if($self->{subclass}) {
308 0         0 my $flushed = $self->flush();
309 0 0       0 $self->print("FLUSH COUNT $flushed") if ($self->{dbg} > 1);
310             }
311              
312 1         2 my $node;
313 1         6 while($node = $self->{head}) {
314 5000         26564 $self->delete($self->{head}[$KEY]);
315             }
316              
317 1         14 1;
318             }
319              
320             sub EXISTS {
321 1     1   54 my($self, $key) = @_;
322 1         5 exists $self->{nodes}{$key};
323             }
324            
325             # firstkey / nextkey emulate keys() and each() behavior by
326             # taking a snapshot of all the nodes at firstkey, and
327             # iterating through the keys with nextkey
328             #
329             # this method therefore will only supports one each() / keys()
330             # happening during any given time.
331             #
332             sub FIRSTKEY {
333 3     3   2221 my($self) = @_;
334              
335 3         12 $self->{'keys'} = [];
336 3         12 my $node = $self->{head};
337 3         13 while($node) {
338 14998         16176 push(@{$self->{'keys'}}, $node->[$KEY]);
  14998         46318  
339 14998         33920 $node = $node->[$AFTER];
340             }
341              
342 3         11 shift @{$self->{'keys'}};
  3         58  
343             }
344              
345             sub NEXTKEY {
346 14998     14998   24734 my($self, $lastkey) = @_;
347 14998         20596 shift @{$self->{'keys'}};
  14998         81230  
348             }
349              
350             sub DESTROY {
351 0     0   0 my($self) = @_;
352              
353             # if debugging, snapshot cache before clearing
354 0 0       0 if($self->{dbg}) {
355 0 0 0     0 if($self->{hit} || $self->{miss}) {
356 0         0 $self->{hit_ratio} =
357             sprintf("%4.3f", $self->{hit} / ($self->{hit} + $self->{miss}));
358             }
359 0         0 $self->print($self->pretty_self());
360 0 0       0 if($self->{dbg} > 1) {
361 0         0 $self->print($self->pretty_chains());
362             }
363             }
364            
365 0 0       0 $self->print("DESTROYING") if $self->{dbg} > 1;
366 0         0 $self->CLEAR();
367            
368 0         0 1;
369             }
370              
371             ####PERL##LRU##TIE##CACHE##PERL##LRU##TIE##CACHE##PERL##LRU##TIE##CACHE
372             ## Helper Routines
373             ####PERL##LRU##TIE##CACHE##PERL##LRU##TIE##CACHE##PERL##LRU##TIE##CACHE
374              
375             # we use scalar_refs for the data for speed
376             sub create_node {
377 20003     20003 0 48323 my($self, $key, $value, $length) = @_;
378 20003 50 33     128714 (defined($$key) && defined($$value))
379             || die("need more localized data than $$key and $$value");
380            
381             # max_size always defined when max_bytes is
382 20003 100       69190 if (($self->{max_size})) {
383 15003 50       33345 $length = defined $length ? $length : &_get_data_length($key, $value)
384             } else {
385 5000         7918 $length = 0;
386             }
387            
388             # ORDER SPECIFIC, see top for NODE ARRAY STRUCT
389 20003         104165 my $node = [ $$key, $$value, $length ];
390             }
391              
392             sub _get_data_length {
393 15003     15003   21495 my($key, $value) = @_;
394 15003         21462 my $length = 0;
395 15003         16409 my %refs;
396              
397 15003         50370 my @data = ($$key, $$value);
398 15003         61390 while(my $elem = shift @data) {
399 30006 100       121130 next if $refs{$elem};
400 15006         35707 $refs{$elem} = 1;
401 15006 100 66     48715 if(ref $elem && ref($elem) =~ /^(SCALAR|HASH|ARRAY)$/) {
402 2         9 my $type = $1;
403 2         4 $length += $REF_SIZE; # guess, 16 bytes per ref, probably more
404 2 50       21 if (($type eq 'SCALAR')) {
    100          
    50          
405 0         0 $length += length($$elem);
406             } elsif (($type eq 'HASH')) {
407 1         9 while (my($k,$v) = each %$elem) {
408 1         3 for my $kv($k,$v) {
409 2 50       5 if ((ref $kv)) {
410 0         0 push(@data, $kv);
411             } else {
412 2         11 $length += length($kv);
413             }
414             }
415             }
416             } elsif (($type eq 'ARRAY')) {
417 1         4 for my $val (@$elem){
418 1 50       4 if ((ref $val)) {
419 0         0 push(@data, $val);
420             } else {
421 1         8 $length += length($val);
422             }
423             }
424             }
425             } else {
426 15004         55106 $length += length($elem);
427             }
428             }
429              
430 15003         44758 $length;
431             }
432              
433             sub insert {
434 20003     20003 0 39693 my($self, $new_node) = @_;
435            
436 20003         58192 $new_node->[$AFTER] = 0;
437 20003         40901 $new_node->[$BEFORE] = $self->{tail};
438 20003 50       59744 $self->print("insert() [$new_node->[$KEY], $new_node->[$VALUE]]") if ($self->{dbg} > 1);
439            
440 20003         67249 $self->{nodes}{$new_node->[$KEY]} = $new_node;
441              
442             # current sizes
443 20003         49017 $self->{count}++;
444 20003         45255 $self->{bytes} += $new_node->[$BYTES] + $STRUCT_SIZE;
445              
446 20003 100       56454 if($self->{tail}) {
447 20000         44323 $self->{tail}[$AFTER] = $new_node;
448             } else {
449 3         7 $self->{head} = $new_node;
450             }
451 20003         49979 $self->{tail} = $new_node;
452              
453             ## if we are too big now, remove head
454 20003   66     219246 while(($self->{max_count} && ($self->{count} > $self->{max_count})) ||
      66        
      66        
455             ($self->{max_bytes} && ($self->{bytes} > $self->{max_bytes})))
456             {
457 5001 50       17123 if($self->{dbg} > 1) {
458 0         0 $self->print("current/max: ".
459             "bytes ($self->{bytes}/$self->{max_bytes}) ".
460             "count ($self->{count}/$self->{max_count}) "
461             );
462             }
463 5001         17560 my $old_node = $self->delete($self->{head}[$KEY]);
464 5001 50       90309 if ($self->{subclass}) {
465 0 0       0 if($old_node->[$DIRTY]) {
466 0 0       0 $self->print("dirty write() [$old_node->[$KEY], $old_node->[$VALUE]]")
467             if ($self->{dbg} > 1);
468 0         0 $self->write($old_node->[$KEY], $old_node->[$VALUE]);
469             }
470             }
471             # if($self->{dbg} > 1) {
472             # $self->print("after delete - bytes $self->{bytes}; count $self->{count}");
473             # }
474             }
475            
476 20003         34686 1;
477             }
478              
479             sub delete {
480 20003     20003 0 58353 my($self, $key) = @_;
481 20003   100     91587 my $node = $self->{nodes}{$key} || return;
482             # return unless $node;
483              
484 15003 50       37384 $self->print("delete() [$key, $node->[$VALUE]]") if ($self->{dbg} > 1);
485              
486 15003         35379 my $before = $node->[$BEFORE];
487 15003         22843 my $after = $node->[$AFTER];
488              
489             # my($before, $after) = $node->{before,after};
490 15003 100       48587 if($before) {
491 2         6 ($before->[$AFTER] = $after);
492             } else {
493 15001         33620 $self->{head} = $after;
494             }
495              
496 15003 100       31416 if($after) {
497 15000         42415 ($after->[$BEFORE] = $before);
498             } else {
499 3         9 $self->{tail} = $before;
500             }
501              
502 15003         39396 delete $self->{nodes}{$key};
503 15003         43497 $self->{bytes} -= ($node->[$BYTES] + $STRUCT_SIZE);
504 15003         23305 $self->{count}--;
505            
506 15003         45209 $node;
507             }
508              
509             sub flush {
510 0     0 0   my $self = shift;
511              
512 0 0         $self->print("FLUSH CACHE") if ($self->{dbg} > 1);
513              
514 0           my $node = $self->{head};
515 0           my $flush_count = 0;
516 0           while($node) {
517 0 0         if($node->[$DIRTY]) {
518 0 0         $self->print("flush dirty write() [$node->[$KEY], $node->[$VALUE]]")
519             if ($self->{dbg} > 1);
520 0           $self->write($node->[$KEY], $node->[$VALUE]);
521 0           $node->[$DIRTY] = 0;
522 0           $flush_count++;
523             }
524 0           $node = $node->[$AFTER];
525             }
526              
527 0           $flush_count;
528             }
529              
530             sub print {
531 0     0 0   my($self, $msg) = @_;
532 0           print "$self: $msg\n";
533             }
534              
535             sub pretty_self {
536 0     0 0   my($self) = @_;
537            
538 0           my(@prints);
539 0           for(sort keys %{$self}) {
  0            
540 0 0         next unless defined $self->{$_};
541 0           push(@prints, "$_=>$self->{$_}");
542             }
543              
544 0           "{ " . join(", ", @prints) . " }";
545             }
546              
547             sub pretty_chains {
548 0     0 0   my($self) = @_;
549 0           my($str);
550 0           my $k = $self->FIRSTKEY();
551              
552 0           $str .= "[head]->";
553 0           my($curr_node) = $self->{head};
554 0           while($curr_node) {
555 0           $str .= "[$curr_node->[$KEY],$curr_node->[$VALUE]]->";
556 0           $curr_node = $curr_node->[$AFTER];
557             }
558 0           $str .= "[tail]->";
559              
560 0           $curr_node = $self->{tail};
561 0           while($curr_node) {
562 0           $str .= "[$curr_node->[$KEY],$curr_node->[$VALUE]]->";
563 0           $curr_node = $curr_node->[$BEFORE];
564             }
565 0           $str .= "[head]";
566              
567 0           $str;
568             }
569              
570             1;
571              
572             __END__