File Coverage

blib/lib/Tie/File.pm
Criterion Covered Total %
statement 850 1000 85.0
branch 350 458 76.4
condition 56 82 68.2
subroutine 102 112 91.0
pod 7 8 87.5
total 1365 1660 82.2


line stmt bran cond sub pod time code
1             package Tie::File;
2              
3             require 5.005;
4              
5 38     38   205967 use strict;
  38         296  
  38         1075  
6 38     38   200 use warnings;
  38         67  
  38         1268  
7              
8 38     38   236 use Carp ':DEFAULT', 'confess';
  38         71  
  38         6284  
9 38     38   22200 use POSIX 'SEEK_SET';
  38         218722  
  38         203  
10 38     38   40008 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
  38         72  
  38         294716  
11             sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
12              
13              
14             our $VERSION = "1.07";
15             my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
16             my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
17             my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
18              
19             my %good_opt = map {$_ => 1, "-$_" => 1}
20             qw(memory dw_size mode recsep discipline
21             autodefer autochomp autodefer_threshhold concurrent);
22              
23             our $DIAGNOSTIC = 0;
24             our @OFF; # used as a temporary alias in some subroutines.
25             our @H; # used as a temporary alias in _annotate_ad_history
26              
27             sub TIEARRAY {
28 2987 50   2987   1828561 if (@_ % 2 != 0) {
29 0         0 croak "usage: tie \@array, $_[0], filename, [option => value]...";
30             }
31 2987         7592 my ($pack, $file, %opts) = @_;
32              
33             # transform '-foo' keys into 'foo' keys
34 2987         9941 for my $key (keys %opts) {
35 57 50       180 unless ($good_opt{$key}) {
36 0         0 croak("$pack: Unrecognized option '$key'\n");
37             }
38 57         108 my $okey = $key;
39 57 50       203 if ($key =~ s/^-+//) {
40 0         0 $opts{$key} = delete $opts{$okey};
41             }
42             }
43              
44 2987 50       7555 if ($opts{concurrent}) {
45 0         0 croak("$pack: concurrent access not supported yet\n");
46             }
47              
48 2987 100       6539 unless (defined $opts{memory}) {
49             # default is the larger of the default cache size and the
50             # deferred-write buffer size (if specified)
51 2983         5771 $opts{memory} = $DEFAULT_MEMORY_SIZE;
52             $opts{memory} = $opts{dw_size}
53 2983 50 33     6857 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
54             # Dora Winifred Read
55             }
56 2987 100       6968 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
57 2987 50       5897 if ($opts{dw_size} > $opts{memory}) {
58 0         0 croak("$pack: dw_size may not be larger than total memory allocation\n");
59             }
60             # are we in deferred-write mode?
61 2987 50       6034 $opts{defer} = 0 unless defined $opts{defer};
62 2987         5492 $opts{deferred} = {}; # no records are presently deferred
63 2987         4717 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
64 2987         4221 $opts{deferred_max} = -1; # empty
65              
66             # What's a good way to arrange that this class can be overridden?
67 2987         15399 $opts{cache} = Tie::File::Cache->new($opts{memory});
68              
69             # autodeferment is enabled by default
70 2987 100       10815 $opts{autodefer} = 1 unless defined $opts{autodefer};
71 2987         5073 $opts{autodeferring} = 0; # but is not initially active
72 2987         4713 $opts{ad_history} = [];
73             $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
74 2987 50       6398 unless defined $opts{autodefer_threshhold};
75             $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
76 2987 50       6334 unless defined $opts{autodefer_filelen_threshhold};
77              
78 2987         4984 $opts{offsets} = [0];
79 2987         5238 $opts{filename} = $file;
80 2987 100       5403 unless (defined $opts{recsep}) {
81 2974         5694 $opts{recsep} = _default_recsep();
82             }
83 2987         8290 $opts{recseplen} = length($opts{recsep});
84 2987 50       6502 if ($opts{recseplen} == 0) {
85 0         0 croak "Empty record separator not supported by $pack";
86             }
87              
88 2987 100       5747 $opts{autochomp} = 1 unless defined $opts{autochomp};
89              
90 2987 100       6225 $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
91 2987         6605 $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
92 2987         4479 $opts{sawlastrec} = undef;
93              
94 2987         3708 my $fh;
95              
96 2987 100       15980 if (UNIVERSAL::isa($file, 'GLOB')) {
    50          
97             # We use 1 here on the theory that some systems
98             # may not indicate failure if we use 0.
99             # MSWin32 does not indicate failure with 0, but I don't know if
100             # it will indicate failure with 1 or not.
101 2 100       24 unless (seek $file, 1, SEEK_SET) {
102 1         210 croak "$pack: your filehandle does not appear to be seekable";
103             }
104 1         7 seek $file, 0, SEEK_SET; # put it back
105 1         3 $fh = $file; # setting binmode is the user's problem
106             } elsif (ref $file) {
107 0         0 croak "usage: tie \@array, $pack, filename, [option => value]...";
108             } else {
109             # $fh = \do { local *FH }; # XXX this is buggy
110 2985 50       6552 if ($] < 5.006) {
111             # perl 5.005 and earlier don't autovivify filehandles
112 0         0 require Symbol;
113 0         0 $fh = Symbol::gensym();
114             }
115 2985 50       112399 sysopen $fh, $file, $opts{mode}, 0666 or return;
116 2985         14057 binmode $fh;
117 2985         7690 ++$opts{ourfh};
118             }
119 2986         4147 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  2986         10437  
  2986         7513  
  2986         8174  
120 2986 50 33     7716 if (defined $opts{discipline} && $] >= 5.006) {
121             # This avoids a compile-time warning under 5.005
122 0         0 eval 'binmode($fh, $opts{discipline})';
123 0 0       0 croak $@ if $@ =~ /unknown discipline/i;
124 0 0       0 die if $@;
125             }
126 2986         4774 $opts{fh} = $fh;
127              
128 2986         18980 bless \%opts => $pack;
129             }
130              
131             sub FETCH {
132 1047     1047   11464 my ($self, $n) = @_;
133 1047         1275 my $rec;
134              
135             # check the defer buffer
136 1047 100       2070 $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
137 1047 100       2580 $rec = $self->_fetch($n) unless defined $rec;
138              
139             # inlined _chomp1
140             substr($rec, - $self->{recseplen}) = ""
141 1047 100 100     3941 if defined $rec && $self->{autochomp};
142 1047         3887 $rec;
143             }
144              
145             # Chomp many records in-place; return nothing useful
146             sub _chomp {
147 145     145   224 my $self = shift;
148 145 100       317 return unless $self->{autochomp};
149 48 50       101 if ($self->{autochomp}) {
150 48         92 for (@_) {
151 68 50       121 next unless defined;
152 68         128 substr($_, - $self->{recseplen}) = "";
153             }
154             }
155             }
156              
157             # Chomp one record in-place; return modified record
158             sub _chomp1 {
159 246     246   453 my ($self, $rec) = @_;
160 246 100       641 return $rec unless $self->{autochomp};
161 221 100       601 return unless defined $rec;
162 137         283 substr($rec, - $self->{recseplen}) = "";
163 137         443 $rec;
164             }
165              
166             sub _fetch {
167 1851     1851   2918 my ($self, $n) = @_;
168              
169             # check the record cache
170 1851         2240 { my $cached = $self->{cache}->lookup($n);
  1851         3460  
171 1851 100       4229 return $cached if defined $cached;
172             }
173              
174 1207 100       1581 if ($#{$self->{offsets}} < $n) {
  1207         2700  
175 23 100       79 return if $self->{eof}; # request for record beyond end of file
176 11         43 my $o = $self->_fill_offsets_to($n);
177             # If it's still undefined, there is no such record, so return 'undef'
178 11 100       38 return unless defined $o;
179             }
180              
181 1189         1856 my $fh = $self->{FH};
182 1189         2833 $self->_seek($n); # we can do this now that offsets is populated
183 1189         3522 my $rec = $self->_read_record;
184              
185             # If we happen to have just read the first record, check to see if
186             # the length of the record matches what 'tell' says. If not, Tie::File
187             # won't work, and should drop dead.
188             #
189             # if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
190             # if (defined $self->{discipline}) {
191             # croak "I/O discipline $self->{discipline} not supported";
192             # } else {
193             # croak "File encoding not supported";
194             # }
195             # }
196              
197 1189 100 66     5999 $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
198 1189         2443 $rec;
199             }
200              
201             sub STORE {
202 371     371   4211 my ($self, $n, $rec) = @_;
203 371 50       750 die "STORE called from _check_integrity!" if $DIAGNOSTIC;
204              
205 371         910 $self->_fixrecs($rec);
206              
207 371 100       842 if ($self->{autodefer}) {
208 231         498 $self->_annotate_ad_history($n);
209             }
210              
211 371 100       684 return $self->_store_deferred($n, $rec) if $self->_is_deferring;
212              
213              
214             # We need this to decide whether the new record will fit
215             # It incidentally populates the offsets table
216             # Note we have to do this before we alter the cache
217             # 20020324 Wait, but this DOES alter the cache. TODO BUG?
218 300         671 my $oldrec = $self->_fetch($n);
219              
220 300 100       648 if (not defined $oldrec) {
221             # We're storing a record beyond the end of the file
222 40         154 $self->_extend_file_to($n+1);
223 40         94 $oldrec = $self->{recsep};
224             }
225             # return if $oldrec eq $rec; # don't bother
226 300         504 my $len_diff = length($rec) - length($oldrec);
227              
228             # length($oldrec) here is not consistent with text mode TODO XXX BUG
229 300         911 $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
230 300         1222 $self->_oadjust([$n, 1, $rec]);
231 300         823 $self->{cache}->update($n, $rec);
232             }
233              
234             sub _store_deferred {
235 73     73   161 my ($self, $n, $rec) = @_;
236 73         173 $self->{cache}->remove($n);
237 73         115 my $old_deferred = $self->{deferred}{$n};
238              
239 73 100 100     254 if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
240 68         111 $self->{deferred_max} = $n;
241             }
242 73         139 $self->{deferred}{$n} = $rec;
243              
244 73         97 my $len_diff = length($rec);
245 73 100       123 $len_diff -= length($old_deferred) if defined $old_deferred;
246 73         115 $self->{deferred_s} += $len_diff;
247 73         209 $self->{cache}->adj_limit(-$len_diff);
248 73 100       199 if ($self->{deferred_s} > $self->{dw_size}) {
    100          
249 1         20 $self->_flush;
250             } elsif ($self->_cache_too_full) {
251 2         5 $self->_cache_flush;
252             }
253             }
254              
255             # Remove a single record from the deferred-write buffer without writing it
256             # The record need not be present
257             sub _delete_deferred {
258 6     6   19 my ($self, $n) = @_;
259 6         15 my $rec = delete $self->{deferred}{$n};
260 6 100       16 return unless defined $rec;
261              
262 4 50 33     26 if (defined $self->{deferred_max}
263             && $n == $self->{deferred_max}) {
264 4         8 undef $self->{deferred_max};
265             }
266              
267 4         16 $self->{deferred_s} -= length $rec;
268 4         12 $self->{cache}->adj_limit(length $rec);
269             }
270              
271             sub FETCHSIZE {
272 592     592   2824 my $self = shift;
273 592 100       1535 my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
  568         1025  
274              
275 592         1188 my $top_deferred = $self->_defer_max;
276 592 100 66     2187 $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
277 592         1455 $n;
278             }
279              
280             sub STORESIZE {
281 17     17   404 my ($self, $len) = @_;
282              
283 17 100       47 if ($self->{autodefer}) {
284 16         39 $self->_annotate_ad_history('STORESIZE');
285             }
286              
287 17         37 my $olen = $self->FETCHSIZE;
288 17 50       49 return if $len == $olen; # Woo-hoo!
289              
290             # file gets longer
291 17 100       38 if ($len > $olen) {
292 6 100       12 if ($self->_is_deferring) {
293 1         4 for ($olen .. $len-1) {
294 2         18 $self->_store_deferred($_, $self->{recsep});
295             }
296             } else {
297 5         15 $self->_extend_file_to($len);
298             }
299 6         22 return;
300             }
301              
302             # file gets shorter
303 11 100       42 if ($self->_is_deferring) {
304             # TODO maybe replace this with map-plus-assignment?
305 2         5 for (grep $_ >= $len, keys %{$self->{deferred}}) {
  2         13  
306 2         6 $self->_delete_deferred($_);
307             }
308 2         6 $self->{deferred_max} = $len-1;
309             }
310              
311 11         37 $self->_seek($len);
312 11         55 $self->_chop_file;
313 11         73 $#{$self->{offsets}} = $len;
  11         45  
314             # $self->{offsets}[0] = 0; # in case we just chopped this
315              
316 11         42 $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
317             }
318              
319             ### OPTIMIZE ME
320             ### It should not be necessary to do FETCHSIZE
321             ### Just seek to the end of the file.
322             sub PUSH {
323 6     6   332 my $self = shift;
324 6         21 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
325              
326             # No need to return:
327             # $self->FETCHSIZE; # because av.c takes care of this for me
328             }
329              
330             sub POP {
331 4     4   263 my $self = shift;
332 4         10 my $size = $self->FETCHSIZE;
333 4 100       24 return if $size == 0;
334             # print STDERR "# POPPITY POP POP POP\n";
335 3         20 scalar $self->SPLICE($size-1, 1);
336             }
337              
338             sub SHIFT {
339 4     4   271 my $self = shift;
340 4         12 scalar $self->SPLICE(0, 1);
341             }
342              
343             sub UNSHIFT {
344 4     4   332 my $self = shift;
345 4         15 $self->SPLICE(0, 0, @_);
346             # $self->FETCHSIZE; # av.c takes care of this for me
347             }
348              
349             sub CLEAR {
350 31     31   1380 my $self = shift;
351              
352 31 100       119 if ($self->{autodefer}) {
353 21         62 $self->_annotate_ad_history('CLEAR');
354             }
355              
356 31         118 $self->_seekb(0);
357 31         122 $self->_chop_file;
358 31         180 $self->{cache}->set_limit($self->{memory});
359 31         105 $self->{cache}->empty;
360 31         51 @{$self->{offsets}} = (0);
  31         97  
361 31         54 %{$self->{deferred}}= ();
  31         76  
362 31         58 $self->{deferred_s} = 0;
363 31         129 $self->{deferred_max} = -1;
364             }
365              
366             sub EXTEND {
367 28     28   90 my ($self, $n) = @_;
368              
369             # No need to pre-extend anything in this case
370 28 100       77 return if $self->_is_deferring;
371              
372 26         115 $self->_fill_offsets_to($n);
373 26         73 $self->_extend_file_to($n);
374             }
375              
376             sub DELETE {
377 9     9   469 my ($self, $n) = @_;
378              
379 9 100       23 if ($self->{autodefer}) {
380 4         9 $self->_annotate_ad_history('DELETE');
381             }
382              
383 9         19 my $lastrec = $self->FETCHSIZE-1;
384 9         23 my $rec = $self->FETCH($n);
385 9 100       21 $self->_delete_deferred($n) if $self->_is_deferring;
386 9 100       31 if ($n == $lastrec) {
    100          
387 4         24 $self->_seek($n);
388 4         17 $self->_chop_file;
389 4         23 $#{$self->{offsets}}--;
  4         19  
390 4         16 $self->{cache}->remove($n);
391             # perhaps in this case I should also remove trailing null records?
392             # 20020316
393             # Note that delete @a[-3..-1] deletes the records in the wrong order,
394             # so we only chop the very last one out of the file. We could repair this
395             # by tracking deleted records inside the object.
396             } elsif ($n < $lastrec) {
397 4         9 $self->STORE($n, "");
398             }
399 9         168 $rec;
400             }
401              
402             sub EXISTS {
403 11     11   372 my ($self, $n) = @_;
404 11 100       29 return 1 if exists $self->{deferred}{$n};
405 10         21 $n < $self->FETCHSIZE;
406             }
407              
408             sub SPLICE {
409 393     393   11729 my $self = shift;
410              
411 393 100       867 if ($self->{autodefer}) {
412 339         742 $self->_annotate_ad_history('SPLICE');
413             }
414              
415 393 100       758 $self->_flush if $self->_is_deferring; # move this up?
416 393 100       813 if (wantarray) {
417 145         330 $self->_chomp(my @a = $self->_splice(@_));
418 145         701 @a;
419             } else {
420 248         586 $self->_chomp1(scalar $self->_splice(@_));
421             }
422             }
423              
424             sub DESTROY {
425 2986     2986   30982 my $self = shift;
426 2986 100       7176 $self->flush if $self->_is_deferring;
427 2986 50       11574 $self->{cache}->delink if defined $self->{cache}; # break circular link
428 2986 100 66     10980 if ($self->{fh} and $self->{ourfh}) {
429 2985         6310 delete $self->{ourfh};
430 2985         59238 close delete $self->{fh};
431             }
432             }
433              
434             sub _splice {
435 393     393   961 my ($self, $pos, $nrecs, @data) = @_;
436 393         510 my @result;
437              
438 393 100       744 $pos = 0 unless defined $pos;
439              
440             # Deal with negative and other out-of-range positions
441             # Also set default for $nrecs
442             {
443 393         484 my $oldsize = $self->FETCHSIZE;
  393         741  
444 393 100       736 $nrecs = $oldsize unless defined $nrecs;
445 393         555 my $oldpos = $pos;
446              
447 393 100       718 if ($pos < 0) {
448 73         97 $pos += $oldsize;
449 73 100       160 if ($pos < 0) {
450 2         444 croak "Modification of non-creatable array value attempted, " .
451             "subscript $oldpos";
452             }
453             }
454              
455 391 100       701 if ($pos > $oldsize) {
456 14 100       47 return unless @data;
457 8         18 $pos = $oldsize; # This is what perl does for normal arrays
458             }
459              
460             # The manual is very unclear here
461 385 100       744 if ($nrecs < 0) {
462 10         16 $nrecs = $oldsize - $pos + $nrecs;
463 10 100       21 $nrecs = 0 if $nrecs < 0;
464             }
465              
466             # nrecs is too big---it really means "until the end"
467             # 20030507
468 385 100       759 if ($nrecs + $pos > $oldsize) {
469 33         68 $nrecs = $oldsize - $pos;
470             }
471             }
472              
473 385         934 $self->_fixrecs(@data);
474 385         835 my $data = join '', @data;
475 385         529 my $datalen = length $data;
476 385         518 my $oldlen = 0;
477              
478             # compute length of data being removed
479 385         851 for ($pos .. $pos+$nrecs-1) {
480 512 50       1008 last unless defined $self->_fill_offsets_to($_);
481 512         992 my $rec = $self->_fetch($_);
482 512 50       1013 last unless defined $rec;
483 512         881 push @result, $rec;
484              
485             # Why don't we just use length($rec) here?
486             # Because that record might have come from the cache. _splice
487             # might have been called to flush out the deferred-write records,
488             # and in this case length($rec) is the length of the record to be
489             # *written*, not the length of the actual record in the file. But
490             # the offsets are still true. 20020322
491             $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
492 512 50       1561 if defined $self->{offsets}[$_+1];
493             }
494 385         1058 $self->_fill_offsets_to($pos+$nrecs);
495              
496             # Modify the file
497 385         1020 $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
498             # Adjust the offsets table
499 385         1817 $self->_oadjust([$pos, $nrecs, @data]);
500              
501             { # Take this read cache stuff out into a separate function
502             # You made a half-attempt to put it into _oadjust.
503             # Finish something like that up eventually.
504             # STORE also needs to do something similarish
505              
506             # update the read cache, part 1
507             # modified records
508 385         773 for ($pos .. $pos+$nrecs-1) {
  385         728  
509 512         819 my $new = $data[$_-$pos];
510 512 100       852 if (defined $new) {
511 194         395 $self->{cache}->update($_, $new);
512             } else {
513 318         617 $self->{cache}->remove($_);
514             }
515             }
516            
517             # update the read cache, part 2
518             # moved records - records past the site of the change
519             # need to be renumbered
520             # Maybe merge this with the previous block?
521             {
522 385         521 my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
  385         817  
523 385         874 my @newkeys = map $_-$nrecs+@data, @oldkeys;
524 385         868 $self->{cache}->rekey(\@oldkeys, \@newkeys);
525             }
526              
527             # Now there might be too much data in the cache, if we spliced out
528             # some short records and spliced in some long ones. If so, flush
529             # the cache.
530 385         925 $self->_cache_flush;
531             }
532              
533             # Yes, the return value of 'splice' *is* actually this complicated
534 385 100       1833 wantarray ? @result : @result ? $result[-1] : undef;
    100          
535             }
536              
537              
538             # write data into the file
539             # $data is the data to be written.
540             # it should be written at position $pos, and should overwrite
541             # exactly $len of the following bytes.
542             # Note that if length($data) > $len, the subsequent bytes will have to
543             # be moved up, and if length($data) < $len, they will have to
544             # be moved down
545             sub _twrite {
546 179     179   1057 my ($self, $data, $pos, $len) = @_;
547              
548 179 50       347 unless (defined $pos) {
549 0         0 die "\$pos was undefined in _twrite";
550             }
551              
552 179         309 my $len_diff = length($data) - $len;
553              
554 179 100       332 if ($len_diff == 0) { # Woo-hoo!
555 30         50 my $fh = $self->{fh};
556 30         79 $self->_seekb($pos);
557 30         135 $self->_write_record($data);
558 30         99 return; # well, that was easy.
559             }
560              
561             # the two records are of different lengths
562             # our strategy here: rewrite the tail of the file,
563             # reading ahead one buffer at a time
564             # $bufsize is required to be at least as large as the data we're overwriting
565 149         258 my $bufsize = _bufsize($len_diff);
566 149         278 my ($writepos, $readpos) = ($pos, $pos+$len);
567 149         200 my $next_block;
568             my $more_data;
569              
570             # Seems like there ought to be a way to avoid the repeated code
571             # and the special case here. The read(1) is also a little weird.
572             # Think about this.
573 149         175 do {
574 376         949 $self->_seekb($readpos);
575 376         5379 my $br = read $self->{fh}, $next_block, $bufsize;
576 376         2584 $more_data = read $self->{fh}, my($dummy), 1;
577 376         1087 $self->_seekb($writepos);
578 376         1187 $self->_write_record($data);
579 376         795 $readpos += $br;
580 376         509 $writepos += length $data;
581 376         1269 $data = $next_block;
582             } while $more_data;
583 149         393 $self->_seekb($writepos);
584 149         419 $self->_write_record($next_block);
585              
586             # There might be leftover data at the end of the file
587 149 100       558 $self->_chop_file if $len_diff < 0;
588             }
589              
590             # _iwrite(D, S, E)
591             # Insert text D at position S.
592             # Let C = E-S-|D|. If C < 0; die.
593             # Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
594             # Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
595             #
596             # In a later version, don't read the entire intervening area into
597             # memory at once; do the copying block by block.
598             sub _iwrite {
599 101     101   520 my $self = shift;
600 101         261 my ($D, $s, $e) = @_;
601 101         169 my $d = length $D;
602 101         168 my $c = $e-$s-$d;
603 101         290 local *FH = $self->{fh};
604 101 50       227 confess "Not enough space to insert $d bytes between $s and $e"
605             if $c < 0;
606 101 50       165 confess "[$s,$e) is an invalid insertion range" if $e < $s;
607              
608 101         254 $self->_seekb($s);
609 101         1901 read FH, my $buf, $e-$s;
610              
611 101         856 $D .= substr($buf, 0, $c, "");
612              
613 101         308 $self->_seekb($s);
614 101         356 $self->_write_record($D);
615              
616 101         695 return $buf;
617             }
618              
619             # Like _twrite, but the data-pos-len triple may be repeated; you may
620             # write several chunks. All the writing will be done in
621             # one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
622             sub _mtwrite {
623 2959     2959   12116 my $self = shift;
624 2959         4194 my $unwritten = "";
625 2959         4003 my $delta = 0;
626              
627 2959 50       7124 @_ % 3 == 0
628             or die "Arguments to _mtwrite did not come in groups of three";
629              
630 2959         7466 while (@_) {
631 5143         14394 my ($data, $pos, $len) = splice @_, 0, 3;
632 5143         8101 my $end = $pos + $len; # The OLD end of the segment to be replaced
633 5143         20968 $data = $unwritten . $data;
634 5143         8744 $delta -= length($unwritten);
635 5143         7106 $unwritten = "";
636 5143         6532 $pos += $delta; # This is where the data goes now
637 5143         6445 my $dlen = length $data;
638 5143         12341 $self->_seekb($pos);
639 5143 100       12514 if ($len >= $dlen) { # the data will fit
640 4283         10407 $self->_write_record($data);
641 4283         9285 $delta += ($dlen - $len); # everything following moves down by this much
642 4283         6805 $data = ""; # All the data in the buffer has been written
643             } else { # won't fit
644 860         2497 my $writable = substr($data, 0, $len - $delta, "");
645 860         2204 $self->_write_record($writable);
646 860         2106 $delta += ($dlen - $len); # everything following moves down by this much
647             }
648              
649             # At this point we've written some but maybe not all of the data.
650             # There might be a gap to close up, or $data might still contain a
651             # bunch of unwritten data that didn't fit.
652 5143         6955 my $ndlen = length $data;
653 5143 100       10799 if ($delta == 0) {
    100          
654 926         1847 $self->_write_record($data);
655             } elsif ($delta < 0) {
656             # upcopy (close up gap)
657 3370 100       6019 if (@_) {
658 1263         3314 $self->_upcopy($end, $end + $delta, $_[1] - $end);
659             } else {
660 2107         5184 $self->_upcopy($end, $end + $delta);
661             }
662             } else {
663             # downcopy (insert data that didn't fit; replace this data in memory
664             # with _later_ data that doesn't fit)
665 847 100       1551 if (@_) {
666 346         920 $unwritten = $self->_downcopy($data, $end, $_[1] - $end);
667             } else {
668             # Make the file longer to accommodate the last segment that doesn't
669 501         1153 $unwritten = $self->_downcopy($data, $end);
670             }
671             }
672             }
673             }
674              
675             # Copy block of data of length $len from position $spos to position $dpos
676             # $dpos must be <= $spos
677             #
678             # If $len is undefined, go all the way to the end of the file
679             # and then truncate it ($spos - $dpos bytes will be removed)
680             sub _upcopy {
681 3424     3424   6528 my $blocksize = 8192;
682 3424         6647 my ($self, $spos, $dpos, $len) = @_;
683 3424 50       7826 if ($dpos > $spos) {
    100          
684 0         0 die "source ($spos) was upstream of destination ($dpos) in _upcopy";
685             } elsif ($dpos == $spos) {
686 16         46 return;
687             }
688              
689 3408   100     11364 while (! defined ($len) || $len > 0) {
690 7965 100       14906 my $readsize = ! defined($len) ? $blocksize
    100          
691             : $len > $blocksize ? $blocksize
692             : $len;
693            
694 7965         12774 my $fh = $self->{fh};
695 7965         18066 $self->_seekb($spos);
696 7965         86770 my $bytes_read = read $fh, my($data), $readsize;
697 7965         24443 $self->_seekb($dpos);
698 7965 100       21961 if ($data eq "") {
699 2121         5927 $self->_chop_file;
700 2121         23606 last;
701             }
702 5844         13879 $self->_write_record($data);
703 5844         12417 $spos += $bytes_read;
704 5844         6999 $dpos += $bytes_read;
705 5844 100       22226 $len -= $bytes_read if defined $len;
706             }
707             }
708              
709             # Write $data into a block of length $len at position $pos,
710             # moving everything in the block forwards to make room.
711             # Instead of writing the last length($data) bytes from the block
712             # (because there isn't room for them any longer) return them.
713             #
714             # Undefined $len means 'until the end of the file'
715             sub _downcopy {
716 1205     1205   11340 my $blocksize = 8192;
717 1205         2825 my ($self, $data, $pos, $len) = @_;
718 1205         1927 my $fh = $self->{fh};
719              
720 1205   100     3909 while (! defined $len || $len > 0) {
721 2348 100       4786 my $readsize = ! defined($len) ? $blocksize
    100          
722             : $len > $blocksize? $blocksize : $len;
723 2348         5564 $self->_seekb($pos);
724 2348         29522 read $fh, my($old), $readsize;
725 2348         6250 my $last_read_was_short = length($old) < $readsize;
726 2348         8287 $data .= $old;
727 2348         3104 my $writable;
728 2348 100       4208 if ($last_read_was_short) {
729             # If last read was short, then $data now contains the entire rest
730             # of the file, so there's no need to write only one block of it
731 680         1392 $writable = $data;
732 680         945 $data = "";
733             } else {
734 1668         6909 $writable = substr($data, 0, $readsize, "");
735             }
736 2348 100       5103 last if $writable eq "";
737 2343         6396 $self->_seekb($pos);
738 2343         7144 $self->_write_record($writable);
739 2343 100 66     8911 last if $last_read_was_short && $data eq "";
740 1668 100       3006 $len -= $readsize if defined $len;
741 1668         5261 $pos += $readsize;
742             }
743 1205         6003 return $data;
744             }
745              
746             # Adjust the object data structures following an '_mtwrite'
747             # Arguments are
748             # [$pos, $nrecs, @length] items
749             # indicating that $nrecs records were removed at $recpos (a record offset)
750             # and replaced with records of length @length...
751             # Arguments guarantee that $recpos is strictly increasing.
752             # No return value
753             sub _oadjust {
754 708     708   1064 my $self = shift;
755 708         899 my $delta = 0;
756 708         923 my $delta_recs = 0;
757 708         896 my $prev_end = -1;
758              
759 708         1350 for (@_) {
760 710         1663 my ($pos, $nrecs, @data) = @$_;
761 710         993 $pos += $delta_recs;
762              
763             # Adjust the offsets of the records after the previous batch up
764             # to the first new one of this batch
765 710         1644 for my $i ($prev_end+2 .. $pos - 1) {
766 1910         2692 $self->{offsets}[$i] += $delta;
767             }
768              
769 710         1118 $prev_end = $pos + @data - 1; # last record moved on this pass
770              
771             # Remove the offsets for the removed records;
772             # replace with the offsets for the inserted records
773 710         1466 my @newoff = ($self->{offsets}[$pos] + $delta);
774 710         1399 for my $i (0 .. $#data) {
775 819         1147 my $newlen = length $data[$i];
776 819         1271 push @newoff, $newoff[$i] + $newlen;
777 819         1261 $delta += $newlen;
778             }
779              
780 710         1225 for my $i ($pos .. $pos+$nrecs-1) {
781 868 100       1160 last if $i+1 > $#{$self->{offsets}};
  868         1990  
782 858         1567 my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
783 858         1234 $delta -= $oldlen;
784             }
785              
786             # replace old offsets with new
787 710         900 splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
  710         1758  
788             # What if we just spliced out the end of the offsets table?
789             # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
790              
791 710         1572 $delta_recs += @data - $nrecs; # net change in total number of records
792             }
793              
794             # The trailing records at the very end of the file
795 708 100       1371 if ($delta) {
796 567         752 for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
  567         1131  
797 1521         2226 $self->{offsets}[$i] += $delta;
798             }
799             }
800              
801             # If we scrubbed out all known offsets, regenerate the trivial table
802             # that knows that the file does indeed start at 0.
803 708 50       960 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
  708         1449  
804             # If the file got longer, the offsets table is no longer complete
805             # $self->{eof} = 0 if $delta_recs > 0;
806              
807             # Now there might be too much data in the cache, if we spliced out
808             # some short records and spliced in some long ones. If so, flush
809             # the cache.
810 708         1476 $self->_cache_flush;
811             }
812              
813             # If a record does not already end with the appropriate terminator
814             # string, append one.
815             sub _fixrecs {
816 756     756   1013 my $self = shift;
817 756         1423 for (@_) {
818 834 100       1474 $_ = "" unless defined $_;
819             $_ .= $self->{recsep}
820 834 100       2791 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
821             }
822             }
823              
824              
825             ################################################################
826             #
827             # Basic read, write, and seek
828             #
829              
830             # seek to the beginning of record #$n
831             # Assumes that the offsets table is already correctly populated
832             #
833             # Note that $n=-1 has a special meaning here: It means the start of
834             # the last known record; this may or may not be the very last record
835             # in the file, depending on whether the offsets table is fully populated.
836             #
837             sub _seek {
838 1378     1378   2186 my ($self, $n) = @_;
839 1378         2055 my $o = $self->{offsets}[$n];
840 1378 50       2392 defined($o)
841             or confess("logic error: undefined offset for record $n");
842 1378 50       16460 seek $self->{fh}, $o, SEEK_SET
843             or confess "Couldn't seek filehandle: $!"; # "Should never happen."
844             }
845              
846             # seek to byte $b in the file
847             sub _seekb {
848 26928     26928   41499 my ($self, $b) = @_;
849 26928 50       233223 seek $self->{fh}, $b, SEEK_SET
850             or die "Couldn't seek filehandle: $!"; # "Should never happen."
851             }
852              
853             # populate the offsets table up to the beginning of record $n
854             # return the offset of record $n
855             sub _fill_offsets_to {
856 960     960   1497 my ($self, $n) = @_;
857              
858 960 100       2495 return $self->{offsets}[$n] if $self->{eof};
859              
860 18         51 my $fh = $self->{fh};
861 18         42 local *OFF = $self->{offsets};
862 18         41 my $rec;
863              
864 18         178 until ($#OFF >= $n) {
865 69         238 $self->_seek(-1); # tricky -- see comment at _seek
866 69         203 $rec = $self->_read_record;
867 69 100       166 if (defined $rec) {
868 56         178 push @OFF, int(tell $fh); # Tels says that int() saves memory here
869             } else {
870 13         44 $self->{eof} = 1;
871 13         113 return; # It turns out there is no such record
872             }
873             }
874              
875             # we have now read all the records up to record n-1,
876             # so we can return the offset of record n
877 5         18 $OFF[$n];
878             }
879              
880             sub _fill_offsets {
881 24     24   81 my ($self) = @_;
882              
883 24         51 my $fh = $self->{fh};
884 24         66 local *OFF = $self->{offsets};
885              
886 24         86 $self->_seek(-1); # tricky -- see comment at _seek
887              
888             # Tels says that inlining read_record() would make this loop
889             # five times faster. 20030508
890 24         106 while ( defined $self->_read_record()) {
891             # int() saves us memory here
892 214         567 push @OFF, int(tell $fh);
893             }
894              
895 24         67 $self->{eof} = 1;
896 24         93 $#OFF;
897             }
898              
899             # assumes that $rec is already suitably terminated
900             sub _write_record {
901 15140     15140   31523 my ($self, $rec) = @_;
902 15140         22410 my $fh = $self->{fh};
903 15140         51305 local $\ = "";
904 15140 50       248767 print $fh $rec
905             or die "Couldn't write record: $!"; # "Should never happen."
906             # $self->{_written} += length($rec);
907             }
908              
909             sub _read_record {
910 1496     1496   2125 my $self = shift;
911 1496         1809 my $rec;
912 1496         1823 { local $/ = $self->{recsep};
  1496         5537  
913 1496         2247 my $fh = $self->{fh};
914 1496         14690 $rec = <$fh>;
915             }
916 1496 100       4078 return unless defined $rec;
917 1427 100       3727 if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
918             # improperly terminated final record --- quietly fix it.
919             # my $ac = substr($rec, -$self->{recseplen});
920             # $ac =~ s/\n/\\n/g;
921 7         14 $self->{sawlastrec} = 1;
922 7 100       21 unless ($self->{rdonly}) {
923 4         13 local $\ = "";
924 4         7 my $fh = $self->{fh};
925 4         75 print $fh $self->{recsep};
926             }
927 7         19 $rec .= $self->{recsep};
928             }
929             # $self->{_read} += length($rec) if defined $rec;
930 1427         2968 $rec;
931             }
932              
933             sub _rw_stats {
934 0     0   0 my $self = shift;
935 0         0 @{$self}{'_read', '_written'};
  0         0  
936             }
937              
938             ################################################################
939             #
940             # Read cache management
941              
942             sub _cache_flush {
943 1095     1095   1804 my ($self) = @_;
944 1095         2716 $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
945             }
946              
947             sub _cache_too_full {
948 72     72   95 my $self = shift;
949 72         126 $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
950             }
951              
952             ################################################################
953             #
954             # File custodial services
955             #
956              
957              
958             # We have read to the end of the file and have the offsets table
959             # entirely populated. Now we need to write a new record beyond
960             # the end of the file. We prepare for this by writing
961             # empty records into the file up to the position we want
962             #
963             # assumes that the offsets table already contains the offset of record $n,
964             # if it exists, and extends to the end of the file if not.
965             sub _extend_file_to {
966 81     81   169 my ($self, $n) = @_;
967 81         239 $self->_seek(-1); # position after the end of the last record
968 81         211 my $pos = $self->{offsets}[-1];
969              
970             # the offsets table has one entry more than the total number of records
971 81         162 my $extras = $n - $#{$self->{offsets}};
  81         217  
972              
973             # Todo : just use $self->{recsep} x $extras here?
974 81         287 while ($extras-- > 0) {
975 228         637 $self->_write_record($self->{recsep});
976 228         488 push @{$self->{offsets}}, int(tell $self->{fh});
  228         1074  
977             }
978             }
979              
980             # Truncate the file at the current position
981             sub _chop_file {
982 2238     2238   3899 my $self = shift;
983 2238         1141213 truncate $self->{fh}, tell($self->{fh});
984             }
985              
986              
987             # compute the size of a buffer suitable for moving
988             # all the data in a file forward $n bytes
989             # ($n may be negative)
990             # The result should be at least $n.
991             sub _bufsize {
992 149     149   224 my $n = shift;
993 149 100       312 return 8192 if $n <= 0;
994 78         104 my $b = $n & ~8191;
995 78 100       151 $b += 8192 if $n & 8191;
996 78         158 $b;
997             }
998              
999             ################################################################
1000             #
1001             # Miscellaneous public methods
1002             #
1003              
1004             # Lock the file
1005             sub flock {
1006 2     2 1 24 my ($self, $op) = @_;
1007 2 50       6 unless (@_ <= 3) {
1008 0         0 my $pack = ref $self;
1009 0         0 croak "Usage: $pack\->flock([OPERATION])";
1010             }
1011 2         3 my $fh = $self->{fh};
1012 2 100       5 $op = LOCK_EX unless defined $op;
1013 2         19 my $locked = flock $fh, $op;
1014              
1015 2 100 66     16 if ($locked && ($op & (LOCK_EX | LOCK_SH))) {
1016             # If you're locking the file, then presumably it's because
1017             # there might have been a write access by another process.
1018             # In that case, the read cache contents and the offsets table
1019             # might be invalid, so discard them. 20030508
1020 1         3 $self->{offsets} = [0];
1021 1         4 $self->{cache}->empty;
1022             }
1023              
1024 2         4 $locked;
1025             }
1026              
1027             # Get/set autochomp option
1028             sub autochomp {
1029 4     4 1 42 my $self = shift;
1030 4 100       12 if (@_) {
1031 2         3 my $old = $self->{autochomp};
1032 2         3 $self->{autochomp} = shift;
1033 2         5 $old;
1034             } else {
1035 2         6 $self->{autochomp};
1036             }
1037             }
1038              
1039             # Get offset table entries; returns offset of nth record
1040             sub offset {
1041 22     22 1 1191 my ($self, $n) = @_;
1042              
1043 22 100       25 if ($#{$self->{offsets}} < $n) {
  22         59  
1044 4 100       24 return if $self->{eof}; # request for record beyond the end of file
1045 1         4 my $o = $self->_fill_offsets_to($n);
1046             # If it's still undefined, there is no such record, so return 'undef'
1047 1 50       6 return unless defined $o;
1048             }
1049              
1050 18         99 $self->{offsets}[$n];
1051             }
1052              
1053             sub discard_offsets {
1054 0     0 0 0 my $self = shift;
1055 0         0 $self->{offsets} = [0];
1056             }
1057              
1058             ################################################################
1059             #
1060             # Matters related to deferred writing
1061             #
1062              
1063             # Defer writes
1064             sub defer {
1065 22     22 1 1100 my $self = shift;
1066 22         64 $self->_stop_autodeferring;
1067 22         37 @{$self->{ad_history}} = ();
  22         56  
1068 22         69 $self->{defer} = 1;
1069             }
1070              
1071             # Flush deferred writes
1072             #
1073             # This could be better optimized to write the file in one pass, instead
1074             # of one pass per block of records. But that will require modifications
1075             # to _twrite, so I should have a good _twrite test suite first.
1076             sub flush {
1077 16     16 1 586 my $self = shift;
1078              
1079 16         44 $self->_flush;
1080 16         60 $self->{defer} = 0;
1081             }
1082              
1083             sub _old_flush {
1084 0     0   0 my $self = shift;
1085 0         0 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  0         0  
  0         0  
1086              
1087 0         0 while (@writable) {
1088             # gather all consecutive records from the front of @writable
1089 0         0 my $first_rec = shift @writable;
1090 0         0 my $last_rec = $first_rec+1;
1091 0   0     0 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1092 0         0 --$last_rec;
1093 0         0 $self->_fill_offsets_to($last_rec);
1094 0         0 $self->_extend_file_to($last_rec);
1095             $self->_splice($first_rec, $last_rec-$first_rec+1,
1096 0         0 @{$self->{deferred}}{$first_rec .. $last_rec});
  0         0  
1097             }
1098              
1099 0         0 $self->_discard; # clear out defered-write-cache
1100             }
1101              
1102             sub _flush {
1103 23     23   33 my $self = shift;
1104 23         34 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  53         123  
  23         96  
1105 23         50 my @args;
1106             my @adjust;
1107              
1108 23         65 while (@writable) {
1109             # gather all consecutive records from the front of @writable
1110 25         43 my $first_rec = shift @writable;
1111 25         42 my $last_rec = $first_rec+1;
1112 25   100     128 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1113 25         32 --$last_rec;
1114 25         63 my $end = $self->_fill_offsets_to($last_rec+1);
1115 25 100       59 if (not defined $end) {
1116 10         62 $self->_extend_file_to($last_rec);
1117 10         23 $end = $self->{offsets}[$last_rec];
1118             }
1119 25         45 my ($start) = $self->{offsets}[$first_rec];
1120             push @args,
1121 25         54 join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
  25         101  
1122             $start, # position
1123             $end-$start; # length
1124             push @adjust, [$first_rec, # starting at this position...
1125             $last_rec-$first_rec+1, # this many records...
1126             # are replaced with these...
1127 25         65 @{$self->{deferred}}{$first_rec .. $last_rec},
  25         109  
1128             ];
1129             }
1130              
1131 23         82 $self->_mtwrite(@args); # write multiple record groups
1132 23         70 $self->_discard; # clear out defered-write-cache
1133 23         57 $self->_oadjust(@adjust);
1134             }
1135              
1136             # Discard deferred writes and disable future deferred writes
1137             sub discard {
1138 6     6 1 255 my $self = shift;
1139 6         24 $self->_discard;
1140 6         23 $self->{defer} = 0;
1141             }
1142              
1143             # Discard deferred writes, but retain old deferred writing mode
1144             sub _discard {
1145 29     29   47 my $self = shift;
1146 29         38 %{$self->{deferred}} = ();
  29         78  
1147 29         52 $self->{deferred_s} = 0;
1148 29         45 $self->{deferred_max} = -1;
1149 29         69 $self->{cache}->set_limit($self->{memory});
1150             }
1151              
1152             # Deferred writing is enabled, either explicitly ($self->{defer})
1153             # or automatically ($self->{autodeferring})
1154             sub _is_deferring {
1155 4564     4564   6548 my $self = shift;
1156 4564 100       16986 $self->{defer} || $self->{autodeferring};
1157             }
1158              
1159             # The largest record number of any deferred record
1160             sub _defer_max {
1161 592     592   793 my $self = shift;
1162 592 100       1479 return $self->{deferred_max} if defined $self->{deferred_max};
1163 1         7 my $max = -1;
1164 1         1 for my $key (keys %{$self->{deferred}}) {
  1         5  
1165 1 50       4 $max = $key if $key > $max;
1166             }
1167 1         3 $self->{deferred_max} = $max;
1168 1         2 $max;
1169             }
1170              
1171             ################################################################
1172             #
1173             # Matters related to autodeferment
1174             #
1175              
1176             # Get/set autodefer option
1177             sub autodefer {
1178 2     2 1 101 my $self = shift;
1179 2 50       8 if (@_) {
1180 2         6 my $old = $self->{autodefer};
1181 2         3 $self->{autodefer} = shift;
1182 2 100       6 if ($old) {
1183 1         3 $self->_stop_autodeferring;
1184 1         7 @{$self->{ad_history}} = ();
  1         4  
1185             }
1186 2         6 $old;
1187             } else {
1188 0         0 $self->{autodefer};
1189             }
1190             }
1191              
1192             # The user is trying to store record #$n Record that in the history,
1193             # and then enable (or disable) autodeferment if that seems useful.
1194             # Note that it's OK for $n to be a non-number, as long as the function
1195             # is prepared to deal with that. Nobody else looks at the ad_history.
1196             #
1197             # Now, what does the ad_history mean, and what is this function doing?
1198             # Essentially, the idea is to enable autodeferring when we see that the
1199             # user has made three consecutive STORE calls to three consecutive records.
1200             # ("Three" is actually ->{autodefer_threshhold}.)
1201             # A STORE call for record #$n inserts $n into the autodefer history,
1202             # and if the history contains three consecutive records, we enable
1203             # autodeferment. An ad_history of [X, Y] means that the most recent
1204             # STOREs were for records X, X+1, ..., Y, in that order.
1205             #
1206             # Inserting a nonconsecutive number erases the history and starts over.
1207             #
1208             # Performing a special operation like SPLICE erases the history.
1209             #
1210             # There's one special case: CLEAR means that CLEAR was just called.
1211             # In this case, we prime the history with [-2, -1] so that if the next
1212             # write is for record 0, autodeferring goes on immediately. This is for
1213             # the common special case of "@a = (...)".
1214             #
1215             sub _annotate_ad_history {
1216 611     611   1056 my ($self, $n) = @_;
1217 611 50       1237 return unless $self->{autodefer}; # feature is disabled
1218 611 100       1157 return if $self->{defer}; # already in explicit defer mode
1219 545 100       1383 return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
1220              
1221 25         48 local *H = $self->{ad_history};
1222 25 100       123 if ($n eq 'CLEAR') {
    50          
1223 2         5 @H = (-2, -1); # prime the history with fake records
1224 2         4 $self->_stop_autodeferring;
1225             } elsif ($n =~ /^\d+$/) {
1226 23 100       48 if (@H == 0) {
1227 1         5 @H = ($n, $n);
1228             } else { # @H == 2
1229 22 100       39 if ($H[1] == $n-1) { # another consecutive record
1230 19         28 $H[1]++;
1231 19 100       45 if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
1232 16         32 $self->{autodeferring} = 1;
1233             }
1234             } else { # nonconsecutive- erase and start over
1235 3         7 @H = ($n, $n);
1236 3         8 $self->_stop_autodeferring;
1237             }
1238             }
1239             } else { # SPLICE or STORESIZE or some such
1240 0         0 @H = ();
1241 0         0 $self->_stop_autodeferring;
1242             }
1243             }
1244              
1245             # If autodeferring was enabled, cut it out and discard the history
1246             sub _stop_autodeferring {
1247 28     28   43 my $self = shift;
1248 28 100       72 if ($self->{autodeferring}) {
1249 5         10 $self->_flush;
1250             }
1251 28         51 $self->{autodeferring} = 0;
1252             }
1253              
1254             ################################################################
1255              
1256              
1257             # This is NOT a method. It is here for two reasons:
1258             # 1. To factor a fairly complicated block out of the constructor
1259             # 2. To provide access for the test suite, which need to be sure
1260             # files are being written properly.
1261             sub _default_recsep {
1262 2998     2998   7919 my $recsep = $/;
1263 2998 50       9862 if ($^O eq 'MSWin32') { # Dos too?
1264             # Windows users expect files to be terminated with \r\n
1265             # But $/ is set to \n instead
1266             # Note that this also transforms \n\n into \r\n\r\n.
1267             # That is a feature.
1268 0         0 $recsep =~ s/\n/\r\n/g;
1269             }
1270 2998         7374 $recsep;
1271             }
1272              
1273             # Utility function for _check_integrity
1274             sub _ci_warn {
1275 0     0   0 my $msg = shift;
1276 0         0 $msg =~ s/\n/\\n/g;
1277 0         0 $msg =~ s/\r/\\r/g;
1278 0         0 print "# $msg\n";
1279             }
1280              
1281             # Given a file, make sure the cache is consistent with the
1282             # file contents and the internal data structures are consistent with
1283             # each other. Returns true if everything checks out, false if not
1284             #
1285             # The $file argument is no longer used. It is retained for compatibility
1286             # with the existing test suite.
1287             sub _check_integrity {
1288 380     380   6909 my ($self, $file, $warn) = @_;
1289 380         685 my $rsl = $self->{recseplen};
1290 380         622 my $rs = $self->{recsep};
1291 380         489 my $good = 1;
1292 380         725 local *_; # local $_ does not work here
1293 380         602 local $DIAGNOSTIC = 1;
1294              
1295 380 50       1266 if (not defined $rs) {
    50          
    50          
1296 0         0 _ci_warn("recsep is undef!");
1297 0         0 $good = 0;
1298             } elsif ($rs eq "") {
1299 0         0 _ci_warn("recsep is empty!");
1300 0         0 $good = 0;
1301             } elsif ($rsl != length $rs) {
1302 0         0 my $ln = length $rs;
1303 0         0 _ci_warn("recsep <$rs> has length $ln, should be $rsl");
1304 0         0 $good = 0;
1305             }
1306              
1307 380 50       1019 if (not defined $self->{offsets}[0]) {
    50          
1308 0         0 _ci_warn("offset 0 is missing!");
1309 0         0 $good = 0;
1310              
1311             } elsif ($self->{offsets}[0] != 0) {
1312 0         0 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
1313 0         0 $good = 0;
1314             }
1315              
1316 380         510 my $cached = 0;
1317             {
1318 380         472 local *F = $self->{fh};
  380         852  
1319 380         3080 seek F, 0, SEEK_SET;
1320 380         1696 local $. = 0;
1321 380         1132 local $/ = $rs;
1322              
1323 380         4190 while () {
1324 2132         3604 my $n = $. - 1;
1325 2132         3743 my $cached = $self->{cache}->_produce($n);
1326 2132         3402 my $offset = $self->{offsets}[$.];
1327 2132         2865 my $ao = tell F;
1328 2132 50 66     5914 if (defined $offset && $offset != $ao) {
1329 0         0 _ci_warn("rec $n: offset <$offset> actual <$ao>");
1330 0         0 $good = 0;
1331             }
1332 2132 50 66     4748 if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
      33        
1333 0         0 $good = 0;
1334 0         0 _ci_warn("rec $n: cached <$cached> actual <$_>");
1335             }
1336 2132 50 66     4737 if (defined $cached && substr($cached, -$rsl) ne $rs) {
1337 0         0 $good = 0;
1338 0         0 _ci_warn("rec $n in the cache is missing the record separator");
1339             }
1340 2132 50 66     7889 if (! defined $offset && $self->{eof}) {
1341 0         0 $good = 0;
1342 0         0 _ci_warn("The offset table was marked complete, but it is missing " .
1343             "element $.");
1344             }
1345             }
1346 380 50       823 if (@{$self->{offsets}} > $.+1) {
  380         1411  
1347 0         0 $good = 0;
1348 0         0 my $n = @{$self->{offsets}};
  0         0  
1349 0         0 _ci_warn("The offset table has $n items, but the file has only $.");
1350             }
1351              
1352 380         970 my $deferring = $self->_is_deferring;
1353 380         958 for my $n ($self->{cache}->ckeys) {
1354 938         1550 my $r = $self->{cache}->_produce($n);
1355 938         1317 $cached += length($r);
1356 938 50       2243 next if $n+1 <= $.; # checked this already
1357 0         0 _ci_warn("spurious caching of record $n");
1358 0         0 $good = 0;
1359             }
1360 380         832 my $b = $self->{cache}->bytes;
1361 380 50       1637 if ($cached != $b) {
1362 0         0 _ci_warn("cache size is $b, should be $cached");
1363 0         0 $good = 0;
1364             }
1365             }
1366              
1367             # That cache has its own set of tests
1368 380 50       813 $good = 0 unless $self->{cache}->_check_integrity;
1369              
1370             # Now let's check the deferbuffer
1371             # Unless deferred writing is enabled, it should be empty
1372 380 50 66     760 if (! $self->_is_deferring && %{$self->{deferred}}) {
  331         906  
1373 0         0 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1374 0         0 $good = 0;
1375             }
1376              
1377             # Any record in the deferbuffer should *not* be present in the readcache
1378 380         640 my $deferred_s = 0;
1379 380         548 while (my ($n, $r) = each %{$self->{deferred}}) {
  467         1375  
1380 87         120 $deferred_s += length($r);
1381 87 50       158 if (defined $self->{cache}->_produce($n)) {
1382 0         0 _ci_warn("record $n is in the deferbuffer *and* the readcache");
1383 0         0 $good = 0;
1384             }
1385 87 50       209 if (substr($r, -$rsl) ne $rs) {
1386 0         0 _ci_warn("rec $n in the deferbuffer is missing the record separator");
1387 0         0 $good = 0;
1388             }
1389             }
1390              
1391             # Total size of deferbuffer should match internal total
1392 380 50       809 if ($deferred_s != $self->{deferred_s}) {
1393 0         0 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1394 0         0 $good = 0;
1395             }
1396              
1397             # Total size of deferbuffer should not exceed the specified limit
1398 380 50       788 if ($deferred_s > $self->{dw_size}) {
1399 0         0 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit " .
1400             "of $self->{dw_size}");
1401 0         0 $good = 0;
1402             }
1403              
1404             # Total size of cached data should not exceed the specified limit
1405 380 50       762 if ($deferred_s + $cached > $self->{memory}) {
1406 0         0 my $total = $deferred_s + $cached;
1407 0         0 _ci_warn("total stored data size is $total which exceeds the limit " .
1408             "of $self->{memory}");
1409 0         0 $good = 0;
1410             }
1411              
1412             # Stuff related to autodeferment
1413 380 50 66     782 if (!$self->{autodefer} && @{$self->{ad_history}}) {
  129         350  
1414 0         0 _ci_warn("autodefer is disabled, but ad_history is nonempty");
1415 0         0 $good = 0;
1416             }
1417 380 50 66     795 if ($self->{autodeferring} && $self->{defer}) {
1418 0         0 _ci_warn("both autodeferring and explicit deferring are active");
1419 0         0 $good = 0;
1420             }
1421 380 100       459 if (@{$self->{ad_history}} == 0) {
  380 50       796  
1422             # That's OK, no additional tests required
1423 14         25 } elsif (@{$self->{ad_history}} == 2) {
1424 14         20 my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
  14         82  
1425 14 50       62 if (@non_number) {
    50          
1426 0         0 my $msg;
1427 0         0 { local $" = ')(';
  0         0  
1428 0         0 $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
  0         0  
1429             }
1430 0         0 _ci_warn($msg);
1431 0         0 $good = 0;
1432             } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1433 0         0 _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
  0         0  
1434 0         0 $good = 0;
1435             }
1436             } else {
1437 0         0 _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
  0         0  
1438 0         0 $good = 0;
1439             }
1440              
1441 380         1291 $good;
1442             }
1443              
1444             ################################################################
1445             #
1446             # Tie::File::Cache
1447             #
1448             # Read cache
1449              
1450             package Tie::File::Cache;
1451             $Tie::File::Cache::VERSION = $Tie::File::VERSION;
1452 38     38   393 use Carp ':DEFAULT', 'confess';
  38         100  
  38         7374  
1453              
1454             sub HEAP () { 0 }
1455             sub HASH () { 1 }
1456             sub MAX () { 2 }
1457             sub BYTES() { 3 }
1458             #sub STAT () { 4 } # Array with request statistics for each record
1459             #sub MISS () { 5 } # Total number of cache misses
1460             #sub REQ () { 6 } # Total number of cache requests
1461 38     38   360 use strict 'vars';
  38         71  
  38         53580  
1462              
1463             sub new {
1464 2988     2988   5801 my ($pack, $max) = @_;
1465 2988         7519 local *_;
1466 2988 50       6343 croak "missing argument to ->new" unless defined $max;
1467 2988         5112 my $self = [];
1468 2988         5498 bless $self => $pack;
1469 2988         10992 @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1470 2988         8461 $self;
1471             }
1472              
1473             sub adj_limit {
1474 77     77   131 my ($self, $n) = @_;
1475 77         121 $self->[MAX] += $n;
1476             }
1477              
1478             sub set_limit {
1479 60     60   128 my ($self, $n) = @_;
1480 60         111 $self->[MAX] = $n;
1481             }
1482              
1483             # For internal use only
1484             # Will be called by the heap structure to notify us that a certain
1485             # piece of data has moved from one heap element to another.
1486             # $k is the hash key of the item
1487             # $n is the new index into the heap at which it is stored
1488             # If $n is undefined, the item has been removed from the heap.
1489             sub _heap_move {
1490 3590     3590   5807 my ($self, $k, $n) = @_;
1491 3590 100       5502 if (defined $n) {
1492 3157         5797 $self->[HASH]{$k} = $n;
1493             } else {
1494 433         866 delete $self->[HASH]{$k};
1495             }
1496             }
1497              
1498             sub insert {
1499 1227     1227   2792 my ($self, $key, $val) = @_;
1500 1227         2680 local *_;
1501 1227 50       2228 croak "missing argument to ->insert" unless defined $key;
1502 1227 50       2382 unless (defined $self->[MAX]) {
1503 0         0 confess "undefined max" ;
1504             }
1505 1227 50       1985 confess "undefined val" unless defined $val;
1506 1227 100       2405 return if length($val) > $self->[MAX];
1507              
1508             # if ($self->[STAT]) {
1509             # $self->[STAT][$key] = 1;
1510             # return;
1511             # }
1512              
1513 1144         1688 my $oldnode = $self->[HASH]{$key};
1514 1144 50       2035 if (defined $oldnode) {
1515 0         0 my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1516 0         0 $self->[BYTES] -= length($oldval);
1517             } else {
1518 1144         2330 $self->[HEAP]->insert($key, $val);
1519             }
1520 1144         1769 $self->[BYTES] += length($val);
1521 1144 100       2818 $self->flush if $self->[BYTES] > $self->[MAX];
1522             }
1523              
1524             sub expire {
1525 56     56   181 my $self = shift;
1526 56         83 my $old_data = $self->[HEAP]->popheap;
1527 56 100       107 return unless defined $old_data;
1528 50         60 $self->[BYTES] -= length $old_data;
1529 50         102 $old_data;
1530             }
1531              
1532             sub remove {
1533 437     437   796 my ($self, @keys) = @_;
1534 437         571 my @result;
1535              
1536             # if ($self->[STAT]) {
1537             # for my $key (@keys) {
1538             # $self->[STAT][$key] = 0;
1539             # }
1540             # return;
1541             # }
1542              
1543 437         654 for my $key (@keys) {
1544 438 100       1079 next unless exists $self->[HASH]{$key};
1545 339         710 my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1546 339         490 $self->[BYTES] -= length $old_data;
1547 339         667 push @result, $old_data;
1548             }
1549 437         883 @result;
1550             }
1551              
1552             sub lookup {
1553 1878     1878   3068 my ($self, $key) = @_;
1554 1878         3630 local *_;
1555 1878 50       3756 croak "missing argument to ->lookup" unless defined $key;
1556              
1557             # if ($self->[STAT]) {
1558             # $self->[MISS]++ if $self->[STAT][$key]++ == 0;
1559             # $self->[REQ]++;
1560             # my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
1561             # # Do some testing to determine this threshhold
1562             # $#$self = STAT - 1 if $hit_rate > 0.20;
1563             # }
1564              
1565 1878 100       3608 if (exists $self->[HASH]{$key}) {
1566 669         1378 $self->[HEAP]->lookup($self->[HASH]{$key});
1567             } else {
1568 1209         2633 return;
1569             }
1570             }
1571              
1572             # For internal use only
1573             sub _produce {
1574 3204     3204   4821 my ($self, $key) = @_;
1575 3204         4905 my $loc = $self->[HASH]{$key};
1576 3204 100       5822 return unless defined $loc;
1577 1923         3872 $self->[HEAP][$loc][2];
1578             }
1579              
1580             # For internal use only
1581             sub _promote {
1582 5     5   23 my ($self, $key) = @_;
1583 5         9 $self->[HEAP]->promote($self->[HASH]{$key});
1584             }
1585              
1586             sub empty {
1587 86     86   2566 my ($self) = @_;
1588 86         130 %{$self->[HASH]} = ();
  86         298  
1589 86         187 $self->[BYTES] = 0;
1590 86         247 $self->[HEAP]->empty;
1591             # @{$self->[STAT]} = ();
1592             # $self->[MISS] = 0;
1593             # $self->[REQ] = 0;
1594             }
1595              
1596             sub is_empty {
1597 3     3   18 my ($self) = @_;
1598 3         4 keys %{$self->[HASH]} == 0;
  3         9  
1599             }
1600              
1601             sub update {
1602 505     505   1032 my ($self, $key, $val) = @_;
1603 505         1159 local *_;
1604 505 50       1001 croak "missing argument to ->update" unless defined $key;
1605 505 100       1439 if (length($val) > $self->[MAX]) {
    100          
1606 21         44 my ($oldval) = $self->remove($key);
1607 21 50       40 $self->[BYTES] -= length($oldval) if defined $oldval;
1608             } elsif (exists $self->[HASH]{$key}) {
1609 449         1082 my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1610 449         700 $self->[BYTES] += length($val);
1611 449 50       968 $self->[BYTES] -= length($oldval) if defined $oldval;
1612             } else {
1613 35         125 $self->[HEAP]->insert($key, $val);
1614 35         64 $self->[BYTES] += length($val);
1615             }
1616 505         946 $self->flush;
1617             }
1618              
1619             sub rekey {
1620 386     386   681 my ($self, $okeys, $nkeys) = @_;
1621 386         832 local *_;
1622 386         502 my %map;
1623 386         779 @map{@$okeys} = @$nkeys;
1624 386 50       752 croak "missing argument to ->rekey" unless defined $nkeys;
1625 386 50       732 croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1626 386         488 my %adjusted; # map new keys to heap indices
1627             # You should be able to cut this to one loop TODO XXX
1628 386         809 for (0 .. $#$okeys) {
1629 530         1662 $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1630             }
1631 386         1677 while (my ($nk, $ix) = each %adjusted) {
1632             # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1633 530         1100 $self->[HEAP]->rekey($ix, $nk);
1634 530         1620 $self->[HASH]{$nk} = $ix;
1635             }
1636             }
1637              
1638             sub ckeys {
1639 795     795   1355 my $self = shift;
1640 795         986 my @a = keys %{$self->[HASH]};
  795         2337  
1641 795         2623 @a;
1642             }
1643              
1644             # Return total amount of cached data
1645             sub bytes {
1646 507     507   993 my $self = shift;
1647 507         1112 $self->[BYTES];
1648             }
1649              
1650             # Expire oldest item from cache until cache size is smaller than $max
1651             sub reduce_size_to {
1652 1131     1131   1687 my ($self, $max) = @_;
1653 1131         2795 until ($self->[BYTES] <= $max) {
1654             # Note that Tie::File::Cache::expire has been inlined here
1655 44         76 my $old_data = $self->[HEAP]->popheap;
1656 44 50       102 return unless defined $old_data;
1657 44         194 $self->[BYTES] -= length $old_data;
1658             }
1659             }
1660              
1661             # Why not just $self->reduce_size_to($self->[MAX])?
1662             # Try this when things stabilize TODO XXX
1663             # If the cache is too full, expire the oldest records
1664             sub flush {
1665 526     526   692 my $self = shift;
1666 526 100       2317 $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
1667             }
1668              
1669             # For internal use only
1670             sub _produce_lru {
1671 1     1   8 my $self = shift;
1672 1         4 $self->[HEAP]->expire_order;
1673             }
1674              
1675 38     38   15720 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1676              
1677             sub _check_integrity { # For CACHE
1678 406     406   784 my $self = shift;
1679 406         537 my $good = 1;
1680              
1681             # Test HEAP
1682 406 50       885 $self->[HEAP]->_check_integrity or $good = 0;
1683              
1684             # Test HASH
1685 406         570 my $bytes = 0;
1686 406         543 for my $k (keys %{$self->[HASH]}) {
  406         938  
1687 1063 50 66     4340 if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
1688 0         0 $good = 0;
1689 0         0 _ci_warn "Cache hash key <$k> is non-numeric";
1690             }
1691              
1692 1063         1736 my $h = $self->[HASH]{$k};
1693 1063 50       2056 if (! defined $h) {
    50          
1694 0         0 $good = 0;
1695 0         0 _ci_warn "Heap index number for key $k is undefined";
1696             } elsif ($h == 0) {
1697 0         0 $good = 0;
1698 0         0 _ci_warn "Heap index number for key $k is zero";
1699             } else {
1700 1063         1398 my $j = $self->[HEAP][$h];
1701 1063 50       1435 if (! defined $j) {
1702 0         0 $good = 0;
1703 0         0 _ci_warn "Heap contents key $k (=> $h) are undefined";
1704             } else {
1705 1063         1347 $bytes += length($j->[2]);
1706 1063 50       2173 if ($k ne $j->[1]) {
1707 0         0 $good = 0;
1708 0         0 _ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
1709             }
1710             }
1711             }
1712             }
1713              
1714             # Test BYTES
1715 406 50       878 if ($bytes != $self->[BYTES]) {
1716 0         0 $good = 0;
1717 0         0 _ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
1718             }
1719              
1720             # Test MAX
1721 406 50       753 if ($bytes > $self->[MAX]) {
1722 0         0 $good = 0;
1723 0         0 _ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
1724             }
1725              
1726 406         824 return $good;
1727             }
1728              
1729             sub delink {
1730 2986     2986   4182 my $self = shift;
1731 2986         9441 $self->[HEAP] = undef; # Bye bye heap
1732             }
1733              
1734             ################################################################
1735             #
1736             # Tie::File::Heap
1737             #
1738             # Heap data structure for use by cache LRU routines
1739              
1740             package Tie::File::Heap;
1741 38     38   312 use Carp ':DEFAULT', 'confess';
  38         94  
  38         52620  
1742             $Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1743             sub SEQ () { 0 };
1744             sub KEY () { 1 };
1745             sub DAT () { 2 };
1746              
1747             sub new {
1748 2988     2988   5308 my ($pack, $cache) = @_;
1749             die "$pack: Parent cache object $cache does not support _heap_move method"
1750 2988 50       5634 unless eval { $cache->can('_heap_move') };
  2988         12333  
1751 2988         7317 my $self = [[0,$cache,0]];
1752 2988         10427 bless $self => $pack;
1753             }
1754              
1755             # Allocate a new sequence number, larger than all previously allocated numbers
1756             sub _nseq {
1757 2302     2302   3142 my $self = shift;
1758 2302         4133 $self->[0][0]++;
1759             }
1760              
1761             sub _cache {
1762 0     0   0 my $self = shift;
1763 0         0 $self->[0][1];
1764             }
1765              
1766             sub _nelts {
1767 0     0   0 my $self = shift;
1768 0         0 $self->[0][2];
1769             }
1770              
1771             sub _nelts_inc {
1772 1179     1179   1515 my $self = shift;
1773 1179         2086 ++$self->[0][2];
1774             }
1775              
1776             sub _nelts_dec {
1777 433     433   590 my $self = shift;
1778 433         578 --$self->[0][2];
1779             }
1780              
1781             sub is_empty {
1782 0     0   0 my $self = shift;
1783 0         0 $self->_nelts == 0;
1784             }
1785              
1786             sub empty {
1787 86     86   136 my $self = shift;
1788 86         477 $#$self = 0;
1789 86         146 $self->[0][2] = 0;
1790 86         191 $self->[0][0] = 0; # might as well reset the sequence numbers
1791             }
1792              
1793             # notify the parent cache object that we moved something
1794             sub _heap_move {
1795 0     0   0 my $self = shift;
1796 0         0 $self->_cache->_heap_move(@_);
1797             }
1798              
1799             # Insert a piece of data into the heap with the indicated sequence number.
1800             # The item with the smallest sequence number is always at the top.
1801             # If no sequence number is specified, allocate a new one and insert the
1802             # item at the bottom.
1803             sub insert {
1804 1179     1179   2192 my ($self, $key, $data, $seq) = @_;
1805 1179 50       2701 $seq = $self->_nseq unless defined $seq;
1806 1179         3514 $self->_insert_new([$seq, $key, $data]);
1807             }
1808              
1809             # Insert a new, fresh item at the bottom of the heap
1810             sub _insert_new {
1811 1179     1179   1957 my ($self, $item) = @_;
1812 1179         1688 my $i = @$self;
1813 1179         3452 $i = int($i/2) until defined $self->[$i/2];
1814 1179         1964 $self->[$i] = $item;
1815 1179         2924 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1816 1179         2070 $self->_nelts_inc;
1817             }
1818              
1819             # Insert [$data, $seq] pair at or below item $i in the heap.
1820             # If $i is omitted, default to 1 (the top element.)
1821             sub _insert {
1822 0     0   0 my ($self, $item, $i) = @_;
1823             # $self->_check_loc($i) if defined $i;
1824 0 0       0 $i = 1 unless defined $i;
1825 0         0 until (! defined $self->[$i]) {
1826 0 0       0 if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1827 0         0 ($self->[$i], $item) = ($item, $self->[$i]);
1828 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1829             }
1830             # If either is undefined, go that way. Otherwise, choose at random
1831 0         0 my $dir;
1832 0 0       0 $dir = 0 if !defined $self->[2*$i];
1833 0 0       0 $dir = 1 if !defined $self->[2*$i+1];
1834 0 0       0 $dir = int(rand(2)) unless defined $dir;
1835 0         0 $i = 2*$i + $dir;
1836             }
1837 0         0 $self->[$i] = $item;
1838 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1839 0         0 $self->_nelts_inc;
1840             }
1841              
1842             # Remove the item at node $i from the heap, moving child items upwards.
1843             # The item with the smallest sequence number is always at the top.
1844             # Moving items upwards maintains this condition.
1845             # Return the removed item. Return undef if there was no item at node $i.
1846             sub remove {
1847 439     439   746 my ($self, $i) = @_;
1848 439 50       775 $i = 1 unless defined $i;
1849 439         594 my $top = $self->[$i];
1850 439 100       877 return unless defined $top;
1851 433         556 while (1) {
1852 913         1061 my $ii;
1853 913         1410 my ($L, $R) = (2*$i, 2*$i+1);
1854              
1855             # If either is undefined, go the other way.
1856             # Otherwise, go towards the smallest.
1857 913 100 100     2510 last unless defined $self->[$L] || defined $self->[$R];
1858 480 100       856 $ii = $R if not defined $self->[$L];
1859 480 100       795 $ii = $L if not defined $self->[$R];
1860 480 100       772 unless (defined $ii) {
1861 158 100       320 $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1862             }
1863              
1864 480         616 $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1865 480         1026 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1866 480         597 $i = $ii; # Fill new vacated spot
1867             }
1868 433         1001 $self->[0][1]->_heap_move($top->[KEY], undef);
1869 433         584 undef $self->[$i];
1870 433         882 $self->_nelts_dec;
1871 433         1022 return $top->[DAT];
1872             }
1873              
1874             sub popheap {
1875 100     100   122 my $self = shift;
1876 100         159 $self->remove(1);
1877             }
1878              
1879             # set the sequence number of the indicated item to a higher number
1880             # than any other item in the heap, and bubble the item down to the
1881             # bottom.
1882             sub promote {
1883 1123     1123   1661 my ($self, $n) = @_;
1884             # $self->_check_loc($n);
1885 1123         1889 $self->[$n][SEQ] = $self->_nseq;
1886 1123         1592 my $i = $n;
1887 1123         1378 while (1) {
1888 1872         3076 my ($L, $R) = (2*$i, 2*$i+1);
1889 1872         2304 my $dir;
1890 1872 100 100     5531 last unless defined $self->[$L] || defined $self->[$R];
1891 749 100       3000 $dir = $R unless defined $self->[$L];
1892 749 100       1318 $dir = $L unless defined $self->[$R];
1893 749 100       1255 unless (defined $dir) {
1894 473 100       939 $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1895             }
1896 749         960 @{$self}[$i, $dir] = @{$self}[$dir, $i];
  749         1149  
  749         1106  
1897 749         1227 for ($i, $dir) {
1898 1498 50       3337 $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1899             }
1900 749         1010 $i = $dir;
1901             }
1902             }
1903              
1904             # Return item $n from the heap, promoting its LRU status
1905             sub lookup {
1906 669     669   1057 my ($self, $n) = @_;
1907             # $self->_check_loc($n);
1908 669         981 my $val = $self->[$n];
1909 669         1433 $self->promote($n);
1910 669         1739 $val->[DAT];
1911             }
1912              
1913              
1914             # Assign a new value for node $n, promoting it to the bottom of the heap
1915             sub set_val {
1916 449     449   820 my ($self, $n, $val) = @_;
1917             # $self->_check_loc($n);
1918 449         759 my $oval = $self->[$n][DAT];
1919 449         701 $self->[$n][DAT] = $val;
1920 449         1048 $self->promote($n);
1921 449         932 return $oval;
1922             }
1923              
1924             # The hash key has changed for an item;
1925             # alter the heap's record of the hash key
1926             sub rekey {
1927 530     530   804 my ($self, $n, $new_key) = @_;
1928             # $self->_check_loc($n);
1929 530         935 $self->[$n][KEY] = $new_key;
1930             }
1931              
1932             sub _check_loc {
1933 0     0   0 my ($self, $n) = @_;
1934 0         0 unless (1 || defined $self->[$n]) {
1935             confess "_check_loc($n) failed";
1936             }
1937             }
1938              
1939 38     38   18525 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1940              
1941             sub _check_integrity {
1942 406     406   510 my $self = shift;
1943 406         505 my $good = 1;
1944 406         513 my %seq;
1945              
1946 406 50       618 unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
  406         1682  
1947 0         0 _ci_warn "Element 0 of heap corrupt";
1948 0         0 $good = 0;
1949             }
1950 406 50       863 $good = 0 unless $self->_satisfies_heap_condition(1);
1951 406         586 for my $i (2 .. $#{$self}) {
  406         938  
1952 1933         2722 my $p = int($i/2); # index of parent node
1953 1933 50 66     4325 if (defined $self->[$i] && ! defined $self->[$p]) {
1954 0         0 _ci_warn "Element $i of heap defined, but parent $p isn't";
1955 0         0 $good = 0;
1956             }
1957              
1958 1933 100       3332 if (defined $self->[$i]) {
1959 824 50       1518 if ($seq{$self->[$i][SEQ]}) {
1960 0         0 my $seq = $self->[$i][SEQ];
1961 0         0 _ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
1962 0         0 $good = 0;
1963             } else {
1964 824         1681 $seq{$self->[$i][SEQ]} = $i;
1965             }
1966             }
1967             }
1968              
1969 406         1070 return $good;
1970             }
1971              
1972             sub _satisfies_heap_condition {
1973 1230     1230   1510 my $self = shift;
1974 1230   50     2011 my $n = shift || 1;
1975 1230         1510 my $good = 1;
1976 1230         1721 for (0, 1) {
1977 2460         3108 my $c = $n*2 + $_;
1978 2460 100       4450 next unless defined $self->[$c];
1979 824 50       1475 if ($self->[$n][SEQ] >= $self->[$c]) {
1980 0         0 _ci_warn "Node $n of heap does not predate node $c";
1981 0         0 $good = 0 ;
1982             }
1983 824 50       1377 $good = 0 unless $self->_satisfies_heap_condition($c);
1984             }
1985 1230         2405 return $good;
1986             }
1987              
1988             # Return a list of all the values, sorted by expiration order
1989             sub expire_order {
1990 1     1   2 my $self = shift;
1991 1         3 my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
  3         8  
1992 1         3 map { $_->[KEY] } @nodes;
  3         9  
1993             }
1994              
1995             sub _nodes {
1996 7     7   11 my $self = shift;
1997 7   100     13 my $i = shift || 1;
1998 7 100       30 return unless defined $self->[$i];
1999 3         9 ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
2000             }
2001              
2002             1;
2003              
2004             __END__