File Coverage

blib/lib/Tie/File.pm
Criterion Covered Total %
statement 850 1000 85.0
branch 350 458 76.4
condition 56 82 68.2
subroutine 102 112 91.0
pod 7 8 87.5
total 1365 1660 82.2


line stmt bran cond sub pod time code
1             package Tie::File;
2              
3             require 5.005;
4              
5 38     38   198536 use strict;
  38         275  
  38         1118  
6 38     38   171 use warnings;
  38         58  
  38         1294  
7              
8 38     38   192 use Carp ':DEFAULT', 'confess';
  38         62  
  38         6135  
9 38     38   11344 use POSIX 'SEEK_SET';
  38         183805  
  38         192  
10 38     38   33576 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
  38         70  
  38         263430  
11             sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
12              
13              
14             our $VERSION = "1.06";
15             my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
16             my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
17             my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
18              
19             my %good_opt = map {$_ => 1, "-$_" => 1}
20             qw(memory dw_size mode recsep discipline
21             autodefer autochomp autodefer_threshhold concurrent);
22              
23             our $DIAGNOSTIC = 0;
24             our @OFF; # used as a temporary alias in some subroutines.
25             our @H; # used as a temporary alias in _annotate_ad_history
26              
27             sub TIEARRAY {
28 2987 50   2987   1992046 if (@_ % 2 != 0) {
29 0         0 croak "usage: tie \@array, $_[0], filename, [option => value]...";
30             }
31 2987         6339 my ($pack, $file, %opts) = @_;
32              
33             # transform '-foo' keys into 'foo' keys
34 2987         8842 for my $key (keys %opts) {
35 57 50       153 unless ($good_opt{$key}) {
36 0         0 croak("$pack: Unrecognized option '$key'\n");
37             }
38 57         82 my $okey = $key;
39 57 50       175 if ($key =~ s/^-+//) {
40 0         0 $opts{$key} = delete $opts{$okey};
41             }
42             }
43              
44 2987 50       6897 if ($opts{concurrent}) {
45 0         0 croak("$pack: concurrent access not supported yet\n");
46             }
47              
48 2987 100       6299 unless (defined $opts{memory}) {
49             # default is the larger of the default cache size and the
50             # deferred-write buffer size (if specified)
51 2983         4493 $opts{memory} = $DEFAULT_MEMORY_SIZE;
52             $opts{memory} = $opts{dw_size}
53 2983 50 33     6070 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
54             # Dora Winifred Read
55             }
56 2987 100       5640 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
57 2987 50       6743 if ($opts{dw_size} > $opts{memory}) {
58 0         0 croak("$pack: dw_size may not be larger than total memory allocation\n");
59             }
60             # are we in deferred-write mode?
61 2987 50       5413 $opts{defer} = 0 unless defined $opts{defer};
62 2987         5158 $opts{deferred} = {}; # no records are presently deferred
63 2987         4384 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
64 2987         4705 $opts{deferred_max} = -1; # empty
65              
66             # What's a good way to arrange that this class can be overridden?
67 2987         15146 $opts{cache} = Tie::File::Cache->new($opts{memory});
68              
69             # autodeferment is enabled by default
70 2987 100       9191 $opts{autodefer} = 1 unless defined $opts{autodefer};
71 2987         4315 $opts{autodeferring} = 0; # but is not initially active
72 2987         4375 $opts{ad_history} = [];
73             $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
74 2987 50       5337 unless defined $opts{autodefer_threshhold};
75             $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
76 2987 50       5085 unless defined $opts{autodefer_filelen_threshhold};
77              
78 2987         4323 $opts{offsets} = [0];
79 2987         4237 $opts{filename} = $file;
80 2987 100       4435 unless (defined $opts{recsep}) {
81 2974         4909 $opts{recsep} = _default_recsep();
82             }
83 2987         8400 $opts{recseplen} = length($opts{recsep});
84 2987 50       5622 if ($opts{recseplen} == 0) {
85 0         0 croak "Empty record separator not supported by $pack";
86             }
87              
88 2987 100       5569 $opts{autochomp} = 1 unless defined $opts{autochomp};
89              
90 2987 100       4758 $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
91 2987         6247 $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
92 2987         4160 $opts{sawlastrec} = undef;
93              
94 2987         3295 my $fh;
95              
96 2987 100       14227 if (UNIVERSAL::isa($file, 'GLOB')) {
    50          
97             # We use 1 here on the theory that some systems
98             # may not indicate failure if we use 0.
99             # MSWin32 does not indicate failure with 0, but I don't know if
100             # it will indicate failure with 1 or not.
101 2 100       19 unless (seek $file, 1, SEEK_SET) {
102 1         224 croak "$pack: your filehandle does not appear to be seekable";
103             }
104 1         7 seek $file, 0, SEEK_SET; # put it back
105 1         2 $fh = $file; # setting binmode is the user's problem
106             } elsif (ref $file) {
107 0         0 croak "usage: tie \@array, $pack, filename, [option => value]...";
108             } else {
109             # $fh = \do { local *FH }; # XXX this is buggy
110 2985 50       6160 if ($] < 5.006) {
111             # perl 5.005 and earlier don't autovivify filehandles
112 0         0 require Symbol;
113 0         0 $fh = Symbol::gensym();
114             }
115 2985 50       102891 sysopen $fh, $file, $opts{mode}, 0666 or return;
116 2985         13449 binmode $fh;
117 2985         6391 ++$opts{ourfh};
118             }
119 2986         3327 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  2986         9866  
  2986         6788  
  2986         7580  
120 2986 50 33     6648 if (defined $opts{discipline} && $] >= 5.006) {
121             # This avoids a compile-time warning under 5.005
122 0         0 eval 'binmode($fh, $opts{discipline})';
123 0 0       0 croak $@ if $@ =~ /unknown discipline/i;
124 0 0       0 die if $@;
125             }
126 2986         3946 $opts{fh} = $fh;
127              
128 2986         15962 bless \%opts => $pack;
129             }
130              
131             sub FETCH {
132 1047     1047   9639 my ($self, $n) = @_;
133 1047         1014 my $rec;
134              
135             # check the defer buffer
136 1047 100       1783 $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
137 1047 100       2072 $rec = $self->_fetch($n) unless defined $rec;
138              
139             # inlined _chomp1
140             substr($rec, - $self->{recseplen}) = ""
141 1047 100 100     3159 if defined $rec && $self->{autochomp};
142 1047         3170 $rec;
143             }
144              
145             # Chomp many records in-place; return nothing useful
146             sub _chomp {
147 145     145   165 my $self = shift;
148 145 100       267 return unless $self->{autochomp};
149 48 50       75 if ($self->{autochomp}) {
150 48         66 for (@_) {
151 68 50       95 next unless defined;
152 68         113 substr($_, - $self->{recseplen}) = "";
153             }
154             }
155             }
156              
157             # Chomp one record in-place; return modified record
158             sub _chomp1 {
159 246     246   374 my ($self, $rec) = @_;
160 246 100       564 return $rec unless $self->{autochomp};
161 221 100       447 return unless defined $rec;
162 137         251 substr($rec, - $self->{recseplen}) = "";
163 137         324 $rec;
164             }
165              
166             sub _fetch {
167 1851     1851   2404 my ($self, $n) = @_;
168              
169             # check the record cache
170 1851         1897 { my $cached = $self->{cache}->lookup($n);
  1851         2941  
171 1851 100       3449 return $cached if defined $cached;
172             }
173              
174 1207 100       1306 if ($#{$self->{offsets}} < $n) {
  1207         2225  
175 23 100       68 return if $self->{eof}; # request for record beyond end of file
176 11         36 my $o = $self->_fill_offsets_to($n);
177             # If it's still undefined, there is no such record, so return 'undef'
178 11 100       32 return unless defined $o;
179             }
180              
181 1189         1500 my $fh = $self->{FH};
182 1189         2290 $self->_seek($n); # we can do this now that offsets is populated
183 1189         3115 my $rec = $self->_read_record;
184              
185             # If we happen to have just read the first record, check to see if
186             # the length of the record matches what 'tell' says. If not, Tie::File
187             # won't work, and should drop dead.
188             #
189             # if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
190             # if (defined $self->{discipline}) {
191             # croak "I/O discipline $self->{discipline} not supported";
192             # } else {
193             # croak "File encoding not supported";
194             # }
195             # }
196              
197 1189 100 66     5041 $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
198 1189         1978 $rec;
199             }
200              
201             sub STORE {
202 371     371   3422 my ($self, $n, $rec) = @_;
203 371 50       651 die "STORE called from _check_integrity!" if $DIAGNOSTIC;
204              
205 371         768 $self->_fixrecs($rec);
206              
207 371 100       668 if ($self->{autodefer}) {
208 231         412 $self->_annotate_ad_history($n);
209             }
210              
211 371 100       611 return $self->_store_deferred($n, $rec) if $self->_is_deferring;
212              
213              
214             # We need this to decide whether the new record will fit
215             # It incidentally populates the offsets table
216             # Note we have to do this before we alter the cache
217             # 20020324 Wait, but this DOES alter the cache. TODO BUG?
218 300         552 my $oldrec = $self->_fetch($n);
219              
220 300 100       510 if (not defined $oldrec) {
221             # We're storing a record beyond the end of the file
222 40         131 $self->_extend_file_to($n+1);
223 40         109 $oldrec = $self->{recsep};
224             }
225             # return if $oldrec eq $rec; # don't bother
226 300         413 my $len_diff = length($rec) - length($oldrec);
227              
228             # length($oldrec) here is not consistent with text mode TODO XXX BUG
229 300         822 $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
230 300         1021 $self->_oadjust([$n, 1, $rec]);
231 300         684 $self->{cache}->update($n, $rec);
232             }
233              
234             sub _store_deferred {
235 73     73   121 my ($self, $n, $rec) = @_;
236 73         147 $self->{cache}->remove($n);
237 73         102 my $old_deferred = $self->{deferred}{$n};
238              
239 73 100 100     218 if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
240 68         89 $self->{deferred_max} = $n;
241             }
242 73         138 $self->{deferred}{$n} = $rec;
243              
244 73         85 my $len_diff = length($rec);
245 73 100       99 $len_diff -= length($old_deferred) if defined $old_deferred;
246 73         90 $self->{deferred_s} += $len_diff;
247 73         190 $self->{cache}->adj_limit(-$len_diff);
248 73 100       174 if ($self->{deferred_s} > $self->{dw_size}) {
    100          
249 1         9 $self->_flush;
250             } elsif ($self->_cache_too_full) {
251 2         4 $self->_cache_flush;
252             }
253             }
254              
255             # Remove a single record from the deferred-write buffer without writing it
256             # The record need not be present
257             sub _delete_deferred {
258 6     6   18 my ($self, $n) = @_;
259 6         12 my $rec = delete $self->{deferred}{$n};
260 6 100       13 return unless defined $rec;
261              
262 4 50 33     18 if (defined $self->{deferred_max}
263             && $n == $self->{deferred_max}) {
264 4         6 undef $self->{deferred_max};
265             }
266              
267 4         7 $self->{deferred_s} -= length $rec;
268 4         8 $self->{cache}->adj_limit(length $rec);
269             }
270              
271             sub FETCHSIZE {
272 592     592   2390 my $self = shift;
273 592 100       1048 my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
  568         821  
274              
275 592         976 my $top_deferred = $self->_defer_max;
276 592 100 66     1787 $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
277 592         1230 $n;
278             }
279              
280             sub STORESIZE {
281 17     17   345 my ($self, $len) = @_;
282              
283 17 100       48 if ($self->{autodefer}) {
284 16         35 $self->_annotate_ad_history('STORESIZE');
285             }
286              
287 17         48 my $olen = $self->FETCHSIZE;
288 17 50       39 return if $len == $olen; # Woo-hoo!
289              
290             # file gets longer
291 17 100       36 if ($len > $olen) {
292 6 100       16 if ($self->_is_deferring) {
293 1         4 for ($olen .. $len-1) {
294 2         11 $self->_store_deferred($_, $self->{recsep});
295             }
296             } else {
297 5         14 $self->_extend_file_to($len);
298             }
299 6         21 return;
300             }
301              
302             # file gets shorter
303 11 100       23 if ($self->_is_deferring) {
304             # TODO maybe replace this with map-plus-assignment?
305 2         4 for (grep $_ >= $len, keys %{$self->{deferred}}) {
  2         11  
306 2         6 $self->_delete_deferred($_);
307             }
308 2         4 $self->{deferred_max} = $len-1;
309             }
310              
311 11         39 $self->_seek($len);
312 11         65 $self->_chop_file;
313 11         55 $#{$self->{offsets}} = $len;
  11         54  
314             # $self->{offsets}[0] = 0; # in case we just chopped this
315              
316 11         40 $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
317             }
318              
319             ### OPTIMIZE ME
320             ### It should not be necessary to do FETCHSIZE
321             ### Just seek to the end of the file.
322             sub PUSH {
323 6     6   378 my $self = shift;
324 6         17 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
325              
326             # No need to return:
327             # $self->FETCHSIZE; # because av.c takes care of this for me
328             }
329              
330             sub POP {
331 4     4   295 my $self = shift;
332 4         8 my $size = $self->FETCHSIZE;
333 4 100       12 return if $size == 0;
334             # print STDERR "# POPPITY POP POP POP\n";
335 3         16 scalar $self->SPLICE($size-1, 1);
336             }
337              
338             sub SHIFT {
339 4     4   215 my $self = shift;
340 4         9 scalar $self->SPLICE(0, 1);
341             }
342              
343             sub UNSHIFT {
344 4     4   298 my $self = shift;
345 4         15 $self->SPLICE(0, 0, @_);
346             # $self->FETCHSIZE; # av.c takes care of this for me
347             }
348              
349             sub CLEAR {
350 31     31   1147 my $self = shift;
351              
352 31 100       100 if ($self->{autodefer}) {
353 21         53 $self->_annotate_ad_history('CLEAR');
354             }
355              
356 31         84 $self->_seekb(0);
357 31         104 $self->_chop_file;
358 31         177 $self->{cache}->set_limit($self->{memory});
359 31         91 $self->{cache}->empty;
360 31         41 @{$self->{offsets}} = (0);
  31         73  
361 31         44 %{$self->{deferred}}= ();
  31         57  
362 31         53 $self->{deferred_s} = 0;
363 31         132 $self->{deferred_max} = -1;
364             }
365              
366             sub EXTEND {
367 28     28   75 my ($self, $n) = @_;
368              
369             # No need to pre-extend anything in this case
370 28 100       104 return if $self->_is_deferring;
371              
372 26         75 $self->_fill_offsets_to($n);
373 26         61 $self->_extend_file_to($n);
374             }
375              
376             sub DELETE {
377 9     9   371 my ($self, $n) = @_;
378              
379 9 100       19 if ($self->{autodefer}) {
380 4         7 $self->_annotate_ad_history('DELETE');
381             }
382              
383 9         16 my $lastrec = $self->FETCHSIZE-1;
384 9         19 my $rec = $self->FETCH($n);
385 9 100       18 $self->_delete_deferred($n) if $self->_is_deferring;
386 9 100       22 if ($n == $lastrec) {
    100          
387 4         17 $self->_seek($n);
388 4         14 $self->_chop_file;
389 4         17 $#{$self->{offsets}}--;
  4         16  
390 4         14 $self->{cache}->remove($n);
391             # perhaps in this case I should also remove trailing null records?
392             # 20020316
393             # Note that delete @a[-3..-1] deletes the records in the wrong order,
394             # so we only chop the very last one out of the file. We could repair this
395             # by tracking deleted records inside the object.
396             } elsif ($n < $lastrec) {
397 4         8 $self->STORE($n, "");
398             }
399 9         131 $rec;
400             }
401              
402             sub EXISTS {
403 11     11   326 my ($self, $n) = @_;
404 11 100       24 return 1 if exists $self->{deferred}{$n};
405 10         20 $n < $self->FETCHSIZE;
406             }
407              
408             sub SPLICE {
409 393     393   9508 my $self = shift;
410              
411 393 100       735 if ($self->{autodefer}) {
412 339         587 $self->_annotate_ad_history('SPLICE');
413             }
414              
415 393 100       608 $self->_flush if $self->_is_deferring; # move this up?
416 393 100       656 if (wantarray) {
417 145         252 $self->_chomp(my @a = $self->_splice(@_));
418 145         475 @a;
419             } else {
420 248         499 $self->_chomp1(scalar $self->_splice(@_));
421             }
422             }
423              
424             sub DESTROY {
425 2986     2986   27004 my $self = shift;
426 2986 100       5853 $self->flush if $self->_is_deferring;
427 2986 50       11233 $self->{cache}->delink if defined $self->{cache}; # break circular link
428 2986 100 66     10469 if ($self->{fh} and $self->{ourfh}) {
429 2985         4745 delete $self->{ourfh};
430 2985         154884 close delete $self->{fh};
431             }
432             }
433              
434             sub _splice {
435 393     393   742 my ($self, $pos, $nrecs, @data) = @_;
436 393         490 my @result;
437              
438 393 100       631 $pos = 0 unless defined $pos;
439              
440             # Deal with negative and other out-of-range positions
441             # Also set default for $nrecs
442             {
443 393         423 my $oldsize = $self->FETCHSIZE;
  393         595  
444 393 100       616 $nrecs = $oldsize unless defined $nrecs;
445 393         443 my $oldpos = $pos;
446              
447 393 100       590 if ($pos < 0) {
448 73         97 $pos += $oldsize;
449 73 100       109 if ($pos < 0) {
450 2         342 croak "Modification of non-creatable array value attempted, " .
451             "subscript $oldpos";
452             }
453             }
454              
455 391 100       620 if ($pos > $oldsize) {
456 14 100       44 return unless @data;
457 8         15 $pos = $oldsize; # This is what perl does for normal arrays
458             }
459              
460             # The manual is very unclear here
461 385 100       653 if ($nrecs < 0) {
462 10         11 $nrecs = $oldsize - $pos + $nrecs;
463 10 100       18 $nrecs = 0 if $nrecs < 0;
464             }
465              
466             # nrecs is too big---it really means "until the end"
467             # 20030507
468 385 100       695 if ($nrecs + $pos > $oldsize) {
469 33         52 $nrecs = $oldsize - $pos;
470             }
471             }
472              
473 385         778 $self->_fixrecs(@data);
474 385         1039 my $data = join '', @data;
475 385         443 my $datalen = length $data;
476 385         413 my $oldlen = 0;
477              
478             # compute length of data being removed
479 385         722 for ($pos .. $pos+$nrecs-1) {
480 512 50       813 last unless defined $self->_fill_offsets_to($_);
481 512         813 my $rec = $self->_fetch($_);
482 512 50       813 last unless defined $rec;
483 512         734 push @result, $rec;
484              
485             # Why don't we just use length($rec) here?
486             # Because that record might have come from the cache. _splice
487             # might have been called to flush out the deferred-write records,
488             # and in this case length($rec) is the length of the record to be
489             # *written*, not the length of the actual record in the file. But
490             # the offsets are still true. 20020322
491             $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
492 512 50       1319 if defined $self->{offsets}[$_+1];
493             }
494 385         839 $self->_fill_offsets_to($pos+$nrecs);
495              
496             # Modify the file
497 385         858 $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
498             # Adjust the offsets table
499 385         1583 $self->_oadjust([$pos, $nrecs, @data]);
500              
501             { # Take this read cache stuff out into a separate function
502             # You made a half-attempt to put it into _oadjust.
503             # Finish something like that up eventually.
504             # STORE also needs to do something similarish
505              
506             # update the read cache, part 1
507             # modified records
508 385         573 for ($pos .. $pos+$nrecs-1) {
  385         604  
509 512         655 my $new = $data[$_-$pos];
510 512 100       695 if (defined $new) {
511 194         315 $self->{cache}->update($_, $new);
512             } else {
513 318         504 $self->{cache}->remove($_);
514             }
515             }
516            
517             # update the read cache, part 2
518             # moved records - records past the site of the change
519             # need to be renumbered
520             # Maybe merge this with the previous block?
521             {
522 385         455 my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
  385         691  
523 385         780 my @newkeys = map $_-$nrecs+@data, @oldkeys;
524 385         707 $self->{cache}->rekey(\@oldkeys, \@newkeys);
525             }
526              
527             # Now there might be too much data in the cache, if we spliced out
528             # some short records and spliced in some long ones. If so, flush
529             # the cache.
530 385         717 $self->_cache_flush;
531             }
532              
533             # Yes, the return value of 'splice' *is* actually this complicated
534 385 100       1500 wantarray ? @result : @result ? $result[-1] : undef;
    100          
535             }
536              
537              
538             # write data into the file
539             # $data is the data to be written.
540             # it should be written at position $pos, and should overwrite
541             # exactly $len of the following bytes.
542             # Note that if length($data) > $len, the subsequent bytes will have to
543             # be moved up, and if length($data) < $len, they will have to
544             # be moved down
545             sub _twrite {
546 179     179   809 my ($self, $data, $pos, $len) = @_;
547              
548 179 50       277 unless (defined $pos) {
549 0         0 die "\$pos was undefined in _twrite";
550             }
551              
552 179         223 my $len_diff = length($data) - $len;
553              
554 179 100       275 if ($len_diff == 0) { # Woo-hoo!
555 30         42 my $fh = $self->{fh};
556 30         65 $self->_seekb($pos);
557 30         82 $self->_write_record($data);
558 30         92 return; # well, that was easy.
559             }
560              
561             # the two records are of different lengths
562             # our strategy here: rewrite the tail of the file,
563             # reading ahead one buffer at a time
564             # $bufsize is required to be at least as large as the data we're overwriting
565 149         227 my $bufsize = _bufsize($len_diff);
566 149         239 my ($writepos, $readpos) = ($pos, $pos+$len);
567 149         165 my $next_block;
568             my $more_data;
569              
570             # Seems like there ought to be a way to avoid the repeated code
571             # and the special case here. The read(1) is also a little weird.
572             # Think about this.
573 149         153 do {
574 376         772 $self->_seekb($readpos);
575 376         5492 my $br = read $self->{fh}, $next_block, $bufsize;
576 376         2016 $more_data = read $self->{fh}, my($dummy), 1;
577 376         879 $self->_seekb($writepos);
578 376         908 $self->_write_record($data);
579 376         687 $readpos += $br;
580 376         424 $writepos += length $data;
581 376         990 $data = $next_block;
582             } while $more_data;
583 149         343 $self->_seekb($writepos);
584 149         375 $self->_write_record($next_block);
585              
586             # There might be leftover data at the end of the file
587 149 100       525 $self->_chop_file if $len_diff < 0;
588             }
589              
590             # _iwrite(D, S, E)
591             # Insert text D at position S.
592             # Let C = E-S-|D|. If C < 0; die.
593             # Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
594             # Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
595             #
596             # In a later version, don't read the entire intervening area into
597             # memory at once; do the copying block by block.
598             sub _iwrite {
599 101     101   389 my $self = shift;
600 101         227 my ($D, $s, $e) = @_;
601 101         123 my $d = length $D;
602 101         130 my $c = $e-$s-$d;
603 101         238 local *FH = $self->{fh};
604 101 50       179 confess "Not enough space to insert $d bytes between $s and $e"
605             if $c < 0;
606 101 50       132 confess "[$s,$e) is an invalid insertion range" if $e < $s;
607              
608 101         218 $self->_seekb($s);
609 101         1549 read FH, my $buf, $e-$s;
610              
611 101         906 $D .= substr($buf, 0, $c, "");
612              
613 101         264 $self->_seekb($s);
614 101         316 $self->_write_record($D);
615              
616 101         583 return $buf;
617             }
618              
619             # Like _twrite, but the data-pos-len triple may be repeated; you may
620             # write several chunks. All the writing will be done in
621             # one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
622             sub _mtwrite {
623 2959     2959   10748 my $self = shift;
624 2959         3948 my $unwritten = "";
625 2959         3132 my $delta = 0;
626              
627 2959 50       6147 @_ % 3 == 0
628             or die "Arguments to _mtwrite did not come in groups of three";
629              
630 2959         6031 while (@_) {
631 5143         12689 my ($data, $pos, $len) = splice @_, 0, 3;
632 5143         6995 my $end = $pos + $len; # The OLD end of the segment to be replaced
633 5143         17172 $data = $unwritten . $data;
634 5143         7547 $delta -= length($unwritten);
635 5143         6815 $unwritten = "";
636 5143         5616 $pos += $delta; # This is where the data goes now
637 5143         5406 my $dlen = length $data;
638 5143         11496 $self->_seekb($pos);
639 5143 100       12560 if ($len >= $dlen) { # the data will fit
640 4283         9433 $self->_write_record($data);
641 4283         8475 $delta += ($dlen - $len); # everything following moves down by this much
642 4283         5742 $data = ""; # All the data in the buffer has been written
643             } else { # won't fit
644 860         2216 my $writable = substr($data, 0, $len - $delta, "");
645 860         2012 $self->_write_record($writable);
646 860         1846 $delta += ($dlen - $len); # everything following moves down by this much
647             }
648              
649             # At this point we've written some but maybe not all of the data.
650             # There might be a gap to close up, or $data might still contain a
651             # bunch of unwritten data that didn't fit.
652 5143         6274 my $ndlen = length $data;
653 5143 100       10020 if ($delta == 0) {
    100          
654 926         1648 $self->_write_record($data);
655             } elsif ($delta < 0) {
656             # upcopy (close up gap)
657 3370 100       8768 if (@_) {
658 1263         3259 $self->_upcopy($end, $end + $delta, $_[1] - $end);
659             } else {
660 2107         4208 $self->_upcopy($end, $end + $delta);
661             }
662             } else {
663             # downcopy (insert data that didn't fit; replace this data in memory
664             # with _later_ data that doesn't fit)
665 847 100       1322 if (@_) {
666 346         718 $unwritten = $self->_downcopy($data, $end, $_[1] - $end);
667             } else {
668             # Make the file longer to accommodate the last segment that doesn't
669 501         977 $unwritten = $self->_downcopy($data, $end);
670             }
671             }
672             }
673             }
674              
675             # Copy block of data of length $len from position $spos to position $dpos
676             # $dpos must be <= $spos
677             #
678             # If $len is undefined, go all the way to the end of the file
679             # and then truncate it ($spos - $dpos bytes will be removed)
680             sub _upcopy {
681 3424     3424   5195 my $blocksize = 8192;
682 3424         5577 my ($self, $spos, $dpos, $len) = @_;
683 3424 50       6448 if ($dpos > $spos) {
    100          
684 0         0 die "source ($spos) was upstream of destination ($dpos) in _upcopy";
685             } elsif ($dpos == $spos) {
686 16         80 return;
687             }
688              
689 3408   100     10189 while (! defined ($len) || $len > 0) {
690 7965 100       12320 my $readsize = ! defined($len) ? $blocksize
    100          
691             : $len > $blocksize ? $blocksize
692             : $len;
693            
694 7965         10079 my $fh = $self->{fh};
695 7965         17320 $self->_seekb($spos);
696 7965         86337 my $bytes_read = read $fh, my($data), $readsize;
697 7965         23870 $self->_seekb($dpos);
698 7965 100       19837 if ($data eq "") {
699 2121         5106 $self->_chop_file;
700 2121         20081 last;
701             }
702 5844         14547 $self->_write_record($data);
703 5844         12296 $spos += $bytes_read;
704 5844         6333 $dpos += $bytes_read;
705 5844 100       20485 $len -= $bytes_read if defined $len;
706             }
707             }
708              
709             # Write $data into a block of length $len at position $pos,
710             # moving everything in the block forwards to make room.
711             # Instead of writing the last length($data) bytes from the block
712             # (because there isn't room for them any longer) return them.
713             #
714             # Undefined $len means 'until the end of the file'
715             sub _downcopy {
716 1205     1205   8870 my $blocksize = 8192;
717 1205         2288 my ($self, $data, $pos, $len) = @_;
718 1205         1587 my $fh = $self->{fh};
719              
720 1205   100     3435 while (! defined $len || $len > 0) {
721 2348 100       3897 my $readsize = ! defined($len) ? $blocksize
    100          
722             : $len > $blocksize? $blocksize : $len;
723 2348         4699 $self->_seekb($pos);
724 2348         23800 read $fh, my($old), $readsize;
725 2348         5883 my $last_read_was_short = length($old) < $readsize;
726 2348         7670 $data .= $old;
727 2348         2468 my $writable;
728 2348 100       3682 if ($last_read_was_short) {
729             # If last read was short, then $data now contains the entire rest
730             # of the file, so there's no need to write only one block of it
731 680         1122 $writable = $data;
732 680         845 $data = "";
733             } else {
734 1668         6656 $writable = substr($data, 0, $readsize, "");
735             }
736 2348 100       4177 last if $writable eq "";
737 2343         4928 $self->_seekb($pos);
738 2343         6383 $self->_write_record($writable);
739 2343 100 66     7801 last if $last_read_was_short && $data eq "";
740 1668 100       2656 $len -= $readsize if defined $len;
741 1668         4695 $pos += $readsize;
742             }
743 1205         5086 return $data;
744             }
745              
746             # Adjust the object data structures following an '_mtwrite'
747             # Arguments are
748             # [$pos, $nrecs, @length] items
749             # indicating that $nrecs records were removed at $recpos (a record offset)
750             # and replaced with records of length @length...
751             # Arguments guarantee that $recpos is strictly increasing.
752             # No return value
753             sub _oadjust {
754 708     708   864 my $self = shift;
755 708         817 my $delta = 0;
756 708         707 my $delta_recs = 0;
757 708         790 my $prev_end = -1;
758              
759 708         1146 for (@_) {
760 710         1412 my ($pos, $nrecs, @data) = @$_;
761 710         814 $pos += $delta_recs;
762              
763             # Adjust the offsets of the records after the previous batch up
764             # to the first new one of this batch
765 710         1391 for my $i ($prev_end+2 .. $pos - 1) {
766 1910         2208 $self->{offsets}[$i] += $delta;
767             }
768              
769 710         959 $prev_end = $pos + @data - 1; # last record moved on this pass
770              
771             # Remove the offsets for the removed records;
772             # replace with the offsets for the inserted records
773 710         1239 my @newoff = ($self->{offsets}[$pos] + $delta);
774 710         1225 for my $i (0 .. $#data) {
775 819         948 my $newlen = length $data[$i];
776 819         1051 push @newoff, $newoff[$i] + $newlen;
777 819         1032 $delta += $newlen;
778             }
779              
780 710         993 for my $i ($pos .. $pos+$nrecs-1) {
781 868 100       925 last if $i+1 > $#{$self->{offsets}};
  868         1839  
782 858         1342 my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
783 858         1034 $delta -= $oldlen;
784             }
785              
786             # replace old offsets with new
787 710         765 splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
  710         1574  
788             # What if we just spliced out the end of the offsets table?
789             # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
790              
791 710         1401 $delta_recs += @data - $nrecs; # net change in total number of records
792             }
793              
794             # The trailing records at the very end of the file
795 708 100       1119 if ($delta) {
796 567         616 for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
  567         915  
797 1521         1720 $self->{offsets}[$i] += $delta;
798             }
799             }
800              
801             # If we scrubbed out all known offsets, regenerate the trivial table
802             # that knows that the file does indeed start at 0.
803 708 50       763 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
  708         1202  
804             # If the file got longer, the offsets table is no longer complete
805             # $self->{eof} = 0 if $delta_recs > 0;
806              
807             # Now there might be too much data in the cache, if we spliced out
808             # some short records and spliced in some long ones. If so, flush
809             # the cache.
810 708         1315 $self->_cache_flush;
811             }
812              
813             # If a record does not already end with the appropriate terminator
814             # string, append one.
815             sub _fixrecs {
816 756     756   888 my $self = shift;
817 756         1212 for (@_) {
818 834 100       1267 $_ = "" unless defined $_;
819             $_ .= $self->{recsep}
820 834 100       2336 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
821             }
822             }
823              
824              
825             ################################################################
826             #
827             # Basic read, write, and seek
828             #
829              
830             # seek to the beginning of record #$n
831             # Assumes that the offsets table is already correctly populated
832             #
833             # Note that $n=-1 has a special meaning here: It means the start of
834             # the last known record; this may or may not be the very last record
835             # in the file, depending on whether the offsets table is fully populated.
836             #
837             sub _seek {
838 1378     1378   1843 my ($self, $n) = @_;
839 1378         1676 my $o = $self->{offsets}[$n];
840 1378 50       2350 defined($o)
841             or confess("logic error: undefined offset for record $n");
842 1378 50       14248 seek $self->{fh}, $o, SEEK_SET
843             or confess "Couldn't seek filehandle: $!"; # "Should never happen."
844             }
845              
846             # seek to byte $b in the file
847             sub _seekb {
848 26928     26928   37028 my ($self, $b) = @_;
849 26928 50       209966 seek $self->{fh}, $b, SEEK_SET
850             or die "Couldn't seek filehandle: $!"; # "Should never happen."
851             }
852              
853             # populate the offsets table up to the beginning of record $n
854             # return the offset of record $n
855             sub _fill_offsets_to {
856 960     960   1273 my ($self, $n) = @_;
857              
858 960 100       2048 return $self->{offsets}[$n] if $self->{eof};
859              
860 18         41 my $fh = $self->{fh};
861 18         41 local *OFF = $self->{offsets};
862 18         31 my $rec;
863              
864 18         152 until ($#OFF >= $n) {
865 69         198 $self->_seek(-1); # tricky -- see comment at _seek
866 69         194 $rec = $self->_read_record;
867 69 100       155 if (defined $rec) {
868 56         155 push @OFF, int(tell $fh); # Tels says that int() saves memory here
869             } else {
870 13         32 $self->{eof} = 1;
871 13         108 return; # It turns out there is no such record
872             }
873             }
874              
875             # we have now read all the records up to record n-1,
876             # so we can return the offset of record n
877 5         15 $OFF[$n];
878             }
879              
880             sub _fill_offsets {
881 24     24   53 my ($self) = @_;
882              
883 24         44 my $fh = $self->{fh};
884 24         70 local *OFF = $self->{offsets};
885              
886 24         88 $self->_seek(-1); # tricky -- see comment at _seek
887              
888             # Tels says that inlining read_record() would make this loop
889             # five times faster. 20030508
890 24         100 while ( defined $self->_read_record()) {
891             # int() saves us memory here
892 214         466 push @OFF, int(tell $fh);
893             }
894              
895 24         59 $self->{eof} = 1;
896 24         82 $#OFF;
897             }
898              
899             # assumes that $rec is already suitably terminated
900             sub _write_record {
901 15140     15140   29353 my ($self, $rec) = @_;
902 15140         20052 my $fh = $self->{fh};
903 15140         45634 local $\ = "";
904 15140 50       219709 print $fh $rec
905             or die "Couldn't write record: $!"; # "Should never happen."
906             # $self->{_written} += length($rec);
907             }
908              
909             sub _read_record {
910 1496     1496   1961 my $self = shift;
911 1496         1459 my $rec;
912 1496         1649 { local $/ = $self->{recsep};
  1496         5030  
913 1496         1854 my $fh = $self->{fh};
914 1496         12989 $rec = <$fh>;
915             }
916 1496 100       3641 return unless defined $rec;
917 1427 100       3218 if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
918             # improperly terminated final record --- quietly fix it.
919             # my $ac = substr($rec, -$self->{recseplen});
920             # $ac =~ s/\n/\\n/g;
921 7         11 $self->{sawlastrec} = 1;
922 7 100       16 unless ($self->{rdonly}) {
923 4         12 local $\ = "";
924 4         5 my $fh = $self->{fh};
925 4         63 print $fh $self->{recsep};
926             }
927 7         15 $rec .= $self->{recsep};
928             }
929             # $self->{_read} += length($rec) if defined $rec;
930 1427         2524 $rec;
931             }
932              
933             sub _rw_stats {
934 0     0   0 my $self = shift;
935 0         0 @{$self}{'_read', '_written'};
  0         0  
936             }
937              
938             ################################################################
939             #
940             # Read cache management
941              
942             sub _cache_flush {
943 1095     1095   1475 my ($self) = @_;
944 1095         2347 $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
945             }
946              
947             sub _cache_too_full {
948 72     72   85 my $self = shift;
949 72         115 $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
950             }
951              
952             ################################################################
953             #
954             # File custodial services
955             #
956              
957              
958             # We have read to the end of the file and have the offsets table
959             # entirely populated. Now we need to write a new record beyond
960             # the end of the file. We prepare for this by writing
961             # empty records into the file up to the position we want
962             #
963             # assumes that the offsets table already contains the offset of record $n,
964             # if it exists, and extends to the end of the file if not.
965             sub _extend_file_to {
966 81     81   179 my ($self, $n) = @_;
967 81         192 $self->_seek(-1); # position after the end of the last record
968 81         191 my $pos = $self->{offsets}[-1];
969              
970             # the offsets table has one entry more than the total number of records
971 81         184 my $extras = $n - $#{$self->{offsets}};
  81         196  
972              
973             # Todo : just use $self->{recsep} x $extras here?
974 81         228 while ($extras-- > 0) {
975 228         540 $self->_write_record($self->{recsep});
976 228         432 push @{$self->{offsets}}, int(tell $self->{fh});
  228         987  
977             }
978             }
979              
980             # Truncate the file at the current position
981             sub _chop_file {
982 2238     2238   4395 my $self = shift;
983 2238         1457397 truncate $self->{fh}, tell($self->{fh});
984             }
985              
986              
987             # compute the size of a buffer suitable for moving
988             # all the data in a file forward $n bytes
989             # ($n may be negative)
990             # The result should be at least $n.
991             sub _bufsize {
992 149     149   186 my $n = shift;
993 149 100       280 return 8192 if $n <= 0;
994 78         94 my $b = $n & ~8191;
995 78 100       121 $b += 8192 if $n & 8191;
996 78         97 $b;
997             }
998              
999             ################################################################
1000             #
1001             # Miscellaneous public methods
1002             #
1003              
1004             # Lock the file
1005             sub flock {
1006 2     2 1 20 my ($self, $op) = @_;
1007 2 50       5 unless (@_ <= 3) {
1008 0         0 my $pack = ref $self;
1009 0         0 croak "Usage: $pack\->flock([OPERATION])";
1010             }
1011 2         3 my $fh = $self->{fh};
1012 2 100       4 $op = LOCK_EX unless defined $op;
1013 2         15 my $locked = flock $fh, $op;
1014              
1015 2 100 66     11 if ($locked && ($op & (LOCK_EX | LOCK_SH))) {
1016             # If you're locking the file, then presumably it's because
1017             # there might have been a write access by another process.
1018             # In that case, the read cache contents and the offsets table
1019             # might be invalid, so discard them. 20030508
1020 1         3 $self->{offsets} = [0];
1021 1         4 $self->{cache}->empty;
1022             }
1023              
1024 2         4 $locked;
1025             }
1026              
1027             # Get/set autochomp option
1028             sub autochomp {
1029 4     4 1 35 my $self = shift;
1030 4 100       9 if (@_) {
1031 2         3 my $old = $self->{autochomp};
1032 2         3 $self->{autochomp} = shift;
1033 2         4 $old;
1034             } else {
1035 2         5 $self->{autochomp};
1036             }
1037             }
1038              
1039             # Get offset table entries; returns offset of nth record
1040             sub offset {
1041 22     22 1 717 my ($self, $n) = @_;
1042              
1043 22 100       18 if ($#{$self->{offsets}} < $n) {
  22         53  
1044 4 100       14 return if $self->{eof}; # request for record beyond the end of file
1045 1         3 my $o = $self->_fill_offsets_to($n);
1046             # If it's still undefined, there is no such record, so return 'undef'
1047 1 50       5 return unless defined $o;
1048             }
1049              
1050 18         56 $self->{offsets}[$n];
1051             }
1052              
1053             sub discard_offsets {
1054 0     0 0 0 my $self = shift;
1055 0         0 $self->{offsets} = [0];
1056             }
1057              
1058             ################################################################
1059             #
1060             # Matters related to deferred writing
1061             #
1062              
1063             # Defer writes
1064             sub defer {
1065 22     22 1 955 my $self = shift;
1066 22         63 $self->_stop_autodeferring;
1067 22         28 @{$self->{ad_history}} = ();
  22         42  
1068 22         63 $self->{defer} = 1;
1069             }
1070              
1071             # Flush deferred writes
1072             #
1073             # This could be better optimized to write the file in one pass, instead
1074             # of one pass per block of records. But that will require modifications
1075             # to _twrite, so I should have a good _twrite test suite first.
1076             sub flush {
1077 16     16 1 472 my $self = shift;
1078              
1079 16         41 $self->_flush;
1080 16         48 $self->{defer} = 0;
1081             }
1082              
1083             sub _old_flush {
1084 0     0   0 my $self = shift;
1085 0         0 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  0         0  
  0         0  
1086              
1087 0         0 while (@writable) {
1088             # gather all consecutive records from the front of @writable
1089 0         0 my $first_rec = shift @writable;
1090 0         0 my $last_rec = $first_rec+1;
1091 0   0     0 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1092 0         0 --$last_rec;
1093 0         0 $self->_fill_offsets_to($last_rec);
1094 0         0 $self->_extend_file_to($last_rec);
1095             $self->_splice($first_rec, $last_rec-$first_rec+1,
1096 0         0 @{$self->{deferred}}{$first_rec .. $last_rec});
  0         0  
1097             }
1098              
1099 0         0 $self->_discard; # clear out defered-write-cache
1100             }
1101              
1102             sub _flush {
1103 23     23   37 my $self = shift;
1104 23         37 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  63         147  
  23         116  
1105 23         51 my @args;
1106             my @adjust;
1107              
1108 23         53 while (@writable) {
1109             # gather all consecutive records from the front of @writable
1110 25         33 my $first_rec = shift @writable;
1111 25         34 my $last_rec = $first_rec+1;
1112 25   100     116 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1113 25         28 --$last_rec;
1114 25         52 my $end = $self->_fill_offsets_to($last_rec+1);
1115 25 100       52 if (not defined $end) {
1116 10         27 $self->_extend_file_to($last_rec);
1117 10         19 $end = $self->{offsets}[$last_rec];
1118             }
1119 25         49 my ($start) = $self->{offsets}[$first_rec];
1120             push @args,
1121 25         54 join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
  25         90  
1122             $start, # position
1123             $end-$start; # length
1124             push @adjust, [$first_rec, # starting at this position...
1125             $last_rec-$first_rec+1, # this many records...
1126             # are replaced with these...
1127 25         55 @{$self->{deferred}}{$first_rec .. $last_rec},
  25         90  
1128             ];
1129             }
1130              
1131 23         69 $self->_mtwrite(@args); # write multiple record groups
1132 23         67 $self->_discard; # clear out defered-write-cache
1133 23         44 $self->_oadjust(@adjust);
1134             }
1135              
1136             # Discard deferred writes and disable future deferred writes
1137             sub discard {
1138 6     6 1 202 my $self = shift;
1139 6         16 $self->_discard;
1140 6         18 $self->{defer} = 0;
1141             }
1142              
1143             # Discard deferred writes, but retain old deferred writing mode
1144             sub _discard {
1145 29     29   43 my $self = shift;
1146 29         29 %{$self->{deferred}} = ();
  29         65  
1147 29         48 $self->{deferred_s} = 0;
1148 29         35 $self->{deferred_max} = -1;
1149 29         66 $self->{cache}->set_limit($self->{memory});
1150             }
1151              
1152             # Deferred writing is enabled, either explicitly ($self->{defer})
1153             # or automatically ($self->{autodeferring})
1154             sub _is_deferring {
1155 4564     4564   5704 my $self = shift;
1156 4564 100       14869 $self->{defer} || $self->{autodeferring};
1157             }
1158              
1159             # The largest record number of any deferred record
1160             sub _defer_max {
1161 592     592   667 my $self = shift;
1162 592 100       1228 return $self->{deferred_max} if defined $self->{deferred_max};
1163 1         2 my $max = -1;
1164 1         2 for my $key (keys %{$self->{deferred}}) {
  1         3  
1165 1 50       4 $max = $key if $key > $max;
1166             }
1167 1         3 $self->{deferred_max} = $max;
1168 1         2 $max;
1169             }
1170              
1171             ################################################################
1172             #
1173             # Matters related to autodeferment
1174             #
1175              
1176             # Get/set autodefer option
1177             sub autodefer {
1178 2     2 1 91 my $self = shift;
1179 2 50       9 if (@_) {
1180 2         3 my $old = $self->{autodefer};
1181 2         5 $self->{autodefer} = shift;
1182 2 100       5 if ($old) {
1183 1         3 $self->_stop_autodeferring;
1184 1         2 @{$self->{ad_history}} = ();
  1         2  
1185             }
1186 2         5 $old;
1187             } else {
1188 0         0 $self->{autodefer};
1189             }
1190             }
1191              
1192             # The user is trying to store record #$n Record that in the history,
1193             # and then enable (or disable) autodeferment if that seems useful.
1194             # Note that it's OK for $n to be a non-number, as long as the function
1195             # is prepared to deal with that. Nobody else looks at the ad_history.
1196             #
1197             # Now, what does the ad_history mean, and what is this function doing?
1198             # Essentially, the idea is to enable autodeferring when we see that the
1199             # user has made three consecutive STORE calls to three consecutive records.
1200             # ("Three" is actually ->{autodefer_threshhold}.)
1201             # A STORE call for record #$n inserts $n into the autodefer history,
1202             # and if the history contains three consecutive records, we enable
1203             # autodeferment. An ad_history of [X, Y] means that the most recent
1204             # STOREs were for records X, X+1, ..., Y, in that order.
1205             #
1206             # Inserting a nonconsecutive number erases the history and starts over.
1207             #
1208             # Performing a special operation like SPLICE erases the history.
1209             #
1210             # There's one special case: CLEAR means that CLEAR was just called.
1211             # In this case, we prime the history with [-2, -1] so that if the next
1212             # write is for record 0, autodeferring goes on immediately. This is for
1213             # the common special case of "@a = (...)".
1214             #
1215             sub _annotate_ad_history {
1216 611     611   868 my ($self, $n) = @_;
1217 611 50       995 return unless $self->{autodefer}; # feature is disabled
1218 611 100       994 return if $self->{defer}; # already in explicit defer mode
1219 545 100       1354 return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
1220              
1221 25         37 local *H = $self->{ad_history};
1222 25 100       104 if ($n eq 'CLEAR') {
    50          
1223 2         4 @H = (-2, -1); # prime the history with fake records
1224 2         6 $self->_stop_autodeferring;
1225             } elsif ($n =~ /^\d+$/) {
1226 23 100       35 if (@H == 0) {
1227 1         4 @H = ($n, $n);
1228             } else { # @H == 2
1229 22 100       45 if ($H[1] == $n-1) { # another consecutive record
1230 19         23 $H[1]++;
1231 19 100       36 if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
1232 16         27 $self->{autodeferring} = 1;
1233             }
1234             } else { # nonconsecutive- erase and start over
1235 3         7 @H = ($n, $n);
1236 3         12 $self->_stop_autodeferring;
1237             }
1238             }
1239             } else { # SPLICE or STORESIZE or some such
1240 0         0 @H = ();
1241 0         0 $self->_stop_autodeferring;
1242             }
1243             }
1244              
1245             # If autodeferring was enabled, cut it out and discard the history
1246             sub _stop_autodeferring {
1247 28     28   34 my $self = shift;
1248 28 100       65 if ($self->{autodeferring}) {
1249 5         9 $self->_flush;
1250             }
1251 28         47 $self->{autodeferring} = 0;
1252             }
1253              
1254             ################################################################
1255              
1256              
1257             # This is NOT a method. It is here for two reasons:
1258             # 1. To factor a fairly complicated block out of the constructor
1259             # 2. To provide access for the test suite, which need to be sure
1260             # files are being written properly.
1261             sub _default_recsep {
1262 2998     2998   8744 my $recsep = $/;
1263 2998 50       8588 if ($^O eq 'MSWin32') { # Dos too?
1264             # Windows users expect files to be terminated with \r\n
1265             # But $/ is set to \n instead
1266             # Note that this also transforms \n\n into \r\n\r\n.
1267             # That is a feature.
1268 0         0 $recsep =~ s/\n/\r\n/g;
1269             }
1270 2998         5980 $recsep;
1271             }
1272              
1273             # Utility function for _check_integrity
1274             sub _ci_warn {
1275 0     0   0 my $msg = shift;
1276 0         0 $msg =~ s/\n/\\n/g;
1277 0         0 $msg =~ s/\r/\\r/g;
1278 0         0 print "# $msg\n";
1279             }
1280              
1281             # Given a file, make sure the cache is consistent with the
1282             # file contents and the internal data structures are consistent with
1283             # each other. Returns true if everything checks out, false if not
1284             #
1285             # The $file argument is no longer used. It is retained for compatibility
1286             # with the existing test suite.
1287             sub _check_integrity {
1288 380     380   5660 my ($self, $file, $warn) = @_;
1289 380         622 my $rsl = $self->{recseplen};
1290 380         478 my $rs = $self->{recsep};
1291 380         395 my $good = 1;
1292 380         616 local *_; # local $_ does not work here
1293 380         499 local $DIAGNOSTIC = 1;
1294              
1295 380 50       1060 if (not defined $rs) {
    50          
    50          
1296 0         0 _ci_warn("recsep is undef!");
1297 0         0 $good = 0;
1298             } elsif ($rs eq "") {
1299 0         0 _ci_warn("recsep is empty!");
1300 0         0 $good = 0;
1301             } elsif ($rsl != length $rs) {
1302 0         0 my $ln = length $rs;
1303 0         0 _ci_warn("recsep <$rs> has length $ln, should be $rsl");
1304 0         0 $good = 0;
1305             }
1306              
1307 380 50       847 if (not defined $self->{offsets}[0]) {
    50          
1308 0         0 _ci_warn("offset 0 is missing!");
1309 0         0 $good = 0;
1310              
1311             } elsif ($self->{offsets}[0] != 0) {
1312 0         0 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
1313 0         0 $good = 0;
1314             }
1315              
1316 380         436 my $cached = 0;
1317             {
1318 380         409 local *F = $self->{fh};
  380         722  
1319 380         2646 seek F, 0, SEEK_SET;
1320 380         1544 local $. = 0;
1321 380         1109 local $/ = $rs;
1322              
1323 380         3493 while () {
1324 2132         3107 my $n = $. - 1;
1325 2132         3173 my $cached = $self->{cache}->_produce($n);
1326 2132         2755 my $offset = $self->{offsets}[$.];
1327 2132         2249 my $ao = tell F;
1328 2132 50 66     4814 if (defined $offset && $offset != $ao) {
1329 0         0 _ci_warn("rec $n: offset <$offset> actual <$ao>");
1330 0         0 $good = 0;
1331             }
1332 2132 50 66     3974 if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
      33        
1333 0         0 $good = 0;
1334 0         0 _ci_warn("rec $n: cached <$cached> actual <$_>");
1335             }
1336 2132 50 66     3840 if (defined $cached && substr($cached, -$rsl) ne $rs) {
1337 0         0 $good = 0;
1338 0         0 _ci_warn("rec $n in the cache is missing the record separator");
1339             }
1340 2132 50 66     6477 if (! defined $offset && $self->{eof}) {
1341 0         0 $good = 0;
1342 0         0 _ci_warn("The offset table was marked complete, but it is missing " .
1343             "element $.");
1344             }
1345             }
1346 380 50       702 if (@{$self->{offsets}} > $.+1) {
  380         1222  
1347 0         0 $good = 0;
1348 0         0 my $n = @{$self->{offsets}};
  0         0  
1349 0         0 _ci_warn("The offset table has $n items, but the file has only $.");
1350             }
1351              
1352 380         867 my $deferring = $self->_is_deferring;
1353 380         779 for my $n ($self->{cache}->ckeys) {
1354 938         1315 my $r = $self->{cache}->_produce($n);
1355 938         1013 $cached += length($r);
1356 938 50       1954 next if $n+1 <= $.; # checked this already
1357 0         0 _ci_warn("spurious caching of record $n");
1358 0         0 $good = 0;
1359             }
1360 380         658 my $b = $self->{cache}->bytes;
1361 380 50       1449 if ($cached != $b) {
1362 0         0 _ci_warn("cache size is $b, should be $cached");
1363 0         0 $good = 0;
1364             }
1365             }
1366              
1367             # That cache has its own set of tests
1368 380 50       694 $good = 0 unless $self->{cache}->_check_integrity;
1369              
1370             # Now let's check the deferbuffer
1371             # Unless deferred writing is enabled, it should be empty
1372 380 50 66     618 if (! $self->_is_deferring && %{$self->{deferred}}) {
  331         765  
1373 0         0 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1374 0         0 $good = 0;
1375             }
1376              
1377             # Any record in the deferbuffer should *not* be present in the readcache
1378 380         448 my $deferred_s = 0;
1379 380         435 while (my ($n, $r) = each %{$self->{deferred}}) {
  467         1187  
1380 87         95 $deferred_s += length($r);
1381 87 50       133 if (defined $self->{cache}->_produce($n)) {
1382 0         0 _ci_warn("record $n is in the deferbuffer *and* the readcache");
1383 0         0 $good = 0;
1384             }
1385 87 50       177 if (substr($r, -$rsl) ne $rs) {
1386 0         0 _ci_warn("rec $n in the deferbuffer is missing the record separator");
1387 0         0 $good = 0;
1388             }
1389             }
1390              
1391             # Total size of deferbuffer should match internal total
1392 380 50       651 if ($deferred_s != $self->{deferred_s}) {
1393 0         0 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1394 0         0 $good = 0;
1395             }
1396              
1397             # Total size of deferbuffer should not exceed the specified limit
1398 380 50       612 if ($deferred_s > $self->{dw_size}) {
1399 0         0 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit " .
1400             "of $self->{dw_size}");
1401 0         0 $good = 0;
1402             }
1403              
1404             # Total size of cached data should not exceed the specified limit
1405 380 50       614 if ($deferred_s + $cached > $self->{memory}) {
1406 0         0 my $total = $deferred_s + $cached;
1407 0         0 _ci_warn("total stored data size is $total which exceeds the limit " .
1408             "of $self->{memory}");
1409 0         0 $good = 0;
1410             }
1411              
1412             # Stuff related to autodeferment
1413 380 50 66     683 if (!$self->{autodefer} && @{$self->{ad_history}}) {
  129         275  
1414 0         0 _ci_warn("autodefer is disabled, but ad_history is nonempty");
1415 0         0 $good = 0;
1416             }
1417 380 50 66     602 if ($self->{autodeferring} && $self->{defer}) {
1418 0         0 _ci_warn("both autodeferring and explicit deferring are active");
1419 0         0 $good = 0;
1420             }
1421 380 100       405 if (@{$self->{ad_history}} == 0) {
  380 50       664  
1422             # That's OK, no additional tests required
1423 14         24 } elsif (@{$self->{ad_history}} == 2) {
1424 14         14 my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
  14         68  
1425 14 50       46 if (@non_number) {
    50          
1426 0         0 my $msg;
1427 0         0 { local $" = ')(';
  0         0  
1428 0         0 $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
  0         0  
1429             }
1430 0         0 _ci_warn($msg);
1431 0         0 $good = 0;
1432             } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1433 0         0 _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
  0         0  
1434 0         0 $good = 0;
1435             }
1436             } else {
1437 0         0 _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
  0         0  
1438 0         0 $good = 0;
1439             }
1440              
1441 380         1069 $good;
1442             }
1443              
1444             ################################################################
1445             #
1446             # Tie::File::Cache
1447             #
1448             # Read cache
1449              
1450             package Tie::File::Cache;
1451             $Tie::File::Cache::VERSION = $Tie::File::VERSION;
1452 38     38   379 use Carp ':DEFAULT', 'confess';
  38         102  
  38         7163  
1453              
1454             sub HEAP () { 0 }
1455             sub HASH () { 1 }
1456             sub MAX () { 2 }
1457             sub BYTES() { 3 }
1458             #sub STAT () { 4 } # Array with request statistics for each record
1459             #sub MISS () { 5 } # Total number of cache misses
1460             #sub REQ () { 6 } # Total number of cache requests
1461 38     38   264 use strict 'vars';
  38         224  
  38         50577  
1462              
1463             sub new {
1464 2988     2988   5131 my ($pack, $max) = @_;
1465 2988         6388 local *_;
1466 2988 50       5179 croak "missing argument to ->new" unless defined $max;
1467 2988         5003 my $self = [];
1468 2988         5099 bless $self => $pack;
1469 2988         9453 @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1470 2988         7504 $self;
1471             }
1472              
1473             sub adj_limit {
1474 77     77   107 my ($self, $n) = @_;
1475 77         96 $self->[MAX] += $n;
1476             }
1477              
1478             sub set_limit {
1479 60     60   111 my ($self, $n) = @_;
1480 60         100 $self->[MAX] = $n;
1481             }
1482              
1483             # For internal use only
1484             # Will be called by the heap structure to notify us that a certain
1485             # piece of data has moved from one heap element to another.
1486             # $k is the hash key of the item
1487             # $n is the new index into the heap at which it is stored
1488             # If $n is undefined, the item has been removed from the heap.
1489             sub _heap_move {
1490 3587     3587   4736 my ($self, $k, $n) = @_;
1491 3587 100       4417 if (defined $n) {
1492 3154         4772 $self->[HASH]{$k} = $n;
1493             } else {
1494 433         706 delete $self->[HASH]{$k};
1495             }
1496             }
1497              
1498             sub insert {
1499 1227     1227   2327 my ($self, $key, $val) = @_;
1500 1227         2300 local *_;
1501 1227 50       1801 croak "missing argument to ->insert" unless defined $key;
1502 1227 50       2303 unless (defined $self->[MAX]) {
1503 0         0 confess "undefined max" ;
1504             }
1505 1227 50       1594 confess "undefined val" unless defined $val;
1506 1227 100       1935 return if length($val) > $self->[MAX];
1507              
1508             # if ($self->[STAT]) {
1509             # $self->[STAT][$key] = 1;
1510             # return;
1511             # }
1512              
1513 1144         1382 my $oldnode = $self->[HASH]{$key};
1514 1144 50       1656 if (defined $oldnode) {
1515 0         0 my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1516 0         0 $self->[BYTES] -= length($oldval);
1517             } else {
1518 1144         2016 $self->[HEAP]->insert($key, $val);
1519             }
1520 1144         1480 $self->[BYTES] += length($val);
1521 1144 100       2519 $self->flush if $self->[BYTES] > $self->[MAX];
1522             }
1523              
1524             sub expire {
1525 56     56   145 my $self = shift;
1526 56         69 my $old_data = $self->[HEAP]->popheap;
1527 56 100       83 return unless defined $old_data;
1528 50         50 $self->[BYTES] -= length $old_data;
1529 50         87 $old_data;
1530             }
1531              
1532             sub remove {
1533 437     437   687 my ($self, @keys) = @_;
1534 437         440 my @result;
1535              
1536             # if ($self->[STAT]) {
1537             # for my $key (@keys) {
1538             # $self->[STAT][$key] = 0;
1539             # }
1540             # return;
1541             # }
1542              
1543 437         583 for my $key (@keys) {
1544 438 100       898 next unless exists $self->[HASH]{$key};
1545 339         595 my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1546 339         427 $self->[BYTES] -= length $old_data;
1547 339         573 push @result, $old_data;
1548             }
1549 437         732 @result;
1550             }
1551              
1552             sub lookup {
1553 1878     1878   2418 my ($self, $key) = @_;
1554 1878         2947 local *_;
1555 1878 50       2754 croak "missing argument to ->lookup" unless defined $key;
1556              
1557             # if ($self->[STAT]) {
1558             # $self->[MISS]++ if $self->[STAT][$key]++ == 0;
1559             # $self->[REQ]++;
1560             # my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
1561             # # Do some testing to determine this threshhold
1562             # $#$self = STAT - 1 if $hit_rate > 0.20;
1563             # }
1564              
1565 1878 100       3290 if (exists $self->[HASH]{$key}) {
1566 669         1137 $self->[HEAP]->lookup($self->[HASH]{$key});
1567             } else {
1568 1209         2246 return;
1569             }
1570             }
1571              
1572             # For internal use only
1573             sub _produce {
1574 3204     3204   3979 my ($self, $key) = @_;
1575 3204         3925 my $loc = $self->[HASH]{$key};
1576 3204 100       4682 return unless defined $loc;
1577 1923         3171 $self->[HEAP][$loc][2];
1578             }
1579              
1580             # For internal use only
1581             sub _promote {
1582 5     5   16 my ($self, $key) = @_;
1583 5         9 $self->[HEAP]->promote($self->[HASH]{$key});
1584             }
1585              
1586             sub empty {
1587 86     86   7574 my ($self) = @_;
1588 86         131 %{$self->[HASH]} = ();
  86         262  
1589 86         171 $self->[BYTES] = 0;
1590 86         263 $self->[HEAP]->empty;
1591             # @{$self->[STAT]} = ();
1592             # $self->[MISS] = 0;
1593             # $self->[REQ] = 0;
1594             }
1595              
1596             sub is_empty {
1597 3     3   14 my ($self) = @_;
1598 3         3 keys %{$self->[HASH]} == 0;
  3         9  
1599             }
1600              
1601             sub update {
1602 505     505   930 my ($self, $key, $val) = @_;
1603 505         1012 local *_;
1604 505 50       833 croak "missing argument to ->update" unless defined $key;
1605 505 100       1180 if (length($val) > $self->[MAX]) {
    100          
1606 21         38 my ($oldval) = $self->remove($key);
1607 21 50       38 $self->[BYTES] -= length($oldval) if defined $oldval;
1608             } elsif (exists $self->[HASH]{$key}) {
1609 449         909 my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1610 449         612 $self->[BYTES] += length($val);
1611 449 50       861 $self->[BYTES] -= length($oldval) if defined $oldval;
1612             } else {
1613 35         117 $self->[HEAP]->insert($key, $val);
1614 35         57 $self->[BYTES] += length($val);
1615             }
1616 505         842 $self->flush;
1617             }
1618              
1619             sub rekey {
1620 386     386   566 my ($self, $okeys, $nkeys) = @_;
1621 386         676 local *_;
1622 386         411 my %map;
1623 386         673 @map{@$okeys} = @$nkeys;
1624 386 50       595 croak "missing argument to ->rekey" unless defined $nkeys;
1625 386 50       622 croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1626 386         417 my %adjusted; # map new keys to heap indices
1627             # You should be able to cut this to one loop TODO XXX
1628 386         670 for (0 .. $#$okeys) {
1629 530         1017 $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1630             }
1631 386         1287 while (my ($nk, $ix) = each %adjusted) {
1632             # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1633 530         906 $self->[HEAP]->rekey($ix, $nk);
1634 530         1291 $self->[HASH]{$nk} = $ix;
1635             }
1636             }
1637              
1638             sub ckeys {
1639 795     795   1161 my $self = shift;
1640 795         810 my @a = keys %{$self->[HASH]};
  795         1930  
1641 795         2205 @a;
1642             }
1643              
1644             # Return total amount of cached data
1645             sub bytes {
1646 507     507   814 my $self = shift;
1647 507         862 $self->[BYTES];
1648             }
1649              
1650             # Expire oldest item from cache until cache size is smaller than $max
1651             sub reduce_size_to {
1652 1131     1131   1416 my ($self, $max) = @_;
1653 1131         2354 until ($self->[BYTES] <= $max) {
1654             # Note that Tie::File::Cache::expire has been inlined here
1655 44         69 my $old_data = $self->[HEAP]->popheap;
1656 44 50       76 return unless defined $old_data;
1657 44         155 $self->[BYTES] -= length $old_data;
1658             }
1659             }
1660              
1661             # Why not just $self->reduce_size_to($self->[MAX])?
1662             # Try this when things stabilize TODO XXX
1663             # If the cache is too full, expire the oldest records
1664             sub flush {
1665 526     526   580 my $self = shift;
1666 526 100       1993 $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
1667             }
1668              
1669             # For internal use only
1670             sub _produce_lru {
1671 1     1   7 my $self = shift;
1672 1         4 $self->[HEAP]->expire_order;
1673             }
1674              
1675 38     38   13605 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1676              
1677             sub _check_integrity { # For CACHE
1678 406     406   650 my $self = shift;
1679 406         446 my $good = 1;
1680              
1681             # Test HEAP
1682 406 50       754 $self->[HEAP]->_check_integrity or $good = 0;
1683              
1684             # Test HASH
1685 406         479 my $bytes = 0;
1686 406         433 for my $k (keys %{$self->[HASH]}) {
  406         772  
1687 1063 50 66     3784 if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
1688 0         0 $good = 0;
1689 0         0 _ci_warn "Cache hash key <$k> is non-numeric";
1690             }
1691              
1692 1063         1472 my $h = $self->[HASH]{$k};
1693 1063 50       1620 if (! defined $h) {
    50          
1694 0         0 $good = 0;
1695 0         0 _ci_warn "Heap index number for key $k is undefined";
1696             } elsif ($h == 0) {
1697 0         0 $good = 0;
1698 0         0 _ci_warn "Heap index number for key $k is zero";
1699             } else {
1700 1063         1131 my $j = $self->[HEAP][$h];
1701 1063 50       1178 if (! defined $j) {
1702 0         0 $good = 0;
1703 0         0 _ci_warn "Heap contents key $k (=> $h) are undefined";
1704             } else {
1705 1063         1130 $bytes += length($j->[2]);
1706 1063 50       1778 if ($k ne $j->[1]) {
1707 0         0 $good = 0;
1708 0         0 _ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
1709             }
1710             }
1711             }
1712             }
1713              
1714             # Test BYTES
1715 406 50       721 if ($bytes != $self->[BYTES]) {
1716 0         0 $good = 0;
1717 0         0 _ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
1718             }
1719              
1720             # Test MAX
1721 406 50       641 if ($bytes > $self->[MAX]) {
1722 0         0 $good = 0;
1723 0         0 _ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
1724             }
1725              
1726 406         650 return $good;
1727             }
1728              
1729             sub delink {
1730 2986     2986   3766 my $self = shift;
1731 2986         8688 $self->[HEAP] = undef; # Bye bye heap
1732             }
1733              
1734             ################################################################
1735             #
1736             # Tie::File::Heap
1737             #
1738             # Heap data structure for use by cache LRU routines
1739              
1740             package Tie::File::Heap;
1741 38     38   260 use Carp ':DEFAULT', 'confess';
  38         72  
  38         46914  
1742             $Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1743             sub SEQ () { 0 };
1744             sub KEY () { 1 };
1745             sub DAT () { 2 };
1746              
1747             sub new {
1748 2988     2988   4448 my ($pack, $cache) = @_;
1749             die "$pack: Parent cache object $cache does not support _heap_move method"
1750 2988 50       4791 unless eval { $cache->can('_heap_move') };
  2988         10727  
1751 2988         6623 my $self = [[0,$cache,0]];
1752 2988         9037 bless $self => $pack;
1753             }
1754              
1755             # Allocate a new sequence number, larger than all previously allocated numbers
1756             sub _nseq {
1757 2302     2302   2630 my $self = shift;
1758 2302         3483 $self->[0][0]++;
1759             }
1760              
1761             sub _cache {
1762 0     0   0 my $self = shift;
1763 0         0 $self->[0][1];
1764             }
1765              
1766             sub _nelts {
1767 0     0   0 my $self = shift;
1768 0         0 $self->[0][2];
1769             }
1770              
1771             sub _nelts_inc {
1772 1179     1179   1288 my $self = shift;
1773 1179         1656 ++$self->[0][2];
1774             }
1775              
1776             sub _nelts_dec {
1777 433     433   479 my $self = shift;
1778 433         506 --$self->[0][2];
1779             }
1780              
1781             sub is_empty {
1782 0     0   0 my $self = shift;
1783 0         0 $self->_nelts == 0;
1784             }
1785              
1786             sub empty {
1787 86     86   116 my $self = shift;
1788 86         394 $#$self = 0;
1789 86         129 $self->[0][2] = 0;
1790 86         153 $self->[0][0] = 0; # might as well reset the sequence numbers
1791             }
1792              
1793             # notify the parent cache object that we moved something
1794             sub _heap_move {
1795 0     0   0 my $self = shift;
1796 0         0 $self->_cache->_heap_move(@_);
1797             }
1798              
1799             # Insert a piece of data into the heap with the indicated sequence number.
1800             # The item with the smallest sequence number is always at the top.
1801             # If no sequence number is specified, allocate a new one and insert the
1802             # item at the bottom.
1803             sub insert {
1804 1179     1179   1741 my ($self, $key, $data, $seq) = @_;
1805 1179 50       2292 $seq = $self->_nseq unless defined $seq;
1806 1179         2884 $self->_insert_new([$seq, $key, $data]);
1807             }
1808              
1809             # Insert a new, fresh item at the bottom of the heap
1810             sub _insert_new {
1811 1179     1179   1521 my ($self, $item) = @_;
1812 1179         1468 my $i = @$self;
1813 1179         2880 $i = int($i/2) until defined $self->[$i/2];
1814 1179         1607 $self->[$i] = $item;
1815 1179         2530 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1816 1179         1757 $self->_nelts_inc;
1817             }
1818              
1819             # Insert [$data, $seq] pair at or below item $i in the heap.
1820             # If $i is omitted, default to 1 (the top element.)
1821             sub _insert {
1822 0     0   0 my ($self, $item, $i) = @_;
1823             # $self->_check_loc($i) if defined $i;
1824 0 0       0 $i = 1 unless defined $i;
1825 0         0 until (! defined $self->[$i]) {
1826 0 0       0 if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1827 0         0 ($self->[$i], $item) = ($item, $self->[$i]);
1828 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1829             }
1830             # If either is undefined, go that way. Otherwise, choose at random
1831 0         0 my $dir;
1832 0 0       0 $dir = 0 if !defined $self->[2*$i];
1833 0 0       0 $dir = 1 if !defined $self->[2*$i+1];
1834 0 0       0 $dir = int(rand(2)) unless defined $dir;
1835 0         0 $i = 2*$i + $dir;
1836             }
1837 0         0 $self->[$i] = $item;
1838 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1839 0         0 $self->_nelts_inc;
1840             }
1841              
1842             # Remove the item at node $i from the heap, moving child items upwards.
1843             # The item with the smallest sequence number is always at the top.
1844             # Moving items upwards maintains this condition.
1845             # Return the removed item. Return undef if there was no item at node $i.
1846             sub remove {
1847 439     439   615 my ($self, $i) = @_;
1848 439 50       618 $i = 1 unless defined $i;
1849 439         484 my $top = $self->[$i];
1850 439 100       625 return unless defined $top;
1851 433         445 while (1) {
1852 916         912 my $ii;
1853 916         1170 my ($L, $R) = (2*$i, 2*$i+1);
1854              
1855             # If either is undefined, go the other way.
1856             # Otherwise, go towards the smallest.
1857 916 100 100     1995 last unless defined $self->[$L] || defined $self->[$R];
1858 483 100       683 $ii = $R if not defined $self->[$L];
1859 483 100       668 $ii = $L if not defined $self->[$R];
1860 483 100       799 unless (defined $ii) {
1861 158 100       274 $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1862             }
1863              
1864 483         511 $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1865 483         854 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1866 483         516 $i = $ii; # Fill new vacated spot
1867             }
1868 433         843 $self->[0][1]->_heap_move($top->[KEY], undef);
1869 433         467 undef $self->[$i];
1870 433         746 $self->_nelts_dec;
1871 433         838 return $top->[DAT];
1872             }
1873              
1874             sub popheap {
1875 100     100   101 my $self = shift;
1876 100         135 $self->remove(1);
1877             }
1878              
1879             # set the sequence number of the indicated item to a higher number
1880             # than any other item in the heap, and bubble the item down to the
1881             # bottom.
1882             sub promote {
1883 1123     1123   1387 my ($self, $n) = @_;
1884             # $self->_check_loc($n);
1885 1123         1602 $self->[$n][SEQ] = $self->_nseq;
1886 1123         1300 my $i = $n;
1887 1123         1144 while (1) {
1888 1869         2440 my ($L, $R) = (2*$i, 2*$i+1);
1889 1869         1913 my $dir;
1890 1869 100 100     4526 last unless defined $self->[$L] || defined $self->[$R];
1891 746 100       1036 $dir = $R unless defined $self->[$L];
1892 746 100       1052 $dir = $L unless defined $self->[$R];
1893 746 100       1069 unless (defined $dir) {
1894 471 100       730 $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1895             }
1896 746         770 @{$self}[$i, $dir] = @{$self}[$dir, $i];
  746         924  
  746         925  
1897 746         990 for ($i, $dir) {
1898 1492 50       2706 $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1899             }
1900 746         799 $i = $dir;
1901             }
1902             }
1903              
1904             # Return item $n from the heap, promoting its LRU status
1905             sub lookup {
1906 669     669   864 my ($self, $n) = @_;
1907             # $self->_check_loc($n);
1908 669         757 my $val = $self->[$n];
1909 669         1159 $self->promote($n);
1910 669         1402 $val->[DAT];
1911             }
1912              
1913              
1914             # Assign a new value for node $n, promoting it to the bottom of the heap
1915             sub set_val {
1916 449     449   639 my ($self, $n, $val) = @_;
1917             # $self->_check_loc($n);
1918 449         630 my $oval = $self->[$n][DAT];
1919 449         542 $self->[$n][DAT] = $val;
1920 449         839 $self->promote($n);
1921 449         755 return $oval;
1922             }
1923              
1924             # The hash key has changed for an item;
1925             # alter the heap's record of the hash key
1926             sub rekey {
1927 530     530   632 my ($self, $n, $new_key) = @_;
1928             # $self->_check_loc($n);
1929 530         719 $self->[$n][KEY] = $new_key;
1930             }
1931              
1932             sub _check_loc {
1933 0     0   0 my ($self, $n) = @_;
1934 0         0 unless (1 || defined $self->[$n]) {
1935             confess "_check_loc($n) failed";
1936             }
1937             }
1938              
1939 38     38   16765 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1940              
1941             sub _check_integrity {
1942 406     406   414 my $self = shift;
1943 406         409 my $good = 1;
1944 406         429 my %seq;
1945              
1946 406 50       517 unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
  406         1379  
1947 0         0 _ci_warn "Element 0 of heap corrupt";
1948 0         0 $good = 0;
1949             }
1950 406 50       761 $good = 0 unless $self->_satisfies_heap_condition(1);
1951 406         457 for my $i (2 .. $#{$self}) {
  406         791  
1952 1933         2276 my $p = int($i/2); # index of parent node
1953 1933 50 66     3477 if (defined $self->[$i] && ! defined $self->[$p]) {
1954 0         0 _ci_warn "Element $i of heap defined, but parent $p isn't";
1955 0         0 $good = 0;
1956             }
1957              
1958 1933 100       2760 if (defined $self->[$i]) {
1959 824 50       1289 if ($seq{$self->[$i][SEQ]}) {
1960 0         0 my $seq = $self->[$i][SEQ];
1961 0         0 _ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
1962 0         0 $good = 0;
1963             } else {
1964 824         1423 $seq{$self->[$i][SEQ]} = $i;
1965             }
1966             }
1967             }
1968              
1969 406         887 return $good;
1970             }
1971              
1972             sub _satisfies_heap_condition {
1973 1230     1230   1282 my $self = shift;
1974 1230   50     1630 my $n = shift || 1;
1975 1230         1240 my $good = 1;
1976 1230         1406 for (0, 1) {
1977 2460         2764 my $c = $n*2 + $_;
1978 2460 100       3719 next unless defined $self->[$c];
1979 824 50       1240 if ($self->[$n][SEQ] >= $self->[$c]) {
1980 0         0 _ci_warn "Node $n of heap does not predate node $c";
1981 0         0 $good = 0 ;
1982             }
1983 824 50       1146 $good = 0 unless $self->_satisfies_heap_condition($c);
1984             }
1985 1230         2407 return $good;
1986             }
1987              
1988             # Return a list of all the values, sorted by expiration order
1989             sub expire_order {
1990 1     1   2 my $self = shift;
1991 1         4 my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
  3         8  
1992 1         3 map { $_->[KEY] } @nodes;
  3         8  
1993             }
1994              
1995             sub _nodes {
1996 7     7   9 my $self = shift;
1997 7   100     12 my $i = shift || 1;
1998 7 100       38 return unless defined $self->[$i];
1999 3         8 ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
2000             }
2001              
2002             1;
2003              
2004             __END__