File Coverage

lib/Tie/File.pm
Criterion Covered Total %
statement 846 996 84.9
branch 350 458 76.4
condition 56 82 68.2
subroutine 100 110 90.9
pod 7 8 87.5
total 1359 1654 82.1


line stmt bran cond sub pod time code
1              
2             package Tie::File;
3             require 5.005;
4 37     37   201926 use Carp ':DEFAULT', 'confess';
  37         300  
  37         5882  
5 37     37   12725 use POSIX 'SEEK_SET';
  37         208954  
  37         210  
6 37     37   38152 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
  37         68  
  37         281634  
7             sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
8              
9              
10             $VERSION = "1.05";
11             my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
12             my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
13             my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
14              
15             my %good_opt = map {$_ => 1, "-$_" => 1}
16             qw(memory dw_size mode recsep discipline
17             autodefer autochomp autodefer_threshhold concurrent);
18              
19             sub TIEARRAY {
20 2985 50   2985   1614489 if (@_ % 2 != 0) {
21 0         0 croak "usage: tie \@array, $_[0], filename, [option => value]...";
22             }
23 2985         7762 my ($pack, $file, %opts) = @_;
24              
25             # transform '-foo' keys into 'foo' keys
26 2985         10507 for my $key (keys %opts) {
27 57 50       183 unless ($good_opt{$key}) {
28 0         0 croak("$pack: Unrecognized option '$key'\n");
29             }
30 57         294 my $okey = $key;
31 57 50       202 if ($key =~ s/^-+//) {
32 0         0 $opts{$key} = delete $opts{$okey};
33             }
34             }
35              
36 2985 50       7727 if ($opts{concurrent}) {
37 0         0 croak("$pack: concurrent access not supported yet\n");
38             }
39              
40 2985 100       6687 unless (defined $opts{memory}) {
41             # default is the larger of the default cache size and the
42             # deferred-write buffer size (if specified)
43 2981         5229 $opts{memory} = $DEFAULT_MEMORY_SIZE;
44             $opts{memory} = $opts{dw_size}
45 2981 50 33     7136 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
46             # Dora Winifred Read
47             }
48 2985 100       6795 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
49 2985 50       6072 if ($opts{dw_size} > $opts{memory}) {
50 0         0 croak("$pack: dw_size may not be larger than total memory allocation\n");
51             }
52             # are we in deferred-write mode?
53 2985 50       6347 $opts{defer} = 0 unless defined $opts{defer};
54 2985         5640 $opts{deferred} = {}; # no records are presently deferred
55 2985         4986 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
56 2985         4815 $opts{deferred_max} = -1; # empty
57              
58             # What's a good way to arrange that this class can be overridden?
59 2985         15466 $opts{cache} = Tie::File::Cache->new($opts{memory});
60              
61             # autodeferment is enabled by default
62 2985 100       11006 $opts{autodefer} = 1 unless defined $opts{autodefer};
63 2985         5029 $opts{autodeferring} = 0; # but is not initially active
64 2985         5028 $opts{ad_history} = [];
65             $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
66 2985 50       6418 unless defined $opts{autodefer_threshhold};
67             $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
68 2985 50       6520 unless defined $opts{autodefer_filelen_threshhold};
69              
70 2985         5454 $opts{offsets} = [0];
71 2985         5363 $opts{filename} = $file;
72 2985 100       6008 unless (defined $opts{recsep}) {
73 2972         6616 $opts{recsep} = _default_recsep();
74             }
75 2985         8878 $opts{recseplen} = length($opts{recsep});
76 2985 50       6039 if ($opts{recseplen} == 0) {
77 0         0 croak "Empty record separator not supported by $pack";
78             }
79              
80 2985 100       5988 $opts{autochomp} = 1 unless defined $opts{autochomp};
81              
82 2985 100       5943 $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
83 2985         6835 $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
84 2985         4668 $opts{sawlastrec} = undef;
85              
86 2985         4032 my $fh;
87              
88 2985 100       15571 if (UNIVERSAL::isa($file, 'GLOB')) {
    50          
89             # We use 1 here on the theory that some systems
90             # may not indicate failure if we use 0.
91             # MSWin32 does not indicate failure with 0, but I don't know if
92             # it will indicate failure with 1 or not.
93 2 100       18 unless (seek $file, 1, SEEK_SET) {
94 1         227 croak "$pack: your filehandle does not appear to be seekable";
95             }
96 1         6 seek $file, 0, SEEK_SET; # put it back
97 1         3 $fh = $file; # setting binmode is the user's problem
98             } elsif (ref $file) {
99 0         0 croak "usage: tie \@array, $pack, filename, [option => value]...";
100             } else {
101             # $fh = \do { local *FH }; # XXX this is buggy
102 2983 50       6789 if ($] < 5.006) {
103             # perl 5.005 and earlier don't autovivify filehandles
104 0         0 require Symbol;
105 0         0 $fh = Symbol::gensym();
106             }
107 2983 50       106230 sysopen $fh, $file, $opts{mode}, 0666 or return;
108 2983         13777 binmode $fh;
109 2983         8100 ++$opts{ourfh};
110             }
111 2984         4458 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  2984         11106  
  2984         7782  
  2984         9002  
112 2984 50 33     8218 if (defined $opts{discipline} && $] >= 5.006) {
113             # This avoids a compile-time warning under 5.005
114 0         0 eval 'binmode($fh, $opts{discipline})';
115 0 0       0 croak $@ if $@ =~ /unknown discipline/i;
116 0 0       0 die if $@;
117             }
118 2984         5425 $opts{fh} = $fh;
119              
120 2984         18587 bless \%opts => $pack;
121             }
122              
123             sub FETCH {
124 995     995   9650 my ($self, $n) = @_;
125 995         1249 my $rec;
126              
127             # check the defer buffer
128 995 100       2107 $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
129 995 100       2313 $rec = $self->_fetch($n) unless defined $rec;
130              
131             # inlined _chomp1
132             substr($rec, - $self->{recseplen}) = ""
133 995 100 100     3637 if defined $rec && $self->{autochomp};
134 995         3864 $rec;
135             }
136              
137             # Chomp many records in-place; return nothing useful
138             sub _chomp {
139 144     144   210 my $self = shift;
140 144 100       315 return unless $self->{autochomp};
141 47 50       100 if ($self->{autochomp}) {
142 47         85 for (@_) {
143 65 50       108 next unless defined;
144 65         133 substr($_, - $self->{recseplen}) = "";
145             }
146             }
147             }
148              
149             # Chomp one record in-place; return modified record
150             sub _chomp1 {
151 242     242   451 my ($self, $rec) = @_;
152 242 100       618 return $rec unless $self->{autochomp};
153 217 100       519 return unless defined $rec;
154 135         282 substr($rec, - $self->{recseplen}) = "";
155 135         468 $rec;
156             }
157              
158             sub _fetch {
159 1753     1753   2736 my ($self, $n) = @_;
160              
161             # check the record cache
162 1753         2169 { my $cached = $self->{cache}->lookup($n);
  1753         3384  
163 1753 100       3961 return $cached if defined $cached;
164             }
165              
166 1157 100       1395 if ($#{$self->{offsets}} < $n) {
  1157         2480  
167 22 100       97 return if $self->{eof}; # request for record beyond end of file
168 10         37 my $o = $self->_fill_offsets_to($n);
169             # If it's still undefined, there is no such record, so return 'undef'
170 10 100       35 return unless defined $o;
171             }
172              
173 1139         1722 my $fh = $self->{FH};
174 1139         2688 $self->_seek($n); # we can do this now that offsets is populated
175 1139         3138 my $rec = $self->_read_record;
176              
177             # If we happen to have just read the first record, check to see if
178             # the length of the record matches what 'tell' says. If not, Tie::File
179             # won't work, and should drop dead.
180             #
181             # if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
182             # if (defined $self->{discipline}) {
183             # croak "I/O discipline $self->{discipline} not supported";
184             # } else {
185             # croak "File encoding not supported";
186             # }
187             # }
188              
189 1139 100 66     5541 $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
190 1139         2343 $rec;
191             }
192              
193             sub STORE {
194 330     330   4322 my ($self, $n, $rec) = @_;
195 330 50       664 die "STORE called from _check_integrity!" if $DIAGNOSTIC;
196              
197 330         821 $self->_fixrecs($rec);
198              
199 330 100       729 if ($self->{autodefer}) {
200 190         408 $self->_annotate_ad_history($n);
201             }
202              
203 330 100       607 return $self->_store_deferred($n, $rec) if $self->_is_deferring;
204              
205              
206             # We need this to decide whether the new record will fit
207             # It incidentally populates the offsets table
208             # Note we have to do this before we alter the cache
209             # 20020324 Wait, but this DOES alter the cache. TODO BUG?
210 259         596 my $oldrec = $self->_fetch($n);
211              
212 259 100       553 if (not defined $oldrec) {
213             # We're storing a record beyond the end of the file
214 40         123 $self->_extend_file_to($n+1);
215 40         117 $oldrec = $self->{recsep};
216             }
217             # return if $oldrec eq $rec; # don't bother
218 259         464 my $len_diff = length($rec) - length($oldrec);
219              
220             # length($oldrec) here is not consistent with text mode TODO XXX BUG
221 259         783 $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
222 259         1058 $self->_oadjust([$n, 1, $rec]);
223 259         754 $self->{cache}->update($n, $rec);
224             }
225              
226             sub _store_deferred {
227 73     73   135 my ($self, $n, $rec) = @_;
228 73         165 $self->{cache}->remove($n);
229 73         118 my $old_deferred = $self->{deferred}{$n};
230              
231 73 100 100     275 if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
232 68         109 $self->{deferred_max} = $n;
233             }
234 73         138 $self->{deferred}{$n} = $rec;
235              
236 73         107 my $len_diff = length($rec);
237 73 100       136 $len_diff -= length($old_deferred) if defined $old_deferred;
238 73         107 $self->{deferred_s} += $len_diff;
239 73         184 $self->{cache}->adj_limit(-$len_diff);
240 73 100       204 if ($self->{deferred_s} > $self->{dw_size}) {
    100          
241 1         4 $self->_flush;
242             } elsif ($self->_cache_too_full) {
243 2         13 $self->_cache_flush;
244             }
245             }
246              
247             # Remove a single record from the deferred-write buffer without writing it
248             # The record need not be present
249             sub _delete_deferred {
250 6     6   11 my ($self, $n) = @_;
251 6         15 my $rec = delete $self->{deferred}{$n};
252 6 100       22 return unless defined $rec;
253              
254 4 50 33     20 if (defined $self->{deferred_max}
255             && $n == $self->{deferred_max}) {
256 4         10 undef $self->{deferred_max};
257             }
258              
259 4         16 $self->{deferred_s} -= length $rec;
260 4         11 $self->{cache}->adj_limit(length $rec);
261             }
262              
263             sub FETCHSIZE {
264 530     530   1647 my $self = shift;
265 530 100       1072 my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
  508         895  
266              
267 530         1071 my $top_deferred = $self->_defer_max;
268 530 100 66     1907 $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
269 530         1137 $n;
270             }
271              
272             sub STORESIZE {
273 16     16   432 my ($self, $len) = @_;
274              
275 16 100       53 if ($self->{autodefer}) {
276 15         34 $self->_annotate_ad_history('STORESIZE');
277             }
278              
279 16         40 my $olen = $self->FETCHSIZE;
280 16 50       41 return if $len == $olen; # Woo-hoo!
281              
282             # file gets longer
283 16 100       39 if ($len > $olen) {
284 6 100       13 if ($self->_is_deferring) {
285 1         4 for ($olen .. $len-1) {
286 2         5 $self->_store_deferred($_, $self->{recsep});
287             }
288             } else {
289 5         13 $self->_extend_file_to($len);
290             }
291 6         23 return;
292             }
293              
294             # file gets shorter
295 10 100       28 if ($self->_is_deferring) {
296             # TODO maybe replace this with map-plus-assignment?
297 2         4 for (grep $_ >= $len, keys %{$self->{deferred}}) {
  2         14  
298 2         7 $self->_delete_deferred($_);
299             }
300 2         5 $self->{deferred_max} = $len-1;
301             }
302              
303 10         32 $self->_seek($len);
304 10         52 $self->_chop_file;
305 10         37 $#{$self->{offsets}} = $len;
  10         42  
306             # $self->{offsets}[0] = 0; # in case we just chopped this
307              
308 10         46 $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
309             }
310              
311             ### OPTIMIZE ME
312             ### It should not be necessary to do FETCHSIZE
313             ### Just seek to the end of the file.
314             sub PUSH {
315 5     5   151 my $self = shift;
316 5         14 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
317              
318             # No need to return:
319             # $self->FETCHSIZE; # because av.c takes care of this for me
320             }
321              
322             sub POP {
323 3     3   111 my $self = shift;
324 3         7 my $size = $self->FETCHSIZE;
325 3 100       8 return if $size == 0;
326             # print STDERR "# POPPITY POP POP POP\n";
327 2         9 scalar $self->SPLICE($size-1, 1);
328             }
329              
330             sub SHIFT {
331 3     3   113 my $self = shift;
332 3         7 scalar $self->SPLICE(0, 1);
333             }
334              
335             sub UNSHIFT {
336 3     3   155 my $self = shift;
337 3         23 $self->SPLICE(0, 0, @_);
338             # $self->FETCHSIZE; # av.c takes care of this for me
339             }
340              
341             sub CLEAR {
342 31     31   1323 my $self = shift;
343              
344 31 100       94 if ($self->{autodefer}) {
345 21         152 $self->_annotate_ad_history('CLEAR');
346             }
347              
348 31         110 $self->_seekb(0);
349 31         134 $self->_chop_file;
350 31         181 $self->{cache}->set_limit($self->{memory});
351 31         94 $self->{cache}->empty;
352 31         54 @{$self->{offsets}} = (0);
  31         86  
353 31         51 %{$self->{deferred}}= ();
  31         58  
354 31         55 $self->{deferred_s} = 0;
355 31         133 $self->{deferred_max} = -1;
356             }
357              
358             sub EXTEND {
359 28     28   103 my ($self, $n) = @_;
360              
361             # No need to pre-extend anything in this case
362 28 100       69 return if $self->_is_deferring;
363              
364 26         89 $self->_fill_offsets_to($n);
365 26         81 $self->_extend_file_to($n);
366             }
367              
368             sub DELETE {
369 9     9   431 my ($self, $n) = @_;
370              
371 9 100       59 if ($self->{autodefer}) {
372 4         8 $self->_annotate_ad_history('DELETE');
373             }
374              
375 9         21 my $lastrec = $self->FETCHSIZE-1;
376 9         23 my $rec = $self->FETCH($n);
377 9 100       25 $self->_delete_deferred($n) if $self->_is_deferring;
378 9 100       29 if ($n == $lastrec) {
    100          
379 4         15 $self->_seek($n);
380 4         17 $self->_chop_file;
381 4         12 $#{$self->{offsets}}--;
  4         18  
382 4         23 $self->{cache}->remove($n);
383             # perhaps in this case I should also remove trailing null records?
384             # 20020316
385             # Note that delete @a[-3..-1] deletes the records in the wrong order,
386             # so we only chop the very last one out of the file. We could repair this
387             # by tracking deleted records inside the object.
388             } elsif ($n < $lastrec) {
389 4         11 $self->STORE($n, "");
390             }
391 9         158 $rec;
392             }
393              
394             sub EXISTS {
395 11     11   409 my ($self, $n) = @_;
396 11 100       31 return 1 if exists $self->{deferred}{$n};
397 10         20 $n < $self->FETCHSIZE;
398             }
399              
400             sub SPLICE {
401 388     388   11051 my $self = shift;
402              
403 388 100       955 if ($self->{autodefer}) {
404 334         713 $self->_annotate_ad_history('SPLICE');
405             }
406              
407 388 100       750 $self->_flush if $self->_is_deferring; # move this up?
408 388 100       758 if (wantarray) {
409 144         316 $self->_chomp(my @a = $self->_splice(@_));
410 144         642 @a;
411             } else {
412 244         629 $self->_chomp1(scalar $self->_splice(@_));
413             }
414             }
415              
416             sub DESTROY {
417 2984     2984   29480 my $self = shift;
418 2984 100       6790 $self->flush if $self->_is_deferring;
419 2984 50       11207 $self->{cache}->delink if defined $self->{cache}; # break circular link
420 2984 100 66     11763 if ($self->{fh} and $self->{ourfh}) {
421 2983         6479 delete $self->{ourfh};
422 2983         53147 close delete $self->{fh};
423             }
424             }
425              
426             sub _splice {
427 388     388   880 my ($self, $pos, $nrecs, @data) = @_;
428 388         526 my @result;
429              
430 388 100       698 $pos = 0 unless defined $pos;
431              
432             # Deal with negative and other out-of-range positions
433             # Also set default for $nrecs
434             {
435 388         511 my $oldsize = $self->FETCHSIZE;
  388         757  
436 388 100       746 $nrecs = $oldsize unless defined $nrecs;
437 388         544 my $oldpos = $pos;
438              
439 388 100       675 if ($pos < 0) {
440 73         112 $pos += $oldsize;
441 73 100       142 if ($pos < 0) {
442 2         429 croak "Modification of non-creatable array value attempted, " .
443             "subscript $oldpos";
444             }
445             }
446              
447 386 100       700 if ($pos > $oldsize) {
448 14 100       47 return unless @data;
449 8         27 $pos = $oldsize; # This is what perl does for normal arrays
450             }
451              
452             # The manual is very unclear here
453 380 100       607 if ($nrecs < 0) {
454 10         18 $nrecs = $oldsize - $pos + $nrecs;
455 10 100       21 $nrecs = 0 if $nrecs < 0;
456             }
457              
458             # nrecs is too big---it really means "until the end"
459             # 20030507
460 380 100       745 if ($nrecs + $pos > $oldsize) {
461 32         87 $nrecs = $oldsize - $pos;
462             }
463             }
464              
465 380         951 $self->_fixrecs(@data);
466 380         828 my $data = join '', @data;
467 380         518 my $datalen = length $data;
468 380         446 my $oldlen = 0;
469              
470             # compute length of data being removed
471 380         955 for ($pos .. $pos+$nrecs-1) {
472 507 50       975 last unless defined $self->_fill_offsets_to($_);
473 507         944 my $rec = $self->_fetch($_);
474 507 50       959 last unless defined $rec;
475 507         873 push @result, $rec;
476              
477             # Why don't we just use length($rec) here?
478             # Because that record might have come from the cache. _splice
479             # might have been called to flush out the deferred-write records,
480             # and in this case length($rec) is the length of the record to be
481             # *written*, not the length of the actual record in the file. But
482             # the offsets are still true. 20020322
483             $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
484 507 50       1523 if defined $self->{offsets}[$_+1];
485             }
486 380         978 $self->_fill_offsets_to($pos+$nrecs);
487              
488             # Modify the file
489 380         1030 $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
490             # Adjust the offsets table
491 380         1734 $self->_oadjust([$pos, $nrecs, @data]);
492              
493             { # Take this read cache stuff out into a separate function
494             # You made a half-attempt to put it into _oadjust.
495             # Finish something like that up eventually.
496             # STORE also needs to do something similarish
497              
498             # update the read cache, part 1
499             # modified records
500 380         702 for ($pos .. $pos+$nrecs-1) {
  380         698  
501 507         793 my $new = $data[$_-$pos];
502 507 100       876 if (defined $new) {
503 191         395 $self->{cache}->update($_, $new);
504             } else {
505 316         616 $self->{cache}->remove($_);
506             }
507             }
508            
509             # update the read cache, part 2
510             # moved records - records past the site of the change
511             # need to be renumbered
512             # Maybe merge this with the previous block?
513             {
514 380         497 my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
  380         783  
515 380         853 my @newkeys = map $_-$nrecs+@data, @oldkeys;
516 380         853 $self->{cache}->rekey(\@oldkeys, \@newkeys);
517             }
518              
519             # Now there might be too much data in the cache, if we spliced out
520             # some short records and spliced in some long ones. If so, flush
521             # the cache.
522 380         875 $self->_cache_flush;
523             }
524              
525             # Yes, the return value of 'splice' *is* actually this complicated
526 380 100       1672 wantarray ? @result : @result ? $result[-1] : undef;
    100          
527             }
528              
529              
530             # write data into the file
531             # $data is the data to be written.
532             # it should be written at position $pos, and should overwrite
533             # exactly $len of the following bytes.
534             # Note that if length($data) > $len, the subsequent bytes will have to
535             # be moved up, and if length($data) < $len, they will have to
536             # be moved down
537             sub _twrite {
538 179     179   1190 my ($self, $data, $pos, $len) = @_;
539              
540 179 50       433 unless (defined $pos) {
541 0         0 die "\$pos was undefined in _twrite";
542             }
543              
544 179         303 my $len_diff = length($data) - $len;
545              
546 179 100       365 if ($len_diff == 0) { # Woo-hoo!
547 30         61 my $fh = $self->{fh};
548 30         77 $self->_seekb($pos);
549 30         104 $self->_write_record($data);
550 30         92 return; # well, that was easy.
551             }
552              
553             # the two records are of different lengths
554             # our strategy here: rewrite the tail of the file,
555             # reading ahead one buffer at a time
556             # $bufsize is required to be at least as large as the data we're overwriting
557 149         338 my $bufsize = _bufsize($len_diff);
558 149         333 my ($writepos, $readpos) = ($pos, $pos+$len);
559 149         236 my $next_block;
560             my $more_data;
561              
562             # Seems like there ought to be a way to avoid the repeated code
563             # and the special case here. The read(1) is also a little weird.
564             # Think about this.
565 149         225 do {
566 376         961 $self->_seekb($readpos);
567 376         4568 my $br = read $self->{fh}, $next_block, $bufsize;
568 376         2327 $more_data = read $self->{fh}, my($dummy), 1;
569 376         1095 $self->_seekb($writepos);
570 376         1201 $self->_write_record($data);
571 376         782 $readpos += $br;
572 376         513 $writepos += length $data;
573 376         1277 $data = $next_block;
574             } while $more_data;
575 149         379 $self->_seekb($writepos);
576 149         453 $self->_write_record($next_block);
577              
578             # There might be leftover data at the end of the file
579 149 100       577 $self->_chop_file if $len_diff < 0;
580             }
581              
582             # _iwrite(D, S, E)
583             # Insert text D at position S.
584             # Let C = E-S-|D|. If C < 0; die.
585             # Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
586             # Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
587             #
588             # In a later version, don't read the entire intervening area into
589             # memory at once; do the copying block by block.
590             sub _iwrite {
591 101     101   481 my $self = shift;
592 101         263 my ($D, $s, $e) = @_;
593 101         180 my $d = length $D;
594 101         157 my $c = $e-$s-$d;
595 101         281 local *FH = $self->{fh};
596 101 50       216 confess "Not enough space to insert $d bytes between $s and $e"
597             if $c < 0;
598 101 50       164 confess "[$s,$e) is an invalid insertion range" if $e < $s;
599              
600 101         233 $self->_seekb($s);
601 101         1711 read FH, my $buf, $e-$s;
602              
603 101         787 $D .= substr($buf, 0, $c, "");
604              
605 101         283 $self->_seekb($s);
606 101         368 $self->_write_record($D);
607              
608 101         698 return $buf;
609             }
610              
611             # Like _twrite, but the data-pos-len triple may be repeated; you may
612             # write several chunks. All the writing will be done in
613             # one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
614             sub _mtwrite {
615 2913     2913   12611 my $self = shift;
616 2913         4809 my $unwritten = "";
617 2913         4020 my $delta = 0;
618              
619 2913 50       7204 @_ % 3 == 0
620             or die "Arguments to _mtwrite did not come in groups of three";
621              
622 2913         7114 while (@_) {
623 5097         15290 my ($data, $pos, $len) = splice @_, 0, 3;
624 5097         7771 my $end = $pos + $len; # The OLD end of the segment to be replaced
625 5097         19776 $data = $unwritten . $data;
626 5097         9704 $delta -= length($unwritten);
627 5097         7946 $unwritten = "";
628 5097         6901 $pos += $delta; # This is where the data goes now
629 5097         7191 my $dlen = length $data;
630 5097         13100 $self->_seekb($pos);
631 5097 100       13982 if ($len >= $dlen) { # the data will fit
632 4240         11188 $self->_write_record($data);
633 4240         8904 $delta += ($dlen - $len); # everything following moves down by this much
634 4240         7275 $data = ""; # All the data in the buffer has been written
635             } else { # won't fit
636 857         2562 my $writable = substr($data, 0, $len - $delta, "");
637 857         2256 $self->_write_record($writable);
638 857         2177 $delta += ($dlen - $len); # everything following moves down by this much
639             }
640              
641             # At this point we've written some but maybe not all of the data.
642             # There might be a gap to close up, or $data might still contain a
643             # bunch of unwritten data that didn't fit.
644 5097         7584 my $ndlen = length $data;
645 5097 100       11152 if ($delta == 0) {
    100          
646 885         1771 $self->_write_record($data);
647             } elsif ($delta < 0) {
648             # upcopy (close up gap)
649 3368 100       6009 if (@_) {
650 1263         3472 $self->_upcopy($end, $end + $delta, $_[1] - $end);
651             } else {
652 2105         5034 $self->_upcopy($end, $end + $delta);
653             }
654             } else {
655             # downcopy (insert data that didn't fit; replace this data in memory
656             # with _later_ data that doesn't fit)
657 844 100       1587 if (@_) {
658 346         905 $unwritten = $self->_downcopy($data, $end, $_[1] - $end);
659             } else {
660             # Make the file longer to accommodate the last segment that doesn't
661 498         1152 $unwritten = $self->_downcopy($data, $end);
662             }
663             }
664             }
665             }
666              
667             # Copy block of data of length $len from position $spos to position $dpos
668             # $dpos must be <= $spos
669             #
670             # If $len is undefined, go all the way to the end of the file
671             # and then truncate it ($spos - $dpos bytes will be removed)
672             sub _upcopy {
673 3422     3422   5972 my $blocksize = 8192;
674 3422         6790 my ($self, $spos, $dpos, $len) = @_;
675 3422 50       8335 if ($dpos > $spos) {
    100          
676 0         0 die "source ($spos) was upstream of destination ($dpos) in _upcopy";
677             } elsif ($dpos == $spos) {
678 16         45 return;
679             }
680              
681 3406   100     11002 while (! defined ($len) || $len > 0) {
682 7962 100       15106 my $readsize = ! defined($len) ? $blocksize
    100          
683             : $len > $blocksize ? $blocksize
684             : $len;
685            
686 7962         13971 my $fh = $self->{fh};
687 7962         20164 $self->_seekb($spos);
688 7962         80408 my $bytes_read = read $fh, my($data), $readsize;
689 7962         24649 $self->_seekb($dpos);
690 7962 100       20968 if ($data eq "") {
691 2119         5521 $self->_chop_file;
692 2119         21941 last;
693             }
694 5843         13697 $self->_write_record($data);
695 5843         12302 $spos += $bytes_read;
696 5843         7531 $dpos += $bytes_read;
697 5843 100       23642 $len -= $bytes_read if defined $len;
698             }
699             }
700              
701             # Write $data into a block of length $len at position $pos,
702             # moving everything in the block forwards to make room.
703             # Instead of writing the last length($data) bytes from the block
704             # (because there isn't room for them any longer) return them.
705             #
706             # Undefined $len means 'until the end of the file'
707             sub _downcopy {
708 1202     1202   10525 my $blocksize = 8192;
709 1202         2797 my ($self, $data, $pos, $len) = @_;
710 1202         2061 my $fh = $self->{fh};
711              
712 1202   100     3957 while (! defined $len || $len > 0) {
713 2345 100       4642 my $readsize = ! defined($len) ? $blocksize
    100          
714             : $len > $blocksize? $blocksize : $len;
715 2345         5738 $self->_seekb($pos);
716 2345         25066 read $fh, my($old), $readsize;
717 2345         6202 my $last_read_was_short = length($old) < $readsize;
718 2345         8269 $data .= $old;
719 2345         3108 my $writable;
720 2345 100       4223 if ($last_read_was_short) {
721             # If last read was short, then $data now contains the entire rest
722             # of the file, so there's no need to write only one block of it
723 677         1392 $writable = $data;
724 677         1073 $data = "";
725             } else {
726 1668         6790 $writable = substr($data, 0, $readsize, "");
727             }
728 2345 100       5036 last if $writable eq "";
729 2340         6007 $self->_seekb($pos);
730 2340         7301 $self->_write_record($writable);
731 2340 100 66     8766 last if $last_read_was_short && $data eq "";
732 1668 100       3041 $len -= $readsize if defined $len;
733 1668         5180 $pos += $readsize;
734             }
735 1202         5973 return $data;
736             }
737              
738             # Adjust the object data structures following an '_mtwrite'
739             # Arguments are
740             # [$pos, $nrecs, @length] items
741             # indicating that $nrecs records were removed at $recpos (a record offset)
742             # and replaced with records of length @length...
743             # Arguments guarantee that $recpos is strictly increasing.
744             # No return value
745             sub _oadjust {
746 662     662   1016 my $self = shift;
747 662         854 my $delta = 0;
748 662         805 my $delta_recs = 0;
749 662         852 my $prev_end = -1;
750 662         845 my %newkeys;
751              
752 662         1228 for (@_) {
753 664         1627 my ($pos, $nrecs, @data) = @$_;
754 664         909 $pos += $delta_recs;
755              
756             # Adjust the offsets of the records after the previous batch up
757             # to the first new one of this batch
758 664         1564 for my $i ($prev_end+2 .. $pos - 1) {
759 1047         1476 $self->{offsets}[$i] += $delta;
760 1047         1930 $newkey{$i} = $i + $delta_recs;
761             }
762              
763 664         1111 $prev_end = $pos + @data - 1; # last record moved on this pass
764              
765             # Remove the offsets for the removed records;
766             # replace with the offsets for the inserted records
767 664         1360 my @newoff = ($self->{offsets}[$pos] + $delta);
768 664         1308 for my $i (0 .. $#data) {
769 767         1138 my $newlen = length $data[$i];
770 767         1217 push @newoff, $newoff[$i] + $newlen;
771 767         1199 $delta += $newlen;
772             }
773              
774 664         1342 for my $i ($pos .. $pos+$nrecs-1) {
775 822 100       1079 last if $i+1 > $#{$self->{offsets}};
  822         1809  
776 812         1454 my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
777 812         1189 $delta -= $oldlen;
778             }
779              
780             # # also this data has changed, so update it in the cache
781             # for (0 .. $#data) {
782             # $self->{cache}->update($pos + $_, $data[$_]);
783             # }
784             # if ($delta_recs) {
785             # my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys;
786             # my @newkeys = map $_ + $delta_recs, @oldkeys;
787             # $self->{cache}->rekey(\@oldkeys, \@newkeys);
788             # }
789              
790             # replace old offsets with new
791 664         868 splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
  664         1627  
792             # What if we just spliced out the end of the offsets table?
793             # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
794              
795 664         1461 $delta_recs += @data - $nrecs; # net change in total number of records
796             }
797              
798             # The trailing records at the very end of the file
799 662 100       1305 if ($delta) {
800 562         789 for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
  562         1149  
801 1393         1894 $self->{offsets}[$i] += $delta;
802             }
803             }
804              
805             # If we scrubbed out all known offsets, regenerate the trivial table
806             # that knows that the file does indeed start at 0.
807 662 50       900 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
  662         1353  
808             # If the file got longer, the offsets table is no longer complete
809             # $self->{eof} = 0 if $delta_recs > 0;
810              
811             # Now there might be too much data in the cache, if we spliced out
812             # some short records and spliced in some long ones. If so, flush
813             # the cache.
814 662         1445 $self->_cache_flush;
815             }
816              
817             # If a record does not already end with the appropriate terminator
818             # string, append one.
819             sub _fixrecs {
820 710     710   980 my $self = shift;
821 710         1415 for (@_) {
822 782 100       1383 $_ = "" unless defined $_;
823             $_ .= $self->{recsep}
824 782 100       2622 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
825             }
826             }
827              
828              
829             ################################################################
830             #
831             # Basic read, write, and seek
832             #
833              
834             # seek to the beginning of record #$n
835             # Assumes that the offsets table is already correctly populated
836             #
837             # Note that $n=-1 has a special meaning here: It means the start of
838             # the last known record; this may or may not be the very last record
839             # in the file, depending on whether the offsets table is fully populated.
840             #
841             sub _seek {
842 1283     1283   1937 my ($self, $n) = @_;
843 1283         1881 my $o = $self->{offsets}[$n];
844 1283 50       2233 defined($o)
845             or confess("logic error: undefined offset for record $n");
846 1283 50       13324 seek $self->{fh}, $o, SEEK_SET
847             or confess "Couldn't seek filehandle: $!"; # "Should never happen."
848             }
849              
850             # seek to byte $b in the file
851             sub _seekb {
852 26870     26870   42704 my ($self, $b) = @_;
853 26870 50       211375 seek $self->{fh}, $b, SEEK_SET
854             or die "Couldn't seek filehandle: $!"; # "Should never happen."
855             }
856              
857             # populate the offsets table up to the beginning of record $n
858             # return the offset of record $n
859             sub _fill_offsets_to {
860 949     949   1530 my ($self, $n) = @_;
861              
862 949 100       2410 return $self->{offsets}[$n] if $self->{eof};
863              
864 17         56 my $fh = $self->{fh};
865 17         60 local *OFF = $self->{offsets};
866 17         33 my $rec;
867              
868 17         76 until ($#OFF >= $n) {
869 27         95 $self->_seek(-1); # tricky -- see comment at _seek
870 27         96 $rec = $self->_read_record;
871 27 100       91 if (defined $rec) {
872 14         61 push @OFF, int(tell $fh); # Tels says that int() saves memory here
873             } else {
874 13         35 $self->{eof} = 1;
875 13         42 return; # It turns out there is no such record
876             }
877             }
878              
879             # we have now read all the records up to record n-1,
880             # so we can return the offset of record n
881 4         13 $OFF[$n];
882             }
883              
884             sub _fill_offsets {
885 22     22   82 my ($self) = @_;
886              
887 22         47 my $fh = $self->{fh};
888 22         77 local *OFF = $self->{offsets};
889              
890 22         92 $self->_seek(-1); # tricky -- see comment at _seek
891              
892             # Tels says that inlining read_record() would make this loop
893             # five times faster. 20030508
894 22         91 while ( defined $self->_read_record()) {
895             # int() saves us memory here
896 166         441 push @OFF, int(tell $fh);
897             }
898              
899 22         67 $self->{eof} = 1;
900 22         71 $#OFF;
901             }
902              
903             # assumes that $rec is already suitably terminated
904             sub _write_record {
905 15049     15049   33759 my ($self, $rec) = @_;
906 15049         24428 my $fh = $self->{fh};
907 15049         52150 local $\ = "";
908 15049 50       222407 print $fh $rec
909             or die "Couldn't write record: $!"; # "Should never happen."
910             # $self->{_written} += length($rec);
911             }
912              
913             sub _read_record {
914 1354     1354   1941 my $self = shift;
915 1354         1607 my $rec;
916 1354         1748 { local $/ = $self->{recsep};
  1354         5378  
917 1354         1991 my $fh = $self->{fh};
918 1354         11548 $rec = <$fh>;
919             }
920 1354 100       3555 return unless defined $rec;
921 1287 100       3648 if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
922             # improperly terminated final record --- quietly fix it.
923             # my $ac = substr($rec, -$self->{recseplen});
924             # $ac =~ s/\n/\\n/g;
925 7         12 $self->{sawlastrec} = 1;
926 7 100       16 unless ($self->{rdonly}) {
927 4         11 local $\ = "";
928 4         7 my $fh = $self->{fh};
929 4         67 print $fh $self->{recsep};
930             }
931 7         21 $rec .= $self->{recsep};
932             }
933             # $self->{_read} += length($rec) if defined $rec;
934 1287         2734 $rec;
935             }
936              
937             sub _rw_stats {
938 0     0   0 my $self = shift;
939 0         0 @{$self}{'_read', '_written'};
  0         0  
940             }
941              
942             ################################################################
943             #
944             # Read cache management
945              
946             sub _cache_flush {
947 1044     1044   1696 my ($self) = @_;
948 1044         2497 $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
949             }
950              
951             sub _cache_too_full {
952 72     72   104 my $self = shift;
953 72         132 $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
954             }
955              
956             ################################################################
957             #
958             # File custodial services
959             #
960              
961              
962             # We have read to the end of the file and have the offsets table
963             # entirely populated. Now we need to write a new record beyond
964             # the end of the file. We prepare for this by writing
965             # empty records into the file up to the position we want
966             #
967             # assumes that the offsets table already contains the offset of record $n,
968             # if it exists, and extends to the end of the file if not.
969             sub _extend_file_to {
970 81     81   163 my ($self, $n) = @_;
971 81         225 $self->_seek(-1); # position after the end of the last record
972 81         274 my $pos = $self->{offsets}[-1];
973              
974             # the offsets table has one entry more than the total number of records
975 81         145 my $extras = $n - $#{$self->{offsets}};
  81         209  
976              
977             # Todo : just use $self->{recsep} x $extras here?
978 81         258 while ($extras-- > 0) {
979 228         666 $self->_write_record($self->{recsep});
980 228         480 push @{$self->{offsets}}, int(tell $self->{fh});
  228         1153  
981             }
982             }
983              
984             # Truncate the file at the current position
985             sub _chop_file {
986 2235     2235   3493 my $self = shift;
987 2235         659838 truncate $self->{fh}, tell($self->{fh});
988             }
989              
990              
991             # compute the size of a buffer suitable for moving
992             # all the data in a file forward $n bytes
993             # ($n may be negative)
994             # The result should be at least $n.
995             sub _bufsize {
996 149     149   273 my $n = shift;
997 149 100       375 return 8192 if $n <= 0;
998 78         107 my $b = $n & ~8191;
999 78 100       171 $b += 8192 if $n & 8191;
1000 78         126 $b;
1001             }
1002              
1003             ################################################################
1004             #
1005             # Miscellaneous public methods
1006             #
1007              
1008             # Lock the file
1009             sub flock {
1010 2     2 1 22 my ($self, $op) = @_;
1011 2 50       6 unless (@_ <= 3) {
1012 0         0 my $pack = ref $self;
1013 0         0 croak "Usage: $pack\->flock([OPERATION])";
1014             }
1015 2         3 my $fh = $self->{fh};
1016 2 100       5 $op = LOCK_EX unless defined $op;
1017 2         17 my $locked = flock $fh, $op;
1018              
1019 2 100 66     12 if ($locked && ($op & (LOCK_EX | LOCK_SH))) {
1020             # If you're locking the file, then presumably it's because
1021             # there might have been a write access by another process.
1022             # In that case, the read cache contents and the offsets table
1023             # might be invalid, so discard them. 20030508
1024 1         3 $self->{offsets} = [0];
1025 1         4 $self->{cache}->empty;
1026             }
1027              
1028 2         5 $locked;
1029             }
1030              
1031             # Get/set autochomp option
1032             sub autochomp {
1033 4     4 1 31 my $self = shift;
1034 4 100       10 if (@_) {
1035 2         5 my $old = $self->{autochomp};
1036 2         4 $self->{autochomp} = shift;
1037 2         6 $old;
1038             } else {
1039 2         5 $self->{autochomp};
1040             }
1041             }
1042              
1043             # Get offset table entries; returns offset of nth record
1044             sub offset {
1045 22     22 1 894 my ($self, $n) = @_;
1046              
1047 22 100       25 if ($#{$self->{offsets}} < $n) {
  22         64  
1048 4 100       20 return if $self->{eof}; # request for record beyond the end of file
1049 1         4 my $o = $self->_fill_offsets_to($n);
1050             # If it's still undefined, there is no such record, so return 'undef'
1051 1 50       8 return unless defined $o;
1052             }
1053              
1054 18         79 $self->{offsets}[$n];
1055             }
1056              
1057             sub discard_offsets {
1058 0     0 0 0 my $self = shift;
1059 0         0 $self->{offsets} = [0];
1060             }
1061              
1062             ################################################################
1063             #
1064             # Matters related to deferred writing
1065             #
1066              
1067             # Defer writes
1068             sub defer {
1069 22     22 1 1153 my $self = shift;
1070 22         71 $self->_stop_autodeferring;
1071 22         35 @{$self->{ad_history}} = ();
  22         45  
1072 22         69 $self->{defer} = 1;
1073             }
1074              
1075             # Flush deferred writes
1076             #
1077             # This could be better optimized to write the file in one pass, instead
1078             # of one pass per block of records. But that will require modifications
1079             # to _twrite, so I should have a good _twrite test suite first.
1080             sub flush {
1081 16     16 1 549 my $self = shift;
1082              
1083 16         45 $self->_flush;
1084 16         62 $self->{defer} = 0;
1085             }
1086              
1087             sub _old_flush {
1088 0     0   0 my $self = shift;
1089 0         0 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  0         0  
  0         0  
1090              
1091 0         0 while (@writable) {
1092             # gather all consecutive records from the front of @writable
1093 0         0 my $first_rec = shift @writable;
1094 0         0 my $last_rec = $first_rec+1;
1095 0   0     0 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1096 0         0 --$last_rec;
1097 0         0 $self->_fill_offsets_to($last_rec);
1098 0         0 $self->_extend_file_to($last_rec);
1099             $self->_splice($first_rec, $last_rec-$first_rec+1,
1100 0         0 @{$self->{deferred}}{$first_rec .. $last_rec});
  0         0  
1101             }
1102              
1103 0         0 $self->_discard; # clear out defered-write-cache
1104             }
1105              
1106             sub _flush {
1107 23     23   30 my $self = shift;
1108 23         44 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
  55         173  
  23         104  
1109 23         47 my @args;
1110             my @adjust;
1111              
1112 23         59 while (@writable) {
1113             # gather all consecutive records from the front of @writable
1114 25         55 my $first_rec = shift @writable;
1115 25         41 my $last_rec = $first_rec+1;
1116 25   100     161 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1117 25         38 --$last_rec;
1118 25         68 my $end = $self->_fill_offsets_to($last_rec+1);
1119 25 100       53 if (not defined $end) {
1120 10         29 $self->_extend_file_to($last_rec);
1121 10         22 $end = $self->{offsets}[$last_rec];
1122             }
1123 25         58 my ($start) = $self->{offsets}[$first_rec];
1124             push @args,
1125 25         56 join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
  25         111  
1126             $start, # position
1127             $end-$start; # length
1128             push @adjust, [$first_rec, # starting at this position...
1129             $last_rec-$first_rec+1, # this many records...
1130             # are replaced with these...
1131 25         158 @{$self->{deferred}}{$first_rec .. $last_rec},
  25         146  
1132             ];
1133             }
1134              
1135 23         82 $self->_mtwrite(@args); # write multiple record groups
1136 23         72 $self->_discard; # clear out defered-write-cache
1137 23         59 $self->_oadjust(@adjust);
1138             }
1139              
1140             # Discard deferred writes and disable future deferred writes
1141             sub discard {
1142 6     6 1 238 my $self = shift;
1143 6         22 $self->_discard;
1144 6         21 $self->{defer} = 0;
1145             }
1146              
1147             # Discard deferred writes, but retain old deferred writing mode
1148             sub _discard {
1149 29     29   43 my $self = shift;
1150 29         69 %{$self->{deferred}} = ();
  29         87  
1151 29         51 $self->{deferred_s} = 0;
1152 29         58 $self->{deferred_max} = -1;
1153 29         71 $self->{cache}->set_limit($self->{memory});
1154             }
1155              
1156             # Deferred writing is enabled, either explicitly ($self->{defer})
1157             # or automatically ($self->{autodeferring})
1158             sub _is_deferring {
1159 4515     4515   6287 my $self = shift;
1160 4515 100       16774 $self->{defer} || $self->{autodeferring};
1161             }
1162              
1163             # The largest record number of any deferred record
1164             sub _defer_max {
1165 530     530   717 my $self = shift;
1166 530 100       1347 return $self->{deferred_max} if defined $self->{deferred_max};
1167 1         2 my $max = -1;
1168 1         2 for my $key (keys %{$self->{deferred}}) {
  1         3  
1169 1 50       5 $max = $key if $key > $max;
1170             }
1171 1         3 $self->{deferred_max} = $max;
1172 1         2 $max;
1173             }
1174              
1175             ################################################################
1176             #
1177             # Matters related to autodeferment
1178             #
1179              
1180             # Get/set autodefer option
1181             sub autodefer {
1182 2     2 1 102 my $self = shift;
1183 2 50       8 if (@_) {
1184 2         4 my $old = $self->{autodefer};
1185 2         3 $self->{autodefer} = shift;
1186 2 100       8 if ($old) {
1187 1         3 $self->_stop_autodeferring;
1188 1         2 @{$self->{ad_history}} = ();
  1         3  
1189             }
1190 2         5 $old;
1191             } else {
1192 0         0 $self->{autodefer};
1193             }
1194             }
1195              
1196             # The user is trying to store record #$n Record that in the history,
1197             # and then enable (or disable) autodeferment if that seems useful.
1198             # Note that it's OK for $n to be a non-number, as long as the function
1199             # is prepared to deal with that. Nobody else looks at the ad_history.
1200             #
1201             # Now, what does the ad_history mean, and what is this function doing?
1202             # Essentially, the idea is to enable autodeferring when we see that the
1203             # user has made three consecutive STORE calls to three consecutive records.
1204             # ("Three" is actually ->{autodefer_threshhold}.)
1205             # A STORE call for record #$n inserts $n into the autodefer history,
1206             # and if the history contains three consecutive records, we enable
1207             # autodeferment. An ad_history of [X, Y] means that the most recent
1208             # STOREs were for records X, X+1, ..., Y, in that order.
1209             #
1210             # Inserting a nonconsecutive number erases the history and starts over.
1211             #
1212             # Performing a special operation like SPLICE erases the history.
1213             #
1214             # There's one special case: CLEAR means that CLEAR was just called.
1215             # In this case, we prime the history with [-2, -1] so that if the next
1216             # write is for record 0, autodeferring goes on immediately. This is for
1217             # the common special case of "@a = (...)".
1218             #
1219             sub _annotate_ad_history {
1220 564     564   1023 my ($self, $n) = @_;
1221 564 50       1087 return unless $self->{autodefer}; # feature is disabled
1222 564 100       1069 return if $self->{defer}; # already in explicit defer mode
1223 498 100       1243 return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
1224              
1225 25         45 local *H = $self->{ad_history};
1226 25 100       128 if ($n eq 'CLEAR') {
    50          
1227 2         5 @H = (-2, -1); # prime the history with fake records
1228 2         7 $self->_stop_autodeferring;
1229             } elsif ($n =~ /^\d+$/) {
1230 23 100       48 if (@H == 0) {
1231 1         5 @H = ($n, $n);
1232             } else { # @H == 2
1233 22 100       42 if ($H[1] == $n-1) { # another consecutive record
1234 19         38 $H[1]++;
1235 19 100       48 if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
1236 16         34 $self->{autodeferring} = 1;
1237             }
1238             } else { # nonconsecutive- erase and start over
1239 3         7 @H = ($n, $n);
1240 3         22 $self->_stop_autodeferring;
1241             }
1242             }
1243             } else { # SPLICE or STORESIZE or some such
1244 0         0 @H = ();
1245 0         0 $self->_stop_autodeferring;
1246             }
1247             }
1248              
1249             # If autodeferring was enabled, cut it out and discard the history
1250             sub _stop_autodeferring {
1251 28     28   41 my $self = shift;
1252 28 100       80 if ($self->{autodeferring}) {
1253 5         11 $self->_flush;
1254             }
1255 28         102 $self->{autodeferring} = 0;
1256             }
1257              
1258             ################################################################
1259              
1260              
1261             # This is NOT a method. It is here for two reasons:
1262             # 1. To factor a fairly complicated block out of the constructor
1263             # 2. To provide access for the test suite, which need to be sure
1264             # files are being written properly.
1265             sub _default_recsep {
1266 2996     2996   8090 my $recsep = $/;
1267 2996 50       9413 if ($^O eq 'MSWin32') { # Dos too?
1268             # Windows users expect files to be terminated with \r\n
1269             # But $/ is set to \n instead
1270             # Note that this also transforms \n\n into \r\n\r\n.
1271             # That is a feature.
1272 0         0 $recsep =~ s/\n/\r\n/g;
1273             }
1274 2996         7155 $recsep;
1275             }
1276              
1277             # Utility function for _check_integrity
1278             sub _ci_warn {
1279 0     0   0 my $msg = shift;
1280 0         0 $msg =~ s/\n/\\n/g;
1281 0         0 $msg =~ s/\r/\\r/g;
1282 0         0 print "# $msg\n";
1283             }
1284              
1285             # Given a file, make sure the cache is consistent with the
1286             # file contents and the internal data structures are consistent with
1287             # each other. Returns true if everything checks out, false if not
1288             #
1289             # The $file argument is no longer used. It is retained for compatibility
1290             # with the existing test suite.
1291             sub _check_integrity {
1292 380     380   6523 my ($self, $file, $warn) = @_;
1293 380         674 my $rsl = $self->{recseplen};
1294 380         591 my $rs = $self->{recsep};
1295 380         523 my $good = 1;
1296 380         684 local *_; # local $_ does not work here
1297 380         599 local $DIAGNOSTIC = 1;
1298              
1299 380 50       1277 if (not defined $rs) {
    50          
    50          
1300 0         0 _ci_warn("recsep is undef!");
1301 0         0 $good = 0;
1302             } elsif ($rs eq "") {
1303 0         0 _ci_warn("recsep is empty!");
1304 0         0 $good = 0;
1305             } elsif ($rsl != length $rs) {
1306 0         0 my $ln = length $rs;
1307 0         0 _ci_warn("recsep <$rs> has length $ln, should be $rsl");
1308 0         0 $good = 0;
1309             }
1310              
1311 380 50       948 if (not defined $self->{offsets}[0]) {
    50          
1312 0         0 _ci_warn("offset 0 is missing!");
1313 0         0 $good = 0;
1314              
1315             } elsif ($self->{offsets}[0] != 0) {
1316 0         0 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
1317 0         0 $good = 0;
1318             }
1319              
1320 380         511 my $cached = 0;
1321             {
1322 380         461 local *F = $self->{fh};
  380         811  
1323 380         2943 seek F, 0, SEEK_SET;
1324 380         1697 local $. = 0;
1325 380         1132 local $/ = $rs;
1326              
1327 380         3433 while () {
1328 2132         3652 my $n = $. - 1;
1329 2132         3750 my $cached = $self->{cache}->_produce($n);
1330 2132         3277 my $offset = $self->{offsets}[$.];
1331 2132         2704 my $ao = tell F;
1332 2132 50 66     5825 if (defined $offset && $offset != $ao) {
1333 0         0 _ci_warn("rec $n: offset <$offset> actual <$ao>");
1334 0         0 $good = 0;
1335             }
1336 2132 50 66     4552 if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
      33        
1337 0         0 $good = 0;
1338 0         0 _ci_warn("rec $n: cached <$cached> actual <$_>");
1339             }
1340 2132 50 66     4592 if (defined $cached && substr($cached, -$rsl) ne $rs) {
1341 0         0 $good = 0;
1342 0         0 _ci_warn("rec $n in the cache is missing the record separator");
1343             }
1344 2132 50 66     7169 if (! defined $offset && $self->{eof}) {
1345 0         0 $good = 0;
1346 0         0 _ci_warn("The offset table was marked complete, but it is missing " .
1347             "element $.");
1348             }
1349             }
1350 380 50       739 if (@{$self->{offsets}} > $.+1) {
  380         1407  
1351 0         0 $good = 0;
1352 0         0 my $n = @{$self->{offsets}};
  0         0  
1353 0         0 _ci_warn("The offset table has $n items, but the file has only $.");
1354             }
1355              
1356 380         1022 my $deferring = $self->_is_deferring;
1357 380         900 for my $n ($self->{cache}->ckeys) {
1358 938         1592 my $r = $self->{cache}->_produce($n);
1359 938         1313 $cached += length($r);
1360 938 50       2274 next if $n+1 <= $.; # checked this already
1361 0         0 _ci_warn("spurious caching of record $n");
1362 0         0 $good = 0;
1363             }
1364 380         842 my $b = $self->{cache}->bytes;
1365 380 50       1627 if ($cached != $b) {
1366 0         0 _ci_warn("cache size is $b, should be $cached");
1367 0         0 $good = 0;
1368             }
1369             }
1370              
1371             # That cache has its own set of tests
1372 380 50       886 $good = 0 unless $self->{cache}->_check_integrity;
1373              
1374             # Now let's check the deferbuffer
1375             # Unless deferred writing is enabled, it should be empty
1376 380 50 66     740 if (! $self->_is_deferring && %{$self->{deferred}}) {
  331         913  
1377 0         0 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1378 0         0 $good = 0;
1379             }
1380              
1381             # Any record in the deferbuffer should *not* be present in the readcache
1382 380         553 my $deferred_s = 0;
1383 380         535 while (my ($n, $r) = each %{$self->{deferred}}) {
  467         1331  
1384 87         117 $deferred_s += length($r);
1385 87 50       150 if (defined $self->{cache}->_produce($n)) {
1386 0         0 _ci_warn("record $n is in the deferbuffer *and* the readcache");
1387 0         0 $good = 0;
1388             }
1389 87 50       229 if (substr($r, -$rsl) ne $rs) {
1390 0         0 _ci_warn("rec $n in the deferbuffer is missing the record separator");
1391 0         0 $good = 0;
1392             }
1393             }
1394              
1395             # Total size of deferbuffer should match internal total
1396 380 50       832 if ($deferred_s != $self->{deferred_s}) {
1397 0         0 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1398 0         0 $good = 0;
1399             }
1400              
1401             # Total size of deferbuffer should not exceed the specified limit
1402 380 50       731 if ($deferred_s > $self->{dw_size}) {
1403 0         0 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit " .
1404             "of $self->{dw_size}");
1405 0         0 $good = 0;
1406             }
1407              
1408             # Total size of cached data should not exceed the specified limit
1409 380 50       808 if ($deferred_s + $cached > $self->{memory}) {
1410 0         0 my $total = $deferred_s + $cached;
1411 0         0 _ci_warn("total stored data size is $total which exceeds the limit " .
1412             "of $self->{memory}");
1413 0         0 $good = 0;
1414             }
1415              
1416             # Stuff related to autodeferment
1417 380 50 66     787 if (!$self->{autodefer} && @{$self->{ad_history}}) {
  129         331  
1418 0         0 _ci_warn("autodefer is disabled, but ad_history is nonempty");
1419 0         0 $good = 0;
1420             }
1421 380 50 66     723 if ($self->{autodeferring} && $self->{defer}) {
1422 0         0 _ci_warn("both autodeferring and explicit deferring are active");
1423 0         0 $good = 0;
1424             }
1425 380 100       459 if (@{$self->{ad_history}} == 0) {
  380 50       790  
1426             # That's OK, no additional tests required
1427 14         23 } elsif (@{$self->{ad_history}} == 2) {
1428 14         18 my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
  14         80  
1429 14 50       53 if (@non_number) {
    50          
1430 0         0 my $msg;
1431 0         0 { local $" = ')(';
  0         0  
1432 0         0 $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
  0         0  
1433             }
1434 0         0 _ci_warn($msg);
1435 0         0 $good = 0;
1436             } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1437 0         0 _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
  0         0  
1438 0         0 $good = 0;
1439             }
1440             } else {
1441 0         0 _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
  0         0  
1442 0         0 $good = 0;
1443             }
1444              
1445 380         1256 $good;
1446             }
1447              
1448             ################################################################
1449             #
1450             # Tie::File::Cache
1451             #
1452             # Read cache
1453              
1454             package Tie::File::Cache;
1455             $Tie::File::Cache::VERSION = $Tie::File::VERSION;
1456 37     37   350 use Carp ':DEFAULT', 'confess';
  37         72  
  37         6924  
1457              
1458             sub HEAP () { 0 }
1459             sub HASH () { 1 }
1460             sub MAX () { 2 }
1461             sub BYTES() { 3 }
1462             #sub STAT () { 4 } # Array with request statistics for each record
1463             #sub MISS () { 5 } # Total number of cache misses
1464             #sub REQ () { 6 } # Total number of cache requests
1465 37     37   300 use strict 'vars';
  37         106  
  37         51625  
1466              
1467             sub new {
1468 2986     2986   6242 my ($pack, $max) = @_;
1469 2986         7830 local *_;
1470 2986 50       7038 croak "missing argument to ->new" unless defined $max;
1471 2986         5440 my $self = [];
1472 2986         5725 bless $self => $pack;
1473 2986         11719 @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1474 2986         8700 $self;
1475             }
1476              
1477             sub adj_limit {
1478 77     77   135 my ($self, $n) = @_;
1479 77         116 $self->[MAX] += $n;
1480             }
1481              
1482             sub set_limit {
1483 60     60   188 my ($self, $n) = @_;
1484 60         183 $self->[MAX] = $n;
1485             }
1486              
1487             # For internal use only
1488             # Will be called by the heap structure to notify us that a certain
1489             # piece of data has moved from one heap element to another.
1490             # $k is the hash key of the item
1491             # $n is the new index into the heap at which it is stored
1492             # If $n is undefined, the item has been removed from the heap.
1493             sub _heap_move {
1494 3501     3501   5498 my ($self, $k, $n) = @_;
1495 3501 100       5480 if (defined $n) {
1496 3071         5567 $self->[HASH]{$k} = $n;
1497             } else {
1498 430         820 delete $self->[HASH]{$k};
1499             }
1500             }
1501              
1502             sub insert {
1503 1177     1177   2565 my ($self, $key, $val) = @_;
1504 1177         2310 local *_;
1505 1177 50       2122 croak "missing argument to ->insert" unless defined $key;
1506 1177 50       2154 unless (defined $self->[MAX]) {
1507 0         0 confess "undefined max" ;
1508             }
1509 1177 50       1953 confess "undefined val" unless defined $val;
1510 1177 100       2172 return if length($val) > $self->[MAX];
1511              
1512             # if ($self->[STAT]) {
1513             # $self->[STAT][$key] = 1;
1514             # return;
1515             # }
1516              
1517 1094         1609 my $oldnode = $self->[HASH]{$key};
1518 1094 50       1947 if (defined $oldnode) {
1519 0         0 my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1520 0         0 $self->[BYTES] -= length($oldval);
1521             } else {
1522 1094         2223 $self->[HEAP]->insert($key, $val);
1523             }
1524 1094         1671 $self->[BYTES] += length($val);
1525 1094 100       2750 $self->flush if $self->[BYTES] > $self->[MAX];
1526             }
1527              
1528             sub expire {
1529 56     56   213 my $self = shift;
1530 56         88 my $old_data = $self->[HEAP]->popheap;
1531 56 100       108 return unless defined $old_data;
1532 50         65 $self->[BYTES] -= length $old_data;
1533 50         106 $old_data;
1534             }
1535              
1536             sub remove {
1537 434     434   767 my ($self, @keys) = @_;
1538 434         540 my @result;
1539              
1540             # if ($self->[STAT]) {
1541             # for my $key (@keys) {
1542             # $self->[STAT][$key] = 0;
1543             # }
1544             # return;
1545             # }
1546              
1547 434         698 for my $key (@keys) {
1548 435 100       1054 next unless exists $self->[HASH]{$key};
1549 336         708 my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1550 336         508 $self->[BYTES] -= length $old_data;
1551 336         649 push @result, $old_data;
1552             }
1553 434         915 @result;
1554             }
1555              
1556             sub lookup {
1557 1780     1780   2854 my ($self, $key) = @_;
1558 1780         3269 local *_;
1559 1780 50       3199 croak "missing argument to ->lookup" unless defined $key;
1560              
1561             # if ($self->[STAT]) {
1562             # $self->[MISS]++ if $self->[STAT][$key]++ == 0;
1563             # $self->[REQ]++;
1564             # my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
1565             # # Do some testing to determine this threshhold
1566             # $#$self = STAT - 1 if $hit_rate > 0.20;
1567             # }
1568              
1569 1780 100       3348 if (exists $self->[HASH]{$key}) {
1570 621         1301 $self->[HEAP]->lookup($self->[HASH]{$key});
1571             } else {
1572 1159         2871 return;
1573             }
1574             }
1575              
1576             # For internal use only
1577             sub _produce {
1578 3204     3204   4839 my ($self, $key) = @_;
1579 3204         4675 my $loc = $self->[HASH]{$key};
1580 3204 100       5727 return unless defined $loc;
1581 1923         3706 $self->[HEAP][$loc][2];
1582             }
1583              
1584             # For internal use only
1585             sub _promote {
1586 5     5   21 my ($self, $key) = @_;
1587 5         13 $self->[HEAP]->promote($self->[HASH]{$key});
1588             }
1589              
1590             sub empty {
1591 86     86   2418 my ($self) = @_;
1592 86         143 %{$self->[HASH]} = ();
  86         269  
1593 86         169 $self->[BYTES] = 0;
1594 86         227 $self->[HEAP]->empty;
1595             # @{$self->[STAT]} = ();
1596             # $self->[MISS] = 0;
1597             # $self->[REQ] = 0;
1598             }
1599              
1600             sub is_empty {
1601 3     3   19 my ($self) = @_;
1602 3         4 keys %{$self->[HASH]} == 0;
  3         11  
1603             }
1604              
1605             sub update {
1606 461     461   936 my ($self, $key, $val) = @_;
1607 461         1053 local *_;
1608 461 50       1182 croak "missing argument to ->update" unless defined $key;
1609 461 100       1233 if (length($val) > $self->[MAX]) {
    100          
1610 21         47 my ($oldval) = $self->remove($key);
1611 21 50       40 $self->[BYTES] -= length($oldval) if defined $oldval;
1612             } elsif (exists $self->[HASH]{$key}) {
1613 405         996 my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1614 405         673 $self->[BYTES] += length($val);
1615 405 50       856 $self->[BYTES] -= length($oldval) if defined $oldval;
1616             } else {
1617 35         119 $self->[HEAP]->insert($key, $val);
1618 35         64 $self->[BYTES] += length($val);
1619             }
1620 461         852 $self->flush;
1621             }
1622              
1623             sub rekey {
1624 381     381   660 my ($self, $okeys, $nkeys) = @_;
1625 381         826 local *_;
1626 381         483 my %map;
1627 381         759 @map{@$okeys} = @$nkeys;
1628 381 50       743 croak "missing argument to ->rekey" unless defined $nkeys;
1629 381 50       711 croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1630 381         490 my %adjusted; # map new keys to heap indices
1631             # You should be able to cut this to one loop TODO XXX
1632 381         784 for (0 .. $#$okeys) {
1633 407         958 $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1634             }
1635 381         1512 while (my ($nk, $ix) = each %adjusted) {
1636             # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1637 407         838 $self->[HEAP]->rekey($ix, $nk);
1638 407         1199 $self->[HASH]{$nk} = $ix;
1639             }
1640             }
1641              
1642             sub ckeys {
1643 789     789   1428 my $self = shift;
1644 789         1013 my @a = keys %{$self->[HASH]};
  789         2131  
1645 789         2622 @a;
1646             }
1647              
1648             # Return total amount of cached data
1649             sub bytes {
1650 507     507   1014 my $self = shift;
1651 507         1069 $self->[BYTES];
1652             }
1653              
1654             # Expire oldest item from cache until cache size is smaller than $max
1655             sub reduce_size_to {
1656 1080     1080   1652 my ($self, $max) = @_;
1657 1080         2926 until ($self->[BYTES] <= $max) {
1658             # Note that Tie::File::Cache::expire has been inlined here
1659 44         90 my $old_data = $self->[HEAP]->popheap;
1660 44 50       82 return unless defined $old_data;
1661 44         211 $self->[BYTES] -= length $old_data;
1662             }
1663             }
1664              
1665             # Why not just $self->reduce_size_to($self->[MAX])?
1666             # Try this when things stabilize TODO XXX
1667             # If the cache is too full, expire the oldest records
1668             sub flush {
1669 482     482   660 my $self = shift;
1670 482 100       2074 $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
1671             }
1672              
1673             # For internal use only
1674             sub _produce_lru {
1675 1     1   10 my $self = shift;
1676 1         4 $self->[HEAP]->expire_order;
1677             }
1678              
1679 37     37   15936 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1680              
1681             sub _check_integrity { # For CACHE
1682 406     406   798 my $self = shift;
1683 406         531 my $good = 1;
1684              
1685             # Test HEAP
1686 406 50       948 $self->[HEAP]->_check_integrity or $good = 0;
1687              
1688             # Test HASH
1689 406         555 my $bytes = 0;
1690 406         537 for my $k (keys %{$self->[HASH]}) {
  406         944  
1691 1063 50 66     4299 if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
1692 0         0 $good = 0;
1693 0         0 _ci_warn "Cache hash key <$k> is non-numeric";
1694             }
1695              
1696 1063         1618 my $h = $self->[HASH]{$k};
1697 1063 50       2062 if (! defined $h) {
    50          
1698 0         0 $good = 0;
1699 0         0 _ci_warn "Heap index number for key $k is undefined";
1700             } elsif ($h == 0) {
1701 0         0 $good = 0;
1702 0         0 _ci_warn "Heap index number for key $k is zero";
1703             } else {
1704 1063         1368 my $j = $self->[HEAP][$h];
1705 1063 50       1455 if (! defined $j) {
1706 0         0 $good = 0;
1707 0         0 _ci_warn "Heap contents key $k (=> $h) are undefined";
1708             } else {
1709 1063         1371 $bytes += length($j->[2]);
1710 1063 50       2240 if ($k ne $j->[1]) {
1711 0         0 $good = 0;
1712 0         0 _ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
1713             }
1714             }
1715             }
1716             }
1717              
1718             # Test BYTES
1719 406 50       918 if ($bytes != $self->[BYTES]) {
1720 0         0 $good = 0;
1721 0         0 _ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
1722             }
1723              
1724             # Test MAX
1725 406 50       769 if ($bytes > $self->[MAX]) {
1726 0         0 $good = 0;
1727 0         0 _ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
1728             }
1729              
1730 406         779 return $good;
1731             }
1732              
1733             sub delink {
1734 2984     2984   4344 my $self = shift;
1735 2984         9595 $self->[HEAP] = undef; # Bye bye heap
1736             }
1737              
1738             ################################################################
1739             #
1740             # Tie::File::Heap
1741             #
1742             # Heap data structure for use by cache LRU routines
1743              
1744             package Tie::File::Heap;
1745 37     37   317 use Carp ':DEFAULT', 'confess';
  37         67  
  37         49867  
1746             $Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1747             sub SEQ () { 0 };
1748             sub KEY () { 1 };
1749             sub DAT () { 2 };
1750              
1751             sub new {
1752 2986     2986   5904 my ($pack, $cache) = @_;
1753             die "$pack: Parent cache object $cache does not support _heap_move method"
1754 2986 50       6218 unless eval { $cache->can('_heap_move') };
  2986         13124  
1755 2986         7906 my $self = [[0,$cache,0]];
1756 2986         10785 bless $self => $pack;
1757             }
1758              
1759             # Allocate a new sequence number, larger than all previously allocated numbers
1760             sub _nseq {
1761 2160     2160   2979 my $self = shift;
1762 2160         3918 $self->[0][0]++;
1763             }
1764              
1765             sub _cache {
1766 0     0   0 my $self = shift;
1767 0         0 $self->[0][1];
1768             }
1769              
1770             sub _nelts {
1771 0     0   0 my $self = shift;
1772 0         0 $self->[0][2];
1773             }
1774              
1775             sub _nelts_inc {
1776 1129     1129   1507 my $self = shift;
1777 1129         1836 ++$self->[0][2];
1778             }
1779              
1780             sub _nelts_dec {
1781 430     430   567 my $self = shift;
1782 430         599 --$self->[0][2];
1783             }
1784              
1785             sub is_empty {
1786 0     0   0 my $self = shift;
1787 0         0 $self->_nelts == 0;
1788             }
1789              
1790             sub empty {
1791 86     86   160 my $self = shift;
1792 86         467 $#$self = 0;
1793 86         160 $self->[0][2] = 0;
1794 86         183 $self->[0][0] = 0; # might as well reset the sequence numbers
1795             }
1796              
1797             # notify the parent cache object that we moved something
1798             sub _heap_move {
1799 0     0   0 my $self = shift;
1800 0         0 $self->_cache->_heap_move(@_);
1801             }
1802              
1803             # Insert a piece of data into the heap with the indicated sequence number.
1804             # The item with the smallest sequence number is always at the top.
1805             # If no sequence number is specified, allocate a new one and insert the
1806             # item at the bottom.
1807             sub insert {
1808 1129     1129   2143 my ($self, $key, $data, $seq) = @_;
1809 1129 50       2538 $seq = $self->_nseq unless defined $seq;
1810 1129         3283 $self->_insert_new([$seq, $key, $data]);
1811             }
1812              
1813             # Insert a new, fresh item at the bottom of the heap
1814             sub _insert_new {
1815 1129     1129   1718 my ($self, $item) = @_;
1816 1129         1597 my $i = @$self;
1817 1129         3197 $i = int($i/2) until defined $self->[$i/2];
1818 1129         1869 $self->[$i] = $item;
1819 1129         2560 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1820 1129         2042 $self->_nelts_inc;
1821             }
1822              
1823             # Insert [$data, $seq] pair at or below item $i in the heap.
1824             # If $i is omitted, default to 1 (the top element.)
1825             sub _insert {
1826 0     0   0 my ($self, $item, $i) = @_;
1827             # $self->_check_loc($i) if defined $i;
1828 0 0       0 $i = 1 unless defined $i;
1829 0         0 until (! defined $self->[$i]) {
1830 0 0       0 if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1831 0         0 ($self->[$i], $item) = ($item, $self->[$i]);
1832 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1833             }
1834             # If either is undefined, go that way. Otherwise, choose at random
1835 0         0 my $dir;
1836 0 0       0 $dir = 0 if !defined $self->[2*$i];
1837 0 0       0 $dir = 1 if !defined $self->[2*$i+1];
1838 0 0       0 $dir = int(rand(2)) unless defined $dir;
1839 0         0 $i = 2*$i + $dir;
1840             }
1841 0         0 $self->[$i] = $item;
1842 0         0 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1843 0         0 $self->_nelts_inc;
1844             }
1845              
1846             # Remove the item at node $i from the heap, moving child items upwards.
1847             # The item with the smallest sequence number is always at the top.
1848             # Moving items upwards maintains this condition.
1849             # Return the removed item. Return undef if there was no item at node $i.
1850             sub remove {
1851 436     436   747 my ($self, $i) = @_;
1852 436 50       758 $i = 1 unless defined $i;
1853 436         603 my $top = $self->[$i];
1854 436 100       730 return unless defined $top;
1855 430         512 while (1) {
1856 912         1058 my $ii;
1857 912         1436 my ($L, $R) = (2*$i, 2*$i+1);
1858              
1859             # If either is undefined, go the other way.
1860             # Otherwise, go towards the smallest.
1861 912 100 100     2517 last unless defined $self->[$L] || defined $self->[$R];
1862 482 100       791 $ii = $R if not defined $self->[$L];
1863 482 100       770 $ii = $L if not defined $self->[$R];
1864 482 100       758 unless (defined $ii) {
1865 158 100       297 $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1866             }
1867              
1868 482         632 $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1869 482         1012 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1870 482         595 $i = $ii; # Fill new vacated spot
1871             }
1872 430         1021 $self->[0][1]->_heap_move($top->[KEY], undef);
1873 430         589 undef $self->[$i];
1874 430         878 $self->_nelts_dec;
1875 430         964 return $top->[DAT];
1876             }
1877              
1878             sub popheap {
1879 100     100   116 my $self = shift;
1880 100         171 $self->remove(1);
1881             }
1882              
1883             # set the sequence number of the indicated item to a higher number
1884             # than any other item in the heap, and bubble the item down to the
1885             # bottom.
1886             sub promote {
1887 1031     1031   1503 my ($self, $n) = @_;
1888             # $self->_check_loc($n);
1889 1031         1827 $self->[$n][SEQ] = $self->_nseq;
1890 1031         1436 my $i = $n;
1891 1031         1299 while (1) {
1892 1761         2917 my ($L, $R) = (2*$i, 2*$i+1);
1893 1761         2047 my $dir;
1894 1761 100 100     5108 last unless defined $self->[$L] || defined $self->[$R];
1895 730 100       1206 $dir = $R unless defined $self->[$L];
1896 730 100       1264 $dir = $L unless defined $self->[$R];
1897 730 100       1220 unless (defined $dir) {
1898 453 100       898 $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1899             }
1900 730         931 @{$self}[$i, $dir] = @{$self}[$dir, $i];
  730         1136  
  730         1068  
1901 730         1554 for ($i, $dir) {
1902 1460 50       3242 $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1903             }
1904 730         945 $i = $dir;
1905             }
1906             }
1907              
1908             # Return item $n from the heap, promoting its LRU status
1909             sub lookup {
1910 621     621   982 my ($self, $n) = @_;
1911             # $self->_check_loc($n);
1912 621         1277 my $val = $self->[$n];
1913 621         1369 $self->promote($n);
1914 621         1562 $val->[DAT];
1915             }
1916              
1917              
1918             # Assign a new value for node $n, promoting it to the bottom of the heap
1919             sub set_val {
1920 405     405   768 my ($self, $n, $val) = @_;
1921             # $self->_check_loc($n);
1922 405         649 my $oval = $self->[$n][DAT];
1923 405         611 $self->[$n][DAT] = $val;
1924 405         909 $self->promote($n);
1925 405         793 return $oval;
1926             }
1927              
1928             # The hash key has changed for an item;
1929             # alter the heap's record of the hash key
1930             sub rekey {
1931 407     407   597 my ($self, $n, $new_key) = @_;
1932             # $self->_check_loc($n);
1933 407         629 $self->[$n][KEY] = $new_key;
1934             }
1935              
1936             sub _check_loc {
1937 0     0   0 my ($self, $n) = @_;
1938 0         0 unless (1 || defined $self->[$n]) {
1939             confess "_check_loc($n) failed";
1940             }
1941             }
1942              
1943 37     37   18095 BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1944              
1945             sub _check_integrity {
1946 406     406   534 my $self = shift;
1947 406         528 my $good = 1;
1948 406         493 my %seq;
1949              
1950 406 50       614 unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
  406         1657  
1951 0         0 _ci_warn "Element 0 of heap corrupt";
1952 0         0 $good = 0;
1953             }
1954 406 50       923 $good = 0 unless $self->_satisfies_heap_condition(1);
1955 406         581 for my $i (2 .. $#{$self}) {
  406         943  
1956 1933         2818 my $p = int($i/2); # index of parent node
1957 1933 50 66     4258 if (defined $self->[$i] && ! defined $self->[$p]) {
1958 0         0 _ci_warn "Element $i of heap defined, but parent $p isn't";
1959 0         0 $good = 0;
1960             }
1961              
1962 1933 100       3186 if (defined $self->[$i]) {
1963 824 50       1512 if ($seq{$self->[$i][SEQ]}) {
1964 0         0 my $seq = $self->[$i][SEQ];
1965 0         0 _ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
1966 0         0 $good = 0;
1967             } else {
1968 824         1624 $seq{$self->[$i][SEQ]} = $i;
1969             }
1970             }
1971             }
1972              
1973 406         1070 return $good;
1974             }
1975              
1976             sub _satisfies_heap_condition {
1977 1230     1230   1562 my $self = shift;
1978 1230   50     1987 my $n = shift || 1;
1979 1230         1475 my $good = 1;
1980 1230         1734 for (0, 1) {
1981 2460         3055 my $c = $n*2 + $_;
1982 2460 100       4323 next unless defined $self->[$c];
1983 824 50       1430 if ($self->[$n][SEQ] >= $self->[$c]) {
1984 0         0 _ci_warn "Node $n of heap does not predate node $c";
1985 0         0 $good = 0 ;
1986             }
1987 824 50       1350 $good = 0 unless $self->_satisfies_heap_condition($c);
1988             }
1989 1230         2339 return $good;
1990             }
1991              
1992             # Return a list of all the values, sorted by expiration order
1993             sub expire_order {
1994 1     1   2 my $self = shift;
1995 1         3 my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
  3         8  
1996 1         3 map { $_->[KEY] } @nodes;
  3         9  
1997             }
1998              
1999             sub _nodes {
2000 7     7   9 my $self = shift;
2001 7   100     15 my $i = shift || 1;
2002 7 100       24 return unless defined $self->[$i];
2003 3         22 ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
2004             }
2005              
2006             "Cogito, ergo sum."; # don't forget to return a true value from the file
2007              
2008             __END__