File Coverage

blib/lib/Tie/DictFile.pm
Criterion Covered Total %
statement 148 184 80.4
branch 52 94 55.3
condition 7 15 46.6
subroutine 18 20 90.0
pod n/a
total 225 313 71.8


line stmt bran cond sub pod time code
1             package Tie::DictFile;
2              
3 1     1   2018 use IO::File;
  1         10988  
  1         132  
4 1     1   7 use Carp;
  1         3  
  1         51  
5              
6             $VERSION="0.03";
7              
8 1     1   16 use strict;
  1         2  
  1         33  
9              
10 1     1   5 use vars qw($AUTOLOAD $CACHE_SIZE $DEBUG $DICTIONARY $MAX_WORD_LENGTH);
  1         2  
  1         2469  
11              
12             $CACHE_SIZE=1024;
13             $DEBUG=0;
14             $DICTIONARY="/usr/share/dict/words";
15             $MAX_WORD_LENGTH=62;
16              
17              
18             #######################################################
19             ################ PUBLIC METHODS #######################
20              
21             sub TIEHASH {
22 3     3   411 my $class = shift;
23 3 50       11 my $dictionary = @_ ? shift : $DICTIONARY;
24              
25 3 50       16 my $fh = new IO::File $dictionary, "r"
26             or croak "we can't open this file for reading: $dictionary, $!";
27 3         346 return bless { fh => $fh,
28             dictionary => $dictionary}, $class ;
29             }
30              
31              
32             sub EXISTS {
33              
34 12 50   12   732 croak "this module can not support strings longer than $MAX_WORD_LENGTH characters"
35             if(length($_[1]) > $MAX_WORD_LENGTH) ;
36            
37 12         25 return defined $_[0]->_fetch($_[1]);
38              
39             }
40              
41              
42             sub FIRSTKEY {
43              
44 4     4   27 my $self = shift;
45 4         22 $self->{fh}->seek(0,SEEK_SET);
46            
47 4         34 my $word;
48 4         6 do {
49 4         11 $word= $self->_fetch_next_line();
50             } while (exists $self->{delete_from_file}->{lc($word)});
51              
52 4         14 return $word;
53             }
54              
55              
56             sub NEXTKEY {
57 8     8   12 my $self = shift;
58 8         8 my $word;
59 8         8 do {
60 8         14 $word= $self->_fetch_next_line();
61             } while (exists $self->{delete_from_file}->{lc($word)});
62 8 100       19 if(! defined $word) {
63 4         5 my($k,$v)=each %{$self->{add_to_file}};
  4         11  
64 4 50       9 if(defined $k)
65 0         0 { return $v;}
66             }
67              
68 8         27 return $word;
69             }
70              
71              
72             sub FETCH {
73 2     2   18 return $_[0]->_fetch($_[1]);
74             }
75              
76              
77             sub STORE {
78 4     4   15 my($self,$word,$value)=@_;
79              
80 4 50       11 croak "this module can not support strings longer than $MAX_WORD_LENGTH characters"
81             if(length($word) > $MAX_WORD_LENGTH) ;
82            
83 4 50       10 croak "this module can not support strings containing line feed/carriage return characters"
84             if($word =~ /[\r\n]/s);
85            
86 4 50       9 croak "you cannot store empty strings with this module"
87             if(! length($word));
88            
89 4 100       10 if(! defined $value) {
    50          
90 2         5 $self->DELETE($word);
91             } elsif(! $self->EXISTS($word)) {
92             ## because we have no previous knowledge of this word,
93             ## let's queue it for addition by DESTROY method
94 2         6 $self->{add_to_file}->{lc($word)}=$word;
95             ## clean up side-effect of failed called to EXISTS
96 2         3 delete $self->{not_in_file}->{lc($word)};
97             }
98 4         14 return $value;
99              
100             }
101              
102              
103             sub DELETE {
104 2     2   4 my $self = shift;
105 2         3 my $word = shift;
106              
107 2         3 my $lcword=lc($word);
108 2         3 my $found_word=undef;
109             ## we looked this up earlier, and it did not
110             ## exist, we don't need to do anything
111 2 50       20 if(exists $self->{not_in_file}->{$lcword}) {
    50          
    50          
    50          
    0          
112              
113             ## we have already asked to delete this
114             ## word which DOES exist physically, do nothing
115             } elsif(exists $self->{delete_from_file}->{$lcword}) {
116            
117             ## this is a word which does NOT exist physically, and
118             ## was queued for addition (by DESTROY method), so let's
119             ## just remove it from the queue
120             } elsif(exists $self->{add_to_file}->{$lcword}) {
121 0         0 delete $self->{add_to_file}->{$lcword};
122              
123             ## this is a word which DOES exist physically,
124             ## let's queue it for removeal (by DESTROY method)
125             } elsif(exists $self->{in_file}->{$lcword}) {
126 2         11 $self->{delete_from_file}->{$lcword}=$self->{in_file}->{$lcword}[0];
127              
128             ## we don't know about thiw word, if we can look it up
129             ## let's queue it for removal
130             } elsif(defined ($found_word= $self->_exists_in_file($word))) {
131 0         0 $self->{delete_from_file}->{$lcword}=$found_word;
132             }
133 2         4 return undef;
134             }
135              
136              
137             sub UNTIE {
138 3     3   8 $_[0]->DESTROY();
139             }
140              
141             sub DESTROY {
142 6     6   7 my $self = shift;
143              
144 6 100 66     7 if(keys %{$self->{delete_from_file}}
  6         20  
  2         7  
145             or keys %{$self->{add_to_file}}) {
146              
147 4 50       67 croak "requested changes to dictionary, but do not have write permissions on: " . $self->{dictionary}
148             if(! -w $self->{dictionary});
149            
150 4         5 my @operations;
151 4         6 foreach my $word (sort(keys %{$self->{add_to_file}},
  4         9  
  4         17  
152             keys %{$self->{delete_from_file}})) {
153 8 100       16 if(exists $self->{add_to_file}->{$word}) {
154 4 50       11 croak "fatal error, we should have an insertion position for: $word"
155             if(! exists $self->{insert_pos}->{$word});
156 4         14 push(@operations,['insert_at',
157             $self->{insert_pos}->{$word},
158             $self->{add_to_file}->{$word}]);
159            
160             } else {
161 4 0 33     10 croak "we should be able to find this word: $word"
162             if(! exists $self->{in_file}->{$word} && ! $self->_exists_in_file($word)) ;
163              
164 4         12 push(@operations,['copy_until', $self->{in_file}->{$word}[1] ]);
165 4         13 push(@operations,['copy_from', $self->{in_file}->{$word}[2] ]);
166            
167             }
168            
169             }
170              
171 4         15 my $filename= "/tmp/" . __PACKAGE__ . ".$$";
172 4 50       24 my $fhout = new IO::File "> $filename"
173             or croak "can't write to temporary file: $filename";
174              
175 4         654 $self->{fh}->seek(0,SEEK_SET);
176 2         16 my $last_position=0;
177 2         17 my $size = (stat($self->{fh}))[7];
178              
179 2         7 while(@operations) {
180 6         9 my $operation=shift(@operations);
181              
182 6 100       19 if($operation->[0] eq "insert_at") {
    100          
183 2 50       5 if($operation->[1] > $last_position) {
184              
185 0         0 $self->_destroy_read_write($filename,$operation->[1],$last_position,\$fhout);
186             }
187 2         21 print $fhout $operation->[2],"\n";
188            
189             } elsif($operation->[0] eq "copy_until") {
190 2         13 $self->_destroy_read_write($filename,$operation->[1],$last_position,\$fhout);
191            
192             }
193              
194 6         18 $last_position=$operation->[1];
195             }
196              
197 2 50       7 if($last_position < $size ) {
198 2         5 $self->_destroy_read_write($filename,$size,$last_position,\$fhout,1);
199            
200             }
201            
202 2         3 undef $self->{fh};
203 2         24 undef $fhout;
204              
205 2         98 $fhout=new IO::File ">" . $self->{dictionary};
206 2         236 my $fhin=new IO::File "<$filename";
207              
208 2         136 while(<$fhin>) {
209 4         33 print $fhout $_;
210             }
211            
212 2         3 undef $fhout;
213 2         60 undef $fhin;
214 2         188 unlink $filename;
215            
216            
217            
218             }
219              
220 4 100       13 if($self->{fh}) {
221 1         2 undef $self->{fh};
222             }
223              
224 4         36 undef $_[0];
225              
226             }
227              
228             ######################################################
229             ############## PRIVATE METHODS #######################
230              
231             sub _fetch {
232 14     14   15 my $self = shift;
233 14         17 my $word= shift;
234            
235 14 50       27 croak "this module can not support strings longer than $MAX_WORD_LENGTH characters\n"
236             if(length($word) > $MAX_WORD_LENGTH);
237            
238 14         17 my $lcword=lc($word);
239              
240             ## we previously asked to add this word, return VALUE
241 14 100       66 if(exists $self->{add_to_file}->{$lcword} ) {
    100          
    100          
    100          
242 2         11 return $self->{add_to_file}->{$lcword};
243             ## we previously asked to delete this word, return UNDEF
244             } elsif(exists $self->{delete_from_file}->{$lcword} ) {
245 2         10 return undef;
246             ## it's was recently read from file, return VALUE
247             } elsif(exists $self->{in_file}->{$lcword} ) {
248 5         24 return $self->{in_file}->{$lcword}[0];
249             ## we looked it up before, and it didn't exist, return UNDEF
250             } elsif(exists $self->{not_in_file}->{$lcword} ) {
251 2         13 return undef;
252             ## ok, let's actually try and look it up in the file
253             } else {
254 3         7 return $self->_exists_in_file($word);
255             }
256              
257             }
258              
259             sub _exists_in_file {
260 3     3   3 my $self = shift;
261 3         5 my $word = shift;
262              
263 3         3 my $lcword=lc($word);
264              
265            
266 3         6 my $fh = $self->{fh};
267 3         8 $fh->seek(0,SEEK_SET);
268              
269 3 50       73 my(@stat) = stat($fh)
270             or croak "could not stat filehandle";
271            
272 3         6 my $size = $stat[7];
273 3         5 my $blksize=($MAX_WORD_LENGTH +2)*2;
274              
275             ## find the right block
276 3         8 my($min, $max) = (0, int($size / $blksize));
277              
278 3         10 while ($max - $min > 1) {
279 0         0 $self->_read_block_cache($lcword,\$min,\$max,$blksize);
280             }
281 3         2 $min *= $blksize;
282 3 50       8 $fh->seek($min,SEEK_SET)
283             or croak "could not seek to position $min when we previously could";
284            
285 3 50       30 <$fh> if $min;
286            
287 3         4 my $read_word;
288 3         4 my $result = undef;
289              
290 3         6 while($read_word=$self->_fetch_next_line()) {
291 4         6 my $lcread_word=lc($read_word);
292 4 100       11 next if($lcread_word lt $lcword);
293              
294 3 50       8 if($lcread_word eq $lcword) {
295 0         0 return $read_word;
296             } else {
297 3         6 $self->{not_in_file}->{$lcword}=$word;
298 3         9 $self->{insert_pos}->{$lcword}=$self->{last_tell};
299 3         20 return undef;
300             }
301             }
302              
303             }
304              
305             sub _fetch_next_line {
306 16     16   19 my $self = shift;
307              
308 16         19 my $line=undef;
309 16         48 $self->{last_tell}=$self->{fh}->tell();
310            
311 16 100 66     1052 if($self->{fh} && ($line= $self->{fh}->getline)) {
312 12         477 chomp $line;
313 12         24 $self->_cache_insert_removable($line);
314 12         108 return $line;
315             } else {
316 4         136 return undef;
317             }
318             }
319              
320              
321             sub _cache_insert_removable {
322 12     12   14 my $self = shift;
323 12         12 my $word = shift;
324 12         16 my $lcword=lc($word);
325              
326 12 50 66     31 if(exists $self->{in_file_a} && @{$self->{in_file_a}} == $CACHE_SIZE) {
  9         35  
327 0         0 my $old_word = shift(@{$self->{in_file_a}});
  0         0  
328 0         0 delete $self->{in_file}->{$old_word};
329             }
330              
331 12         14 push(@{$self->{in_file_a}},$lcword);
  12         22  
332 12         38 $self->{in_file}->{$lcword}=[$word,$self->{last_tell},$self->{fh}->tell()];
333              
334             }
335              
336              
337             sub _read_block_cache {
338 0     0   0 my $self = shift;
339 0         0 my ($word,$min,$max,$blksize)=@_;
340              
341             ## based on Jarkko Hietaniemi's Search::Dict lookup routine
342 0         0 my ($mid,$line_read);
343 0         0 my $fh = $self->{fh};
344 0         0 $mid = int(($$max + $$min) / 2);
345 0 0       0 if(exists $self->{block_cache}->{$mid}) {
346 0         0 $line_read =$self->{block_cache}->{$mid} ;
347             } else {
348              
349 0 0       0 $fh->seek($mid * $blksize, SEEK_SET)
350             or croak "could not seek to position " . $mid * $blksize ;
351              
352            
353 0 0       0 <$fh> if $mid; # probably a partial line
354 0         0 $line_read = lc(<$fh>);
355 0         0 chomp $line_read;
356 0         0 $self->{block_cache}->{$mid}=$line_read ;
357             }
358              
359 0 0 0     0 if (defined($line_read) && $line_read le $word) {
360 0         0 $$min = $mid;
361             } else {
362 0         0 $$max = $mid;
363             }
364             }
365              
366             ###################################################
367             ############### PRIVATE METHODS ###################
368              
369             sub _debug {
370              
371 0     0   0 my $level= shift;
372 0 0       0 if($level !~ /^\d+$/) {
373 0         0 unshift(@_,$level);
374 0         0 $level=1;
375             }
376 0 0       0 if($DEBUG >= $level) {
377 0         0 my($sub)=(caller(1))[3];
378 0         0 my $x=join("","Debugging $sub: " ,@_);
379 0 0       0 $x.="\n" if($x !~ /\n$/);
380 0         0 print STDERR $x;
381             }
382             }
383              
384              
385             sub _destroy_read_write {
386 4     4   8 my($self,$filename,$byte_mark, $last_position,$reffh,$last)=@_;
387 4         5 my $fhout = $$reffh;
388              
389 4         5 my $buffer;
390 4         18 $self->{fh}->sysseek($last_position,SEEK_SET);
391 4 100       33 my @args = $last ? ($byte_mark - $last_position) : ($byte_mark, $last_position);
392 4         17 my $bytes = $self->{fh}->sysread($buffer,@args);
393 4 50       49 if($bytes != ($byte_mark-$last_position)) {
394 0         0 undef $fhout;
395 0         0 unlink($filename);
396 0         0 croak "error reading dictionary, got unexpectedly short string";
397             }
398 4         10 print $fhout $buffer;
399              
400             }
401              
402              
403             ## TODO
404              
405             ## croak in DESTROY should work (and not need subsequent returns)
406             ## installer looks for canditate file locations
407             ## more efficient copy of temporary file during DESTROY
408              
409             1;
410              
411             __END__