File Coverage

blib/lib/File/RandomAccess.pm
Criterion Covered Total %
statement 105 208 50.4
branch 49 106 46.2
condition 8 32 25.0
subroutine 11 13 84.6
pod 9 11 81.8
total 182 370 49.1


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: RandomAccess.pm
3             #
4             # Description: Buffer to support random access reading of sequential file
5             #
6             # Revisions: 02/11/2004 - P. Harvey Created
7             # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
8             # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
9             # 01/02/2005 - P. Harvey Added DEBUG code
10             # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
11             # multi-character EOL sequences
12             # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
13             # generate "substr outside string" warning
14             # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
15             # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
16             # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
17             # scalar with a multi-character newline
18             # 01/24/2009 - PH Protect against reading too much at once
19             # 10/04/2018 - PH Added NoBuffer option
20             #
21             # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
22             # which case the file is buffered in memory to allow random access.
23             # SeekTest() is called automatically when the object is created
24             # unless specified.
25             #
26             # May also be used for string i/o (just pass a scalar reference)
27             #
28             # Legal: Copyright (c) 2003-2022 Phil Harvey (philharvey66 at gmail.com)
29             # This library is free software; you can redistribute it and/or
30             # modify it under the same terms as Perl itself.
31             #------------------------------------------------------------------------------
32              
33             package File::RandomAccess;
34              
35 104     104   556 use strict;
  104         158  
  104         4170  
36             require 5.002;
37             require Exporter;
38              
39 104     104   529 use vars qw($VERSION @ISA @EXPORT_OK);
  104         159  
  104         181024  
40             $VERSION = '1.11';
41             @ISA = qw(Exporter);
42              
43             sub Read($$$);
44              
45             # constants
46             my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
47             my $SKIP_SIZE = 65536; # size to skip when fast-forwarding over sequential data
48             my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
49              
50             #------------------------------------------------------------------------------
51             # Create new RandomAccess object
52             # Inputs: 0) reference to RandomAccess object or RandomAccess class name
53             # 1) file reference or scalar reference
54             # 2) flag set if file is already random access (disables automatic SeekTest)
55             sub new($$;$)
56             {
57 1490     1490 1 3885 my ($that, $filePt, $isRandom) = @_;
58 1490   33     5395 my $class = ref($that) || $that;
59 1490         2302 my $self;
60              
61 1490 100       3707 if (ref $filePt eq 'SCALAR') {
62             # string i/o
63 808         3321 $self = {
64             BUFF_PT => $filePt,
65             BASE => 0,
66             POS => 0,
67             LEN => length($$filePt),
68             TESTED => -1,
69             };
70 808         1481 bless $self, $class;
71             } else {
72             # file i/o
73 682         1358 my $buff = '';
74 682         3917 $self = {
75             FILE_PT => $filePt, # file pointer
76             BUFF_PT => \$buff, # reference to file data
77             BASE => 0, # location of start of buffer in file
78             POS => 0, # current position in buffer
79             LEN => 0, # length of data in buffer
80             TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
81             };
82 682         1543 bless $self, $class;
83 682 100       2511 $self->SeekTest() unless $isRandom;
84             }
85 1490         3838 return $self;
86             }
87              
88             #------------------------------------------------------------------------------
89             # Enable DEBUG code
90             # Inputs: 0) reference to RandomAccess object
91             sub Debug($)
92             {
93 0     0 0 0 my $self = shift;
94 0         0 $self->{DEBUG} = { };
95             }
96              
97             #------------------------------------------------------------------------------
98             # Perform seek test and turn on buffering if necessary
99             # Inputs: 0) reference to RandomAccess object
100             # Returns: 1 if seek test passed (ie. no buffering required)
101             # Notes: Must be done before any other i/o
102             sub SeekTest($)
103             {
104 708     708 1 1399 my $self = shift;
105 708 100       2742 unless ($self->{TESTED}) {
106 681         1347 my $fp = $self->{FILE_PT};
107 681 50 33     11053 if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
108 681         2261 $self->{TESTED} = 1; # test passed
109             } else {
110 0         0 $self->{TESTED} = -1; # test failed (requires buffering)
111             }
112             }
113 708 100       2371 return $self->{TESTED} == 1 ? 1 : 0;
114             }
115              
116             #------------------------------------------------------------------------------
117             # Get current position in file
118             # Inputs: 0) reference to RandomAccess object
119             # Returns: current position in file
120             sub Tell($)
121             {
122 5774     5774 1 8435 my $self = shift;
123 5774         7036 my $rtnVal;
124 5774 100       10787 if ($self->{TESTED} < 0) {
125 765         1160 $rtnVal = $self->{POS} + $self->{BASE};
126             } else {
127 5009         9700 $rtnVal = tell($self->{FILE_PT});
128             }
129 5774         13040 return $rtnVal;
130             }
131              
132             #------------------------------------------------------------------------------
133             # Seek to position in file
134             # Inputs: 0) reference to RandomAccess object
135             # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
136             # Returns: 1 on success
137             # Notes: When buffered, this doesn't quite behave like seek() since it will return
138             # success even if you seek outside the limits of the file. However if you
139             # do this, you will get an error on your next Read().
140             sub Seek($$;$)
141             {
142 7977     7977 1 14553 my ($self, $num, $whence) = @_;
143 7977 100       13675 $whence = 0 unless defined $whence;
144 7977         10448 my $rtnVal;
145 7977 100       14014 if ($self->{TESTED} < 0) {
146 1261         1553 my $newPos;
147 1261 100 33     2459 if ($whence == 0) {
    100          
    50          
148 894         1170 $newPos = $num - $self->{BASE}; # from start of file
149             } elsif ($whence == 1) {
150 275         429 $newPos = $num + $self->{POS}; # relative to current position
151             } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
152 0         0 $newPos = -1; # (can't seek relative to end if no buffering)
153             } else {
154 92         317 $self->Slurp(); # read whole file into buffer
155 92         148 $newPos = $num + $self->{LEN}; # relative to end of file
156             }
157 1261 100       2608 if ($newPos >= 0) {
158 1248         1579 $self->{POS} = $newPos;
159 1248         1613 $rtnVal = 1;
160             }
161             } else {
162 6716         65089 $rtnVal = seek($self->{FILE_PT}, $num, $whence);
163             }
164 7977         38433 return $rtnVal;
165             }
166              
167             #------------------------------------------------------------------------------
168             # Read from the file
169             # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
170             # Returns: Number of bytes read
171             sub Read($$$)
172             {
173 26550     26550 1 33847 my $self = shift;
174 26550         30373 my $len = $_[1];
175 26550         29444 my $rtnVal;
176              
177             # protect against reading too much at once
178             # (also from dying with a "Negative length" error)
179 26550 50       43787 if ($len & 0xf8000000) {
180 0 0       0 return 0 if $len < 0;
181             # read in smaller blocks because Windows attempts to pre-allocate
182             # memory for the full size, which can lead to an out-of-memory error
183 0         0 my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
184 0         0 my $num = Read($self, $_[0], $maxLen);
185 0 0       0 return $num if $num < $maxLen;
186 0         0 for (;;) {
187 0         0 $len -= $maxLen;
188 0 0       0 last if $len <= 0;
189 0 0       0 my $l = $len < $maxLen ? $len : $maxLen;
190 0         0 my $buff;
191 0         0 my $n = Read($self, $buff, $l);
192 0 0       0 last unless $n;
193 0         0 $_[0] .= $buff;
194 0         0 $num += $n;
195 0 0       0 last if $n < $l;
196             }
197 0         0 return $num;
198             }
199             # read through our buffer if necessary
200 26550 100       42392 if ($self->{TESTED} < 0) {
201             # purge old data before reading in NoBuffer mode
202 7649 50 0     12017 $self->Purge() or return 0 if $self->{NoBuffer};
203 7649         8001 my $buff;
204 7649         9752 my $newPos = $self->{POS} + $len;
205             # number of bytes to read from file
206 7649         9195 my $num = $newPos - $self->{LEN};
207 7649 50 66     12125 if ($num > 0 and $self->{FILE_PT}) {
208             # read data from file in multiples of $CHUNK_SIZE
209 0         0 $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
210 0         0 $num = read($self->{FILE_PT}, $buff, $num);
211 0 0       0 if ($num) {
212 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
213 0         0 $self->{LEN} += $num;
214             }
215             }
216             # number of bytes left in data buffer
217 7649         9688 $num = $self->{LEN} - $self->{POS};
218 7649 100       10981 if ($len <= $num) {
    100          
219 7190         7959 $rtnVal = $len;
220             } elsif ($num <= 0) {
221 361         528 $_[0] = '';
222 361         874 return 0;
223             } else {
224 98         137 $rtnVal = $num;
225             }
226             # return data from our buffer
227 7288         7644 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  7288         14940  
228 7288         9978 $self->{POS} += $rtnVal;
229             } else {
230             # read directly from file
231 18901 100       31032 $_[0] = '' unless defined $_[0];
232 18901   100     108655 $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
233             }
234 26189 50       46365 if ($self->{DEBUG}) {
235 0         0 my $pos = $self->Tell() - $rtnVal;
236 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
237 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
238             }
239             }
240 26189         69564 return $rtnVal;
241             }
242              
243             #------------------------------------------------------------------------------
244             # Read a line from file (end of line is $/)
245             # Inputs: 0) reference to RandomAccess object, 1) buffer
246             # Returns: Number of bytes read
247             sub ReadLine($$)
248             {
249 6601     6601 1 8870 my $self = shift;
250 6601         7557 my $rtnVal;
251 6601         9451 my $fp = $self->{FILE_PT};
252              
253 6601 100       11168 if ($self->{TESTED} < 0) {
254 123         178 my ($num, $buff);
255 123 50 0     298 $self->Purge() or return 0 if $self->{NoBuffer};
256 123         210 my $pos = $self->{POS};
257 123 50       218 if ($fp) {
258             # make sure we have some data after the current position
259 0         0 while ($self->{LEN} <= $pos) {
260 0         0 $num = read($fp, $buff, $CHUNK_SIZE);
261 0 0       0 return 0 unless $num;
262 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
263 0         0 $self->{LEN} += $num;
264             }
265             # scan and read until we find the EOL (or hit EOF)
266 0         0 for (;;) {
267 0         0 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  0         0  
268 0 0       0 if ($pos >= 0) {
269 0         0 $pos += length($/);
270 0         0 last;
271             }
272 0         0 $pos = $self->{LEN}; # have scanned to end of buffer
273 0 0       0 $num = read($fp, $buff, $CHUNK_SIZE) or last;
274 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
275 0         0 $self->{LEN} += $num;
276             }
277             } else {
278             # string i/o
279 123         194 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  123         369  
280 123 100       272 if ($pos < 0) {
281 18         30 $pos = $self->{LEN};
282 18 50       44 $self->{POS} = $pos if $self->{POS} > $pos;
283             } else {
284 105         169 $pos += length($/);
285             }
286             }
287             # read the line from our buffer
288 123         196 $rtnVal = $pos - $self->{POS};
289 123         156 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  123         274  
290 123         201 $self->{POS} = $pos;
291             } else {
292 6478         23145 $_[0] = <$fp>;
293 6478 100       10897 if (defined $_[0]) {
294 6458         9410 $rtnVal = length($_[0]);
295             } else {
296 20         41 $rtnVal = 0;
297             }
298             }
299 6601 50       11619 if ($self->{DEBUG}) {
300 0         0 my $pos = $self->Tell() - $rtnVal;
301 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
302 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
303             }
304             }
305 6601         16569 return $rtnVal;
306             }
307              
308             #------------------------------------------------------------------------------
309             # Read whole file into buffer (without changing read pointer)
310             # Inputs: 0) reference to RandomAccess object
311             sub Slurp($)
312             {
313 92     92 1 160 my $self = shift;
314 92   50     260 my $fp = $self->{FILE_PT} || return;
315             # read whole file into buffer (in large chunks)
316 0         0 my ($buff, $num);
317 0         0 while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
318 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
319 0         0 $self->{LEN} += $num;
320             }
321             }
322              
323             #------------------------------------------------------------------------------
324             # Purge internal buffer [internal use only]
325             # Inputs: 0) reference to RandomAccess object
326             # Returns: 1 on success, or 0 if current buffer position is negative
327             # Notes: This is called only in NoBuffer mode
328             sub Purge($)
329             {
330 0     0 0 0 my $self = shift;
331 0 0       0 return 1 unless $self->{FILE_PT};
332 0 0       0 return 0 if $self->{POS} < 0; # error if we can't read from here
333 0 0       0 if ($self->{POS} > $CHUNK_SIZE) {
334 0         0 my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
335 0 0       0 if ($purge >= $self->{LEN}) {
    0          
336             # read up to current position in 64k chunks, discarding as we go
337 0         0 while ($self->{POS} > $self->{LEN}) {
338 0         0 $self->{BASE} += $self->{LEN};
339 0         0 $self->{POS} -= $self->{LEN};
340 0         0 ${$self->{BUFF_PT}} = '';
  0         0  
341 0         0 $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
  0         0  
342 0 0       0 last if $self->{LEN} < $SKIP_SIZE;
343             }
344             } elsif ($purge > 0) {
345 0         0 ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
  0         0  
  0         0  
346 0         0 $self->{BASE} += $purge;
347 0         0 $self->{POS} -= $purge;
348 0         0 $self->{LEN} -= $purge;
349             }
350             }
351 0         0 return 1;
352             }
353              
354             #------------------------------------------------------------------------------
355             # Set binary mode
356             # Inputs: 0) reference to RandomAccess object
357             sub BinMode($)
358             {
359 744     744 1 1491 my $self = shift;
360 744 100       3805 binmode($self->{FILE_PT}) if $self->{FILE_PT};
361             }
362              
363             #------------------------------------------------------------------------------
364             # Close the file and free the buffer
365             # Inputs: 0) reference to RandomAccess object
366             sub Close($)
367             {
368 479     479 1 1892 my $self = shift;
369              
370 479 50       1968 if ($self->{DEBUG}) {
371 0         0 local $_;
372 0 0       0 if ($self->Seek(0,2)) {
373 0         0 $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
374 0         0 my $last;
375 0         0 my $tot = 0;
376 0         0 my $bad = 0;
377 0         0 foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
  0         0  
  0         0  
378 0         0 my $pos = $_;
379 0         0 my $len = $self->{DEBUG}->{$_};
380 0 0 0     0 if (defined $last and $last < $pos) {
381 0         0 my $bytes = $pos - $last;
382 0         0 $tot += $bytes;
383 0         0 $self->Seek($last);
384 0         0 my $buff;
385 0         0 $self->Read($buff, $bytes);
386 0         0 my $warn = '';
387 0 0       0 if ($buff =~ /[^\0]/) {
388 0         0 $bad += ($pos - $last);
389 0         0 $warn = ' - NON-ZERO!';
390             }
391 0         0 printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
392             }
393 0         0 my $cur = $pos + $len;
394 0 0 0     0 $last = $cur unless defined $last and $last > $cur;
395             }
396 0         0 print "$tot bytes missed";
397 0 0       0 $bad and print ", $bad non-zero!";
398 0         0 print "\n";
399             } else {
400 0         0 warn "File::RandomAccess DEBUG not working (file already closed?)\n";
401             }
402 0         0 delete $self->{DEBUG};
403             }
404             # close the file
405 479 100       1563 if ($self->{FILE_PT}) {
406 477         9555 close($self->{FILE_PT});
407 477         1861 delete $self->{FILE_PT};
408             }
409             # reset the buffer
410 479         1339 my $emptyBuff = '';
411 479         1375 $self->{BUFF_PT} = \$emptyBuff;
412 479         1032 $self->{BASE} = 0;
413 479         1064 $self->{LEN} = 0;
414 479         1441 $self->{POS} = 0;
415             }
416              
417             #------------------------------------------------------------------------------
418             1; # end