File Coverage

blib/lib/File/RandomAccess.pm
Criterion Covered Total %
statement 105 208 50.4
branch 49 106 46.2
condition 8 32 25.0
subroutine 11 13 84.6
pod 9 11 81.8
total 182 370 49.1


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: RandomAccess.pm
3             #
4             # Description: Buffer to support random access reading of sequential file
5             #
6             # Revisions: 02/11/2004 - P. Harvey Created
7             # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
8             # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
9             # 01/02/2005 - P. Harvey Added DEBUG code
10             # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
11             # multi-character EOL sequences
12             # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
13             # generate "substr outside string" warning
14             # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
15             # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
16             # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
17             # scalar with a multi-character newline
18             # 01/24/2009 - PH Protect against reading too much at once
19             # 10/04/2018 - PH Added NoBuffer option
20             #
21             # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
22             # which case the file is buffered in memory to allow random access.
23             # SeekTest() is called automatically when the object is created
24             # unless specified.
25             #
26             # May also be used for string i/o (just pass a scalar reference)
27             #
28             # Legal: Copyright (c) 2003-2023 Phil Harvey (philharvey66 at gmail.com)
29             # This library is free software; you can redistribute it and/or
30             # modify it under the same terms as Perl itself.
31             #------------------------------------------------------------------------------
32              
33             package File::RandomAccess;
34              
35 106     106   686 use strict;
  106         219  
  106         5339  
36             require 5.002;
37             require Exporter;
38              
39 106     106   668 use vars qw($VERSION @ISA @EXPORT_OK);
  106         224  
  106         227757  
40             $VERSION = '1.11';
41             @ISA = qw(Exporter);
42              
43             sub Read($$$);
44              
45             # constants
46             my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
47             my $SKIP_SIZE = 65536; # size to skip when fast-forwarding over sequential data
48             my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
49              
50             #------------------------------------------------------------------------------
51             # Create new RandomAccess object
52             # Inputs: 0) reference to RandomAccess object or RandomAccess class name
53             # 1) file reference or scalar reference
54             # 2) flag set if file is already random access (disables automatic SeekTest)
55             sub new($$;$)
56             {
57 1512     1512 1 4857 my ($that, $filePt, $isRandom) = @_;
58 1512   33     6450 my $class = ref($that) || $that;
59 1512         2720 my $self;
60              
61 1512 100       4940 if (ref $filePt eq 'SCALAR') {
62             # string i/o
63 809         4105 $self = {
64             BUFF_PT => $filePt,
65             BASE => 0,
66             POS => 0,
67             LEN => length($$filePt),
68             TESTED => -1,
69             };
70 809         1739 bless $self, $class;
71             } else {
72             # file i/o
73 703         1920 my $buff = '';
74 703         5133 $self = {
75             FILE_PT => $filePt, # file pointer
76             BUFF_PT => \$buff, # reference to file data
77             BASE => 0, # location of start of buffer in file
78             POS => 0, # current position in buffer
79             LEN => 0, # length of data in buffer
80             TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
81             };
82 703         1955 bless $self, $class;
83 703 100       3524 $self->SeekTest() unless $isRandom;
84             }
85 1512         5019 return $self;
86             }
87              
88             #------------------------------------------------------------------------------
89             # Enable DEBUG code
90             # Inputs: 0) reference to RandomAccess object
91             sub Debug($)
92             {
93 0     0 0 0 my $self = shift;
94 0         0 $self->{DEBUG} = { };
95             }
96              
97             #------------------------------------------------------------------------------
98             # Perform seek test and turn on buffering if necessary
99             # Inputs: 0) reference to RandomAccess object
100             # Returns: 1 if seek test passed (ie. no buffering required)
101             # Notes: Must be done before any other i/o
102             sub SeekTest($)
103             {
104 729     729 1 1777 my $self = shift;
105 729 100       3183 unless ($self->{TESTED}) {
106 702         1785 my $fp = $self->{FILE_PT};
107 702 50 33     15092 if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
108 702         2882 $self->{TESTED} = 1; # test passed
109             } else {
110 0         0 $self->{TESTED} = -1; # test failed (requires buffering)
111             }
112             }
113 729 100       3308 return $self->{TESTED} == 1 ? 1 : 0;
114             }
115              
116             #------------------------------------------------------------------------------
117             # Get current position in file
118             # Inputs: 0) reference to RandomAccess object
119             # Returns: current position in file
120             sub Tell($)
121             {
122 5888     5888 1 10541 my $self = shift;
123 5888         8790 my $rtnVal;
124 5888 100       13305 if ($self->{TESTED} < 0) {
125 765         1395 $rtnVal = $self->{POS} + $self->{BASE};
126             } else {
127 5123         10955 $rtnVal = tell($self->{FILE_PT});
128             }
129 5888         15453 return $rtnVal;
130             }
131              
132             #------------------------------------------------------------------------------
133             # Seek to position in file
134             # Inputs: 0) reference to RandomAccess object
135             # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
136             # Returns: 1 on success
137             # Notes: When buffered, this doesn't quite behave like seek() since it will return
138             # success even if you seek outside the limits of the file. However if you
139             # do this, you will get an error on your next Read().
140             sub Seek($$;$)
141             {
142 8056     8056 1 18100 my ($self, $num, $whence) = @_;
143 8056 100       16581 $whence = 0 unless defined $whence;
144 8056         11600 my $rtnVal;
145 8056 100       16930 if ($self->{TESTED} < 0) {
146 1264         2004 my $newPos;
147 1264 100 33     3294 if ($whence == 0) {
    100          
    50          
148 896         1506 $newPos = $num - $self->{BASE}; # from start of file
149             } elsif ($whence == 1) {
150 275         576 $newPos = $num + $self->{POS}; # relative to current position
151             } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
152 0         0 $newPos = -1; # (can't seek relative to end if no buffering)
153             } else {
154 93         441 $self->Slurp(); # read whole file into buffer
155 93         187 $newPos = $num + $self->{LEN}; # relative to end of file
156             }
157 1264 100       2704 if ($newPos >= 0) {
158 1251         1969 $self->{POS} = $newPos;
159 1251         1942 $rtnVal = 1;
160             }
161             } else {
162 6792         82540 $rtnVal = seek($self->{FILE_PT}, $num, $whence);
163             }
164 8056         53793 return $rtnVal;
165             }
166              
167             #------------------------------------------------------------------------------
168             # Read from the file
169             # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
170             # Returns: Number of bytes read
171             sub Read($$$)
172             {
173 26929     26929 1 41915 my $self = shift;
174 26929         37456 my $len = $_[1];
175 26929         35309 my $rtnVal;
176              
177             # protect against reading too much at once
178             # (also from dying with a "Negative length" error)
179 26929 50       53873 if ($len & 0xf8000000) {
180 0 0       0 return 0 if $len < 0;
181             # read in smaller blocks because Windows attempts to pre-allocate
182             # memory for the full size, which can lead to an out-of-memory error
183 0         0 my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
184 0         0 my $num = Read($self, $_[0], $maxLen);
185 0 0       0 return $num if $num < $maxLen;
186 0         0 for (;;) {
187 0         0 $len -= $maxLen;
188 0 0       0 last if $len <= 0;
189 0 0       0 my $l = $len < $maxLen ? $len : $maxLen;
190 0         0 my $buff;
191 0         0 my $n = Read($self, $buff, $l);
192 0 0       0 last unless $n;
193 0         0 $_[0] .= $buff;
194 0         0 $num += $n;
195 0 0       0 last if $n < $l;
196             }
197 0         0 return $num;
198             }
199             # read through our buffer if necessary
200 26929 100       51687 if ($self->{TESTED} < 0) {
201             # purge old data before reading in NoBuffer mode
202 7655 50 0     14630 $self->Purge() or return 0 if $self->{NoBuffer};
203 7655         9827 my $buff;
204 7655         11858 my $newPos = $self->{POS} + $len;
205             # number of bytes to read from file
206 7655         11181 my $num = $newPos - $self->{LEN};
207 7655 50 66     15148 if ($num > 0 and $self->{FILE_PT}) {
208             # read data from file in multiples of $CHUNK_SIZE
209 0         0 $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
210 0         0 $num = read($self->{FILE_PT}, $buff, $num);
211 0 0       0 if ($num) {
212 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
213 0         0 $self->{LEN} += $num;
214             }
215             }
216             # number of bytes left in data buffer
217 7655         10931 $num = $self->{LEN} - $self->{POS};
218 7655 100       13899 if ($len <= $num) {
    100          
219 7196         10110 $rtnVal = $len;
220             } elsif ($num <= 0) {
221 361         721 $_[0] = '';
222 361         1150 return 0;
223             } else {
224 98         174 $rtnVal = $num;
225             }
226             # return data from our buffer
227 7294         9571 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  7294         18634  
228 7294         12963 $self->{POS} += $rtnVal;
229             } else {
230             # read directly from file
231 19274 100       39001 $_[0] = '' unless defined $_[0];
232 19274   100     136724 $rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
233             }
234 26568 50       56808 if ($self->{DEBUG}) {
235 0         0 my $pos = $self->Tell() - $rtnVal;
236 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
237 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
238             }
239             }
240 26568         92357 return $rtnVal;
241             }
242              
243             #------------------------------------------------------------------------------
244             # Read a line from file (end of line is $/)
245             # Inputs: 0) reference to RandomAccess object, 1) buffer
246             # Returns: Number of bytes read
247             sub ReadLine($$)
248             {
249 6683     6683 1 11221 my $self = shift;
250 6683         9077 my $rtnVal;
251 6683         12020 my $fp = $self->{FILE_PT};
252              
253 6683 100       13949 if ($self->{TESTED} < 0) {
254 123         242 my ($num, $buff);
255 123 50 0     318 $self->Purge() or return 0 if $self->{NoBuffer};
256 123         245 my $pos = $self->{POS};
257 123 50       279 if ($fp) {
258             # make sure we have some data after the current position
259 0         0 while ($self->{LEN} <= $pos) {
260 0         0 $num = read($fp, $buff, $CHUNK_SIZE);
261 0 0       0 return 0 unless $num;
262 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
263 0         0 $self->{LEN} += $num;
264             }
265             # scan and read until we find the EOL (or hit EOF)
266 0         0 for (;;) {
267 0         0 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  0         0  
268 0 0       0 if ($pos >= 0) {
269 0         0 $pos += length($/);
270 0         0 last;
271             }
272 0         0 $pos = $self->{LEN}; # have scanned to end of buffer
273 0 0       0 $num = read($fp, $buff, $CHUNK_SIZE) or last;
274 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
275 0         0 $self->{LEN} += $num;
276             }
277             } else {
278             # string i/o
279 123         181 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  123         434  
280 123 100       316 if ($pos < 0) {
281 18         36 $pos = $self->{LEN};
282 18 50       51 $self->{POS} = $pos if $self->{POS} > $pos;
283             } else {
284 105         237 $pos += length($/);
285             }
286             }
287             # read the line from our buffer
288 123         229 $rtnVal = $pos - $self->{POS};
289 123         202 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  123         324  
290 123         240 $self->{POS} = $pos;
291             } else {
292 6560         28393 $_[0] = <$fp>;
293 6560 100       12970 if (defined $_[0]) {
294 6540         10806 $rtnVal = length($_[0]);
295             } else {
296 20         63 $rtnVal = 0;
297             }
298             }
299 6683 50       14285 if ($self->{DEBUG}) {
300 0         0 my $pos = $self->Tell() - $rtnVal;
301 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
302 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
303             }
304             }
305 6683         20965 return $rtnVal;
306             }
307              
308             #------------------------------------------------------------------------------
309             # Read whole file into buffer (without changing read pointer)
310             # Inputs: 0) reference to RandomAccess object
311             sub Slurp($)
312             {
313 93     93 1 218 my $self = shift;
314 93   50     327 my $fp = $self->{FILE_PT} || return;
315             # read whole file into buffer (in large chunks)
316 0         0 my ($buff, $num);
317 0         0 while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
318 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
319 0         0 $self->{LEN} += $num;
320             }
321             }
322              
323             #------------------------------------------------------------------------------
324             # Purge internal buffer [internal use only]
325             # Inputs: 0) reference to RandomAccess object
326             # Returns: 1 on success, or 0 if current buffer position is negative
327             # Notes: This is called only in NoBuffer mode
328             sub Purge($)
329             {
330 0     0 0 0 my $self = shift;
331 0 0       0 return 1 unless $self->{FILE_PT};
332 0 0       0 return 0 if $self->{POS} < 0; # error if we can't read from here
333 0 0       0 if ($self->{POS} > $CHUNK_SIZE) {
334 0         0 my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
335 0 0       0 if ($purge >= $self->{LEN}) {
    0          
336             # read up to current position in 64k chunks, discarding as we go
337 0         0 while ($self->{POS} > $self->{LEN}) {
338 0         0 $self->{BASE} += $self->{LEN};
339 0         0 $self->{POS} -= $self->{LEN};
340 0         0 ${$self->{BUFF_PT}} = '';
  0         0  
341 0         0 $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
  0         0  
342 0 0       0 last if $self->{LEN} < $SKIP_SIZE;
343             }
344             } elsif ($purge > 0) {
345 0         0 ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
  0         0  
  0         0  
346 0         0 $self->{BASE} += $purge;
347 0         0 $self->{POS} -= $purge;
348 0         0 $self->{LEN} -= $purge;
349             }
350             }
351 0         0 return 1;
352             }
353              
354             #------------------------------------------------------------------------------
355             # Set binary mode
356             # Inputs: 0) reference to RandomAccess object
357             sub BinMode($)
358             {
359 765     765 1 1900 my $self = shift;
360 765 100       5058 binmode($self->{FILE_PT}) if $self->{FILE_PT};
361             }
362              
363             #------------------------------------------------------------------------------
364             # Close the file and free the buffer
365             # Inputs: 0) reference to RandomAccess object
366             sub Close($)
367             {
368 496     496 1 1455 my $self = shift;
369              
370 496 50       2485 if ($self->{DEBUG}) {
371 0         0 local $_;
372 0 0       0 if ($self->Seek(0,2)) {
373 0         0 $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
374 0         0 my $last;
375 0         0 my $tot = 0;
376 0         0 my $bad = 0;
377 0         0 foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
  0         0  
  0         0  
378 0         0 my $pos = $_;
379 0         0 my $len = $self->{DEBUG}->{$_};
380 0 0 0     0 if (defined $last and $last < $pos) {
381 0         0 my $bytes = $pos - $last;
382 0         0 $tot += $bytes;
383 0         0 $self->Seek($last);
384 0         0 my $buff;
385 0         0 $self->Read($buff, $bytes);
386 0         0 my $warn = '';
387 0 0       0 if ($buff =~ /[^\0]/) {
388 0         0 $bad += ($pos - $last);
389 0         0 $warn = ' - NON-ZERO!';
390             }
391 0         0 printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
392             }
393 0         0 my $cur = $pos + $len;
394 0 0 0     0 $last = $cur unless defined $last and $last > $cur;
395             }
396 0         0 print "$tot bytes missed";
397 0 0       0 $bad and print ", $bad non-zero!";
398 0         0 print "\n";
399             } else {
400 0         0 warn "File::RandomAccess DEBUG not working (file already closed?)\n";
401             }
402 0         0 delete $self->{DEBUG};
403             }
404             # close the file
405 496 100       1965 if ($self->{FILE_PT}) {
406 494         10567 close($self->{FILE_PT});
407 494         2496 delete $self->{FILE_PT};
408             }
409             # reset the buffer
410 496         1736 my $emptyBuff = '';
411 496         1804 $self->{BUFF_PT} = \$emptyBuff;
412 496         1384 $self->{BASE} = 0;
413 496         1378 $self->{LEN} = 0;
414 496         1881 $self->{POS} = 0;
415             }
416              
417             #------------------------------------------------------------------------------
418             1; # end