File Coverage

blib/lib/PDL/DiskCache.pm
Criterion Covered Total %
statement 66 123 53.6
branch 23 72 31.9
condition 4 9 44.4
subroutine 9 12 75.0
pod 2 3 66.6
total 104 219 47.4


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PDL::DiskCache -- Non-memory-resident array object
4              
5             =head1 SYNOPSIS
6              
7             NON-OO:
8              
9             use PDL::DiskCache;
10             tie @a,'PDL::DiskCache', \@files, \%options;
11             imag $a[3];
12              
13             OO:
14              
15             use PDL::DiskCache;
16             $x = diskcache(\@files,\%options);
17             imag $x->[3];
18              
19             or
20              
21             use PDL::DiskCache;
22             $x = new PDL::DiskCache(\@files,\%options);
23             imag $x->[4];
24              
25             =over 3
26              
27             =item \@files
28              
29             an array ref containing a list of file names
30              
31             =item \%options
32              
33             a hash ref containing options for the PDL::DiskCache object (see "TIEARRAY"
34             below for details)
35              
36             =back
37              
38             =head1 DESCRIPTION
39              
40             A PDL::DiskCache object is a perl L<"tied array"|perltie> that is useful
41             for operations where you have to look at a large collection of PDLs one
42             or a few at a time (such as tracking features through an image sequence).
43             You can write prototype code that uses a perl list of a few PDLs, then
44             scale up to to millions of PDLs simply by handing the prototype code
45             a DiskCache tied array instead of a native perl array. The individual
46             PDLs are stored on disk and a few of them are swapped into memory on a
47             FIFO basis. You can set whether the data are read-only or writeable.
48              
49             By default, PDL::DiskCache uses FITS files to represent the PDLs, but
50             you can use any sort of file at all -- the read/write routines are the
51             only place where it examines the underlying data, and you can specify
52             the routines to use at construction time (or, of course, subclass
53             PDL::DiskCache).
54              
55             Items are swapped out on a FIFO basis, so if you have 10 slots
56             and an expression with 10 items in it then you're OK (but you probably
57             want more slots than that); but if you use more items in an expression than
58             there are slots, thrashing will occur!
59              
60             The hash ref interface is kept for historical reasons; you can access
61             the sync() and purge() method calls directly from the returned array ref.
62              
63             =head1 Shortcomings & caveats
64              
65             There's no file locking, so you could really hose yourself by having two of
66             these things going at once on the same files.
67              
68             Since this is a tied array, things like Dumper traverse it transparently.
69             That is sort-of good but also sort-of dangerous. You wouldn't want to
70             PDL::Dumper::sdump() a large PDL::DiskCache, for example -- that would defeat
71             the purpose of using a PDL::DiskCache in the first place.
72              
73              
74              
75             =head1 Author, license, no warranty
76              
77             Copyright 2001, Craig DeForest
78              
79             This code may be distributed under the same terms as Perl itself
80             (license available at L). Copying, reverse engineering,
81             distribution, and modification are explicitly allowed so long as this notice
82             is preserved intact and modified versions are clearly marked as such.
83              
84             If you modify the code and it's useful, please send a copy of the modified
85             version to cdeforest@solar.stanford.edu.
86              
87             This package comes with NO WARRANTY.
88              
89             =head1 FUNCTIONS
90              
91             =cut
92              
93             ######################################################################
94             # Package initialization
95             $PDL::DiskCache::VERSION = 1.1;
96            
97 1     1   70835 use strict;
  1         3  
  1         30  
98 1     1   7 use Carp;
  1         2  
  1         1802  
99              
100             =head2 diskcache
101              
102             Object constructor.
103              
104             =for usage
105              
106             $x = diskcache(\@f,\%options);
107              
108             Options
109              
110             =over 3
111              
112             =item
113              
114             See the TIEARRAY options,below.
115              
116             =back
117              
118             =cut
119              
120             sub diskcache {
121 2     2   2607 my($f,$opt) = @_;
122 2         6 return PDL::DiskCache::new('PDL::DiskCache',$f,$opt);
123             }
124              
125             sub PDL::DiskCache::new {
126 2     2 0 6 my($class,$f,$opt) = @_;
127 2         4 my($x)=[];
128              
129 2         4 my($y) = tie @{$x},$class,$f,$opt;
  2         14  
130 2 50       8 if($opt->{bless}) {
131 0         0 $x = bless($x,$class);
132             }
133              
134 2 50       5 if(wantarray) {
135 2         10 return ($x,bless($y,$class));
136             } else {
137 0         0 return $x;
138             }
139             }
140              
141             *PDL::DiskCache::diskcache = *diskcache;
142              
143             =head2 TIEARRAY
144              
145             =for ref
146              
147             Tied-array constructor; invoked by perl during object construction.
148              
149             =for usage
150              
151             TIEARRAY(class,\@f,\%options)
152              
153             Options
154              
155             =over 3
156              
157             =item ro (default 0)
158              
159             If set, treat the files as read-only (modifications
160             to the tied array will only persist until the changed elements are
161             swapped out)
162              
163             =item rw (default 1)
164              
165             If set, allow reading and writing to the files.
166             Because there's currently no way to determine reliably whether a PDL
167             has been modified, rw files are always written to disk when they're
168             swapped out -- this causes a slight performance hit.
169              
170             =item mem (default 20)
171              
172             Number of files to be cached in memory at once.
173              
174             =item read (default \&rfits)
175              
176             A function ref pointing to code that will read
177             list objects from disk. The function must have the same syntax as
178             rfits: $object = rfits(filename).
179              
180             =item write (default \&wfits)
181              
182             A function ref pointing to code that will
183             write list objects to disk. The function must have the same syntax as
184             wfits: func(object,filename).
185              
186             =item bless (default 0)
187              
188             If set to a nonzero value, then the array ref gets
189             blessed into the DiskCache class for for easier access to the "purge"
190             and "sync" methods. This means that you can say C<< $x->sync >> instead
191             of the more complex C<< (%{tied @$x})->sync >>, but C will return
192             "PDL::DiskCache" instead of "ARRAY", which could break some code.
193              
194             =item verbose (default 0)
195              
196             Get chatty.
197              
198             =back
199              
200             =cut
201              
202             sub PDL::DiskCache::TIEARRAY {
203 2     2   5 my($class,$f,$opt) = @_;
204              
205 2 50       9 croak "PDL::DiskCache needs array ref as 2nd arg (did you pass an array instead?)\n"
206             if(ref $f ne 'ARRAY');
207             my($new) = {files => $f # File list
208 2         40 , n => scalar(@{$f}) # no. of el.
209             , write => $opt->{write} || \&main::wfits # Write routine
210             , read => $opt->{read} || \&main::rfits # Read routine
211             , mem => $opt->{mem} || 20 # No. of mem slots
212 2   50     5 , rw => (!($opt->{ro})) # rw or ro
      50        
      50        
213             , fdex => [] # Current file stored in each slot, by slot
214             , slot => [] # Current slot in which each file is stored
215             , cache => [] # Actual cached stuff gets held here
216             , opt => {} # Options stashed here for later reference
217             , cache_next => 0 # Next cache slot to be used
218             };
219 2         4 foreach $_(keys %{$opt}) {
  2         7  
220 2         7 $new->{opt}->{$_} = $opt->{$_};
221             }
222              
223 2         8 return bless($new,$class);
224             }
225              
226             =head2 purge
227              
228             Remove an item from the oldest slot in the cache, writing to disk as necessary.
229             You also send in how many slots to purge (default 1; sending in -1 purges
230             everything.)
231              
232             For most uses, a nice MODIFIED flag in the data structure could save
233             some hassle here. But PDLs can get modified out from under us
234             with slicing and .= -- so for now we always assume everything is tainted
235             and must be written to disk.
236              
237             =cut
238              
239             sub PDL::DiskCache::purge {
240 0     0 1 0 my($me,$n) = @_,1;
241 0 0       0 $me = (tied @{$me}) if("$me" =~ m/^PDL\:\:DiskCache\=ARRAY/);
  0         0  
242              
243 0 0       0 $n = $me->{mem} if($n<0);
244            
245 0 0       0 print "purging $n items..." if($me->{opt}->{verbose});
246              
247            
248 0         0 my($dex) = $me->{cache_next};
249              
250 0         0 local($_);
251 0         0 for(1..$n) {
252 0 0       0 if($me->{rw}) {
253             print "writing $me->{files}->[$me->{fdex}->[$dex]]: "
254 0 0       0 if($me->{opt}->{verbose});
255              
256 0         0 eval {&{$me->{write}}($me->{cache}->[$dex],
  0         0  
257 0         0 $me->{files}->[$me->{fdex}->[$dex]]);
258             };
259 0 0       0 print "WARNING: PDL::DiskCache::purge: problems with write of ".$me->{files}->[$me->{fdex}->[$dex]].", item $me->{fdex}->[$dex] from slot $dex: `$@' (".$me->{opt}->{varname}.") \n" if($@);
260 0         0 $@ = 0;
261              
262 0 0       0 print "ok.\n" if($me->{opt}->{verbose});
263             }
264            
265              
266 0 0       0 print "Purging item $dex (file $me->{fdex}->[$dex])...\n" if($me->{opt}->{verbose});
267 0         0 undef $me->{slot}->[$me->{fdex}->[$dex]]; # Purge from slot location list
268 0         0 undef $me->{fdex}->[$dex]; # Purge from slot fdex list
269 0         0 undef $me->{cache}->[$dex]; # Purge from memory
270              
271 0         0 $dex++;
272 0         0 $dex %= $me->{mem};
273             }
274 0 0       0 print "...done with purge.\n" if($me->{opt}->{verbose});
275             }
276              
277             sub PDL::DiskCache::FETCH {
278 2     2   435 my($me,$i) = @_;
279              
280 2 50 33     15 if($i < 0 || $i >= $me->{n}) {
281 0         0 carp("PDL::DiskCache: Element $i is outside range of 0-",$me->{n}-1,"\n");
282 0         0 return undef;
283             }
284              
285 2 50       7 if(defined $me->{slot}->[$i]) {
286 0 0       0 print "Item $i is in the cache...\n" if ($me->{opt}->{verbose});
287 0         0 return ($me->{cache}->[$me->{slot}->[$i]]);
288             }
289            
290             ### Got here -- we have to get the item from disk.
291              
292             print "Item $i ($me->{files}->[$i]) not present. Retrieving..."
293 2 50       6 if($me->{opt}->{verbose});
294            
295 2 50       8 if(defined($me->{fdex}->[$me->{cache_next}])) {
296 0 0       0 print "cache full..." if($me->{opt}->{verbose});
297              
298 0         0 $me->purge(1);
299             }
300            
301 2         4 my($x) = $me->{cache_next};
302 2         3 $me->{cache}->[$x] = eval {
303 2         5 &{$me->{read}}($me->{files}->[$i])
  2         7  
304             } ;
305 2         5 undef $@; # Keep this from hanging anything else.
306             print "result is ",(defined $me->{cache}->[$x] ? "" : "un")."defined.\n"
307 2 0       8 if($me->{opt}->{verbose});
    50          
308              
309 2         5 $me->{slot}->[$i] = $me->{cache_next};
310 2         4 $me->{fdex}->[$me->{cache_next}] = $i;
311 2         4 $me->{cache_next}++;
312 2         5 $me->{cache_next} %= $me->{mem};
313 2         13 $me->{cache}->[$x];
314             }
315              
316             sub PDL::DiskCache::STORE {
317 3     3   11 my($me, $i, $val) = @_;
318              
319 3 50       13 if( $me->{slot}->[$i] ) {
320 0 0       0 print "Storing index $i, in cache\n" if($me->{opt}->{verbose});
321 0         0 $me->sync($i);
322 0         0 return $me->{cache}->[$me->{slot}->[$i]] = $val;
323             } else {
324 3 50       588 print "Storing index $i, not in cache\n" if($me->{opt}->{verbose});
325 3 50       20 if(defined ($me->{fdex}->[$me->{cache_next}])) {
326 0 0       0 print "cache full..." if($me->{opt}->{verbose});
327 0         0 $me->purge(1);
328             }
329            
330 3         7 my($x) = $me->{cache_next};
331 3         9 $me->{slot}->[$i] = $x;
332 3         7 $me->{fdex}->[$x] = $i;
333 3         5 $me->{cache_next}++;
334 3         5 $me->{cache_next} %= $me->{mem};
335 3         13 $me->sync($i);
336 3         16 return $me->{cache}->[$x] = $val;
337             }
338              
339 0         0 croak("This never happens");
340              
341             }
342            
343             sub PDL::DiskCache::FETCHSIZE {
344 0     0   0 my($me) = shift;
345              
346 0         0 $me->{n};
347             }
348              
349             sub PDL::DiskCache::STORESIZE {
350 0     0   0 my($me,$newsize) = @_;
351              
352 0 0       0 if($newsize > $me->{n}) {
353 0         0 croak("PDL::DiskCache: Can't augment array size (yet)!\n");
354             }
355            
356 0         0 for( my($i) = $newsize-1; $i<$me->{n}; $i++ ) {
357 0 0       0 if(defined $me->{slot}->[$i]) {
358 0 0       0 if($me->{rw}) {
359             print "Writing $me->{files}->[$me->{fdex}->[$i]]\n"
360 0 0       0 if($me->{opt}->{verbose});
361 0         0 eval {&{$me->{write}}($me->{cache}->[$me->{slot}->[$i]],
  0         0  
362 0         0 $me->{files}->[$i]);
363             };
364 0         0 $@ = 0; # Keep from hanging anything else
365             }
366 0         0 undef $me->{fdex}->[$me->{slot}->[$i]];
367             }
368             }
369 0         0 $#{$me->{slot}} = $newsize-1;
  0         0  
370 0         0 $#{$me->{files}} = $newsize-1;
  0         0  
371 0         0 $me->{n} = $newsize;
372             }
373              
374             =head2 sync
375              
376             In a rw cache, flush items out to disk but retain them in the cache.
377              
378             Accepts a single scalar argument, which is the index number of a
379             single item that should be written to disk. Passing (-1), or no
380             argument, writes all items to disk, similar to purge(-1).
381              
382             For ro caches, this is a not-too-slow (but safe) no-op.
383              
384             =cut
385              
386             sub PDL::DiskCache::sync {
387 5     5 1 9 my($me) = shift;
388 5 50       24 $me = (tied @{$me}) if("$me" =~ m/^PDL\:\:DiskCache\=ARRAY/);
  0         0  
389 5         10 my($syncn) = shift;
390 5 100       14 $syncn = -1 unless defined $syncn;
391 5 100       338 print "PDL::DiskCache::sync\n" if($me->{opt}->{verbose});
392            
393 5 100       35 my @list = $syncn==-1 ? (0..$me->{mem}-1) : ($syncn);
394              
395 5 100       174 if($me->{rw}) {
396 4         9 for(@list) {
397 23 100       59 if(defined $me->{fdex}->[$_]) {
398              
399             print " writing $me->{files}->[$me->{fdex}->[$_]]...\n"
400 6 50       569 if($me->{opt}->{verbose});
401              
402 6         25 eval {&{$me->{write}}($me->{cache}->[$_],
  6         26  
403 6         21 $me->{files}->[$me->{fdex}->[$_]]);
404             };
405 6         2340 $@ = 0; # keep from hanging anything else
406             }
407             }
408             }
409             }
410              
411             =head2 DESTROY
412              
413             This is the perl hook for object destruction. It just makes a call to
414             "sync", to flush the cache out to disk. Destructor calls from perl don't
415             happen at a guaranteed time, so be sure to call "sync" if you need to
416             ensure that the files get flushed out, e.g. to use 'em somewhere else.
417              
418             =cut
419              
420             sub PDL::DiskCache::DESTROY {
421 2     2   473 my($me) = shift;
422              
423 2         6 $me->sync;
424             }
425              
426             # return true
427             1;