File Coverage

blib/lib/DB/DataStore.pm
Criterion Covered Total %
statement 178 208 85.5
branch 31 52 59.6
condition 6 15 40.0
subroutine 34 42 80.9
pod 7 7 100.0
total 256 324 79.0


line stmt bran cond sub pod time code
1             package DB::DataStore;
2              
3             =head1 NAME
4              
5             DB::DataStore - Simple and fast record based data store
6              
7             =head1 SYNPOSIS
8              
9             use DB::DataStore;
10              
11              
12             my $store = DB::DataStore->open( $directory );
13              
14             my $data = "TEXT DATA OR BYTES";
15             my $id = $store->stow( $data, $optionalID );
16              
17             my $val = $store->fetch( $id );
18              
19             $store->recycle( $id );
20              
21             my $new_id = $store->next_id; # $new_id == $id
22              
23             $store->stow( "MORE DATA", $new_id );
24              
25             =head1 DESCRIPTION
26              
27             A simple and fast way to store arbitrary text or byte data.
28             It is written entirely in perl with no non-core dependencies. It is designed to be
29             both easy to set up and easy to use.
30              
31             =head1 LIMITATIONS
32              
33             DB::DataStore is not meant to store huge amounts of data.
34             It will fail if it tries to create a file size greater than the
35             max allowed by the filesystem. This limitation will be removed in
36             subsequent versions. This limitation is most important when working
37             with sets of data that approach the max file size of the system
38             in question.
39              
40             This is not written with thread safety in mind, so unexpected behavior
41             can occur when multiple DB::DataStore objects open the same directory.
42              
43             =cut
44              
45 1     1   427 use strict;
  1         1  
  1         22  
46 1     1   3 use warnings;
  1         1  
  1         24  
47              
48 1     1   3 use File::Path qw(make_path);
  1         3  
  1         47  
49 1     1   538 use Data::Dumper;
  1         6759  
  1         49  
50              
51 1     1   5 use vars qw($VERSION);
  1         1  
  1         205  
52              
53             $VERSION = '1.05';
54              
55             =head1 METHODS
56              
57             =head2 open( directory )
58              
59             Takes a single argument - a directory, and constructs the data store in it.
60             The directory must be writeable or creatible. If a DataStore already exists
61             there, it opens it, otherwise it creates a new one.
62              
63             =cut
64             sub open {
65 1     1 1 750 my( $pkg, $directory ) = @_;
66              
67 1         129 make_path( "$directory/stores", { error => \my $err } );
68 1 50       5 if( @$err ) {
69 0         0 my( $err ) = values %{ $err->[0] };
  0         0  
70 0         0 die $err;
71             }
72 1         2 my $filename = "$directory/STORE_INDEX";
73              
74 1   33     7 bless {
75             DIRECTORY => $directory,
76             OBJ_INDEX => DB::DataStore::FixedRecycleStore->open( "IL", "$directory/OBJ_INDEX" ),
77             STORE_IDX => DB::DataStore::FixedStore->open( "I", $filename ),
78             STORES => [],
79             }, ref( $pkg ) || $pkg;
80            
81             } #open
82              
83             =head2 entry_count
84              
85             Returns how many entries are in this store. Recycling ids does
86             _not_ decrement this entry_count.
87              
88             =cut
89             sub entry_count {
90 0     0 1 0 shift->{OBJ_INDEX}->entry_count;
91             }
92              
93             =head2 ensure_entry_count( min_count )
94              
95             This makes sure there there are at least min_count
96             entries in this datastore. This creates empty
97             records if needed.
98              
99             =cut
100             sub ensure_entry_count {
101 0     0 1 0 shift->{OBJ_INDEX}->ensure_entry_count( shift );
102             }
103              
104             =head2 next_id
105              
106             This sets up a new empty record and returns the
107             id for it.
108              
109             =cut
110             sub next_id {
111 0     0 1 0 my $self = shift;
112 0         0 $self->{OBJ_INDEX}->next_id;
113             }
114              
115             =head2 stow( data, optionalID )
116              
117             This saves the text or byte data to the datastore.
118             If an id is passed in, this saves the data to the record
119             for that id, overwriting what was there.
120             If an id is not passed in, it creates a new datastore.
121              
122             Returns the id of the record written to.
123              
124             =cut
125             sub stow {
126 4     4 1 344 my( $self, $data, $id ) = @_;
127              
128 4   33     20 $id //= $self->{OBJ_INDEX}->next_id;
129              
130 1     1   5 my $save_size = do { use bytes; length( $data ); };
  1         0  
  1         4  
  4         4  
  4         3  
131              
132 4         2 my( $current_store_id, $current_idx_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  4         12  
133              
134             #
135             # Check if this record had been saved before, and that the
136             # store is was in has a large enough record size.
137             #
138 4 100       10 if( $current_store_id ) {
139 1         3 my $old_store = $self->_get_store( $current_store_id );
140              
141 1 50       3 warn "object '$id' references store '$current_store_id' which does not exist" unless $old_store;
142              
143 1 50       2 if( $old_store->{RECORD_SIZE} >= $save_size ) {
144 1         3 $old_store->put_record( $current_idx_in_store, [$data] );
145 1         3 return $id;
146             }
147            
148             # the old store was not big enough (or missing), so remove its record from
149             # there.
150 0 0       0 $old_store->recycle( $current_idx_in_store, 1 ) if $old_store;
151             }
152              
153 3         6 my( $store_id, $store ) = $self->_best_store_for_size( $save_size );
154 3         6 my $index_in_store = $store->next_id;
155              
156 3         9 $self->{OBJ_INDEX}->put_record( $id, [ $store_id, $index_in_store ] );
157 3         8 $store->put_record( $index_in_store, [ $data ] );
158              
159 3         6 $id;
160             } #stow
161              
162             =head2 fetch( id )
163              
164             Returns the record associated with the ID. If the ID has no
165             record associated with it, undef is returned.
166              
167             =cut
168             sub fetch {
169 2     2 1 352 my( $self, $id ) = @_;
170 2         2 my( $store_id, $id_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  2         5  
171 2 50       7 return undef unless $store_id;
172              
173 2         5 my $store = $self->_get_store( $store_id );
174 2         2 my( $data ) = @{ $store->get_record( $id_in_store ) };
  2         4  
175 2         7 $data;
176             } #fetch
177              
178             =head2 recycle( $id )
179              
180             This marks that the record associated with the id may be reused.
181             Calling this does not decrement the number of entries reported
182             by the datastore.
183              
184             =cut
185             sub recycle {
186 1     1 1 2 my( $self, $id ) = @_;
187 1         1 my( $store_id, $id_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  1         2  
188 1 50       2 return undef unless defined $store_id;
189            
190 1         2 my $store = $self->_get_store( $store_id );
191 1         2 $store->recycle( $id_in_store );
192 1         2 $self->{OBJ_INDEX}->recycle( $id_in_store );
193              
194             } #recycle
195              
196             sub _best_store_for_size {
197 3     3   3 my( $self, $record_size ) = @_;
198 3         2 my( $best_idx, $best_size, $best_store ); #without going over.
199              
200             # using the written record rather than the array of stores to
201             # determine how many there are.
202 3         6 for my $idx ( 1 .. $self->{STORE_IDX}->entry_count ) {
203 2         4 my $store = $self->_get_store( $idx );
204 2         2 my $store_size = $store->{RECORD_SIZE};
205 2 50       4 if( $store_size >= $record_size ) {
206 2 50 33     5 if( ! defined( $best_size ) || $store_size < $best_size ) {
207 2         2 $best_idx = $idx;
208 2         2 $best_size = $store_size;
209 2         2 $best_store = $store;
210             }
211             }
212             } #each store
213            
214 3 100       6 if( $best_store ) {
215 2         2 return $best_idx, $best_store;
216             }
217              
218             # Have to create a new store.
219             # Make one that is thrice the size of the record
220 1         2 my $store_size = 3 * $record_size;
221 1         3 my $store_id = $self->{STORE_IDX}->next_id;
222              
223             # first, make an entry in the store index, giving it that size, then
224             # fetch it?
225 1         3 $self->{STORE_IDX}->put_record( $store_id, [$store_size] );
226              
227 1         3 my $store = $self->_get_store( $store_id );
228              
229 1         2 $store_id, $store;
230              
231             } #_best_store_for_size
232              
233             sub _get_recycled_ids {
234 0     0   0 shift->{OBJ_INDEX}->get_recycled_ids;
235             }
236              
237             sub _get_store {
238 7     7   6 my( $self, $store_index ) = @_;
239              
240 7 100       15 if( $self->{STORES}[ $store_index ] ) {
241 6         10 return $self->{STORES}[ $store_index ];
242             }
243              
244 1         1 my( $store_size ) = @{ $self->{ STORE_IDX }->get_record( $store_index ) };
  1         2  
245              
246             # since we are not using a pack template with a definite size, the size comes from the record
247              
248 1         7 my $store = DB::DataStore::FixedRecycleStore->open( "A*", "$self->{DIRECTORY}/${store_index}_OBJSTORE", $store_size );
249 1         1 $self->{STORES}[ $store_index ] = $store;
250 1         2 $store;
251             } #_get_store
252              
253             # ----------- end DB::DataStore
254             =head1 HELPER PACKAGES
255              
256             DB::DataStore relies on two helper packages that are useful in
257             their own right and are documented here.
258              
259             =head1 HELPER PACKAGE
260              
261             DB::DataStore::FixedStore
262              
263             =head1 DESCRIPTION
264              
265             A fixed record store that uses perl pack and unpack templates to store
266             identically sized sets of data and uses a single file to do so.
267              
268             =head1 SYNOPSIS
269              
270             my $template = "LII"; # perl pack template. See perl pack/unpack.
271              
272             my $size; #required if the template does not have a definite size, like A*
273              
274             my $store = DB::DataStore::FixedStore->open( $template, $filename, $size );
275              
276             my $new_id = $store->next_id;
277              
278             $store->put_record( $id, [ 321421424243, 12, 345 ] );
279              
280             my $more_data = $store->get_record( $other_id );
281              
282             my $removed_last = $store->pop;
283              
284             my $last_id = $store->push( $data_at_the_end );
285              
286             my $entries = $store->entry_count;
287              
288             if( $entries < $min ) {
289              
290             $store->ensure_entry_count( $min );
291              
292             }
293              
294             $store->emtpy;
295              
296             $store->unlink_store;
297              
298             =head1 METHODS
299              
300             =cut
301             package DB::DataStore::FixedStore;
302              
303 1     1   462 use strict;
  1         1  
  1         20  
304 1     1   3 use warnings;
  1         1  
  1         26  
305 1     1   3 no warnings 'uninitialized';
  1         1  
  1         26  
306              
307 1     1   3 use Fcntl qw( SEEK_SET LOCK_EX LOCK_UN );
  1         1  
  1         39  
308 1     1   456 use File::Copy;
  1         1939  
  1         127  
309              
310             =head2 open( template, filename, size )
311              
312             Opens or creates the file given as a fixed record
313             length data store. If a size is not given,
314             it calculates the size from the template, if it can.
315             This will die if a zero byte record size is determined.
316              
317             =cut
318             sub open {
319 6     6   264 my( $pkg, $template, $filename, $size ) = @_;
320 6   33     19 my $class = ref( $pkg ) || $pkg;
321 6         6 my $FH;
322 1   66 1   7 my $useSize = $size || do { use bytes; length( pack( $template ) ) };
  1         0  
  1         5  
  6         10  
323 6 50       8 die "Cannot open a zero record sized fixed store" unless $useSize;
324 6 50       116 unless( -e $filename ) {
325 6         233 CORE::open $FH, ">$filename";
326 6         10 print $FH "";
327 6         25 close $FH;
328             }
329 6 50       99 CORE::open $FH, "+<$filename" or die "$@ $!";
330 6         64 bless { TMPL => $template,
331             RECORD_SIZE => $useSize,
332             FILENAME => $filename,
333             }, $class;
334             } #open
335              
336             =head2 empty
337              
338             This empties out the database, setting it to zero records.
339              
340             =cut
341             sub empty {
342 0     0   0 my $self = shift;
343 0         0 my $fh = $self->_filehandle;
344 0         0 truncate $self->{FILENAME}, 0;
345 0         0 undef;
346             } #empty
347              
348             =head2 ensure_entry_count( count )
349              
350             Makes sure the data store has at least as many entries
351             as the count given. This creates empty records if needed
352             to rearch the target record count.
353              
354             =cut
355             sub ensure_entry_count {
356 0     0   0 my( $self, $count ) = @_;
357 0         0 my $fh = $self->_filehandle;
358              
359 0         0 my $entries = $self->entry_count;
360 0 0       0 if( $count > $entries ) {
361 0         0 for( (1+$entries)..$count ) {
362 0         0 $self->put_record( $_, [] );
363             }
364             }
365             } #ensure_entry_count
366              
367             =head2
368              
369             Returns the number of entries in this store.
370             This is the same as the size of the file divided
371             by the record size.
372              
373             =cut
374             sub entry_count {
375             # return how many entries this index has
376 59     59   49 my $self = shift;
377 59         60 my $fh = $self->_filehandle;
378 59         357 my $filesize = -s $self->{FILENAME};
379 59         292 int( $filesize / $self->{RECORD_SIZE} );
380             }
381              
382              
383             #
384             # _ _ The subs below are commented out pending a need _ _
385             # v v
386             #
387             # Why wait for a need? Because the need might not be quite
388             # what you were expecting; once you have a need, you are much
389             # more familiar with it.
390              
391             # =head2 get_records( startIDX, number )
392             # =cut
393             # sub get_records {
394             # my( $self, $startIDX, $number ) = @_;
395              
396             # my $fh = $self->_filehandle;
397             # my $size = $self->{RECORD_SIZE};
398             # my $tmpl = $self->{TMPL};
399             # sysseek $fh, $size * ($startIDX-1), SEEK_SET or die "Could not seek ($self->{RECORD_SIZE} * ($startIDX-1)) : $@ $!";
400             # my $srv = sysread $fh, my $data, $number * $size;
401             # # TODO : check $srv response
402             # my( @res );
403             # for( 0..($number-1) ) {
404             # my $part = substr( $data, $_ * $size, ($_ + 1 ) * ($size) );
405             # push @res, [ unpack( $tmpl, $part )];
406             # }
407             # \@res;
408             # } #get_records
409              
410             # sub splice_records {
411             # my( $self, $start, $numberToRemove, @listToAdd ) = @_;
412              
413             # # TODO - check for maximum chunk size
414             # CORE::open( my $fh, "+<$self->{FILENAME}.splicer" );
415             # my $size = $self->{RECORD_SIZE};
416              
417             # my $orig_fh = $self->_filehandle;
418              
419             # my $splice_action = sub {
420             # # put the first part of this file to the new file
421             # if( $start > 1 ) {
422             # sysread $orig_fh, my $data, $start * $size;
423             # syswrite( $fh, $data );
424             # }
425              
426             # # add the list of things
427             # my $to_write = '';
428             # for my $adder (@listToAdd) {
429             # my $part = pack( $self->{TMPL}, @$adder );
430             # my $part_length = do { use bytes; length( $part ); };
431             # if( $part_length < $size ) {
432             # my $delt = $size - $part_length;
433             # $part .= "\0" x $delt;
434             # }
435             # $to_write .= $part;
436             # }
437              
438             # my $to_write_length = do { use bytes; length( $to_write ); };
439             # sysseek( $fh, $self->{RECORD_SIZE} * ($start+$numberToRemove-1), SEEK_SET ) && ( my $swv = syswrite( $fh, $to_write ) );
440             # # then add the last part
441             # my $endRecords = $self->entry_count - ( $start + $numberToRemove );
442             # if( $endRecords > 0 ) {
443             # sysseek( $orig_fh, $self->{RECORD_SIZE} * ($start+$numberToRemove-1), SEEK_SET );
444             # my $srv = sysread $orig_fh, my $data, $size * $endRecords;
445             # }
446             # 1;
447             # }; #splice_action
448             # &$splice_action() && move( $self->{FILENAME}, "$self->{FILENAME}.bak" ) && move( "$self->{FILENAME}.splicer", $self->{FILENAME} );
449            
450             # } #splice_records
451              
452             =head2 get_record( idx )
453              
454             Returns an arrayref representing the record with the given id.
455             The array in question is the unpacked template.
456              
457             =cut
458             sub get_record {
459 15     15   18 my( $self, $idx ) = @_;
460              
461 15         18 my $fh = $self->_filehandle;
462              
463             # how about an ensure_entry_count right here?
464             # also a has_record
465 15 50       44 sysseek $fh, $self->{RECORD_SIZE} * ($idx-1), SEEK_SET or die "Could not seek ($self->{RECORD_SIZE} * ($idx-1)) : $@ $!";
466 15         46 my $srv = sysread $fh, my $data, $self->{RECORD_SIZE};
467 15 50       24 defined( $srv ) or die "Could not read : $@ $!";
468 15         106 [unpack( $self->{TMPL}, $data )];
469             } #get_record
470              
471             =head2 next_id
472              
473             adds an empty record and returns its id, starting with 1
474              
475             =cut
476             sub next_id {
477 6     6   4 my( $self ) = @_;
478 6         10 my $fh = $self->_filehandle;
479 6         11 my $next_id = 1 + $self->entry_count;
480 6         13 $self->put_record( $next_id, [] );
481 6         24 $next_id;
482             } #next_id
483              
484              
485             =head2 pop
486              
487             Remove the last record and return it.
488              
489             =cut
490             sub pop {
491 7     7   5 my( $self ) = @_;
492              
493 7         10 my $entries = $self->entry_count;
494 7 100       28 return undef unless $entries;
495 2         5 my $ret = $self->get_record( $entries );
496 2         4 truncate $self->_filehandle, ($entries-1) * $self->{RECORD_SIZE};
497 2         14 $ret;
498             } #pop
499              
500             =head2 push( data )
501              
502             Add a record to the end of this store. Returns the id assigned
503             to that record. The data must be a scalar or list reference.
504             If a list reference, it should conform to the pack template
505             assigned to this store.
506              
507             =cut
508             sub push {
509 5     5   15 my( $self, $data ) = @_;
510 5         6 my $fh = $self->_filehandle;
511 5         7 my $next_id = 1 + $self->entry_count;
512 5         9 $self->put_record( $next_id, $data );
513 5         19 $next_id;
514             } #push
515              
516             =head2 push( idx, data )
517              
518             Saves the data to the record and the record to the filesystem.
519             The data must be a scalar or list reference.
520             If a list reference, it should conform to the pack template
521             assigned to this store.
522              
523             =cut
524             sub put_record {
525 19     19   20 my( $self, $idx, $data ) = @_;
526 19         19 my $fh = $self->_filehandle;
527 19 50       66 my $to_write = pack ( $self->{TMPL}, ref $data ? @$data : $data );
528              
529             # allows the put_record to grow the data store by no more than one entry
530 1 50   1   474 use Carp 'longmess'; print STDERR Data::Dumper->Dump([longmess]) if $idx > (1+$self->entry_count);
  1         1  
  1         79  
  19         20  
531 19 50       27 die "Index out of bounds" if $idx > (1+$self->entry_count);
532              
533 1     1   11 my $to_write_length = do { use bytes; length( $to_write ); };
  1         2  
  1         3  
  19         19  
  19         16  
534 19 100       26 if( $to_write_length < $self->{RECORD_SIZE} ) {
535 6         5 my $del = $self->{RECORD_SIZE} - $to_write_length;
536 6         9 $to_write .= "\0" x $del;
537 1     1   48 $to_write_length = do { use bytes; length( $to_write ); };
  1         1  
  1         3  
  6         5  
  6         4  
538             }
539 19 50       26 die "$to_write_length vs $self->{RECORD_SIZE}" unless $to_write_length == $self->{RECORD_SIZE};
540              
541             # how about an ensure_entry_count right here?
542              
543 19 50       217 sysseek( $fh, $self->{RECORD_SIZE} * ($idx-1), SEEK_SET ) && ( my $swv = syswrite( $fh, $to_write ) );
544 19         73 1;
545             } #put_record
546              
547             =head2 unlink_store
548              
549             Removes the file for this record store entirely from the file system.
550              
551             =cut
552             sub unlink_store {
553             # TODO : more checks
554 0     0   0 my $self = shift;
555 0         0 close $self->_filehandle;
556 0         0 unlink $self->{FILENAME};
557             }
558              
559             sub _filehandle {
560 106     106   62 my $self = shift;
561 106         1535 CORE::open( my $fh, "+<$self->{FILENAME}" );
562 106         202 $fh;
563             }
564              
565              
566             # ----------- end DB::DataStore::FixedStore
567              
568              
569              
570             =head1 HELPER PACKAGE
571              
572             DB::DataStore::FixedRecycleStore
573              
574             =head1 SYNOPSIS
575              
576             A subclass DB::DataStore::FixedRecycleStore. This allows
577             indexes to be recycled and their record space reclaimed.
578              
579             my $store = DB::DataStore::FixedRecycleStore->open( $template, $filename, $size );
580              
581             my $id = $store->next_id;
582              
583             $store->put_record( $id, ["SOMEDATA","FOR","PACK" ] );
584              
585             my $id2 = $store->next_id; # == 2
586              
587             $store->recycle( $id );
588              
589             my $avail_ids = $store->get_recycled_ids; # [ 1 ]
590              
591             my $id3 = $store->next_id;
592             $id3 == $id;
593              
594             =cut
595             package DB::DataStore::FixedRecycleStore;
596              
597 1     1   138 use strict;
  1         1  
  1         17  
598 1     1   3 use warnings;
  1         1  
  1         199  
599              
600             our @ISA='DB::DataStore::FixedStore';
601              
602             sub open {
603 2     2   3 my( $pkg, $template, $filename, $size ) = @_;
604 2         9 my $self = DB::DataStore::FixedStore->open( $template, $filename, $size );
605 2         8 $self->{RECYCLER} = DB::DataStore::FixedStore->open( "L", "${filename}.recycle" );
606 2         8 bless $self, $pkg;
607             } #open
608              
609             =head1 METHODS
610              
611             =head2 recycle( $idx )
612              
613             Recycles the given id and reclaims its space.
614              
615             =cut
616             sub recycle {
617 2     2   3 my( $self, $idx ) = @_;
618 2         5 $self->{RECYCLER}->push( [$idx] );
619             } #recycle
620              
621             =head2 get_recycled_ids
622              
623             Returns a list reference of ids that are available
624             to be reused.
625              
626             =cut
627             sub get_recycled_ids {
628 0     0   0 my $self = shift;
629 0         0 my $R = $self->{RECYCLER};
630 0         0 my $max = $R->entry_count;
631 0         0 my @ids;
632 0         0 for( 1 .. $max ) {
633 0         0 push @ids, @{ $R->get_record( $_ ) };
  0         0  
634             }
635 0         0 \@ids;
636             } #get_recycled_ids
637              
638             sub next_id {
639 7     7   4 my $self = shift;
640              
641 7 100       4 my( $recycled_id ) = @{ $self->{RECYCLER}->pop || []};
  7         13  
642 7 100       22 $recycled_id = $recycled_id ? $recycled_id : $self->SUPER::next_id;
643             } #next_id
644              
645             # ----------- end package DB::DataStore::FixedRecycleStore;
646              
647             1;
648              
649             __END__