File Coverage

blib/lib/FlatFile/DataStore/Initialize.pm
Criterion Covered Total %
statement 134 137 97.8
branch 39 44 88.6
condition 7 9 77.7
subroutine 18 18 100.0
pod 1 7 14.2
total 199 215 92.5


line stmt bran cond sub pod time code
1             #---------------------------------------------------------------------
2             package FlatFile::DataStore; # not FlatFile::DataStore::Initialize
3             #---------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             FlatFile::DataStore::Initialize - Provides routines that are used
8             only when initializing a datastore
9              
10             =head1 SYNOPSYS
11              
12             require FlatFile::DataStore::Initialize;
13              
14             (But this is done only in FlatFile/DataStore.pm)
15              
16             =head1 DESCRIPTION
17              
18             FlatFile::DataStore::Initialize provides the routines that
19             are used only when a datastore is initialized. It isn't a
20             "true" module; it's intended for loading more methods in the
21             FlatFile::DataStore class.
22              
23             =head1 VERSION
24              
25             FlatFile::DataStore::Initialize version 1.03
26              
27             =cut
28              
29             our $VERSION = '1.03';
30              
31 23     23   689 use 5.008003;
  23         109  
  23         1659  
32 23     23   139 use strict;
  23         50  
  23         892  
33 23     23   140 use warnings;
  23         59  
  23         957  
34              
35 23     23   34612 use URI;
  23         77471  
  23         761  
36 23     23   185 use URI::Escape;
  23         52  
  23         2173  
37 23     23   136 use Digest::MD5 qw(md5_hex);
  23         48  
  23         1530  
38 23     23   134 use Data::Dumper;
  23         51  
  23         1088  
39 23     23   127 use Carp;
  23         46  
  23         1342  
40              
41 23     23   304 use Math::Int2Base qw( base_chars int2base base2int );
  23         48  
  23         1814  
42 23     23   161 use Data::Omap qw( :ALL );
  23         45  
  23         61261  
43              
44             #---------------------------------------------------------------------
45             # burst_query(), called by init() to parse the datastore's uri
46             # Takes a hash ref to the %Preamble attribute hash, so it can
47             # know which parts of the uri belong in the preamble.
48             # Then it gets the uri from the datastore object and parses it.
49             # It loads all of the values it gets (and generates) in to a
50             # hash ref which it returns.
51             #
52             # Private method.
53              
54             sub burst_query {
55 51     51 0 127 my( $self, $Preamble ) = @_;
56              
57 51         227 my $uri = $self->uri;
58 51         429 my $query = URI->new( $uri )->query();
59              
60 51         279808 my @pairs = split /[;&]/, $query;
61 51         234 my $omap = []; # psuedo-new(), ordered hash
62 51         120 my $pos = 0;
63 51         100 my %parms;
64             my $load_parms = sub {
65 779     779   2133 my( $name, $val ) = split /=/, $_[0], 2;
66              
67 779         2189 $name = uri_unescape( $name );
68 779         6634 $val = uri_unescape( $val );
69              
70 779 100       19366 croak qq/Parm duplicated in uri: $name/ if $parms{ $name };
71              
72 778         2234 $parms{ $name } = $val;
73 778 100       2388 if( $Preamble->{ $name } ) {
74 598         1469 my( $len, $parm ) = split /-/, $val, 2;
75 598 100 66     3047 croak qq/Value must be format 'length-parm': $name=$val/
76             unless defined $len and defined $parm;
77 597         2730 omap_add( $omap, $name => [ $pos, 0+$len, $parm ] );
78 597         28064 $pos += $len;
79             }
80 51         463 };
81 51         165 for( @pairs ) {
82 597 100       1763 if( /^defaults=(.*)/ ) {
83 18         98 $load_parms->( $_ ) for defaults( $1 );
84 17         60 next;
85             }
86 579         1751 $load_parms->( $_ );
87             }
88              
89             # some attributes are generated here:
90 48         141 $parms{'specs'} = $omap;
91 48         132 $parms{'preamblelen'} = $pos;
92              
93 48         636 return \%parms;
94             }
95              
96             #---------------------------------------------------------------------
97             # defaults()
98             # This routine provides some default values for the datastore
99             # configuration uri. It takes the default you want, one of
100             # xsmall xsmall_nohist
101             # small small_nohist
102             # medium medium_nohist
103             # large large_nohist
104             # xlarge xlarge_nohist
105             # and it returns the default values as an array of key/value
106             # strings, ready to include in a uri.
107             #
108             # Private method.
109              
110             sub defaults {
111 18     18 1 61 my( $want ) = @_;
112              
113 18         794 my $ind = uri_escape( "1-+#=*-" );
114              
115 18         813 my @xsmall_nohist = (
116             "indicator=$ind",
117             "transind=$ind",
118             "date=7-yymdttt",
119             "transnum=2-62", # 3,843 transactions
120             "keynum=2-62", # 3,843 records
121             "reclen=2-62", # 3,843 bytes/record
122             "thisfnum=1-36", # 35 data files
123             "thisseek=4-62", # 14,776,335 bytes/file
124             );
125 18         89 my @xsmall = (
126             @xsmall_nohist,
127             "prevfnum=1-36",
128             "prevseek=4-62",
129             "nextfnum=1-36",
130             "nextseek=4-62",
131             );
132              
133 18         109 my @small_nohist = (
134             "indicator=$ind",
135             "transind=$ind",
136             "date=7-yymdttt",
137             "transnum=3-62", # 238,327 transactions
138             "keynum=3-62", # 238,327 records
139             "reclen=3-62", # 238,327 bytes/record
140             "thisfnum=1-36", # 35 data files
141             "thisseek=5-62", # 916,132,831 bytes/file
142             );
143 18         122 my @small = (
144             @small_nohist,
145             "prevfnum=1-36",
146             "prevseek=5-62",
147             "nextfnum=1-36",
148             "nextseek=5-62",
149             );
150              
151 18         121 my @medium_nohist = (
152             "indicator=$ind",
153             "transind=$ind",
154             "date=7-yymdttt",
155             "transnum=4-62", # 14,776,335 transactions
156             "keynum=4-62", # 14,776,335 records
157             "reclen=4-62", # 14,776,335 bytes/record
158             "thisfnum=2-36", # 1,295 data files
159             "thisseek=5-62", # 916,132,831 bytes/file
160             );
161 18         240 my @medium = (
162             @medium_nohist,
163             "prevfnum=2-36",
164             "prevseek=5-62",
165             "nextfnum=2-36",
166             "nextseek=5-62",
167             );
168              
169 18         130 my @large_nohist = (
170             "datamax=1.9G",
171             "dirmax=300",
172             "keymax=100_000",
173             "indicator=$ind",
174             "transind=$ind",
175             "date=7-yymdttt",
176             "transnum=5-62", # 916,132,831 transactions
177             "keynum=5-62", # 916,132,831 records
178             "reclen=5-62", # 916,132,831 bytes/record
179             "thisfnum=3-36", # 46,655 data files
180             "thisseek=6-62", # 56G per file (but see datamax)
181             );
182 18         90 my @large = (
183             @large_nohist,
184             "prevfnum=3-36",
185             "prevseek=6-62",
186             "nextfnum=3-36",
187             "nextseek=6-62",
188             );
189              
190 18         113 my @xlarge_nohist = (
191             "datamax=1.9G",
192             "dirmax=300",
193             "dirlev=2",
194             "keymax=100_000",
195             "tocmax=100_000",
196             "indicator=$ind",
197             "transind=$ind",
198             "date=7-yymdttt",
199             "transnum=6-62", # 56B transactions
200             "keynum=6-62", # 56B records
201             "reclen=6-62", # 56G per record
202             "thisfnum=4-36", # 1,679,615 data files
203             "thisseek=6-62", # 56G per file (but see datamax)
204             );
205 18         79 my @xlarge = (
206             @xlarge_nohist,
207             "prevfnum=4-36",
208             "prevseek=6-62",
209             "nextfnum=4-36",
210             "nextseek=6-62",
211             );
212              
213 18         331 my $ret = {
214             xsmall => \@xsmall,
215             xsmall_nohist => \@xsmall_nohist,
216             small => \@small,
217             small_nohist => \@small_nohist,
218             medium => \@medium,
219             medium_nohist => \@medium_nohist,
220             large => \@large,
221             large_nohist => \@large_nohist,
222             xlarge => \@xlarge,
223             xlarge_nohist => \@xlarge_nohist,
224             }->{ $want };
225              
226 18 100       394 croak qq/Unrecognized defaults: $want/ unless $ret;
227 17         222 @$ret; # returned
228             }
229              
230             #---------------------------------------------------------------------
231             # make_preamble_regx(), called by init() to construct a regular
232             # expression that should match any record's preamble.
233             # This regx should capture each field's value.
234             #
235             # Private method.
236              
237             sub make_preamble_regx {
238 45     45 0 96 my( $self ) = @_;
239              
240 45         98 my $regx = "";
241 45         163 for( $self->specs ) { # specs() returns an array of hashrefs
242 540         1607 my( $key, $aref ) = %$_;
243 540         938 my( $pos, $len, $parm ) = @$aref;
244              
245 540         1013 for( $key ) {
246 540 100 100     3382 if( /indicator/ or /transind/ ) {
    100          
    100          
247 86 100       811 $regx .= ($len == 1 ? "([\Q$parm\E])" : "([\Q$parm\E]{$len})");
248             }
249             elsif( /user/ ) {
250             # XXX should only allow $Ascii_chars, not checked here
251             # (metachars in $parm should already be escaped as needed)
252 42 100       309 $regx .= ($len == 1 ? "([$parm])" : "([$parm]{$len})");
253             }
254             elsif( /date/ ) {
255             # XXX regx makes no attempt to insure an actual valid date
256             # XXX this code needs to barf on, e.g., yyyyyyyy ...
257             # e.g., yyyymmdd(8) yyyymmddtttttt(14) yymd(4) yymdttt(7)
258 45 100       835 croak qq/Invalid date length: $len/
259             unless $len =~ /^(?:4|7|8|14)$/;
260 44 100       571 croak qq/Date length doesn't match format: $len-$parm/
261             unless $len == length $parm;
262 43 100       316 $regx .= ($len < 8 ? "([0-9A-Za-z]{$len})" : "([0-9]{$len})");
263             }
264             else {
265 367         1185 my $chars = base_chars( $parm );
266 367         3923 $chars =~ s/([0-9])[0-9]+([0-9])/$1-$2/; # compress
267 367         1035 $chars =~ s/([A-Z])[A-Z]+([A-Z])/$1-$2/;
268 367         758 $chars =~ s/([a-z])[a-z]+([a-z])/$1-$2/;
269             # '-' is 'null' character:
270 367 100       1916 $regx .= ($len == 1 ? "([-$chars])" : "([-$chars]{$len})");
271             }
272             }
273             }
274 43         2408 return qr/$regx/;
275             }
276              
277             #---------------------------------------------------------------------
278             # make_crud(), called by init() to construct a hash of CRUD indicators
279             # (CRUD: Create, Retrieve, Update, Delete)
280             # the following are suggested, but configurable in the uri
281             # + Create
282             # # Old Update (old record flagged as updated)
283             # = Update
284             # * Old Delete (old record flagged as deleted)
285             # - Delete
286             # (no indicator for Retrieve, n/a--but didn't want to say CUD)
287             # Note that a reverse set is included, e.g., '+' => 'create' as
288             # well as create => '+'.
289             #
290             # Private method.
291              
292             sub make_crud {
293 43     43 0 97 my( $self ) = @_;
294              
295 43         162 my( $len, $chars ) = split /-/, $self->indicator, 2;
296 43 100       432 croak qq/Only single-character indicators supported/ if $len != 1;
297              
298 42         234 my @c = split //, $chars;
299 42         547 my %c = map { $_ => 1 } @c;
  211         578  
300 42         214 my @n = keys %c;
301 42 100 66     611 croak qq/Need five unique indicator characters/ if @n != 5 or @c != 5;
302              
303 41         87 my %crud;
304 41         280 @crud{ qw( create oldupd update olddel delete ) } = @c;
305 41         221 @crud{ @c } = qw( create oldupd update olddel delete );
306 41         339 return \%crud;
307             }
308              
309             #---------------------------------------------------------------------
310             # convert_max(), called by init() to convert user-supplied max values
311             # (datamax, keymax, etc.) into an integer.
312             # One can say, "500_000_000", "500M", or ".5G" to mean
313             # 500,000,000 bytes
314             #
315             # Private method.
316              
317             sub convert_max {
318 20     20 0 39 my( $max ) = @_;
319              
320             # ignoring M/G ambiguities and using round numbers:
321 20         72 my %sizes = ( M => 10**6, G => 10**9 );
322              
323 20         58 $max =~ s/_//g;
324 20 100       110 if( $max =~ /^([.0-9]+)([MG])/ ) {
325 4         21 my( $n, $s ) = ( $1, $2 );
326 4         20 $max = $n * $sizes{ $s };
327             }
328              
329 20         120 return 0+$max;
330             }
331              
332             #---------------------------------------------------------------------
333             # initialize(), called by init() when datastore is first used
334             # adds a serialized object to the uri file to bypass uri
335             # parsing from then on
336             #
337             # Private method.
338              
339             sub initialize {
340 39     39 0 86 my( $self ) = @_;
341              
342             # can't initialize after data has been added
343              
344 39         164 my $fnum = int2base 1, $self->fnumbase, $self->fnumlen;
345 39         1148 my $datafile = $self->which_datafile( $fnum );
346 39 100       1291 croak qq/Can't initialize database (data files exist): $datafile/
347             if -e $datafile;
348              
349             # make object a one-liner
350 38         105 local $Data::Dumper::Quotekeys = 0;
351 38         103 local $Data::Dumper::Pair = '=>';
352 38         89 local $Data::Dumper::Useqq = 1;
353 38         72 local $Data::Dumper::Terse = 1;
354 38         95 local $Data::Dumper::Indent = 0;
355              
356             # delete dir, don't want it in obj file
357 38         163 my $savedir = $self->dir;
358 38         174 $self->dir("");
359              
360 38         176 my $uri_file = "$savedir/" . $self->name . ".uri";
361 38         158 my $uri = $self->uri;
362 38         255 my $obj = Dumper $self;
363 38         22186 my $uri_md5 = md5_hex( $uri );
364 38         361 my $obj_md5 = md5_hex( $obj );
365 38         427 my $contents = <<_end_;
366             $uri
367             $obj
368             $uri_md5
369             $obj_md5
370             _end_
371 38         192 $self->write_file( $uri_file, \$contents );
372              
373             # restore dir
374 38         250 $self->dir( $savedir );
375              
376             }
377              
378             #---------------------------------------------------------------------
379             # write_file(), dump contents to file
380             # Takes a file name and some "contents", locks it for writing,
381             # and writes the contents to the file. The $contents parameter
382             # is expected to be a string, a scalar reference, or an array
383             # reference. The lines in this array should already end with
384             # newline, if they're expected to be that way in the file.
385             #
386             # Private method.
387              
388             sub write_file {
389 38     38 0 95 my( $self, $file, $contents ) = @_;
390              
391 38         231 my $fh = $self->locked_for_write( $file );
392 38         109 my $type = ref $contents;
393 38 50       124 if( $type ) {
394 38 50       129 if ( $type eq 'SCALAR' ) { print $fh $$contents }
  38 0       2829  
395 0         0 elsif( $type eq 'ARRAY' ) { print $fh join "", @$contents }
396 0         0 else { croak qq/Unrecognized type: $type/ }
397             }
398 0         0 else { print $fh $contents }
399 38 50       1277 close $fh or die "Can't close $file: $!";
400             }
401              
402             1; # returned
403              
404             __END__