File Coverage

blib/lib/MCDB_File.pm
Criterion Covered Total %
statement 29 29 100.0
branch 1 2 50.0
condition n/a
subroutine 11 11 100.0
pod n/a
total 41 42 97.6


line stmt bran cond sub pod time code
1             package MCDB_File;
2              
3 2     2   145715 use strict;
  2         10  
  2         49  
4 2     2   8 use warnings;
  2         2  
  2         38  
5 2     2   6 use Carp;
  2         3  
  2         102  
6 2     2   10 use vars qw($VERSION @ISA);
  2         3  
  2         99  
7              
8 2     2   11 use DynaLoader ();
  2         2  
  2         29  
9 2     2   8 use Exporter ();
  2         2  
  2         202  
10              
11             @ISA = qw(Exporter DynaLoader);
12              
13             $VERSION = '0.0108';
14              
15             =head1 NAME
16              
17             MCDB_File - Perl extension for access to mcdb constant databases
18              
19             =head1 SYNOPSIS
20              
21             use MCDB_File ();
22             tie %mcdb, 'MCDB_File', 'file.mcdb' or die "tie failed: $!\n";
23             $value = $mcdb{$key};
24             $num_records = scalar $mcdb;
25             untie %mcdb;
26              
27             use MCDB_File ();
28             eval {
29             my $mcdb_make = new MCDB_File::Make('t.mcdb')
30             or die "create t.mcdb failed: $!\n";
31             $mcdb_make->insert('key1', 'value1');
32             $mcdb_make->insert('key2' => 'value2', 'key3' => 'value3');
33             $mcdb_make->insert(%t);
34             $mcdb_make->finish;
35             } or ($@ ne "" and warn "$@");
36              
37             use MCDB_File ();
38             eval { MCDB_File::Make::create $file, %t; }
39             or ($@ ne "" and warn "$@");
40              
41             =head1 DESCRIPTION
42              
43             B is a module which provides a Perl interface to B.
44             mcdb is originally based on Dan Bernstein's B package.
45              
46             mcdb - fast, reliable, simple code to create, read constant databases
47              
48             =head2 Reading from an mcdb constant database
49              
50             After the C shown above, accesses to C<%h> will refer
51             to the B file C, as described in L.
52              
53             C, C, and C can be used to iterate through records.
54             Note that only one iteration loop can be in progress at any one time.
55             Performing multiple iterations at the same time (i.e. in nested loops)
56             will not have independent iterators and therefore should be avoided.
57             Note that it is safe to use the find('key') method while iterating.
58             See PERFORMANCE section below for sample usage.
59              
60             =head2 Creating an mcdb constant database
61              
62             An B file is created in three steps. First call
63             C, where C<$fname> is the name of the
64             database file to be created. Secondly, call the C method
65             once for each (I, I) pair. Finally, call the C
66             method to complete the creation. A temporary file is used during
67             mcdb creation and atomically renamed to C<$fname> when C
68             method is successful.
69              
70             Alternatively, call the C method with multiple key/value
71             pairs. This can be significantly faster because there is less crossing
72             over the bridge from perl to C code. One simple way to do this is to pass
73             in an entire hash, as in: C<< $mcdb_make->insert(%hash); >>.
74              
75             A simpler interface to B file creation is provided by
76             C. This creates an B file named
77             C<$fname> containing the contents of C<%t>.
78              
79             =head1 EXAMPLES
80              
81             These are all complete programs.
82              
83             1. Use $mcdb->find('key') method to look up a 'key' in an mcdb.
84              
85             use MCDB_File ();
86             $mcdb = tie %h, MCDB_File, "$file.mcdb" or die ...;
87             $value = $mcdb->find('key'); # slightly faster than $value = $h{key};
88             undef $mcdb;
89             untie %h;
90              
91             2. Convert a Berkeley DB (B-tree) database to B format.
92              
93             use MCDB_File ();
94             use DB_File;
95              
96             tie %h, DB_File, $ARGV[0], O_RDONLY, undef, $DB_BTREE
97             or die "$0: can't tie to $ARGV[0]: $!\n";
98              
99             MCDB_File::Make::create $ARGV[1], %h; # croak()s if error
100              
101             3. Convert a flat file to B format. In this example, the flat
102             file consists of one key per line, separated by a colon from the value.
103             Blank lines and lines beginning with B<#> are skipped.
104              
105             use MCDB_File;
106              
107             eval {
108             my $mcdb = new MCDB_File::Make("data.mcdb")
109             or die "$0: new MCDB_File::Make failed: $!\n";
110             while (<>) {
111             next if /^$/ or /^#/;
112             chomp;
113             ($k, $v) = split /:/, $_, 2;
114             if (defined $v) {
115             $mcdb->insert($k, $v);
116             } else {
117             warn "bogus line: $_\n";
118             }
119             }
120             $mcdb->finish;
121             } or ($@ ne "" and die "$@");
122              
123             4. Perl version of B.
124              
125             use MCDB_File ();
126              
127             tie %data, 'MCDB_File', $ARGV[0]
128             or die "$0: can't tie to $ARGV[0]: $!\n";
129             while (($k, $v) = each %data) {
130             print '+', length $k, ',', length $v, ":$k->$v\n";
131             }
132             print "\n";
133              
134             5. Although an B file is constant, you can simulate updating it
135             in Perl. This is an expensive operation, as you have to create a
136             new database, and copy into it everything that is unchanged from the
137             old database. (As compensation, the update does not affect database
138             readers. The old database is available for them, up until the moment
139             the new one is Ced.)
140              
141             use MCDB_File ();
142              
143             $file = 'data.cdb';
144             tie %old, 'MCDB_File', $file
145             or die "$0: can't tie to $file: $!\n";
146             $new = new MCDB_File::Make($file)
147             or die "$0: new MCDB_File::Make failed: $!\n";
148              
149             eval {
150             # Add the new values; remember which keys we've seen.
151             while (<>) {
152             chomp;
153             ($k, $v) = split;
154             $new->insert($k, $v);
155             $seen{$k} = 1;
156             }
157              
158             # Add any old values that haven't been replaced.
159             while (($k, $v) = each %old) {
160             $new->insert($k, $v) unless $seen{$k};
161             }
162              
163             $new->finish;
164             } or ($@ ne "" and die "$@");
165              
166             =head1 REPEATED KEYS
167              
168             Most users can ignore this section.
169              
170             An B file can contain repeated keys. If the C method is
171             called more than once with the same key during the creation of an B
172             file, that key will be repeated.
173              
174             Here's an example.
175              
176             $mcdb = new MCDB_File::Make("$file.mcdb") or die ...;
177             $mcdb->insert('cat', 'gato');
178             $mcdb->insert('cat', 'chat');
179             $mcdb->finish;
180              
181             Normally, any attempt to access a key retrieves the first value
182             stored under that key. This code snippet always prints B.
183              
184             $catref = tie %catalogue, MCDB_File, "$file.mcdb" or die ...;
185             print "$catalogue{cat}";
186              
187             However, all the usual ways of iterating over a hash---C,
188             C, and C---do the Right Thing, even in the presence of
189             repeated keys. This code snippet prints B.
190              
191             print join(' ', keys %catalogue, values %catalogue);
192              
193             And these two both print B, although the second is
194             more efficient.
195              
196             foreach $key (keys %catalogue) {
197             print "$key:$catalogue{$key} ";
198             }
199              
200             while (($key, $val) = each %catalogue) {
201             print "$key:$val ";
202             }
203              
204             The C method retrieves all the values associated with a key.
205             It returns a reference to an array containing all the values. This code
206             prints B.
207              
208             print "@{$catref->multi_get('cat')}";
209              
210             C always returns an array reference. If the key was not
211             found in the database, it will be a reference to an empty array. To
212             test whether the key was found, you must test the array, and not the
213             reference.
214              
215             $x = $catref->multi_get($key);
216             warn "$key not found\n" unless $x; # WRONG; message never printed
217             warn "$key not found\n" unless @$x; # Correct
218              
219             Any extra references to C object (like C<$catref> in the
220             examples above) must be released with C or must have gone out of
221             scope before calling C on the hash. This ensures that the object's
222             C method is called. Note that C will check this for
223             you; see L for further details.
224              
225             use MCDB_File ();
226             $catref = tie %catalogue, MCDB_File, "$file.mcdb" or die ...;
227             print "@{$catref->multi_get('cat')}";
228             undef $catref;
229             untie %catalogue;
230              
231             =head1 RETURN VALUES
232              
233             The routines C and C return B if the attempted
234             operation failed; C<$!> contains the reason for failure.
235             C and C call C if the attempted operation
236             fails.
237              
238             =head1 DIAGNOSTICS
239              
240             The following fatal errors may occur.
241             (See L if you want to trap them.)
242              
243             =over 4
244              
245             =item Modification of an MCDB_File attempted
246              
247             You attempted to modify a hash tied to a B.
248              
249             =item MCDB_File::Make:::
250              
251             An OS level problem occurred, such as permission denied writing
252             to filesystem, or you have run out of disk space.
253              
254             =back
255              
256             =head1 PERFORMANCE
257              
258             The MCDB_File C method is a thin wrapper around the C library
259             C and MCDB_File provides constants: C
260             C C C and C.
261              
262             For very large B files on which more than a few queries will be made,
263             it is recommended that C with C be called
264             once on the object returned by C.
265              
266             my $mcdb = tie %h, MCDB_File, "$file.mcdb" or die ...;
267             $mcdb->madvise(MCDB_File::MADV_RANDOM);
268             $value = $mcdb->find('key'); # slightly faster than $value = $h{key};
269             # ... (lots more queries)
270             undef $mcdb;
271             untie %h;
272              
273             For iterating over very large B files, it is recommended that
274             C with C be called once on the
275             object returned by C.
276              
277             Sometimes you need to get the most performance possible out of a
278             library. Rumour has it that perl's tie() interface is slow. In order
279             to get around that you can use MCDB_File in an object oriented
280             fashion, rather than via tie().
281              
282             my $mcdb = MCDB_File->TIEHASH('/path/to/mcdbfile.mcdb');
283             if ($mcdb->EXISTS('key')) {
284             print "Key: 'key'; Value: ", $mcdb->FETCH('key'), "\n";
285             }
286             undef $mcdb;
287              
288             For more information on the methods available on tied hashes see L.
289              
290             Due to the internal Perl reuse of FETCH method to support queries,
291             as well as each() and values(), it will be slightly more efficient
292             to call the $mcdb->find('key') method than to call $mcdb->FETCH('key').
293              
294             =head1 ACKNOWLEDGEMENTS
295              
296             mcdb is based on cdb, created by Dan Bernstein .
297             MCDB_File is based on CDB_File, created by Tim Goodwin,
298             and currently maintained by Todd Rinaldo https://github.com/toddr/CDB_File/
299              
300             =head1 AUTHOR
301              
302             gstrauss
303              
304             =cut
305              
306             bootstrap MCDB_File $VERSION;
307              
308             # (not worth the mess of doing this in .xs; define the values here)
309             use constant {
310 2         499 MADV_NORMAL => 0, # MCDB_MADV_NORMAL
311             MADV_RANDOM => 1, # MCDB_MADV_RANDOM
312             MADV_SEQUENTIAL => 2, # MCDB_MADV_SEQUENTIAL
313             MADV_WILLNEED => 3, # MCDB_MADV_WILLNEED
314             MADV_DONTNEED => 4 # MCDB_MADV_DONTNEED
315 2     2   10 };
  2         4  
316              
317             sub CLEAR {
318 2     2   256 croak "Modification of an MCDB_File attempted"
319             }
320              
321             sub DELETE {
322 1     1   532 &CLEAR
323             }
324              
325             sub STORE {
326 1     1   4002 &CLEAR
327             }
328              
329             # Must be preloaded for the prototype.
330              
331             package MCDB_File::Make;
332              
333             sub create($\%) {
334 3     3   31426 my($fn, $RHdata) = @_;
335              
336 3 50       1847 my $mcdb = new MCDB_File::Make($fn) or return undef;
337 3         171 $mcdb->insert(%$RHdata);
338 3         15863 $mcdb->finish;
339 3         49 return 1;
340             }
341              
342             1;