File Coverage

blib/lib/DBIx/FileStore.pm
Criterion Covered Total %
statement 20 170 11.7
branch 0 72 0.0
condition 0 18 0.0
subroutine 7 24 29.1
pod 12 12 100.0
total 39 296 13.1


line stmt bran cond sub pod time code
1             package DBIx::FileStore;
2 2     2   104695 use strict;
  2         11  
  2         47  
3              
4             # this reads and writes files from the db.
5 2     2   2433 use DBI;
  2         27685  
  2         106  
6 2     2   13 use Digest::MD5 qw( md5_base64 );
  2         4  
  2         96  
7 2     2   878 use File::Copy;
  2         7041  
  2         95  
8              
9 2     2   694 use DBIx::FileStore::ConfigFile;
  2         5  
  2         82  
10              
11             our $VERSION = '0.33'; # version also mentioned in POD below.
12 2     2   13 use Mouse;
  2         4  
  2         6  
13              
14             has 'dbuser' => ( is=>'rw', isa=>'Str', default=>build_value_sub('dbuser', 'root') );
15             has 'dbpasswd' => ( is=>'rw', isa=>'Str', default=>build_value_sub('dbpasswd', '') );
16             has 'dbhost' => ( is=>'rw', isa=>'Str', default=>build_value_sub('dbhost', '') );
17             has 'dbname' => ( is=>'rw', isa=>'Str', default=>build_value_sub('dbname', '') );
18             has 'dbport' => ( is=>'rw', isa=>'Str', default=>build_value_sub('dbport', 3306) );
19             has 'filetable' => ( is=>'rw', isa=>'Str', default=>build_value_sub('filetable', 'files') );
20             has 'blockstable' => ( is=>'rw', isa=>'Str', default=>build_value_sub('blockstable', 'fileblocks') );
21             has 'blocksize' => ( is=>'rw', isa=>'Int', default=>build_value_sub('blocksize', 500*1024) );
22             has 'verbose' => ( is=>'rw', isa=>'Bool', default=>build_value_sub('verbose', 0) );
23             has 'uselocks' => ( is=>'rw', isa=>'Bool', default=>0 );
24             has 'config_file' => ( is=>'rw', isa=>'Maybe[Str]', default=>undef );
25             has 'config_hash' => ( is=>'rw', isa=>'HashRef', lazy=>1, builder=>'build_config_hash' );
26             has 'dbh' => ( is=>'rw', lazy=>1, builder=>'build_dbh' );
27              
28             sub build_value_sub {
29 18     18 1 32 my ($name, $default) = @_;
30             return sub {
31 0     0     my ($self) = @_;
32             #warn "$0: in build_value_sub( sub( $name, $default ));\n";
33 0 0         my $v = exists($self->config_hash->{$name}) ? $self->config_hash->{$name} : scalar($default);
34             #warn "$0: builder for $name: returning $v\n";
35 0           return $v;
36 18         86 };
37             };
38              
39             sub build_config_hash {
40 0     0 1   my $self = shift;
41 0           my $config_reader = new DBIx::FileStore::ConfigFile();
42 0           my $config_file = $self->config_file();
43 0 0         my $hash = $config_reader->read_config_file( $config_file ? ($config_file) : () );
44 0           return $hash;
45             }
46              
47             sub build_dbh {
48 0     0 1   my ($self) = @_;
49              
50 0           my $dsn = "DBI:mysql:" . $self->dbname . ";host=" . $self->dbhost . ";port=" . $self->dbport;
51             #print "$0: DSN is $dsn\n";
52              
53 0           my %attr = ( RaiseError => 1, PrintError => 1, AutoCommit => 1 ); # for mysql
54              
55             #warn "$0: dbpasswd is " . $self->dbpasswd . "\n";
56 0           my $dbh = DBI->connect_cached(
57             $dsn, $self->dbuser, $self->dbpasswd, \%attr
58             );
59 0           $dbh->{mysql_auto_reconnect} = 1; # auto reconnect
60              
61 0           return $dbh;
62             }
63            
64             sub get_all_filenames {
65 0     0 1   my ($self) = @_;
66 0           my $files = $self->dbh->selectall_arrayref( # lasttime + 0 gives us an int back
67             "select name, c_len, c_md5, lasttime+0 from " . $self->filetable .
68             " where b_num=0 order by name");
69 0           return $files;
70             }
71              
72             sub get_filenames_matching_prefix {
73 0     0 1   my ($self, $name) = @_;
74 0           my $pattern = $name . "%";
75 0           my $files = $self->dbh->selectall_arrayref( # lasttime + 0 gives us an int back
76             "select name, c_len, c_md5, lasttime+0 from " . $self->filetable .
77             " where name like ? and b_num=0 order by name", {}, $pattern);
78 0           return $files;
79             }
80              
81             sub rename_file {
82 0     0 1   my ($self, $from, $to) = @_;
83 0 0         die "$0: name not ok: $from" unless name_ok($from);
84 0 0         die "$0: name not ok: $to" unless name_ok($to);
85             # renames the rows in the filetable and the blockstable
86 0           my $dbh = $self->dbh;
87 0           $self->_lock_tables();
88              
89 0           for my $table ( ( $self->filetable, $self->blockstable ) ) {
90 0           my $sql = "select name from $table where name like ?";
91 0 0         $sql .= " order by b_num" if $table eq $self->filetable;
92              
93 0           my $files = $dbh->selectall_arrayref( $sql, {}, $from . " %");
94 0           for my $f (@$files) {
95 0           (my $num = $f->[0]) =~ s/.* //;
96 0 0         print "$0: Moving $table:$f->[0], (num $num) to '$to $num'...\n" if $self->verbose;
97 0           $dbh->do("update $table set name=? where name=?", {}, "$to $num", $f->[0]);
98             }
99             }
100              
101 0           $self->_unlock_tables();
102 0           return 1;
103             }
104              
105             sub delete_file {
106 0     0 1   my ($self, $name) = @_;
107 0 0         die "$0: name not ok: $name" unless name_ok($name);
108              
109 0           my $dbh = $self->dbh;
110 0           my $filetable = $self->filetable; # probably "files"
111 0           my $blockstable = $self->blockstable; # probably "fileblocks"
112 0           for my $table ( ( $filetable, $blockstable ) ) {
113 0           my $rv = int($dbh->do( "delete from $table where name like ?", {}, "$name %" ));
114 0 0         if($rv) {
115 0 0         print "$0: $table: deleted $name ($rv blocks)\n" if $self->verbose;
116             } else {
117 0 0         warn "$0: no blocks to delete for $table:$name\n" if $self->verbose;
118             }
119             }
120 0           return 1;
121             }
122              
123              
124             sub copy_blocks_from_db_to_filehandle {
125 0     0 1   my ($self, $fdbname, $filehandle) = @_;
126 0 0         die "$0: name not ok: $fdbname" unless name_ok($fdbname);
127             my $print_to_filehandle_callback = sub {
128             # this is a closure, so $fh comes from the surrounding context
129 0     0     my $content = shift;
130 0           print $filehandle $content;
131 0           };
132             # read all our blocks, calling our callback for each one
133 0           my $ret = $self->_read_blocks_from_db( $print_to_filehandle_callback, $fdbname );
134 0           return $ret;
135             }
136              
137              
138             # reads the content into $pathname, returns the length of the data read.
139             sub read_from_db {
140 0     0 1   my ($self, $pathname, $fdbname) = @_;
141              
142 0 0         die "$0: name not ok: $fdbname" unless name_ok($fdbname);
143              
144 0 0         open( my $fh, ">", $pathname) || die "$0: can't open for output: $pathname\n";
145              
146             # this is a function used as a callback and called with each chunk of the data
147             # into a temporary file. $fh stay in context for the function (closure) below.
148             my $print_to_file_callback = sub {
149             # this is a closure, so $fh comes from the surrounding context
150 0     0     my $content = shift;
151 0           print $fh $content;
152 0           };
153              
154             # read all our blocks, calling our callback for each one
155 0           my $ret = $self->_read_blocks_from_db( $print_to_file_callback, $fdbname );
156              
157             # if we fetched *something* into our scoped $fh to the temp file,
158             # then copy it to the destination they asked for, and delete
159             # the temp file.
160 0 0         if (!$fh) {
161 0           warn "$0: not found in FileDB: $fdbname\n";
162             }
163             # clear our fh member
164 0 0         close($fh) if defined($fh); # this 'close' should cause the associated file to be deleted
165 0           $fh = undef;
166              
167             # return number of bytes read.
168 0           return $ret;
169             }
170              
171              
172             # my $bytes_written = $self->write_to_db( $localpathname, $filestorename );
173             sub write_to_db {
174 0     0 1   my ($self, $pathname, $fdbname) = @_;
175              
176 0 0         die "$0: name not ok: $fdbname" unless name_ok($fdbname);
177              
178 0 0         open(my $fh, "<" , $pathname)
179             || die "$0: Couldn't open: $pathname\n";
180            
181              
182 0           my $total_length = -s $pathname; # get the length
183 0 0         if ($total_length == 0) { warn "$0: warning: writing 0 bytes for $pathname\n"; }
  0            
184              
185 0           my $bytecount = $self->write_from_filehandle_to_db( $fh, $fdbname );
186              
187 0 0         die "$0: file length didn't match written data length for $pathname: $bytecount != $total_length\n"
188             if ($bytecount != $total_length);
189              
190 0 0         close ($fh) || die "$0: couldn't close: $pathname\n";
191 0           return $total_length;
192             }
193              
194             sub write_from_filehandle_to_db {
195 0     0 1   my ($self, $fh, $fdbname) = @_;
196              
197 0           my $ctx = Digest::MD5->new;
198 0           my $dbh = $self->dbh;
199 0           my $filetable = $self->filetable;
200 0           my $blockstable = $self->blockstable;
201 0           my $verbose = $self->verbose;
202 0           my $size = 0;
203              
204 0           $self->_lock_tables();
205 0           for( my ($bytes,$part,$block) = (0,0,""); # init our three vars
206             $bytes = read($fh, $block, $self->blocksize); # test that we read something
207             $part++, $block="" ) { # increment $part and reset $block
208 0           $ctx->add( $block );
209 0           $size += length( $block );
210 0           my $b_md5 = md5_base64( $block );
211              
212 0 0 0       printf("saving from filehandle into '%s $part'\n", $fdbname )
213             if($verbose && $part%25==0);
214              
215 0           my $name = sprintf("%s %05d", $fdbname, $part);
216 0           $dbh->do("replace into $filetable set name=?, c_md5=?, b_md5=?, c_len=?, b_num=?", {},
217             $name, "?", $b_md5, 0, $part);
218 0           $dbh->do("replace into $blockstable set name=?, block=?", {},
219             $name, $block);
220             }
221 0 0         print "\n" if $verbose;
222 0           $dbh->do( "update $filetable set c_md5=?, c_len=? where name like ?", {}, $ctx->b64digest, $size, "$fdbname %" );
223 0           $self->_unlock_tables();
224              
225 0           return $size;
226             }
227              
228              
229             # From here below is utility code and implementation details
230             # returns the length of the data read,
231             # calls &$callback( $block ) for each block read.
232             sub _read_blocks_from_db {
233 0     0     my ($self, $callback, $fdbname) = @_;
234             # callback is called on each block, like &$callback( $block )
235 0 0         die "$0: name not ok: $fdbname" unless name_ok($fdbname);
236 0           my $dbh = $self->dbh;
237 0           my $verbose = $self->verbose;
238 0           my $ctx = Digest::MD5->new();
239            
240 0 0         warn "$0: Fetching rows $fdbname" if $verbose;
241 0           $self->_lock_tables();
242 0           my $cmd = "select name, b_md5, c_md5, b_num, c_len from " . $self->filetable . " where name like ? order by b_num";
243 0           my @params = ( $fdbname . ' %' );
244 0           my $sth = $dbh->prepare($cmd);
245 0           my $rv = $sth->execute( @params );
246 0           my $rownum = 0;
247 0           my $c_len = 0;
248 0           my $orig_c_md5;
249 0           while( my $row = $sth->fetchrow_arrayref() ) {
250 0   0       $c_len ||= $row->[5];
251 0 0 0       unless ($row && defined($row->[0])) {
252 0           warn "$0: Error: bad row returned?";
253 0           next;
254             }
255 0 0         print "$row->[0]\n" if $verbose;
256 0 0         unless ($row->[0] =~ /\s+(\d+)$/) {
257 0           warn "$0: Skipping block that doesn't match ' [0-9]+\$': $row->[0]";
258 0           next;
259             }
260 0           my $name_num = $1;
261 0   0       $orig_c_md5 ||= $row->[2];
262              
263             # check the MD5...
264 0 0         if ($row->[2] ne $orig_c_md5) { die "$0: Error: Is DB being updated? Bad content md5sum for $row->[0]\n"; }
  0            
265 0 0         die "$0: Error: our count is row num $rownum, but file says $name_num"
266             unless $rownum == $name_num;
267            
268 0           my ($block) = $dbh->selectrow_array("select block from fileblocks where name=?", {}, $row->[0]);
269 0 0         die "$0: Bad MD5 checksum for $row->[0] ($row->[1] != " . md5_base64( $block )
270             unless ($row->[1] eq md5_base64( $block ));
271 0           $ctx->add($block);
272              
273 0           &$callback( $block ); # call the callback, and pass it the block!
274              
275 0           $rownum++;
276             }
277 0           $self->_unlock_tables();
278              
279 0           my $retrieved_md5 = $ctx->b64digest();
280 0 0         die "$0: Bad MD5 checksum for $fdbname ($retrieved_md5 != $orig_c_md5)"
281             unless ($retrieved_md5 eq $orig_c_md5);
282            
283 0           return $c_len; # for your inspection
284             }
285              
286              
287             sub _lock_tables {
288 0     0     my $self = shift;
289 0 0         if ($self->uselocks) {
290 0           $self->dbh->do("lock tables " . $self->filetable . " write, " . $self->blockstable . " write");
291             }
292             }
293             sub _unlock_tables {
294 0     0     my $self = shift;
295 0 0         if ($self->uselocks) {
296 0           $self->dbh->do("unlock tables");
297             }
298             }
299              
300              
301             # warns and returns 0 if passed filename ending with like '/tmp/file 1'
302             # else returns 1, ie, that the name is OK
303             # Note that this is a FUNCTION, not a METHOD
304             sub name_ok {
305 0     0 1   my $file = shift;
306 0 0 0       if (!defined($file) || $file eq "" ) {
307 0           warn "$0: Can't use empty filename\n";
308 0           return 0;
309             }
310 0 0 0       if ($file && $file =~ /\s/) {
311 0           warn "$0: Can't use filedbname containing whitespace\n";
312 0           return 0;
313             }
314 0 0         if (length($file) > 75) {
315 0           warn "$0: Can't use filedbname longer than 75 chars\n";
316 0           return 0;
317             }
318 0           return 1;
319             }
320              
321             1;
322              
323             =pod
324              
325             =head1 NAME
326              
327             DBIx::FileStore - Module to store files in a DBI backend
328              
329             =head1 VERSION
330              
331             Version 0.33
332              
333             =head1 SYNOPSIS
334              
335             Ever wanted to store files in a database?
336              
337             This code helps you do that.
338              
339             All the fdb tools in script/ use this library to
340             get at file names and contents in the database.
341              
342             To get started, see the README file (which includes a QUICKSTART
343             guide) from the DBIx-FileStore distribution.
344              
345             This document details the DBIx::FileStore module implementation.
346              
347             =head1 FILENAME NOTES
348              
349             The name of the file in the filestore cannot contain spaces.
350              
351             The maximum length of the name of a file in the filestore
352             is 75 characters.
353              
354             You can store files under any name you wish in the filestore.
355             The name need not correspond to the original name on the filesystem.
356              
357             All filenames in the filestore are in one flat address space.
358             You can use / in filenames, but it does not represent an actual
359             directory. (Although fdbls has some support for viewing files in the
360             filestore as if they were in folders. See the docs on 'fdbls'
361             for details.)
362              
363              
364             =head1 METHODS
365              
366             =head2 new DBIx::FileStore()
367              
368             my $filestore = new DBIx::FileStore();
369              
370             returns a new DBIx::FileStore object
371              
372             =head2 build_config_hash()
373              
374             my $hash = $fs->build_config_hash()
375            
376             reads $fs->config_file (or /etc/fdbrc.conf or ~/.fdbrc) as config file.
377              
378             =head2 build_dbh()
379              
380             returns a dbh for the $self->dbh
381              
382             =head2 build_value_sub( 'member_name', 'default_value' )
383              
384             uses $self->config_hash to return the config value for the member_name,
385             or the default_value.
386              
387              
388              
389              
390             =head2 get_all_filenames()
391              
392             my $fileinfo_ref = $filestore->get_all_filenames()
393              
394             Returns a list of references to data about all the files in the
395             filestore.
396              
397             Each row consist of the following columns:
398             name, c_len, c_md5, lasttime_as_int
399              
400             =head2 get_filenames_matching_prefix( $prefix )
401              
402             my $fileinfo_ref = get_filenames_matching_prefix( $prefix );
403              
404             Returns a list of references to data about the files in the
405             filestore whose name matches the prefix $prefix.
406              
407             Returns a list of references in the same format as get_all_filenames().
408              
409             =head2 read_from_db( $filesystem_name, $storage_name);
410              
411             my $bytecount = $filestore->read_from_db( "filesystemname.txt", "filestorename.txt" );
412              
413             Copies the file 'filestorename.txt' from the filestore to the file filesystemname.txt
414             on the local filesystem.
415              
416             =head2 rename_file( $from, $to );
417              
418             my $ok = $self->rename_file( $from, $to );
419              
420             Renames the file in the database from $from to $to.
421             Returns 1;
422              
423             =head2 delete_file( $fdbname );
424              
425             my $ok = $self->delete_file( $fdbname );
426              
427             Removes data named $filename from the filestore.
428              
429             =head2 copy_blocks_from_db_to_filehandle()
430              
431             my $bytecount = $filestore->copy_blocks_from_db_to_filehandle( $fdbname, $fh );
432              
433             copies blocks from the filehandle $fh into the fdb at the name $fdbname
434              
435             =head2 _read_blocks_from_db( $callback_function, $fdbname );
436              
437             my $bytecount = $filestore->_read_blocks_from_db( $callback_function, $fdbname );
438              
439             ** Intended for internal use by this module. **
440              
441             Fetches the blocks from the database for the file stored under $fdbname,
442             and calls the $callback_function on the data from each one after it is read.
443              
444             It also confirms that the base64 md5 checksum for each block and the file contents
445             as a whole are correct. Die()'s with an error if a checksum doesn't match.
446              
447             If uselocks is set, lock the relevant tables while data is extracted.
448              
449             =head2 write_to_db( $localpathname, $filestorename );
450              
451             my $bytecount = $self->write_to_db( $localpathname, $filestorename );
452              
453             Copies the file $localpathname from the filesystem to the name
454             $filestorename in the filestore.
455              
456             Locks the relevant tables while data is extracted. Locking should probably
457             be configurable by the caller.
458              
459             Returns the number of bytes written. Dies with a message if the source
460             file could not be read.
461              
462             Note that it currently reads the file twice: once to compute the md5 checksum
463             before insterting it, and a second time to insert the blocks.
464              
465             =head2 write_from_filehandle_to_db ($fh, $fdbname)
466              
467             Reads blocks of the appropriate block size from $fb and writes them
468             into the fdb under the name $fdbname.
469             Returns the number of bytes written into the filestore.
470              
471             =head1 FUNCTIONS
472              
473             =head2 name_ok( $fdbname )
474              
475             my $filename_ok = DBIx::FileStore::name_ok( $fdbname )
476              
477             Checks that the name $fdbname is acceptable for using as a name
478             in the filestore. Must not contain spaces or be over 75 chars.
479              
480             =head1 IMPLEMENTATION
481              
482             The data is stored in the database using two tables: 'files' and
483             'fileblocks'. All meta-data is stored in the 'files' table,
484             and the file contents are stored in the 'fileblocks' table.
485              
486             =head2 fileblocks table
487              
488             The fileblocks table has only three fields:
489              
490             =head3 name
491              
492             The name of the block, exactly as used in the fileblocks table.
493             Always looks like "filename.txt ",
494             for example "filestorename.txt 00000".
495              
496             =head3 block
497              
498             The contents of the named block. Each block is currently set
499             to be 512K. Care must be taken to use blocks that are
500             not larger than mysql buffers can handle (in particular,
501             max_allowed_packet).
502              
503             =head3 lasttime
504              
505             The timestamp of when this block was inserted into the DB or updated.
506              
507             =head2 files table
508              
509             The files table has several fields. There is one row in the files table
510             for each row in the fileblocks table-- not one per file (see IMPLEMENTATION
511             CAVEATS, below). The fields in the files table are:
512              
513             =head3 c_len
514              
515             Content length. The content length of the complete file (sum of length of all the file's blocks).
516              
517             =head3 b_num
518              
519             Block number. The number of the block this row represents. The b_num is repeated as a five
520             (or more) digit number at the end of the name field (see above). We denormalize
521             the data like this so we can quickly and easily find blocks by name or block number.
522              
523             =head3 b_md5
524              
525             Block md5. The md5 checksum for the block (b is for 'block') represented by this row.
526             We use base64 encoding (which uses 0-9, a-z, A-Z, and a few other characters)
527             to represent md5's because it's a little shorter than the hex
528             representation. (22 vs. 32 characters)
529              
530             =head3 c_md5
531              
532             Content md5. The base64 md5 checksum for the whole file (c is for 'content') represented by this row.
533              
534             =head3 lasttime
535              
536             The timestamp of when this row was inserted into the DB or updated.
537              
538             =head2 See the file 'table-definitions.sql' for more details about
539             the db schema used.
540              
541             =head1 IMPLEMENTATION CAVEATS
542              
543             DBIx::FileStore is what I would consider production-grade code,
544             but the overall wisdom of storing files in blobs in a mysql database
545             may be questionable (for good reason).
546              
547             That having been said, if you have a good reason to do so, as long
548             as you understand the repercussions of storing files in
549             your mysql database, then this toolkit offers a stable and
550             flexible backend for binary data storage, and it works quite nicely.
551              
552             If we were to redesign the system, in particular we might reconsider
553             having one row in the 'files' table for each block stored in the
554             'fileblocks' table. Perhaps instead, we'd have one entry in
555             the 'files' table per file.
556              
557             In concrete terms, though, the storage overhead of doing it this way
558             (which only affects files larger than the block size, which defaults
559             to 512K) is about 100 bytes per block. Assuming files larger than
560             512K, and with a conservative average block size of 256K, the extra
561             storage overhead of doing it this way is still only about 0.039%
562              
563             =head1 AUTHOR
564              
565             Josh Rabinowitz, C<< >>
566              
567             =head1 SUPPORT
568              
569             You should probably read the documentation for the various filestore command-line
570             tools:
571              
572             L, L, L, L, L, L, L, L, and L.
573              
574             =over 4
575              
576             =item * Search CPAN
577              
578             You can also read the documentation at:
579              
580             L
581              
582             =back
583              
584             =head1 LICENSE AND COPYRIGHT
585              
586             Copyright 2010-2017 Josh Rabinowitz.
587              
588             This program is free software; you can redistribute it and/or modify it
589             under the terms of either: the GNU General Public License as published
590             by the Free Software Foundation; or the Artistic License.
591              
592             See http://dev.perl.org/licenses/ for more information.
593              
594             =cut
595              
596             1; # End of DBIx::FileStore
597