File Coverage

blib/lib/DataStore/CAS.pm
Criterion Covered Total %
statement 137 198 69.1
branch 47 110 42.7
condition 31 83 37.3
subroutine 37 64 57.8
pod 26 26 100.0
total 278 481 57.8


line stmt bran cond sub pod time code
1             package DataStore::CAS;
2 5     5   12591 use 5.008;
  5         19  
3 5     5   33 use Carp;
  5         10  
  5         280  
4 5     5   531 use Try::Tiny;
  5         1990  
  5         389  
5             require Scalar::Util;
6             require Symbol;
7 5     5   1894 use Moo::Role;
  5         58504  
  5         32  
8              
9             our $VERSION= '0.07';
10             our @CARP_NOT= qw( DataStore::CAS::File DataStore::CAS::VirtualHandle );
11              
12             # ABSTRACT: Abstract base class for Content Addressable Storage
13              
14              
15             requires 'digest';
16              
17             has hash_of_null => ( is => 'lazy' );
18              
19             sub _build_hash_of_null {
20 6     6   111 return shift->calculate_hash('');
21             }
22              
23              
24             requires 'get';
25              
26              
27             sub _thing_stringifies_to_filename {
28 18     18   41 my $ref= ref $_[0];
29 18 100 33     396 !$ref? defined $_[0] && length $_[0]
      66        
30             : $ref->isa('Path::Class::File')
31             || $ref->isa('Path::Tiny')
32             || $ref->isa('File::Temp')
33             || -e "$_[0]"
34             }
35             sub _describe_unputtable {
36 0 0   0   0 !defined $_[0]? 'undef'
    0          
    0          
37             : !ref $_[0]? '"'.$_[0].'"'
38             : !Scalar::Util::blessed($_[0])? ref($_[0]).' ref'
39             : 'object of '.ref($_[0]).' (stringifies to "'.$_[0].'")'
40             }
41              
42             sub put {
43 23     23 1 8185 my $ref= ref $_[1];
44 23 100 66     154 goto $_[0]->can('put_scalar')
45             if !$ref || $ref eq 'SCALAR';
46 5 100 100     84 goto $_[0]->can('put_file')
47             if $ref->isa('DataStore::CAS::File')
48             or _thing_stringifies_to_filename($_[1]);
49 1 50 33     23 goto $_[0]->can('put_handle')
50             if $ref->isa('IO::Handle')
51             or Scalar::Util::reftype($_[1]) eq 'GLOB';
52 0         0 croak("Unhandled argument to ->put : "._describe_unputtable($_[1]));
53             }
54              
55              
56             sub put_scalar {
57 17     17 1 46 my ($self, undef, $flags)= @_;
58 17 50       50 my $ref= ref $_[1] eq 'SCALAR'? $_[1] : \$_[1];
59              
60             # Force to plain string if it is an object
61 17 50       41 if (ref $$ref) {
62             # TODO: croak unless object has stringify magic
63 0         0 $ref= \"$$ref";
64             }
65              
66             # Can only 'put' octets, not wide-character unicode strings.
67 17 50       54 utf8::downgrade($$ref, 1)
68             or croak "scalar must be byte string (octets). If storing unicode,"
69             ." you must reduce to a byte encoding first.";
70              
71             my $hash= $flags && $flags->{known_hashes} && $flags->{known_hashes}{$self->digest}
72 17 50 33     69 ? $flags->{known_hashes}{$self->digest}
73             : $self->calculate_hash($ref);
74 17 100       66 if ($self->get($hash)) {
75             # Already have it
76             $flags->{stats}{dup_file_count}++
77 1 50       5 if $flags->{stats};
78 1         8 return $hash;
79             } else {
80 16 100       85 $flags= { ($flags? %$flags : ()), known_hashes => { $self->digest => $hash } };
81 16         59 my $handle= $self->new_write_handle($flags);
82 16         49 $handle->_write_all($$ref);
83 16         54 return $self->commit_write_handle($handle);
84             }
85             }
86              
87              
88             sub put_file {
89 6     6 1 17 my ($self, $file, $flags)= @_;
90 6         11 my $ref= ref $file;
91 6   100     30 my $is_cas_file= $ref && $ref->isa('DataStore::CAS::File');
92 6         13 my $is_filename= _thing_stringifies_to_filename($file);
93 6 50 66     24 croak "Unhandled argument to ->put_file : "._describe_unputtable($file)
94             unless $is_cas_file || $is_filename;
95              
96 6 50       19 my %known_hashes= $flags->{known_hashes}? %{$flags->{known_hashes}} : ();
  0         0  
97             # Apply reuse_hash feature, if requested
98 6 50 66     21 if ($is_cas_file && $flags->{reuse_hash}) {
99 0         0 $known_hashes{$file->store->digest}= $file->hash;
100 0         0 $flags= { %$flags, known_hashes => \%known_hashes };
101             }
102             # It is probably better to read a file twice than to write one that
103             # doesn't need to be written.
104             # ...but can't do better than ->put_handle unless the file is a real file.
105 6 50 33     46 my $fname= $is_filename? "$file"
    100          
106             : $is_cas_file && $file->can('local_file')? $file->local_file
107             : undef;
108 6 50 33     135 if ($known_hashes{$self->digest} || (defined $fname && -f $fname)) {
      33        
109             # Calculate the hash if it wasn't given.
110 6   33     35 my $hash= ($known_hashes{$self->digest} ||= $self->calculate_file_hash($fname));
111             # Avoid unnecessary work
112 6 100       687 if ($self->get($hash)) {
113             $flags->{stats}{dup_file_count}++
114 5 50       16 if $flags->{stats};
115             $self->_unlink_source_file($file, $flags)
116 5 50 33     16 if $flags->{move} && defined $fname;
117 5         22 return $hash;
118             }
119             # Save hash for next step
120 1         8 $flags= { %$flags, known_hashes => \%known_hashes };
121             }
122 1         4 my $fh;
123 1 50 0     6 if ($is_cas_file) {
    0          
    0          
124 1 50       4 $fh= $file->open or croak "Can't open '$file': $!";
125             }
126             elsif ($ref && $ref->can('openr')) {
127 0 0       0 $fh= $file->openr or croak "Can't open '$file': $!";
128             }
129             elsif ($is_filename) {
130 0 0       0 open($fh, '<', $fname) or croak "Can't open '$fname': $!";
131             }
132             else {
133 0         0 croak "Don't know how to open '$file'";
134             }
135 1         6 my $hash= $self->put_handle($fh, $flags);
136             $self->_unlink_source_file($file, $flags)
137 1 50 33     103 if $hash && $flags->{move};
138 0         0 return $hash;
139             }
140              
141             sub _unlink_source_file {
142 1     1   3 my ($self, $file, $flags)= @_;
143 1 50       5 return if $flags->{dry_run};
144 1   33     13 my $is_cas_file= ref $file && ref($file)->isa('DataStore::CAS::File');
145 1 50       5 if ($is_cas_file) {
146 1         277 croak "Refusing to delete origin CAS File (this can damage a CAS)\n"
147             ."If you really want to do this, pass \$file->local_name and then"
148             ." delete the cas entry yourself.";
149             } else {
150 0 0 0     0 if (ref $file && ref($file)->isa('File::Temp')) {
151             # The Simple backend closes File::Temp files to ensure they don't
152             # get written to any more. so match that behavior here.
153 0         0 $file->close;
154             }
155 0 0       0 unlink "$file" or croak "unlink($file): $!"
156             }
157             }
158              
159              
160             sub put_handle {
161 2     2 1 7 my ($self, $h_in, $flags)= @_;
162 2         7 binmode $h_in;
163 2         8 my $h_out= $self->new_write_handle($flags);
164 2   50     14 my $buf_size= $flags->{buffer_size} || 1024*1024;
165 2         4 my $buf;
166 2         4 while(1) {
167 4         59 my $got= read($h_in, $buf, $buf_size);
168 4 100       20 if ($got) {
    50          
169 2 50       8 $h_out->_write_all($buf) or croak "write: $!";
170             } elsif (!defined $got) {
171 0 0 0     0 next if ($!{EINTR} || $!{EAGAIN});
172 0         0 croak "read: $!";
173             } else {
174 2         7 last;
175             }
176             }
177 2         7 return $self->commit_write_handle($h_out);
178             }
179              
180              
181             # This implementation probably needs overridden by subclasses.
182             sub new_write_handle {
183 0     0 1 0 my ($self, $flags)= @_;
184 0         0 return DataStore::CAS::FileCreatorHandle->new($self, { flags => $flags });
185             }
186              
187             # This must be implemented by subclasses
188             requires 'commit_write_handle';
189              
190              
191             sub calculate_hash {
192 23     23 1 36 my $self= shift;
193 23 100       82 Digest->new($self->digest)->add(ref $_[0]? ${$_[0]} : $_[0])->hexdigest;
  17         690  
194             }
195              
196             sub calculate_file_hash {
197 9     9 1 22 my ($self, $file)= @_;
198 9 50       410 open my $fh, '<', $file or croak "open($file): $!";
199 9         36 binmode $fh;
200 9         51 Digest->new($self->digest)->addfile($fh)->hexdigest;
201             }
202              
203              
204             sub validate {
205 0     0 1 0 my ($self, $hash, $flags)= @_;
206              
207 0         0 my $file= $self->get($hash);
208 0 0       0 return undef unless defined $file;
209              
210             # Exceptions during 'put' will most likely come from reading $file,
211             # which means that validation fails, and we return false.
212 0         0 my $new_hash;
213             try {
214             # We don't pass flags directly through to get/put, because flags for validate
215             # are not the same as flags for get or put. But, 'stats' is a standard thing.
216 0     0   0 my %args= ( dry_run => 1 );
217 0 0       0 $args{stats}= $flags->{stats} if $flags->{stats};
218 0         0 $new_hash= $self->put_handle($file, \%args);
219             }
220       0     catch {
221 0         0 };
222 0 0 0     0 return (defined $new_hash and $new_hash eq $hash)? 1 : 0;
223             }
224              
225              
226             requires 'delete';
227              
228              
229             requires 'iterator';
230              
231              
232             requires 'open_file';
233              
234             # File and Handle objects have DESTROY methods that call these methods of
235             # their associated CAS. The CAS should implement these for cleanup of
236             # temporary files, or etc.
237       17     sub _file_destroy {}
238       18     sub _handle_destroy {}
239              
240             package DataStore::CAS::File;
241 5     5   10388 use strict;
  5         15  
  5         127  
242 5     5   34 use warnings;
  5         19  
  5         1928  
243              
244             our $VERSION= '0.07';
245              
246 4     4 1 612 sub store { $_[0]{store} }
247 5     5 1 50 sub hash { $_[0]{hash} }
248 3     3 1 20 sub size { $_[0]{size} }
249              
250             sub open {
251 4     4 1 11 my $self= shift;
252 4 100       33 return $self->{store}->open_file($self)
253             if @_ == 0;
254 1 50       4 return $self->{store}->open_file($self, { @_ })
255             if @_ > 1;
256 1 50 33     14 return $self->{store}->open_file($self, { layer => $_[0] })
257             if @_ == 1 and !ref $_[0];
258 0         0 Carp::croak "Wrong arguments to 'open'";
259             };
260              
261             sub DESTROY {
262 18     18   4822 $_[0]{store}->_file_destroy(@_);
263             }
264              
265             our $AUTOLOAD;
266             sub AUTOLOAD {
267 4     4   1181 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
268 4 100       21 return $_[0]{$attr} if exists $_[0]{$attr};
269 2         7 unshift @_, $_[0]{store};
270             goto (
271 2   66     171 $_[0]->can("_file_$attr")
272             or Carp::croak "Can't locate object method \"_file_$attr\" via package \"".ref($_[0]).'"'
273             );
274             }
275              
276             package DataStore::CAS::VirtualHandle;
277 5     5   53 use strict;
  5         12  
  5         160  
278 5     5   34 use warnings;
  5         11  
  5         4691  
279              
280             our $VERSION= '0.07';
281              
282             sub new {
283 19     19 1 664 my ($class, $cas, $fields)= @_;
284 19         60 my $glob= bless Symbol::gensym(), $class;
285 19         300 ${*$glob}= $cas;
  19         61  
286 19 50       32 %{*$glob}= %{$fields||{}};
  19         73  
  19         80  
287 19         110 tie *$glob, $glob;
288 19         70 $glob;
289             }
290 19     19   55 sub TIEHANDLE { return $_[0]; }
291              
292 19     19   477 sub _cas { ${*${$_[0]}} } # the scalar view of the symbol points to the CAS object
  19         32  
  19         103  
293 39     39   66 sub _data { \%{*${$_[0]}} } # the hashref view of the symbol holds the fields of the handle
  39         44  
  39         152  
294              
295 19     19   694 sub DESTROY { unshift @_, ${*{$_[0]}}; goto $_[0]->can('_handle_destroy') }
  19         30  
  19         83  
  19         117  
296              
297             # By default, any method not defined will call to C<$cas->_handle_$method( $handle, @args );>
298             our $AUTOLOAD;
299             sub AUTOLOAD {
300 4     4   1720 unshift @_, ${*${$_[0]}}; # unshift @_, $self->_cas
  4         6  
  4         17  
301 4         15 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
302             goto (
303 4   66     175 $_[0]->can("_handle_$attr")
304             or Carp::croak "Can't locate object method \"_handle_$attr\" via package \"".ref($_[0]).'"'
305             );
306             }
307              
308             #
309             # Tied filehandle API
310             #
311              
312 1     1   1067 sub READ { (shift)->read(@_) }
313 0 0   0   0 sub READLINE { wantarray? (shift)->getlines : (shift)->getline }
314 0     0   0 sub GETC { $_[0]->getc }
315 0     0   0 sub EOF { $_[0]->eof }
316              
317 0     0   0 sub WRITE { (shift)->write(@_) }
318 0     0   0 sub PRINT { (shift)->print(@_) }
319 0     0   0 sub PRINTF { (shift)->printf(@_) }
320              
321 0     0   0 sub SEEK { (shift)->seek(@_) }
322 0     0   0 sub TELL { (shift)->tell(@_) }
323              
324 0     0   0 sub FILENO { $_[0]->fileno }
325 0     0   0 sub CLOSE { $_[0]->close }
326              
327             #
328             # The following are some default implementations to make subclassing less cumbersome.
329             #
330              
331             sub getlines {
332 0     0 1 0 my $self= shift;
333 0 0 0     0 wantarray or !defined wantarray or Carp::croak "getlines called in scalar context";
334 0         0 my (@ret, $line);
335 0         0 push @ret, $line
336             while defined ($line= $self->getline);
337 0         0 @ret;
338             }
339              
340             # I'm not sure why anyone would ever want this function, but I'm adding
341             # it for completeness.
342             sub getc {
343 0     0 1 0 my $c;
344 0 0       0 $_[0]->read($c, 1)? $c : undef;
345             }
346              
347             # 'write' does not guarantee that all bytes get written in one shot.
348             # Needs to be called in a loop to accomplish "print" semantics.
349             sub _write_all {
350 18     18   43 my ($self, $str)= @_;
351 18         31 while (1) {
352 18         49 my $wrote= $self->write($str);
353 18 50 33     114 return 1 if defined $wrote and ($wrote eq length $str);
354 0 0 0     0 return undef unless defined $wrote or $!{EINTR} or $!{EAGAIN};
      0        
355 0         0 substr($str, 0, $wrote)= '';
356             }
357             }
358              
359             # easy to forget that 'print' API involves "$," and "$\"
360             sub print {
361 0     0 1 0 my $self= shift;
362 0 0       0 my $str= join( (defined $, ? $, : ""), @_ );
363 0 0       0 $str .= $\ if defined $\;
364 0         0 $self->_write_all($str);
365             }
366              
367             # as if anyone would want to write their own printf implementation...
368             sub printf {
369 0     0 1 0 my $self= shift;
370 0         0 my $str= sprintf($_[0], $_[1..$#_]);
371 0         0 $self->_write_all($str);
372             }
373              
374             # virtual handles are unlikely to have one, and if they did, they wouldn't
375             # be using this class
376 0     0 1 0 sub fileno { undef; }
377              
378             package DataStore::CAS::FileCreatorHandle;
379 5     5   40 use strict;
  5         12  
  5         161  
380 5     5   30 use warnings;
  5         10  
  5         202  
381 5     5   1531 use parent -norequire => 'DataStore::CAS::VirtualHandle';
  5         883  
  5         36  
382              
383             our $VERSION= '0.07';
384              
385             # For write-handles, commit data to the CAS and return the digest hash for it.
386 0     0 1 0 sub commit { $_[0]->_cas->commit_write_handle(@_) }
387              
388             # These would happen anyway via the AUTOLOAD, but we enumerate them so that
389             # they officially appear as methods of this class.
390 0     0 1 0 sub close { $_[0]->_cas->_handle_close(@_) }
391 0     0 1 0 sub seek { $_[0]->_cas->_handle_seek(@_) }
392 0     0 1 0 sub tell { $_[0]->_cas->_handle_tell(@_) }
393 18     18 1 45 sub write { $_[0]->_cas->_handle_write(@_) }
394              
395             # This is a write-only handle
396 0     0 1   sub eof { return 1; }
397 0     0 1   sub read { return 0; }
398 0     0 1   sub readline { return undef; }
399              
400              
401             1;
402              
403             __END__