File Coverage

blib/lib/Cantella/Store/UUID.pm
Criterion Covered Total %
statement 27 77 35.0
branch 0 20 0.0
condition 0 3 0.0
subroutine 9 18 50.0
pod 6 6 100.0
total 42 124 33.8


line stmt bran cond sub pod time code
1             package Cantella::Store::UUID;
2              
3 1     1   1315 use Moose;
  1         2  
  1         7  
4 1     1   8035 use Try::Tiny;
  1         3  
  1         79  
5 1     1   7 use Class::MOP;
  1         2  
  1         35  
6 1     1   7 use Data::GUID;
  1         2  
  1         12  
7 1     1   176 use File::Copy qw();
  1         2  
  1         24  
8 1     1   7 use Path::Class qw();
  1         2  
  1         20  
9 1     1   6 use Cantella::Store::UUID::Util '_mkdirs';
  1         2  
  1         9  
10 1     1   255 use MooseX::Types::Path::Class qw/Dir/;
  1         2  
  1         15  
11              
12 1     1   1198 use namespace::autoclean;
  1         2  
  1         9  
13              
14             our $VERSION = '0.003003';
15             $VERSION = eval $VERSION;
16              
17             has nest_levels => (
18             is => 'ro',
19             isa => 'Int',
20             required => 1,
21             );
22              
23             has root_dir => (
24             is => 'ro',
25             isa => Dir,
26             coerce => 1,
27             required => 1
28             );
29              
30             has file_class => (
31             is => 'ro',
32             isa => 'ClassName',
33             required => 1,
34             default => sub {
35             Class::MOP::load_class('Cantella::Store::UUID::File');
36             return 'Cantella::Store::UUID::File';
37             }
38             );
39              
40             # File::Copy 2.10 introduced 'sub _eq' in lieu of a simple "$from eq $to" check
41             # to enable checking of whether strings _or_ refs were identical. However, this
42             # resulted in
43             #
44             # Argument "...." isn't numeric in numeric eq (==) at /usr/share/perl/5.10/File/Copy.pm line 70.
45             #
46             # this hack will implement File::Copy 2.13's version of 'sub _eq'
47              
48             # 5.8.8 File::Copy 2.09 -- ok
49             # 5.9.5 File::Copy 2.10 -- broken
50             # 5.8.9 File::Copy 2.13 -- ok
51             # 5.10.0 File::Copy 2.11 -- broken
52             # 5.10.1 File::Copy 2.14 -- ok
53              
54             if ($File::Copy::VERSION >= 2.10 && $File::Copy::VERSION <= 2.12) {
55             Class::MOP::Package->initialize('File::Copy')->add_package_symbol('&_eq' => sub {
56             my $Scalar_Util_loaded = eval q{ require Scalar::Util; require overload; 1 };
57             my ($from, $to) = map {
58             $Scalar_Util_loaded && Scalar::Util::blessed($_)
59             && overload::Method($_, q{""})
60             ? "$_"
61             : $_
62             } (@_);
63             return '' if ( (ref $from) xor (ref $to) );
64             return $from == $to if ref $from;
65             return $from eq $to;
66             });
67             }
68              
69             sub from_uuid {
70 0     0 1   my ($self, $uuid) = @_;
71 0           return $self->file_class->new(
72             uuid => $uuid,
73             dir => $self->_get_dir_for_uuid($uuid),
74             _document_store => $self,
75             );
76             }
77              
78             sub new_uuid {
79 0     0 1   Data::GUID->new;
80             }
81              
82             sub create_file {
83 0     0 1   my( $self, $source_file, $uuid, $metadata) = @_;
84 0 0         $source_file = Path::Class::file($source_file) unless blessed $source_file;
85 0 0         my %meta = %{ $metadata || {} };
  0            
86 0           $meta{original_name} = $source_file->basename;
87              
88 0           my $new_file = $self->from_uuid( $uuid );
89 0           $new_file->metadata( \%meta );
90 0 0         return $new_file if File::Copy::copy($source_file, $new_file->path);
91              
92 0           my $new_path = $new_file->path;
93 0           die("File copy from ${source_file} to ${new_path} failed: $!");
94             }
95              
96             sub deploy {
97 0     0 1   my $self = shift;
98              
99 0           my $root = $self->root_dir;
100 0 0 0       unless( -d $root || $root->mkpath ){
101 0           die("Failed to create ${root}");
102             }
103 0           _mkdirs($root, $self->nest_levels);
104 0           return 1;
105             }
106              
107             sub _get_dir_for_uuid {
108 0     0     my ($self, $uuid) = @_;
109 0 0         $uuid = Data::GUID->from_any_string($uuid) unless blessed $uuid;
110 0           my $target = $self->root_dir;
111 0           my @dirs = split('', uc(substr($uuid->as_hex, 2, $self->nest_levels)));
112              
113 0           return $target->subdir( @dirs );
114             }
115              
116             sub grep_files {
117 0     0 1   my($self, $test) = @_;
118 0           my @result;
119              
120             my $callback = sub {
121 0     0     my $node = shift;
122 0 0         return if $node->is_dir;
123 0 0         return unless $node->basename =~ /^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/;
124 0           my $uuid;
125             try {
126 0           $uuid = Data::GUID->from_string($node->basename);
127             } catch {
128 0           warn("Invalid object in file storage at: ${node}");
129 0           };
130 0 0         push(@result, $uuid) if $test->( $self->from_uuid($uuid) );
131 0           };
132              
133 0           $self->root_dir->recurse(callback => $callback, depthfirst => 1, preorder => 0);
134 0           return @result;
135             }
136              
137             sub map_files {
138 0     0 1   my($self, $block) = @_;
139 0           my @result;
140              
141             my $callback = sub {
142 0     0     my $node = shift;
143 0 0         return if $node->is_dir;
144 0 0         return unless $node->basename =~ /^[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}$/;
145 0           my $uuid;
146             try {
147 0           $uuid = Data::GUID->from_string($node->basename);
148             } catch {
149 0           warn("Invalid object in file storage at: ${node}");
150 0           };
151 0           push(@result, $block->( $self->from_uuid($uuid) ));
152 0           };
153              
154 0           $self->root_dir->recurse(callback => $callback, depthfirst => 1, preorder => 0);
155 0           return @result;
156             }
157              
158              
159             __PACKAGE__->meta->make_immutable;
160              
161             1;
162              
163             __END__;
164              
165             =head1 NAME
166              
167             Cantella::Store::UUID - UUID based file storage
168              
169             =head1 DESCRIPTION
170              
171             L<Cantella::Store::UUID> stores documents in a deterministic location based on
172             a UUID. Depending on the number of files to be stored, a store may use 1
173             or more levels. A level is composed of 16 directories (0-9 and A-F) nested to
174             C<n> depth. For Example, if a store has 3 levels, the path to file represented
175             by UUID C<A5D45AF2-73D1-11DD-AA18-4B321EADD46B> would be
176             C<A/5/D/A5D45AF2-73D1-11DD-AA18-4B321EADD46B>.
177              
178             The goal is to provide a simple way to spread the storage of a large number of
179             files over many directories to prevent any single directory from storing too-many
180             files. Optionally, lower level tools can then be utilized to spread the
181             underlying storage points accross different physical devices if necessary.
182              
183             The number of final storage points available can be calculated by raising 16 to
184             the nth power, where n is the number of C<nest levels>.
185              
186             B<Caution:> The number of directories generated is actually larger than the
187             number of final storage points because all directories in the hierarchy must
188             be counted, thus the number of directories a store contains is
189             C<(16^n) + (16^(n-1)) .. (16^1) + (16^0)> and a 5 level deep hierarchy for
190             all three storage points would create 3,355,443 directories. For this reason,
191             any number larger than 4 is cautioned against.
192              
193             =head1 SYNOPSYS
194              
195             use Path::Class qw(file);
196             use Cantella::Store::UUID;
197              
198             my $store = Cantella::Store::UUID->new(
199             root_dir => './test-cantella-store-uuid',
200             nest_levels => 3,
201             );
202             $store->deploy; #create the storage dirs (should only be done once)
203              
204             my $new_uuid = $store->new_uuid;
205             {
206             my $source_file = file './some-file';
207             my $stored_file = $store->create_file($source_file, $new_uuid, {foo => 'bar'});
208             $source_file->remove; #it was copied into the storage
209             }
210              
211             #this object is identical to the one returned by ->create_file
212             my $stored_file = $store->from_uuid($new_uuid);
213             print $stored_file->metadata->{foo}; #prints 'bar'
214              
215             # $grep_results[0] eq $new_uuid#
216             my @grep_results = $store->grep_files(sub { exists shift->metadata->{foo}});
217              
218             # $map_results[0] eq 'bar'
219             my @map_results = $store->grep_files(sub { $_->metadata->{foo}});
220              
221             =head1 ATTRIBUTES
222              
223             C<Cantella::Store::UUID> is a subclass of L<Moose::Object>. It inherits the
224             C<new> object provided by L<Moose>. All attributes can be set using the C<new>
225             constructor method, or their respecitive writer method, if applicable.
226              
227             =head2 nest_levels
228              
229             Required, read-only integer representing how many levels of depth to use in
230             the directory structure.
231              
232             The following methods are associated with this attribute:
233              
234             =over 4
235              
236             =item B<nest_levels> - reader
237              
238             =back
239              
240             =head2 root_dir
241              
242             =over 4
243              
244             =item B<root_dir> - reader
245              
246             =back
247              
248             Required, read-only directory location for the root of the hierarchy.
249              
250             =head2 file_class
251              
252             =over 4
253              
254             =item B<file_class> - reader
255              
256             =back
257              
258             Required, read-only class name. The class to use for stored file objects.
259             Defaults to L<Cantella::Store::UUID::File>.
260              
261             =head1 METHODS
262              
263             =head2 new
264              
265             =over 4
266              
267             =item B<arguments:> C<\%arguments>
268              
269             =item B<return value:> C<$object_instance>
270              
271             =back
272              
273             Constructor.
274              
275             =head2 from_uuid
276              
277             =over 4
278              
279             =item B<arguments:> C<$uuid>
280              
281             =item B<return value:> C<$file_object>
282              
283             =back
284              
285             Return the apropriate file object for C<$uuid>. Please note that this
286             particular file does not neccesarily exist and its presence is not checked for.
287             See L<exists|Cantella::Store::UUID::File/exists>.
288              
289             =head2 new_uuid
290              
291             =over 4
292              
293             =item B<arguments:> none
294              
295             =item B<return value:> C<$uuid>
296              
297             =back
298              
299             Returns a new UUID object suitable for use with this module. By default, it
300             currently uses L<Data::GUID>.
301              
302             =head2 create_file
303              
304             =over 4
305              
306             =item B<arguments:> C<$original, $uuid, $metadata>
307              
308             =item B<return value:> C<$file_object>
309              
310             =back
311              
312             Will copy the C<$original> file into the the UUID storage and return the
313             file object representing it. The key C<original_name> will be automatically
314             set on the metadata with the base name of the original file.
315              
316             =head2 deploy
317              
318             =over 4
319              
320             =item B<arguments:> none
321              
322             =item B<return value:> none
323              
324             =back
325              
326             Create directory hierarchy, starting with C<root_dir>. A call to deploy may
327             take a couple of minutes or even hours depending on the value of C<nest_levels>
328             and the speed of the storage being utilized.
329              
330             =head2 grep_files
331              
332             =over 4
333              
334             =item B<arguments:> C<$code_ref>
335              
336             =item B<return value:> C<@matching_uuids>
337              
338             =back
339              
340             Recurse the storage testing every file against C<$code_ref>. Return all of the
341             UUIDs where C<$code_ref> returns a true value. The only argument given to
342             C<$code_ref> is a file object. The order in which files are tested and
343             subsequently returned is undefined behavior and may change. Be aware that,
344             depending on the number of @matching_ids and the number of documents stored,
345             this method could take a very, very long time to finish and use considerable
346             amounts of memory.
347              
348             =head2 map_files
349              
350             =over 4
351              
352             =item B<arguments:> C<$code_ref>
353              
354             =item B<return value:> C<@return_values>
355              
356             =back
357              
358             Recurse the storage executing C<$code_ref> on every file. Return all of the
359             values returned by C<$code_ref>. The only argument given to C<$code_ref> is
360             a file object. The order in which files are tested and subsequently returned
361             is undefined behavior and may change. Be aware that, depending on the result
362             values and number of documents stored, this method could take a very, very long
363             time to finish and use considerable amounts of memory.
364              
365             =head2 _get_dir_for_uuid
366              
367             =over 4
368              
369             =item B<arguments:> C<$uuid>
370              
371             =item B<return value:> C<Path::Class::Dir $dir>
372              
373             =back
374              
375             Given a UUID, it returns the apropriate directory;
376              
377             =head1 SEE ALSO
378              
379             L<Cantella::Store::UUID::File>
380              
381             =head1 AUTHOR
382              
383             Guillermo Roditi (groditi) E<lt>groditi@cpan.orgE<gt>
384              
385             =head1 COPYRIGHT AND LICENSE
386              
387             This software is copyright (c) 2009, 2010 by Guillermo Roditi.
388              
389             This is free software; you can redistribute it and/or modify it under
390             the same terms as the Perl 5 programming language system itself.
391              
392             =cut