File Coverage

blib/lib/Yars/Tools.pm
Criterion Covered Total %
statement 245 305 80.3
branch 56 100 56.0
condition 22 44 50.0
subroutine 47 53 88.6
pod 27 27 100.0
total 397 529 75.0


line stmt bran cond sub pod time code
1             package Yars::Tools;
2              
3 24     24   10171 use strict;
  24         51  
  24         672  
4 24     24   124 use warnings;
  24         40  
  24         670  
5 24     24   128 use Clustericious::Config;
  24         45  
  24         575  
6 24     24   171 use List::Util qw/ shuffle uniq /;
  24         44  
  24         1540  
7 24     24   775 use Hash::MoreUtils qw/safe_reverse/;
  24         32330  
  24         1340  
8 24     24   442 use Clustericious::Log;
  24         5639  
  24         190  
9 24     24   17520 use File::Find::Rule;
  24         135597  
  24         195  
10 24     24   1275 use File::Basename qw/dirname/;
  24         68  
  24         1131  
11 24     24   137 use File::Path qw/mkpath/;
  24         41  
  24         842  
12 24     24   121 use File::Temp;
  24         43  
  24         1553  
13 24     24   941 use File::Compare;
  24         24983  
  24         1070  
14 24     24   149 use JSON::MaybeXS ();
  24         43  
  24         418  
15             # TODO: rm dep on stat
16 24     24   347 use File::stat qw/stat/;
  24         5450  
  24         175  
17 24     24   1396 use Mojo::ByteStream qw/b/;
  24         49  
  24         811  
18 24     24   120 use File::Spec;
  24         35  
  24         393  
19 24     24   470 use Mojo::UserAgent;
  24         162792  
  24         207  
20 24     24   467 use File::Spec;
  24         43  
  24         403  
21 24     24   552 use Yars::Util qw( format_tx_error );
  24         43  
  24         927  
22 24     24   139 use File::Glob qw( bsd_glob );
  24         52  
  24         24805  
23              
24             # ABSTRACT: various utility functions dealing with servers, hosts, etc
25             our $VERSION = '1.31'; # VERSION
26              
27              
28             sub new
29             {
30 32     32 1 478 my($class, $config) = @_;
31 32 100       73 WARN "No url found in config file" unless eval { $config->url };
  32         130  
32 32         679 my $self = bless {
33             bucket_to_url => { }, # map buckets to server urls
34             bucket_to_root => { }, # map buckets to disk roots
35             disk_is_local => { }, # our disk roots (values are just 1)
36             servers => { }, # all servers
37             our_url => '', # our server url
38             state_file => '', # name of file with disk states
39             ua => '', # UserAgent
40             server_status_cache => {},
41             server_status_cache_lifetime => 3,
42             default_dir => '',
43             }, $class;
44 32         158 $self->refresh_config($config);
45 32         132 $self;
46             }
47              
48             sub _set_ua
49             {
50 14     14   36 my($self, $ua) = @_;
51 14         34 $self->{ua} = $ua;
52 14         147 return;
53             }
54              
55             sub _ua
56             {
57 279     279   617 my($self) = @_;
58 279 100       1242 my $ua = $self->{ua} ? $self->{ua}->() : Mojo::UserAgent->new;
59 279         493771 $ua->max_redirects(30);
60 279         2884 $ua;
61             }
62              
63              
64             sub refresh_config {
65 1111     1111 1 12624 my $self = shift;
66 1111         1976 my $config = shift;
67 1111 100 66     4541 return 1 if defined($self->{our_url}) && keys %{ $self->{bucket_to_root} } > 0 && keys %{ $self->{bucket_to_url} } > 0;
  1111   66     6320  
  1077         4969  
68 34   66     126 $config ||= Clustericious::Config->new("Yars");
69 34 50 66     405 $self->{our_url} ||= $config->url or WARN "No url found in config file";
70 34         590 TRACE "Our url is " . $self->{our_url};
71 34         169368 for my $server ($config->servers) {
72 59         1302 $self->{servers}->{$server->{url}} = 1;
73 59         97 for my $disk (@{ $server->{disks} }) {
  59         143  
74 102         143 for my $bucket (@{ $disk->{buckets} }) {
  102         215  
75 528         1022 $self->{bucket_to_url}->{$bucket} = $server->{url};
76 528 100       900 next unless $server->{url} eq $self->{our_url};
77 352         533 $self->{bucket_to_root}->{$bucket} = $disk->{root};
78 352 50       547 LOGDIE "Disk root not given" unless defined($disk->{root});
79 352         584 $self->{disk_is_local}->{$disk->{root}} = 1;
80             }
81             }
82             }
83 34         1331 my $default_dir = $self->{default_dir} = bsd_glob("~/var/run/yars");
84            
85 34         328 my $state_file = $self->{state_file} = $config->state_file(default => "$default_dir/state.txt");
86 34 100       1653 -e $state_file or do {
87 32         229 INFO "Writing new state file ($state_file)";
88 32         28733 my %disks = map { ($_ => "up") } keys %{ $self->{disk_is_local} };
  60         202  
  32         149  
89 32         201 $self->_write_state({disks => \%disks});
90             };
91 34 50       1593 -e $state_file or LOGDIE "Could not write state file $state_file";
92             #TRACE "bucket2url : ".Dumper($self->{bucket_to_url});
93             }
94              
95             sub _dir_is_empty {
96             # stolen from File::Find::Rule::DirectoryEmpty
97 341     341   562 my $dir = shift;
98 341 50       4589 opendir( DIR, $dir ) or return;
99 341         2167 while ( $_ = readdir DIR ) {
100 669 100       2743 if ( !/^\.\.?$/ ) {
101 13         64 closedir DIR;
102 13         61 return 0;
103             }
104             }
105 328         1408 closedir DIR;
106 328         684 return 1;
107             }
108              
109              
110             sub disk_for {
111 940     940 1 1848 my $self = shift;
112 940         1589 my $digest = shift;
113 940 50       1394 unless (keys %{ $self->{bucket_to_root} }) {
  940         3068  
114 0         0 $self->refresh_config;
115 0 0       0 LOGDIE "No config data" unless keys %{ $self->{bucket_to_root} } > 0;
  0         0  
116             }
117 940         1723 my ($bucket) = grep { $digest =~ /^$_/i } keys %{ $self->{bucket_to_root} };
  8596         61766  
  940         4416  
118 940 100       3351 TRACE "no local disk for $digest in ".(join ' ', keys %{ $self->{bucket_to_root} }) unless defined($bucket);
  87         672  
119 940 100       49976 return unless defined($bucket);
120 853         3773 return $self->{bucket_to_root}->{$bucket};
121             }
122              
123              
124             sub local_buckets {
125 0     0 1 0 my($self) = @_;
126 0 0       0 $self->refresh_config unless keys %{ $self->{bucket_to_root} };
  0         0  
127 0         0 my %r = safe_reverse $self->{bucket_to_root};
128 0 0       0 do {$_ = [ $_ ] unless ref $_} for values %r;
  0         0  
129 0         0 return %r;
130             }
131              
132             sub _state {
133 324     324   586 my $self = shift;
134 324 50 33     3504 $self->refresh_config() unless $self->{state_file} && -e $self->{state_file};
135             # TODO: rm dep on File::stat
136 324 100 66     2704 return $self->{_state}->{cached} if $self->{_state}->{mod_time} && $self->{_state}->{mod_time} == stat($self->{state_file})->mtime;
137 24   33     88 our $j ||= JSON::MaybeXS->new;
138 24 50       166 -e $self->{state_file} or LOGDIE "Missing state file " . $self->{state_file};
139 24         213 $self->{_state}->{cached} = $j->decode(Mojo::Asset::File->new(path => $self->{state_file})->slurp);
140             # TODO: rm dep on File::stat
141 24         5009 $self->{_state}->{mod_time} = stat($self->{state_file})->mtime;
142 24         5870 return $self->{_state}->{cached};
143             }
144              
145             sub _write_state {
146 35     35   113 my $self = shift;
147 35         77 my $state = shift;
148 35         1803 my $dir = dirname($self->{state_file});
149 35   66     404 our $j ||= JSON::MaybeXS->new;
150 35         2330 mkpath $dir;
151 35         355 my $temp = File::Temp->new(DIR => $dir, UNLINK => 0);
152 35         16689 print $temp $j->encode($state);
153 35         299 $temp->close;
154 35 50       1461 rename "$temp", $self->{state_file} or return 0;
155 35         2118 return 1;
156             }
157              
158              
159             sub disk_is_up {
160 321     321 1 673 my $class = shift;
161 321         558 my $root = shift;
162 321 50 66     6042 return 0 if -d $root && ! -w $root;
163 321 100 50     1299 return 1 if ($class->_state->{disks}{$root} || 'up') eq 'up';
164 29         5190 return 0;
165             }
166              
167              
168             sub disk_is_up_verified
169             {
170 12     12 1 20 my($self, $root) = @_;
171 12 100       31 return unless $self->disk_is_up($root);
172 10         1060 my $tmpdir = File::Spec->catdir($root, 'tmp');
173 10         24 my $temp;
174 10         62 eval {
175 24     24   191 use autodie;
  24         48  
  24         263  
176 10 100       116 unless(-d $tmpdir)
177             {
178 7         1013 mkpath $tmpdir;
179 7         45 chmod 0777, $tmpdir;
180             };
181 10         1164 $temp = File::Temp->new("disk_is_up_verifiedXXXXX", DIR => $tmpdir, SUFFIX => '.txt');
182 10         3270 print $temp "test";
183 10         40 close $temp;
184 10 50       2437 die "file has zero size" if -z $temp->filename;
185 10         197 unlink $temp->filename;
186             };
187 10 50       1229 if(my $error = $@)
188             {
189 0         0 INFO "Create temp file in $tmpdir FAILED: $error";
190 0         0 return;
191             }
192             else
193             {
194 10         29 INFO "created temp file to test status: " . $temp->filename;
195 10         10259 return 1;
196             }
197             }
198              
199              
200             sub disk_is_down {
201 3     3 1 15 return not shift->disk_is_up(@_);
202             }
203              
204              
205             sub disk_is_local {
206 3     3 1 7 my $self = shift;
207 3         7 my $root = shift;
208 3         15 return $self->{disk_is_local}->{$root};
209             }
210              
211              
212             sub server_is_up {
213             # TODO use state file for this
214 0     0 1 0 my $self = shift;
215 0         0 my $server_url = shift;
216 0 0 0     0 if (exists($self->{server_status_cache}->{$server_url}) && $self->{server_status_cache}->{$server_url}{checked} > time - $self->{server_status_cache_lifetime}) {
217 0         0 return $self->{server_status_cache}->{$server_url}{result};
218             }
219 0         0 TRACE "Checking $server_url/status";
220 0         0 my $tx = $self->_ua->get( "$server_url/status" );
221 0         0 $self->{server_status_cache}->{$server_url}{checked} = time;
222 0 0       0 if (my $res = $tx->success) {
223 0         0 my $got = $res->json;
224 0 0 0     0 if (defined($got->{server_version}) && length($got->{server_version})) {
225 0         0 return ($self->{server_status_cache}->{$server_url}{result} = 1);
226             }
227 0         0 TRACE "/status did not return version, got : ". JSON::MaybeXS::encode_json($got);
228 0         0 return ($self->{server_status_cache}->{$server_url}{result} = 0);
229             }
230 0         0 TRACE "Server $server_url is not up : response was ".format_tx_error($tx->error);
231 0         0 return ($self->{server_status_cache}->{$server_url}{result} = 0);
232             }
233             sub server_is_down {
234 0     0 1 0 return not shift->server_is_up(@_);
235             }
236              
237             sub _touch {
238 0     0   0 my $path = shift;
239 0         0 my $dir = dirname($path);
240 0 0       0 -d $dir or do {
241 0         0 my $ok;
242 0         0 eval { mkpath($dir); $ok = 1; };
  0         0  
  0         0  
243 0 0       0 if($@) { WARN "mkpath $dir failed : $@;"; $ok = 0; };
  0         0  
  0         0  
244 0 0       0 return 0 unless $ok;
245             };
246 0 0       0 open my $fp, ">>$path" or return 0;
247 0         0 close $fp;
248 0         0 return 1;
249             }
250              
251              
252             sub mark_disk_down {
253 3     3 1 7 my $class = shift;
254 3         6 my $root = shift;
255 3 50       14 return 1 if $class->disk_is_down($root);
256 3         571 my $state = $class->_state;
257 3         346 INFO "Marking disk $root down";
258 3 50       3013 exists($state->{disks}{$root}) or WARN "$root not present in state file";
259 3         12 $state->{disks}{$root} = 'down';
260 3 50       11 $class->_write_state($state) and return 1;
261 0         0 ERROR "Could not mark disk $root down";
262 0         0 return 0;
263             }
264              
265             sub mark_disk_up {
266 0     0 1 0 my $class = shift;
267 0         0 my $root = shift;
268 0 0       0 return 1 if $class->disk_is_up($root);
269 0         0 my $state = $class->_state;
270 0         0 INFO "Marking disk $root up";
271 0         0 $state->{disks}{$root} = 'up';
272 0 0       0 $class->_write_state($state) and return 1;
273 0         0 ERROR "Could not mark disk up";
274 0         0 return 0;
275             }
276              
277              
278             sub server_for {
279 1101     1101 1 2271 my $self = shift;
280 1101         1863 my $digest = shift;
281 1101         1614 my $found;
282 1101 50       1584 $self->refresh_config unless keys %{ $self->{bucket_to_url} } > 0;
  1101         4005  
283 1101         4213 for my $i (0..length($digest)) {
284 2202 100       7830 last if $found = $self->{bucket_to_url}->{ uc substr($digest,0,$i) };
285 1172 100       4084 last if $found = $self->{bucket_to_url}->{ lc substr($digest,0,$i) };
286             }
287 1101         3152 return $found;
288             }
289              
290              
291             sub bucket_map {
292 9     9 1 42 return shift->{bucket_to_url};
293             }
294              
295              
296             sub storage_path {
297 1228     1228 1 2470 my $class = shift;
298 1228         2193 my $digest = shift;
299 1228   33     4692 my $root = shift || $class->disk_for($digest) || LOGDIE "No local disk for $digest";
300 1228         24955 return join "/", $root, ( grep length, split /(..)/, $digest );
301             }
302              
303              
304             sub remote_stashed_server {
305 69     69 1 153 my $self = shift;
306 69         165 my ($filename,$digest) = @_;
307              
308 69         173 my $assigned_server = $self->server_for($digest);
309             # TODO broadcast these requests all at once
310 69         144 for my $server (shuffle(keys %{ $self->{servers} })) {
  69         308  
311 120 100       1194 next if $server eq $self->{our_url};
312 53 50       124 next if $server eq $assigned_server;
313 53         280 DEBUG "Checking remote $server for $filename";
314 53         49953 my $tx = $self->_ua->head( "$server/file/$filename/$digest", { "X-Yars-Check-Stash" => 1, "Connection" => "Close" } );
315 53 100       289397 if (my $res = $tx->success) {
316             # Found it!
317 6         251 return $server;
318             }
319             }
320 63         1361 return '';
321             }
322              
323              
324             sub local_stashed_dir {
325 291     291 1 622 my $self = shift;
326 291         720 my ($filename,$md5) = @_;
327 291         560 for my $root ( shuffle(keys %{ $self->{disk_is_local} })) {
  291         1556  
328 451         1515 my $dir = $self->storage_path($md5,$root);
329 451         3665 TRACE "Checking for $dir/$filename";
330 451 100       271793 return $dir if -r "$dir/$filename";
331             }
332 236         1671 return '';
333             }
334              
335              
336             sub server_exists {
337 1     1 1 2 my $self = shift;
338 1         2 my $server_url = shift;
339 1 50       6 return exists($self->{servers}->{$server_url}) ? 1 : 0;
340             }
341              
342              
343             sub server_url {
344 1378     1378 1 5991 return shift->{our_url};
345             }
346              
347              
348             sub disk_roots {
349 67     67 1 153 return keys %{ shift->{disk_is_local} };
  67         535  
350             }
351              
352              
353             sub server_urls {
354 3     3 1 6 return keys %{ shift->{servers} }
  3         18  
355             }
356              
357              
358             sub cleanup_tree {
359 21     21 1 39 my $self = shift;
360 21         60 my ($dir) = @_;
361 21         70 while (_dir_is_empty($dir)) {
362 328 100       836 last if $self->{disk_is_local}->{$dir};
363 320 50       8878 rmdir $dir or do { warn "cannot rmdir $dir : $!"; last; };
  0         0  
  0         0  
364 320         3054 $dir =~ s[/[^/]+$][];
365             }
366             }
367              
368              
369             sub count_files {
370 12     12 1 18 my $class = shift;
371 12         20 my $dir = shift;
372 12 50       136 -d $dir or return 0;
373 12         375 my @list = File::Find::Rule->file->in($dir);
374 12         47508 return scalar @list;
375             }
376              
377              
378             sub human_size {
379 36     36 1 73 my $class = shift;
380 36         42 my $val = shift;
381 36         75 my @units = qw/B K M G T P/;
382 36         44 my $unit = shift @units;
383 36   66     46 do {
384 108         120 $unit = shift @units;
385 108         278 $val /= 1024;
386             } until $val < 1024 || !@units;
387 36         282 return sprintf( "%.0f%s", $val + 0.5, $unit );
388             }
389              
390              
391             sub content_is_same {
392 2     2 1 5 my $class = shift;
393 2         6 my ($filename,$asset) = @_;
394 2         4 my $check;
395 2 50       19 if ($asset->isa("Mojo::Asset::File")) {
396 0         0 $asset->handle->flush;
397 0         0 $check = ( compare($filename,$asset->path) == 0 );
398             } else {
399             # Memory asset. Assume that if one can fit in memory, two can, too.
400 2         8 my $existing = Mojo::Asset::File->new(path => $filename);
401 2   66     21 $check = ( $existing->size == $asset->size && $asset->slurp eq $existing->slurp );
402             }
403 2         418 return $check;
404             }
405              
406              
407             sub hex2b64 {
408 529     529 1 1193 my $class = shift;
409 529         1110 my $hex = shift;
410 529         5378 my $b64 = b(pack 'H*', $hex)->b64_encode;
411 529         13497 local $/="\n";
412 529         2009 chomp $b64;
413 529         6178 return $b64;
414             }
415              
416             sub b642hex {
417 0     0 1   my $class = shift;
418 0           my $b64 = shift;
419 0           return unpack 'H*', b($b64)->b64_decode;
420             }
421              
422              
423             1;
424              
425             __END__
426              
427             =pod
428              
429             =encoding UTF-8
430              
431             =head1 NAME
432              
433             Yars::Tools - various utility functions dealing with servers, hosts, etc
434              
435             =head1 VERSION
436              
437             version 1.31
438              
439             =head1 DESCRIPTION
440              
441             This module is largely used internally by L<Yars>. Documentation for
442             some of its capabilities are provided here for the understanding of how
443             the rest of the L<Yars> server works, but they should not be considered
444             to be a public interface and they may change in the future, though
445             probably not for a good reason.
446              
447             =head1 FUNCTIONS
448              
449             =head2 new
450              
451             Create a new instance of Yars::Tools
452              
453             =head2 refresh_config
454              
455             Refresh the configuration data cached in memory.
456              
457             =head2 disk_for
458              
459             Given an md5 digest, calculate the root directory of this file. Undef is
460             returned if this file does not belong on the current host.
461              
462             =head2 local_buckets
463              
464             Get a hash from disk to list of buckets for this server.
465              
466             =head2 disk_is_up
467              
468             Given a disk root, return true unless the disk is marked down. A disk is
469             down if the state file indicates it, or if it exists but is unwriteable.
470              
471             =head2 disk_is_up_verified
472              
473             This is the same as disk_is_up, but doesn't trust the operating system,
474             and tries to write a file to the disk's temp directory and verify that
475             the file is not of zero size.
476              
477             =head2 disk_is_down
478              
479             Disk is not up.
480              
481             =head2 disk_is_local
482              
483             Return true if the disk is on this server.
484              
485             =head2 server_is_up, server_is_down
486              
487             Check to see if a remote server is up or down.
488              
489             =head2 mark_disk_down, mark_disk_up
490              
491             Mark a disk as up or down.
492              
493             =head2 server_for
494              
495             Given an md5, return the url for the server for this file.
496              
497             =head2 bucket_map
498              
499             Return a map from bucket prefix to server url.
500              
501             =head2 storage_path
502              
503             Calculate the directory of an md5 on disk. Optionally pass a second
504             parameter to force it onto a particular disk.
505              
506             =head2 remote_stashed_server
507              
508             Find a server which is stashing this file, if one exists.
509             Parameters :
510             $c - controller
511             $filename - filename
512             $digest - digest
513              
514             =head2 local_stashed_dir
515              
516             Find a local directory stashing this file, if one exists.
517             Parameters :
518             $filename - filename
519             $digest - digest
520             Returns :
521             The directory or false.
522              
523             =head2 server_exists
524              
525             Does this server exist?
526              
527             =head2 server_url
528              
529             Returns the url of the current server.
530              
531             =head2 disk_roots
532              
533             Return all the local directory roots, in a random order.
534              
535             =head2 server_urls
536              
537             Return all the other urls, in a random order.
538              
539             =head2 cleanup_tree
540              
541             Given a directory, traverse upwards until encountering a local disk root
542             or a non-empty directory, and remove all empty directories.
543              
544             =head2 count_files
545              
546             Count the number of files in a directory tree.
547              
548             =head2 human_size
549              
550             Given a size, format it like df -kh
551              
552             =head2 content_is_same
553              
554             Given a filename and an Asset, return true if the content is the same
555             for both.
556              
557             =head2 hex2b64, b642hex
558              
559             Convert from hex to base 64.
560              
561             =head1 SEE ALSO
562              
563             L<Yars>, L<Yars::Client>
564              
565             =head1 AUTHOR
566              
567             Original author: Marty Brandon
568              
569             Current maintainer: Graham Ollis E<lt>plicease@cpan.orgE<gt>
570              
571             Contributors:
572              
573             Brian Duggan
574              
575             Curt Tilmes
576              
577             =head1 COPYRIGHT AND LICENSE
578              
579             This software is copyright (c) 2013 by NASA GSFC.
580              
581             This is free software; you can redistribute it and/or modify it under
582             the same terms as the Perl 5 programming language system itself.
583              
584             =cut