File Coverage

blib/lib/File/Rsync/Mirror/Recent.pm
Criterion Covered Total %
statement 390 490 79.5
branch 113 216 52.3
condition 13 31 41.9
subroutine 44 48 91.6
pod 10 10 100.0
total 570 795 71.7


line stmt bran cond sub pod time code
1             package File::Rsync::Mirror::Recent;
2              
3             # use warnings;
4 7     7   203102 use strict;
  7         13  
  7         162  
5 7     7   3284 use File::Rsync::Mirror::Recentfile;
  7         21  
  7         392  
6              
7             =encoding utf-8
8              
9             =head1 NAME
10              
11             File::Rsync::Mirror::Recent - mirroring via rsync made efficient
12              
13             =cut
14              
15             package File::Rsync::Mirror::Recent;
16              
17 7     7   82 use File::Basename qw(basename dirname fileparse);
  7         13  
  7         517  
18 7     7   42 use File::Copy qw(cp);
  7         13  
  7         409  
19 7     7   48 use File::Path qw(mkpath);
  7         14  
  7         374  
20 7     7   42 use File::Rsync;
  7         8  
  7         186  
21 7     7   3941 use File::Rsync::Mirror::Recentfile::Done (); # at least needed by thaw()
  7         26  
  7         200  
22 7     7   43 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
  7         8  
  7         1124  
23 7     7   49 use File::Temp;
  7         13  
  7         664  
24 7     7   4119 use List::Pairwise qw(mapp grepp);
  7         21703  
  7         515  
25 7     7   49 use List::Util qw(first max);
  7         13  
  7         515  
26 7     7   43 use Scalar::Util qw(blessed reftype);
  7         19  
  7         399  
27 7     7   42 use Storable;
  7         13  
  7         420  
28 7     7   42 use Time::HiRes qw();
  7         13  
  7         130  
29 7     7   35 use YAML::Syck;
  7         13  
  7         505  
30              
31 7     7   41 use version; our $VERSION = qv('0.4.3');
  7         13  
  7         29  
32              
33             =head1 SYNOPSIS
34              
35             The documentation in here is normally not needed because the code is
36             considered to be run from several standalone programs. For a quick
37             overview, see the file README.mirrorcpan and the bin/ directory of the
38             distribution. For the architectural ideas see the section THE
39             ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
40              
41             File::Rsync::Mirror::Recent establishes a view on a collection of
42             File::Rsync::Mirror::Recentfile objects and provides abstractions
43             spanning multiple time intervals associated with those.
44              
45             =head1 EXPORT
46              
47             No exports.
48              
49             =head1 CONSTRUCTORS
50              
51             =head2 my $obj = CLASS->new(%hash)
52              
53             Constructor. On every argument pair the key is a method name and the
54             value is an argument to that method name.
55              
56             =cut
57              
58             sub new {
59 670     670 1 27263 my($class, @args) = @_;
60 670         1990 my $self = bless {}, $class;
61 670         2165 while (@args) {
62 745         2397 my($method,$arg) = splice @args, 0, 2;
63 745         3578 $self->$method($arg);
64             }
65 670         4352 return $self;
66             }
67              
68             =head2 my $obj = CLASS->thaw($statusfile)
69              
70             Constructor from a statusfile left over from a previous
71             rmirror run. See also C.
72              
73             =cut
74              
75             sub _thaw_without_pathdb {
76 35     35   149 my($self,$file) = @_;
77 35 50       1854 open my $fh, $file or die "Can't open '$file': $!";
78 35         393 local $/ = "\n";
79 35         88 my $in_pathdb = 0;
80 35         500 my $tfile = File::Temp->new
81             (
82             TEMPLATE => "Recent-thaw-XXXX",
83             TMPDIR => 1,
84             UNLINK => 0,
85             CLEANUP => 0,
86             SUFFIX => '.dat',
87             );
88 35         322237796 my $template_for_eop;
89 35         6627 while (<$fh>) {
90 14260 100       19014 if ($in_pathdb) {
    100          
91 5580 100       11685 if (/$template_for_eop/) {
92 35         64 $in_pathdb = 0;
93             }
94             } elsif (/(\s+)-\s*__pathdb\s*:/) {
95 35         72 $in_pathdb = 1;
96 35         301 my $next_attr = sprintf "^%s\\S", " ?" x length($1);
97 35         561 $template_for_eop = qr{$next_attr};
98             }
99 14260 100       28327 print $tfile $_ unless $in_pathdb;
100             }
101 35 50       1483 close $tfile or die "Could not close: $!";
102 35         216 my $return = $self->thaw($tfile->filename);
103 35         166 $return->_havelostpathdb(1);
104 35         202 unlink $tfile->filename;
105 35         2443 return $return;
106             }
107             sub thaw {
108 40     40 1 506 my($self, $file) = @_;
109 40 50       224 die "thaw called without statusfile argument" unless defined $file;
110 40 50       824 unless (-e $file){
111 0         0 require Carp;
112 0         0 Carp::confess("Alert: statusfile '$file' not found");
113             }
114 40         642 require YAML::Syck;
115 40         125 my $start = time;
116 40         109 my $sleeptime = 0.02;
117 40         3384 while (not mkdir "$file.lock") {
118 0         0 my $err = $!;
119 0         0 Time::HiRes::sleep $sleeptime;
120 0         0 my $waiting = time - $start;
121 0 0       0 if ($waiting >= 3){
122 0         0 warn "*** waiting ($waiting) for lock ($err) ***";
123 0         0 $sleeptime = 1;
124             }
125             }
126 40         464 my $size = -s $file;
127 40         431 my $serialized = YAML::Syck::LoadFile($file);
128 40 50       62525 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
129 40         133 my $charged_self = $serialized->{reduced_self};
130 40         383 my $class = blessed $self;
131 40         336 bless $charged_self, $class;
132 40         67 my $rfs = $serialized->{reduced_rfs};
133 40         111 my $rfclass = $class . "file"; # "Recent" . "file"
134 40         306 my $pathdb = $charged_self->_pathdb;
135 40         244 for my $rf (@$rfs) {
136 200         790 bless $rf, $rfclass;
137 200         487 $rf->_pathdb($pathdb);
138             }
139 40         247 $charged_self->_recentfiles($rfs);
140 40         270 $charged_self->_principal_recentfile($rfs->[0]);
141             # die "FIXME: thaw all recentfiles from reduced_rfs into _recentfiles as well, watch out for pathdb and rsync";
142 40         257 return $charged_self;
143             }
144              
145             =head1 ACCESSORS
146              
147             =cut
148              
149             my @accessors;
150              
151             BEGIN {
152 7     7   6429 @accessors =
153             (
154             "__pathdb",
155             "_dirtymark", # keeps track of the dirtymark of the recentfiles
156             "_havelostpathdb", # boolean
157             "_have_written_statusfile", # boolean
158             "_logfilefordone", # turns on _logfile on all DONE
159             # systems (disk intensive)
160             "_max_one_state", # when we have no time left but want
161             # at least get one file per
162             # iteration to avoid procrastination
163             "_principal_recentfile",
164             "_recentfiles",
165             "_rsync",
166             "_runstatusfile", # occasionally dumps all rfs
167             "_verbose", # internal variable for verbose setter/getter
168             "_verboselog", # internal variable for verboselog setter/getter
169             );
170              
171 7         117 my @pod_lines =
172 7         15 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
  357         840  
173              
174             =over 4
175              
176             =item ignore_link_stat_errors
177              
178             as in F:R:M:Recentfile
179              
180             =item local
181              
182             Option to specify the local principal file for operations with a local
183             collection of recentfiles.
184              
185             =item localroot
186              
187             as in F:R:M:Recentfile
188              
189             =item max_files_per_connection
190              
191             as in F:R:M:Recentfile
192              
193             =item remote
194              
195             The remote principal recentfile in rsync notation. E.g.
196              
197             pause.perl.org::authors/RECENT.recent
198              
199             =item remoteroot
200              
201             as in F:R:M:Recentfile
202              
203             =item remote_recentfile
204              
205             Rsync address of the remote C symlink or whichever name
206             the principal remote recentfile has.
207              
208             =item rsync_options
209              
210             Things like compress, links, times or checksums. Passed in to the
211             File::Rsync object used to run the mirror. Can be a hashref or an
212             arrayref. Depending on the version of File::Rsync it is passed on as a
213             hashref or as a flat list.
214              
215             =item tempdir
216              
217             as in F:R:M:Recentfile
218              
219             =item ttl
220              
221             Minimum time before fetching the principal recentfile again.
222              
223             =back
224              
225             =cut
226              
227 7     7   43 use accessors @accessors;
  7         13  
  7         49  
228              
229             =head1 METHODS
230              
231             =head2 $arrayref = $obj->news ( %options )
232              
233             Test this with:
234              
235             perl -Ilib bin/rrr-news \
236             -after 1217200539 \
237             -max 12 \
238             -local /home/ftp/pub/PAUSE/authors/RECENT.recent
239              
240             perl -Ilib bin/rrr-news \
241             -after 1217200539 \
242             -rsync=compress=1 \
243             -rsync=links=1 \
244             -localroot /home/ftp/pub/PAUSE/authors/ \
245             -remote pause.perl.org::authors/RECENT.recent
246             -verbose
247              
248             All parameters that can be passed to
249             File:Rsync:Mirror:Recentfile::recent_events() can also be specified
250             here.
251              
252             One additional option is supported. If C<$Options{callback}> is
253             specified, it must be a subref. This sub is called whenever one chunk
254             of events is found. The first argument to the callback is a reference
255             to the currently accumulated array of events.
256              
257             Note: all data are kept in memory.
258              
259             =cut
260              
261             sub news {
262 600     600 1 4890 my($self, %opt) = @_;
263 600         1782 my $local = $self->local;
264 600 50       2934 unless ($local) {
265 0 0       0 if (my $remote = $self->remote) {
266 0         0 my $localroot;
267 0 0       0 if ($localroot = $self->localroot) {
268             # nice, they know what they are doing
269             } else {
270 0         0 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
271             }
272             } else {
273 0         0 die "Alert: neither local nor remote specified, cannot continue";
274             }
275             }
276 600         2592 my $rfs = $self->recentfiles;
277 600         1278 my $ret = [];
278 600         1110 my $before;
279 600         1248 for my $rf (@$rfs) {
280 3000         5496 my %locopt = %opt;
281 3000         5940 $locopt{before} = $before;
282 3000 50       5658 if ($opt{max}) {
283 0         0 $locopt{max} -= scalar @$ret;
284 0 0       0 last if $locopt{max} <= 0;
285             }
286 3000         5196 $locopt{info} = {};
287 3000         9522 my $res = $rf->recent_events(%locopt);
288 3000 50       7926 if (@$res){
289 3000         54888 push @$ret, @$res;
290             }
291 3000 50 33     8904 if ($opt{max} && scalar @$ret > $opt{max}) {
292 0         0 last;
293             }
294 3000 50       5880 if ($opt{after}){
295 0 0 0     0 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
296 0         0 last;
297             }
298 0 0       0 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
299 0         0 last;
300             }
301             }
302 3000 50       6114 if (!@$res){
303 0         0 next;
304             }
305 3000         4518 $before = $res->[-1]{epoch};
306 3000 50 33     6384 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
307 3000 50       18558 if (my $sub = $opt{callback}) {
308 0         0 $sub->($ret);
309             }
310             }
311 600         2436 $ret;
312             }
313              
314             =head2 overview ( %options )
315              
316             returns a small table that summarizes the state of all recentfiles
317             collected in this Recent object.
318              
319             $options{verbose}=1 increases the number of columns displayed.
320              
321             Here is an example output:
322              
323             Ival Cnt Max Min Span Util Cloud
324             1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
325             6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
326             1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
327             1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
328             1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
329             1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
330             1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
331             Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
332              
333             I is the name of the interval.
334              
335             I is the number of entries in this recentfile.
336              
337             I is the highest(first) epoch in this recentfile, rounded.
338              
339             I is the lowest(last) epoch in this recentfile, rounded.
340              
341             I is the timespan currently covered, rounded.
342              
343             I is I devided by the designated timespan of this
344             recentfile.
345              
346             I is ascii art illustrating the sequence of the Max and Min
347             timestamps.
348              
349             =cut
350             sub overview {
351 12     12 1 78 my($self,%options) = @_;
352 12         48 my $rfs = $self->recentfiles;
353 12         18 my(@s,%rank);
354 12         36 RECENTFILE: for my $rf (@$rfs) {
355 72         198 my $re=$rf->recent_events;
356 72         102 my $rfsummary;
357 72 50       126 if (@$re) {
358 72         300 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
359 72         162 my $merged = $rf->merged;
360             $rfsummary =
361             [
362             "Ival",
363             $rf->interval,
364             "Cnt",
365             scalar @$re,
366             "Dirtymark",
367             $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
368             "Produced",
369             sprintf ("%.2f", $rf->{ORIG}{Producers}{time}||0),
370             "Merged",
371             ($rf->interval eq "Z"
372             ?
373             "-"
374             :
375             sprintf ("%.2f", $merged->{epoch} || 0)),
376             "Max",
377             sprintf ("%.2f", $re->[0]{epoch}),
378             "Min",
379 72 50 50     168 sprintf ("%.2f", $re->[-1]{epoch}),
    100 50        
    100          
380             "Span",
381             sprintf ("%.2f", $span),
382             "Util", # u9n:)
383             ($rf->interval eq "Z"
384             ?
385             "-"
386             :
387             sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
388             ),
389             ];
390 72     228   534 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
  144         1794  
  648         4962  
391             } else {
392 0         0 next RECENTFILE;
393             }
394 72         882 push @s, $rfsummary;
395             }
396 12         78 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
  132         198  
397 12         66 my $maxrank = max values %rank;
398 12         36 for my $rfsummary (@s) {
399 72         114 my $string = " " x $maxrank;
400 72         60 my @borders;
401 72         78 for my $ele (qw(Max Min)) {
402 144     1104   624 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
  144         2286  
  1296         6126  
403 144         528 push @borders, $rank{$r}-1;
404             }
405 72         114 for ($borders[0],$borders[1]) {
406 144         216 substr($string,$_,1) = "^";
407             }
408 72         264 push @$rfsummary, "Cloud", $string;
409             }
410 12 50       48 unless ($options{verbose}) {
411 12         36 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
  84         144  
412 12         30 for (@s) {
413 72     576   342 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
  504         4368  
  720         4572  
414             }
415             }
416 12         24 my @sprintf;
417 12         30 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
  96         192  
418 84         78 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
  504         630  
419 84         186 push @sprintf, "%" . $maxlength . "s";
420             }
421 12         48 my $sprintf = join " ", @sprintf;
422 12         30 $sprintf .= "\n";
423 12     84   42 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
  84         456  
  12         36  
424 12     504   42 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
  72         222  
  504         3336  
425             }
426              
427             =head2 _pathdb
428              
429             Keeping track of already handled files. Currently it is a hash, will
430             probably become a database with its own accessors.
431              
432             =cut
433              
434             sub _pathdb {
435 703     703   1638 my($self, $set) = @_;
436 703 50       2252 if ($set) {
437 0         0 $self->__pathdb ($set);
438             }
439 703         2562 my $pathdb = $self->__pathdb;
440 703 100       3534 unless (defined $pathdb) {
441 694         1658 $self->__pathdb(+{});
442             }
443 703         3266 return $self->__pathdb;
444             }
445              
446             =head2 $recentfile = $obj->principal_recentfile ()
447              
448             returns the principal recentfile object of this tree.
449              
450             =cut
451             # mirrors the recentfile and instantiates the recentfile object
452             sub _principal_recentfile_fromremote {
453 10     10   28 my($self) = @_;
454             # get the remote recentfile
455 10 50       38 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
456 10         164 my $splitter = qr{(.+)/([^/]*)};
457 10         122 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
458 10         70 $self->remoteroot($remoteroot);
459 10         58 my($abslfile, $fh);
460 10 50 66     150 if (!defined $rfilename) {
    100          
461 0         0 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
462             } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
463 4         28 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
464             }
465 10         98 my @need_args =
466             (
467             "ignore_link_stat_errors",
468             "localroot",
469             "max_files_per_connection",
470             "remoteroot",
471             "rsync_options",
472             "tempdir",
473             "ttl",
474             "verbose",
475             "verboselog",
476             );
477 10         32 my $rf0;
478 10 100       52 unless ($abslfile) {
479 6         30 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
  54         342  
480 6         66 $rf0->split_rfilename($rfilename);
481 6         30 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
482             }
483 10         778 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
484 10         70 $rf0->_current_tempfile ( $abslfile );
485 10         92 $rf0->_current_tempfile_fh ( $fh );
486 10         68 $rf0->_use_tempfile (1);
487 10         54 for my $override (@need_args) {
488 90         632 $rf0->$override ( $self->$override );
489             }
490 10         104 $rf0->is_slave (1);
491 10         104 return $rf0;
492             }
493             sub principal_recentfile {
494 694     694 1 1326 my($self) = @_;
495 694         2409 my $rf0 = $self->_principal_recentfile;
496 694 100       3532 return $rf0 if defined $rf0;
497 659         1449 my $local = $self->local;
498 659 100       2392 if ($local) {
499 649         2865 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ($local);
500             } else {
501 10 50       38 if (my $remote = $self->remote) {
502 10         94 my $localroot;
503 10 50       38 if ($localroot = $self->localroot) {
504             # nice, they know what they are doing
505             } else {
506 0         0 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
507             }
508 10         126 $rf0 = $self->_principal_recentfile_fromremote;
509             } else {
510 0         0 die "Alert: neither local nor remote specified, cannot continue";
511             }
512             }
513 659         2114 $self->_principal_recentfile($rf0);
514 659         3097 return $rf0;
515             }
516              
517             =head2 $recentfiles_arrayref = $obj->recentfiles ()
518              
519             returns a reference to the complete list of recentfile objects that
520             describe this tree. No guarantee is given that the represented
521             recentfiles exist or have been read. They are just bare objects.
522              
523             =cut
524              
525             sub recentfiles {
526 776     776 1 1985 my($self) = @_;
527 776         2873 my $rfs = $self->_recentfiles;
528 776 100       4236 return $rfs if defined $rfs;
529 659         1970 my $rf0 = $self->principal_recentfile;
530 659         2283 my $pathdb = $self->_pathdb;
531 659         3362 $rf0->_pathdb ($pathdb);
532 659         2627 my $aggregator = $rf0->aggregator;
533 659         2748 my @rf = $rf0;
534 659         1596 for my $agg (@$aggregator) {
535 2684         5056 my $nrf = $rf0->_sparse_clone;
536 2684         5333 $nrf->interval ( $agg );
537 2684         4906 $nrf->have_mirrored ( 0 );
538 2684         8322 $nrf->_pathdb ( $pathdb );
539 2684         7414 push @rf, $nrf;
540             }
541 659         2226 $self->_recentfiles(\@rf);
542 659         2521 return \@rf;
543             }
544              
545             =head2 $success = $obj->rmirror ( %options )
546              
547             Mirrors all recentfiles of the I address working through all
548             of them, mirroring their contents.
549              
550             Test this with:
551              
552             use File::Rsync::Mirror::Recent;
553             my $rrr = File::Rsync::Mirror::Recent->new(
554             ignore_link_stat_errors => 1,
555             localroot => "/home/ftp/pub/PAUSE/authors",
556             remote => "pause.perl.org::authors/RECENT.recent",
557             max_files_per_connection => 5000,
558             rsync_options => {
559             compress => 1,
560             links => 1,
561             times => 1,
562             checksum => 0,
563             },
564             verbose => 1,
565             _runstatusfile => "recent-rmirror-state.yml",
566             _logfilefordone => "recent-rmirror-donelog.log",
567             );
568             $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
569              
570             Or try without the loop parameter and write the loop yourself:
571              
572             use File::Rsync::Mirror::Recent;
573             my @rrr;
574             for my $t ("authors","modules"){
575             my $rrr = File::Rsync::Mirror::Recent->new(
576             ignore_link_stat_errors => 1,
577             localroot => "/home/ftp/pub/PAUSE/$t",
578             remote => "pause.perl.org::$t/RECENT.recent",
579             max_files_per_connection => 512,
580             rsync_options => {
581             compress => 1,
582             links => 1,
583             times => 1,
584             checksum => 0,
585             },
586             verbose => 1,
587             _runstatusfile => "recent-rmirror-state-$t.yml",
588             _logfilefordone => "recent-rmirror-donelog-$t.log",
589             ttl => 5,
590             );
591             push @rrr, $rrr;
592             }
593             while (){
594             for my $rrr (@rrr){
595             $rrr->rmirror ( "skip-deletes" => 1 );
596             }
597             warn "sleeping 23\n"; sleep 23;
598             }
599              
600              
601             =cut
602             # _alluptodate is unused but at least it worked last time I needed it,
603             # so let us keep it around
604             sub _alluptodate {
605 0     0   0 my($self) = @_;
606 0         0 my $sdm = $self->_dirtymark;
607 0 0       0 return unless defined $sdm;
608 0         0 for my $rf (@{$self->recentfiles}) {
  0         0  
609 0 0       0 return if $rf->seeded;
610 0         0 my $rfdm = $rf->dirtymark;
611 0 0       0 return unless defined $rfdm;
612 0 0       0 return unless $rfdm eq $sdm;
613 0         0 my $done = $rf->done;
614 0 0       0 return unless defined $done;
615 0         0 my $done_intervals = $done->_intervals;
616 0 0       0 return if !defined $done_intervals;
617             # nonono, may be more than one, only covered it must be:
618             # return if @$done_intervals > 1;
619 0         0 my $minmax = $rf->minmax;
620 0 0       0 return unless defined $minmax;
621 0 0       0 return unless $done->covered(@$minmax{qw(max min)});
622             }
623             # $DB::single++;
624 0         0 return 1;
625             }
626             sub _fullseed {
627 1     1   3 my($self) = @_;
628 1         3 for ( @{$self->recentfiles} ) { $_->seed(1) }
  1         14  
  5         16  
629             }
630             sub rmirror {
631 15     15 1 250 my($self, %options) = @_;
632              
633 15         88 my $rfs = $self->recentfiles;
634              
635 15         77 $self->principal_recentfile->seed;
636       0     my $_sigint = sub {
637             # XXX exit gracefully (reminder)
638 15         121 };
639              
640             # XXX needs accessor: warning, if set too low, we do nothing but
641             # mirror the principal!
642 15         47 my $minimum_time_per_loop = 20;
643              
644 15 50       87 if (my $logfile = $self->_logfilefordone) {
645 0         0 for my $i (0..$#$rfs) {
646 0         0 $rfs->[$i]->done->_logfile($logfile);
647             }
648             }
649 15 50       132 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
650 15         125 my $mydm = $self->_dirtymark;
651 15 100       123 if (!defined $mydm){
    50          
652 10         30 $self->_dirtymark($dirtymark);
653             } elsif ($dirtymark ne $mydm) {
654 0 0       0 if ($self->verbose) {
655 0         0 my $fh;
656 0 0       0 if (my $vl = $self->verboselog) {
657 0 0       0 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
658             } else {
659 0         0 $fh = \*STDERR;
660             }
661 0         0 print $fh "NewDirtymark: old[$mydm] new[$dirtymark]\n";
662             }
663 0         0 $self->_dirtymark($dirtymark);
664             }
665             }
666 15         123 my $rstfile = $self->runstatusfile;
667 15 100       93 unless ($self->_have_written_statusfile) {
668 10         98 $self->_rmirror_runstatusfile_write ($rstfile, \%options);
669 10         40 $self->_have_written_statusfile(1);
670             }
671 15         166 $self->_rmirror_loop($minimum_time_per_loop,\%options);
672             }
673              
674             sub _rmirror_loop {
675 15     15   37 my($self,$minimum_time_per_loop,$options) = @_;
676 15         24 LOOP: while () {
677 20         53 my $ttleave = time + $minimum_time_per_loop;
678 20         81 my $rstfile = $self->runstatusfile;
679 20         109 my $otherproc = $self->_thaw_without_pathdb ($rstfile);
680 20         31291 my $pid = fork;
681 20 50       1077 if (! defined $pid) {
    100          
682 0         0 warn "Contention: $!";
683 0         0 sleep 0.25;
684 0         0 next LOOP;
685             } elsif ($pid) {
686 15         194956864 waitpid($pid,0);
687             } else {
688 5         531 $self = $self->thaw ($rstfile);
689 5         151 my $rfs = $self->recentfiles;
690 5         48 $self->principal_recentfile->seed;
691 5         99 RECENTFILE: for my $i (0..$#$rfs) {
692 24         78 my $rf = $rfs->[$i];
693 24 100       141 if (time > $ttleave) {
694             # Must make sure that one file can get fetched in any case
695 1         5 $self->_max_one_state(1);
696             }
697 24 100       151 if ($rf->seeded) {
    100          
698 9         103 $self->_rmirror_mirror ($i, $options);
699             } elsif ($rf->uptodate) {
700 6 100       20 if ($i < $#$rfs) {
701 5         19 $rfs->[$i+1]->done->merge($rf->done);
702             }
703             # no further seed necessary because "periodic" does it
704 6         16 next RECENTFILE;
705             }
706 18         96 WORKUNIT: while (time < $ttleave) {
707 27 100       159 if ($rf->uptodate) {
708 16         138 $self->_rmirror_sleep_per_connection ($i);
709 16         91 next RECENTFILE;
710             } else {
711 11         73 $self->_rmirror_mirror ($i, $options);
712             }
713             }
714 2 100       14 if ($self->_max_one_state) {
715 1         6 last RECENTFILE;
716             }
717             }
718 5         482 $self->_max_one_state(0);
719 5         39 my $exit = 0;
720 5 100       45 if ($rfs->[-1]->uptodate) {
721 4         69 $self->_rmirror_cleanup;
722             }
723 5 50       20 unless ($options->{loop}) {
724 5         10 $exit = 1;
725             }
726 5         280 $self->_rmirror_runstatusfile_write ($rstfile, $options);
727 5 50       120 exit if $exit;
728 0         0 last LOOP;
729             }
730              
731 15         358 $otherproc = $self->_thaw_without_pathdb ($rstfile);
732 15 100 33     954 if (!$options->{loop} && $otherproc && $otherproc->recentfiles->[-1]->uptodate) {
      66        
733 10         243 last LOOP;
734             }
735 5         10 my $sleep = $ttleave - time;
736 5 50       40 if ($sleep > 0.01) {
737 0         0 $self->_rmirror_endofloop_sleep ($sleep);
738             } else {
739             # negative time not invented yet:)
740             }
741             }
742             }
743              
744             sub _rmirror_mirror {
745 20     20   104 my($self, $i, $options) = @_;
746 20         247 my $rfs = $self->recentfiles;
747 20         72 my $rf = $rfs->[$i];
748 20         125 my %locopt = %$options;
749 20 50       98 if ($self->_max_one_state) {
750 0         0 $locopt{max} = 1;
751             }
752 20         189 $locopt{piecemeal} = 1;
753 20         238 $rf->mirror (%locopt);
754 20 100       257 if ($i==0) {
755             # we limit to 0 for the case that upstream is broken and has
756             # more than one timestamp (happened on PAUSE 200903)
757 5 50       66 if (my $dirtymark = $rf->dirtymark) {
758 5         85 my $mydm = $self->_dirtymark;
759 5 100 66     145 if (!defined $mydm or $dirtymark ne $mydm) {
760 1         6 $self->_dirtymark($dirtymark);
761 1         10 $self->_fullseed;
762             }
763             }
764             }
765             }
766              
767             sub _rmirror_sleep_per_connection {
768 16     16   43 my($self, $i) = @_;
769 16         123 my $rfs = $self->recentfiles;
770 16         38 my $rf = $rfs->[$i];
771 16         82 my $sleep = $rf->sleep_per_connection;
772 16 50       118 $sleep = 0.42 unless defined $sleep;
773 16         6722565 Time::HiRes::sleep $sleep;
774 16 100       504 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
775             }
776              
777             sub _rmirror_cleanup {
778 4     4   17 my($self) = @_;
779 4         31 my $pathdb = $self->_pathdb();
780 4         811 for my $k (keys %$pathdb) {
781 552         1036 delete $pathdb->{$k};
782             }
783 4         183 my $rfs = $self->recentfiles;
784 4         34 for my $i (0..$#$rfs-1) {
785 16         58 my $thismerged = $rfs->[$i]->merged;
786 16         28 my $next = $rfs->[$i+1];
787 16         72 my $nextminmax = $next->minmax;
788 16 50 33     326 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
789 0         0 $next->seed;
790             }
791             }
792             }
793              
794             =head2 $file = $obj->runstatusfile ($set)
795              
796             Getter/setter for C<_runstatusfile> attribute. Defaults to a temporary
797             file created by C. A status file is required for
798             C working. Since it may be interesting for debugging
799             purposes, you may want to specify a permanent file for this.
800              
801             =cut
802             sub runstatusfile {
803 35     35 1 41 my($self,$set) = @_;
804 35 50       102 if (defined $set) {
805 0         0 $self->_runstatusfile ($set);
806             }
807 35         125 my $x = $self->_runstatusfile;
808 35 100       171 unless (defined $x) {
809 6         36 require File::Temp;
810 6         66 my $tfile = File::Temp->new
811             (
812             TEMPLATE => "Recent-XXXX",
813             TMPDIR => 1,
814             UNLINK => 0,
815             CLEANUP => 0,
816             SUFFIX => '.dat',
817             );
818 6         2712 $self->_runstatusfile($tfile->filename);
819             }
820 35         361 return $self->_runstatusfile;
821             }
822              
823             # unused code.... it was an oops, discovered the thaw() method too
824             # late, and starting writing this here....
825             sub _rmirror_runstatusfile_read {
826 0     0   0 my($self, $file) = @_;
827              
828 0         0 require YAML::Syck;
829 0         0 my $start = time;
830             # XXX is locking useful here?
831 0         0 while (not mkdir "$file.lock") {
832 0         0 Time::HiRes::sleep 0.2;
833 0 0       0 warn "*** waiting for lock ***" if time - $start >= 3;
834             }
835 0         0 my $yml = YAML::Syck::LoadFile $file;
836 0 0       0 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
837 0         0 my $rself = $yml->{reduced_self};
838 0         0 my $rfs = $yml->{reduced_rfs};
839             # XXX bring them into self
840             }
841              
842             sub _rmirror_runstatusfile_write {
843 15     15   32 my($self, $file, $options) = @_;
844 15         24 my $rself;
845 15         95 while (my($k,$v) = each %$self) {
846 185 100       503 next if $k =~ /^-(_principal_recentfile|_recentfiles)$/;
847 155         440 $rself->{$k} = $v;
848             }
849 15         51 my $rfs = $self->recentfiles;
850 15         24 my $rrfs;
851 15         78 for my $i (0..$#$rfs) {
852 75         77 my $rf = $rfs->[$i];
853 75         296 while (my($k,$v) = each %$rf) {
854 1727 100       2484 next if $k =~ /^-(_current_tempfile_fh|_pathdb|_rsync)$/;
855 1615         3633 $rrfs->[$i]{$k} = $rfs->[$i]{$k};
856             }
857             }
858 15         134 require YAML::Syck;
859 15         27 my $start = time;
860 15         1342 while (not mkdir "$file.lock") {
861 0         0 Time::HiRes::sleep 0.15;
862 0 0       0 warn "*** waiting for lock directory '$file.lock' ***" if time - $start >= 3;
863             }
864             YAML::Syck::DumpFile
865             (
866 15         200 "$file.new",
867             {
868             options => $options,
869             time => time,
870             reduced_rfs => $rrfs,
871             reduced_self => $rself,
872             });
873 15 50       11434 rename "$file.new", $file or die "Could not rename: $!";
874 15 50       1288 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
875             }
876              
877             sub _rmirror_endofloop_sleep {
878 0     0   0 my($self, $sleep) = @_;
879 0 0       0 if ($self->verbose) {
880 0         0 my $fh;
881 0 0       0 if (my $vl = $self->verboselog) {
882 0 0       0 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
883             } else {
884 0         0 $fh = \*STDERR;
885             }
886 0         0 printf $fh
887             (
888             "Dorm %d (%s secs)\n",
889             time,
890             $sleep,
891             );
892             }
893 0         0 sleep $sleep;
894             }
895              
896             # it returns two things: abslfile and rfilename. But the abslfile is
897             # undef when the rfilename ends in .recent. A weird interface, my
898             # friend.
899             sub _principal_recentfile_fromremote_resosymlink {
900 4     4   8 my($self, $rfilename) = @_;
901 4 50       24 $rfilename = "RECENT.recent" unless length $rfilename;
902 4         16 my $abslfile = undef;
903 4         8 my $fh;
904 4 50       100 if ($rfilename =~ /\.recent$/) {
905             # may be a file *or* a symlink,
906 4         20 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
907 4         144 while (-l $abslfile) {
908 4         56 my $symlink = readlink $abslfile;
909 4 50       60 if ($symlink =~ m|/|) {
910 0         0 die "FIXME: filenames containing '/' not supported, got '$symlink'";
911             }
912 4         84 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
913 4 50       304 if (-e $localrfile) {
914 0         0 my $old_symlink = readlink $localrfile;
915 0 0       0 if ($old_symlink eq $symlink) {
916 0 0       0 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
917             } else {
918 0         0 unlink $localrfile; # may fail
919 0 0       0 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
920             }
921             } else {
922 4 50       328 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
923             }
924 4         52 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
925             }
926             }
927 4         500 return ($abslfile, $rfilename, $fh);
928             }
929              
930             # takes a basename, returns an absolute name, does not delete the
931             # file, throws the $fh away. Caller must rename or unlink
932              
933             # XXX needs to activate the fh in the rf0 so that it is able to unlink
934             # the file. I would like that the file is used immediately by $rf0
935             sub _fetch_as_tempfile {
936 8     8   28 my($self, $rfile) = @_;
937 8         88 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
938 8 50       56 $suffix = "" unless defined $suffix;
939 8   33     84 my $fh = File::Temp->new
940             (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
941             $rfile,
942             ),
943             DIR => $self->tempdir || $self->localroot,
944             SUFFIX => $suffix,
945             UNLINK => 0,
946             );
947 8         3304 my $rsync;
948             my @rsync_options;
949 8 50       40 if (my $rso = $self->rsync_options) {
950 8 50       72 if (ref $rso eq "HASH") {
    0          
951 8         52 @rsync_options = %$rso;
952             } elsif (ref $rso eq "ARRAY") {
953 0         0 @rsync_options = @$rso;
954             }
955             } else {
956 0         0 @rsync_options = ();
957             }
958 8 50       48 if ($File::Rsync::VERSION <= 0.45) {
959 0         0 $rsync = File::Rsync->new({@rsync_options});
960             } else {
961 8         96 $rsync = File::Rsync->new(@rsync_options);
962             }
963 8 50       4176 unless ($rsync) {
964 0         0 require Carp;
965 0         0 Carp::confess(YAML::Syck::Dump($self->rsync_options));
966             }
967 8         36 my $dst = $fh->filename;
968 8         124 local($ENV{LANG}) = "C";
969 8 50       40 $rsync->exec
970             (
971             src => join("/",$self->remoteroot,$rfile),
972             dst => $dst,
973             ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
974 8 100       380880 unless (-l $dst) {
975 4         44 my $mode = 0644;
976 4 50       144 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
977             }
978 8         2296 return($dst,$fh);
979             }
980              
981             =head2 $verbose = $obj->verbose ( $set )
982              
983             Getter/setter method to set verbosity for this F:R:M:Recent object and
984             all associated Recentfile objects.
985              
986             =cut
987             sub verbose {
988 20     20 1 46 my($self,$set) = @_;
989 20 50       74 if (defined $set) {
990 0         0 for ( @{$self->recentfiles} ) { $_->verbose($set) }
  0         0  
  0         0  
991 0         0 $self->_verbose ($set);
992             }
993 20         88 my $x = $self->_verbose;
994 20 100       134 unless (defined $x) {
995 10         22 $x = 0;
996 10         50 $self->_verbose ($x);
997             }
998 20         94 return $x;
999            
1000             }
1001              
1002             =head2 my $vl = $obj->verboselog ( $set )
1003              
1004             Getter/setter method for the path to the logfile to write verbose
1005             progress information to.
1006              
1007             Note: This is a primitive stop gap solution to get simple verbose
1008             logging working. The program still sends error messages to STDERR.
1009             Switching to Log4perl or similar is probably the way to go. TBD.
1010              
1011             =cut
1012             sub verboselog {
1013 16     16 1 48 my($self,$set) = @_;
1014 16 50       56 if (defined $set) {
1015 0         0 for ( @{$self->recentfiles} ) { $_->verboselog($set) }
  0         0  
  0         0  
1016 0         0 $self->_verboselog ($set);
1017             }
1018 16         76 my $x = $self->_verboselog;
1019 16 100       100 unless (defined $x) {
1020 10         28 $x = 0;
1021 10         60 $self->_verboselog ($x);
1022             }
1023 16         204 return $x;
1024             }
1025              
1026             =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
1027              
1028             The idea is that we want to have a short file that records really
1029             recent changes. So that a fresh mirror can be kept fresh as long as
1030             the connectivity is given. Then we want longer files that record the
1031             history before. So when the mirror falls behind the update period
1032             reflected in the shortest file, it can complement the list of recent
1033             file events with the next one. And if this is not long enough we want
1034             another one, again a bit longer. And we want one that completes the
1035             history back to the oldest file. The index files together do contain
1036             the complete list of current files. The longer a period covered by an
1037             index file is gone the less often the index file is updated. For
1038             practical reasons adjacent files will often overlap a bit but this is
1039             neither necessary nor enforced. Enforced is only that there must not
1040             ever be a gap between two adjacent index files that would have to
1041             contain a file reference. That's the basic idea. The following example
1042             represents a tree that has a few updates every day:
1043              
1044             RECENT.recent -> RECENT-1h.yaml
1045             RECENT-1h.yaml
1046             RECENT-6h.yaml
1047             RECENT-1d.yaml
1048             RECENT-1M.yaml
1049             RECENT-1W.yaml
1050             RECENT-1Q.yaml
1051             RECENT-1Y.yaml
1052             RECENT-Z.yaml
1053              
1054             Each of these files represents a contract to hold a record for every
1055             filesystem event within the period indicated in the filename.
1056              
1057             The first file is the principal file, in so far it is the one that is
1058             written first after a filesystem change. Usually a symlink links to it
1059             with a filename that has the same filenameroot and the suffix
1060             C<.recent>. On systems that do not support symlinks there is a plain
1061             copy maintained instead.
1062              
1063             The last file, the Z file, contains the complementary files that are
1064             in none of the other files. It may contain C events but often
1065             C events are discarded at the transition to the Z file.
1066              
1067             =head2 SITE SEEING TOUR
1068              
1069             This section illustrates the operation of a server-client couple in a
1070             fictious installation that has to deal with a long time of inactivity.
1071             I think such an edge case installation demonstrates the economic
1072             behaviour of our model of overlapping time slices best.
1073              
1074             The sleeping beauty (http://en.wikipedia.org/wiki/Sleeping_Beauty) is
1075             a classic fairytale of a princess sleeping for a hundred years. The
1076             story inspired the test case 02-aurora.t.
1077              
1078             Given an upstream server where the people stop feeding new files for
1079             one hundred years. That upstream server has no driving energy to do
1080             major changes to its RECENT files. Cronjobs will continue to shift
1081             things towards the Z file but soon will stop doing so since all of
1082             them have to keep their promise to record files covering a certain
1083             period. Soon all RECENT files will cover exactly their native period.
1084              
1085             Downstream servers will stubbornly ask their question to the rsync
1086             server whether there is a newer RECENT.recent. As soon as the smallest
1087             RECENT file has reached the state of maximum possible merge with the
1088             second smallest RECENT file, the answer of the rsync server will
1089             always be: nothing new. And downstream servers that were uptodate on
1090             the previous request will be satisfied and do nothing. Never will they
1091             request a download. The answer that there is no change is sufficient
1092             to determine that there is no change in the whole tree.
1093              
1094             Let's presume the smallest RECENT file on this castle is a 1h file and
1095             downstream decides to ask every 30 minutes. Now the hundred years are
1096             over and upstream starts producing files again. One file every minute.
1097             After one minute it will move old files over to the, say, 1d file. In
1098             the next sixty minutes it will not be allowed to move any other file
1099             over to the 1d file. At some point in time downstream will ask the
1100             obligatory question "anything new?" and it will get the current 1h
1101             file. It will recognize in the meta part of the current file which
1102             timestamps have been moved to the 1d file, it will recognize that it
1103             has all those. It will have no need to download the 1d file, it will
1104             download the missing files and be done. No second RECENT file needs to
1105             be downloaded.
1106              
1107             Downstream only decides to download another RECENT file when not doing
1108             so would result in a gap between two recent files. Such that
1109             consistency checks would become impossible. Or for potentially
1110             interested third parties, like down-down-stream servers.
1111              
1112             Downloads of RECENT files are subject to rsync optimizations in that
1113             rsync does some level of blockwise checksumming that is considered
1114             efficient to avoid copying blocks of data that have not changed. Our
1115             format is that of an ordered array, so that large blocks stay constant
1116             when elements are prepended to the array. This means we usually do not
1117             have to rsync full RECENT files. Only if they are really small, the
1118             rsync algorithm will not come into play but that's OK for small files.
1119              
1120             Upstream servers are extremely lazy in writing the larger files. See
1121             File::Rsync::Mirror::Recentfile::aggregate() for the specs. Long
1122             before the one hundred years are over, the upstream server will stop
1123             changing files. Slowly everything that existed before upstream fell
1124             asleep trickles into the Z file. Say, the second-largest RECENT file
1125             is a 1Y file and the third-largest RECENT file is a 1Q file, then it
1126             will take at least one quarter of a year that the 1Y file will be
1127             merged into the Z file. From that point in time everything will have
1128             been merged into the Z file and the server's job to call C
1129             regularly will become a noop. Consequently downstream will never again
1130             download anything. Just the obligatory question: anything new?
1131              
1132             =head2 THE INDIVIDUAL RECENTFILE
1133              
1134             A I consists of a hash that has two keys: C and
1135             C. The C part has metadata and the C part has a
1136             list of fileobjects.
1137              
1138             =head2 THE META PART
1139              
1140             Here we find things that are pretty much self explaining: all
1141             lowercase attributes are accessors and as such explained in the
1142             manpages. The uppercase attribute C contains version
1143             information about involved software components.
1144              
1145             Even though the lowercase attributes are documented in the
1146             F:R:M:Recentfile manpage, let's focus on the important stuff to make
1147             sure nothing goes by unnoticed: meta contains the aggregator levels in
1148             use in this installation, in other words the names of the RECENT
1149             files, eg:
1150              
1151             aggregator:
1152             - 3s
1153             - 8s
1154             - 21s
1155             - 55s
1156             - Z
1157              
1158             It contains a dirtymark telling us the timestamp of the last protocol
1159             violation of the upstream server:
1160              
1161             dirtymark: '1325093856.49272'
1162              
1163             Plus a few things convenient in a situation where we need to do some
1164             debugging.
1165              
1166             And it contains information about which timestamp is the maximum
1167             timestamp in the neighboring file. This is probably the most important
1168             data in meta:
1169              
1170             merged:
1171             epoch: 1307159461.94575
1172              
1173             This keeps track of the highest epoch we would find if we looked into
1174             the next RECENT file.
1175              
1176             Another entry is the minmax, eg:
1177              
1178             minmax:
1179             max: 1307161441.97444
1180             min: 1307140103.70322
1181              
1182             The merged/epoch and minmax examples above illustrate one case of an
1183             overlap (130715... is between 130716... and 130714...). The syncing
1184             strategy for the client is in general the imperative: if the interval
1185             covered by a recentfile (minmax) and the interval covered by the next
1186             higher recentfile (merged/epoch) do not overlap anymore, then it is
1187             time to refresh the next recentfile.
1188              
1189             =head2 THE RECENT PART
1190              
1191             This is the interesting part. Every entry refers to some filesystem
1192             change (with path, epoch, type).
1193              
1194             The I value is the point in time when some change was
1195             I but can be set to arbitrary values. Do not be tempted to
1196             believe that the entry has a direct relation to something like
1197             modification time or change time on the filesystem level. They are not
1198             reflecting release dates. (If you want exact release dates: Barbie is
1199             providing a database of them. See
1200             http://use.perl.org/~barbie/journal/37907).
1201              
1202             All these entries can be devided into two types (denoted by the
1203             I attribute): Cs and Cs. Changes and creations are
1204             Cs. Deletes are Cs.
1205              
1206             Besides an I and a I attribute we find a third one:
1207             I. This path is relative to the directory we find the
1208             I in.
1209              
1210             The order of the entries in the I is by decreasing epoch
1211             attribute. These are unique floating point numbers. When the server
1212             has ntp running correctly, then the timestamps are usually reflecting
1213             a real epoch. If time is running backwards, we trump the system epoch
1214             with strictly monotonically increasing floating point timestamps and
1215             guarantee they are unique.
1216              
1217             =head1 CORRUPTION AND RECOVERY
1218              
1219             If the origin host breaks the promise to deliver consistent and
1220             complete I then it must update its C and all
1221             slaves must discard what they cosider the truth.
1222              
1223             In the worst case that something goes wrong despite the dirtymark
1224             mechanism the way back to sanity can be achieved through traditional
1225             rsyncing between the hosts. But please be wary doing that: mixing
1226             traditional rsync and the F:R:M:R technique can lead to gratuitous
1227             extra errors. If you're the last host in a chain, there's nobody you
1228             can disturb, but if you have downstream clients, it is possible that
1229             rsync copies a RECENT file before the contained files are actually
1230             available.
1231              
1232             =head1 BACKGROUND
1233              
1234             This is about speeding up rsync operation on large trees. Uses a small
1235             metadata cocktail and pull technology.
1236              
1237             rersyncrecent solves this problem with a couple of (usually 2-10)
1238             lightweight index files which cover different overlapping time
1239             intervals. The master writes these files and the clients/slaves can
1240             construct the full tree from the information contained in them. The
1241             most recent index file usually covers the last seconds or minutes or
1242             hours of the tree and depending on the needs, slaves can rsync every
1243             few seconds or minutes and then bring their trees in full sync.
1244              
1245             The rersyncrecent model was developed for CPAN but as it is both
1246             convenient and economic it is also a general purpose solution. I'm
1247             looking forward to see a CPAN backbone that is only a few seconds
1248             behind PAUSE.
1249              
1250             =head2 NON-COMPETITORS
1251              
1252             File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
1253             Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
1254             Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
1255             Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
1256             Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
1257             Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
1258              
1259             rsnapshot www.rsnapshot.org focus on backup
1260             csync www.csync.org more like unison
1261             multi-rsync sourceforge 167893 lan push to many
1262             chasm chasmd.org per-directory manifests
1263              
1264             =head2 COMPETITORS
1265              
1266             The problem to solve which clusters and ftp mirrors and otherwise
1267             replicated datasets like CPAN share: how to transfer only a minimum
1268             amount of data to determine the diff between two hosts.
1269              
1270             Normally it takes a long time to determine the diff itself before it
1271             can be transferred. Known solutions at the time of this writing are
1272             csync2, and rsync 3 batch mode.
1273              
1274             For many years the best solution was B which solves the
1275             problem by maintaining a sqlite database on both ends and talking a
1276             highly sophisticated protocol to quickly determine which files to send
1277             and which to delete at any given point in time. Csync2 is often
1278             inconvenient because it is push technology and the act of syncing
1279             demands quite an intimate relationship between the sender and the
1280             receiver. This is hard to achieve in an environment of loosely coupled
1281             sites where the number of sites is large or connections are unreliable
1282             or network topology is changing.
1283              
1284             B works around these problems by providing
1285             rsync-able batch files which allow receiving nodes to replay the
1286             history of the other nodes. This reduces the need to have an
1287             incestuous relation but it has the disadvantage that these batch files
1288             replicate the contents of the involved files. This seems inappropriate
1289             when the nodes already have a means of communicating over rsync.
1290              
1291             =head2 HONORABLE MENTION
1292              
1293             B at https://fedorahosted.org/InstantMirror/ is an
1294             ambitious project that tries to combine various technologies (squid,
1295             bittorrent) to overcome the current slowness with the main focus on
1296             fedora. It's been founded in 2009-03 and at the time of this writing
1297             it is still a bit early to comment on.
1298              
1299             =head1 LIMITATIONS
1300              
1301             If the tree of the master server is changing faster than the bandwidth
1302             permits to mirror then additional protocols may need to be deployed.
1303             Certainly p2p/bittorrent can help in such situations because
1304             downloading sites help each other and bittorrent chunks large files
1305             into pieces.
1306              
1307             =head1 INOTIFY
1308              
1309             Currently the origin server has two options. The traditional one is to
1310             strictly keep track of injected and removed files through all involved
1311             processes and call C on every file system event. The other
1312             option is to let data come in and use the assistance of inotify. PAUSE
1313             is running the former, the cpan master site is running the latter.
1314             Both work equally well for CPAN because CPAN has not yet had any
1315             problem with upload storms. On installations that have to deal with
1316             more uploaded data than inotify+rrr can handle it's better to use the
1317             traditional method such that the relevant processes can build up some
1318             backpressure to throttle writing processes until we're ready to accept
1319             the next data chunk.
1320              
1321             =head1 FUTURE DIRECTIONS
1322              
1323             Convince other users outside the CPAN like
1324             http://fedoraproject.org/wiki/Infrastructure/Mirroring
1325              
1326             =head1 SEE ALSO
1327              
1328             L,
1329             L,
1330             L
1331              
1332             =head1 BUGS
1333              
1334             Please report any bugs or feature requests through the web interface
1335             at
1336             L.
1337             I will be notified, and then you'll automatically be notified of
1338             progress on your bug as I make changes.
1339              
1340             =head1 SUPPORT
1341              
1342             You can find documentation for this module with the perldoc command.
1343              
1344             perldoc File::Rsync::Mirror::Recent
1345              
1346             You can also look for information at:
1347              
1348             =over 4
1349              
1350             =item * RT: CPAN's request tracker
1351              
1352             L
1353              
1354             =item * AnnoCPAN: Annotated CPAN documentation
1355              
1356             L
1357              
1358             =item * CPAN Ratings
1359              
1360             L
1361              
1362             =item * Search CPAN
1363              
1364             L
1365              
1366             =back
1367              
1368              
1369             =head1 ACKNOWLEDGEMENTS
1370              
1371             Thanks to RJBS for module-starter.
1372              
1373             =head1 AUTHOR
1374              
1375             Andreas König
1376              
1377             =head1 COPYRIGHT & LICENSE
1378              
1379             Copyright 2008, 2009 Andreas König.
1380              
1381             This program is free software; you can redistribute it and/or modify it
1382             under the same terms as Perl itself.
1383              
1384              
1385             =cut
1386              
1387             1; # End of File::Rsync::Mirror::Recent