File Coverage

blib/lib/File/Rsync/Mirror/Recent.pm
Criterion Covered Total %
statement 385 490 78.5
branch 107 216 49.5
condition 12 31 38.7
subroutine 44 48 91.6
pod 10 10 100.0
total 558 795 70.1


line stmt bran cond sub pod time code
1             package File::Rsync::Mirror::Recent;
2              
3             # use warnings;
4 6     6   538691 use strict;
  6         57  
  6         192  
5 6     6   3732 use File::Rsync::Mirror::Recentfile;
  6         19  
  6         315  
6              
7             =encoding utf-8
8              
9             =head1 NAME
10              
11             File::Rsync::Mirror::Recent - mirroring via rsync made efficient
12              
13             =cut
14              
15             package File::Rsync::Mirror::Recent;
16              
17 6     6   49 use File::Basename qw(basename dirname fileparse);
  6         13  
  6         368  
18 6     6   42 use File::Copy qw(cp);
  6         12  
  6         261  
19 6     6   30 use File::Path qw(mkpath);
  6         12  
  6         237  
20 6     6   36 use File::Rsync;
  6         12  
  6         127  
21 6     6   3179 use File::Rsync::Mirror::Recentfile::Done (); # at least needed by thaw()
  6         18  
  6         178  
22 6     6   38 use File::Rsync::Mirror::Recentfile::FakeBigFloat qw(:all);
  6         17  
  6         768  
23 6     6   42 use File::Temp;
  6         12  
  6         545  
24 6     6   3093 use List::Pairwise qw(mapp grepp);
  6         14017  
  6         388  
25 6     6   49 use List::Util qw(first max);
  6         12  
  6         390  
26 6     6   41 use Scalar::Util qw(blessed reftype);
  6         13  
  6         281  
27 6     6   32 use Storable;
  6         17  
  6         268  
28 6     6   35 use Time::HiRes qw();
  6         12  
  6         84  
29 6     6   30 use YAML::Syck;
  6         12  
  6         289  
30              
31 6     6   31 use version; our $VERSION = qv('0.4.6');
  6         12  
  6         35  
32              
33             =head1 SYNOPSIS
34              
35             The documentation in here is normally not needed because the code is
36             considered to be run from several standalone programs. For a quick
37             overview, see the file README.mirrorcpan and the bin/ directory of the
38             distribution. For the architectural ideas see the section THE
39             ARCHITECTURE OF A COLLECTION OF RECENTFILES below.
40              
41             File::Rsync::Mirror::Recent establishes a view on a collection of
42             File::Rsync::Mirror::Recentfile objects and provides abstractions
43             spanning multiple time intervals associated with those.
44              
45             =head1 EXPORT
46              
47             No exports.
48              
49             =head1 CONSTRUCTORS
50              
51             =head2 my $obj = CLASS->new(%hash)
52              
53             Constructor. On every argument pair the key is a method name and the
54             value is an argument to that method name.
55              
56             =cut
57              
58             sub new {
59 560     560 1 31940 my($class, @args) = @_;
60 560         1969 my $self = bless {}, $class;
61 560         2699 while (@args) {
62 626         2992 my($method,$arg) = splice @args, 0, 2;
63 626         3552 $self->$method($arg);
64             }
65 560         5057 return $self;
66             }
67              
68             =head2 my $obj = CLASS->thaw($statusfile)
69              
70             Constructor from a statusfile left over from a previous
71             rmirror run. See also C.
72              
73             =cut
74              
75             sub _thaw_without_pathdb {
76 24     24   281 my($self,$file) = @_;
77 24 50       2880 open my $fh, $file or die "Can't open '$file': $!";
78 24         643 local $/ = "\n";
79 24         115 my $in_pathdb = 0;
80 24         902 my $tfile = File::Temp->new
81             (
82             TEMPLATE => "Recent-thaw-XXXX",
83             TMPDIR => 1,
84             UNLINK => 0,
85             CLEANUP => 0,
86             SUFFIX => '.dat',
87             );
88 24         23176 my $template_for_eop;
89 24         12267 while (<$fh>) {
90 6060 100       11108 if ($in_pathdb) {
    100          
91 48 100       729 if (/$template_for_eop/) {
92 24         67 $in_pathdb = 0;
93             }
94             } elsif (/(\s+)-\s*__pathdb\s*:/) {
95 24         89 $in_pathdb = 1;
96 24         384 my $next_attr = sprintf "^%s\\S", " ?" x length($1);
97 24         502 $template_for_eop = qr{$next_attr};
98             }
99 6060 100       13054 print $tfile $_ unless $in_pathdb;
100             }
101 24 50       1219 close $tfile or die "Could not close: $!";
102 24         302 my $return = $self->thaw($tfile->filename);
103 24         148 $return->_havelostpathdb(1);
104 24         241 unlink $tfile->filename;
105 24         1728 return $return;
106             }
107             sub thaw {
108 28     28 1 666 my($self, $file) = @_;
109 28 50       245 die "thaw called without statusfile argument" unless defined $file;
110 28 50       878 unless (-e $file){
111 0         0 require Carp;
112 0         0 Carp::confess("Alert: statusfile '$file' not found");
113             }
114 28         936 require YAML::Syck;
115 28         192 my $start = time;
116 28         138 my $sleeptime = 0.02;
117 28         2742 while (not mkdir "$file.lock") {
118 0         0 my $err = $!;
119 0         0 Time::HiRes::sleep $sleeptime;
120 0         0 my $waiting = time - $start;
121 0 0       0 if ($waiting >= 3){
122 0         0 warn "*** waiting ($waiting) for lock ($err) ***";
123 0         0 $sleeptime = 1;
124             }
125             }
126 28         540 my $size = -s $file;
127 28         746 my $serialized = YAML::Syck::LoadFile($file);
128 28 50       65454 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
129 28         155 my $charged_self = $serialized->{reduced_self};
130 28         467 my $class = blessed $self;
131 28         527 bless $charged_self, $class;
132 28         90 my $rfs = $serialized->{reduced_rfs};
133 28         188 my $rfclass = $class . "file"; # "Recent" . "file"
134 28         475 my $pathdb = $charged_self->_pathdb;
135 28         180 for my $rf (@$rfs) {
136 140         942 bless $rf, $rfclass;
137 140         457 $rf->_pathdb($pathdb);
138             }
139 28         306 $charged_self->_recentfiles($rfs);
140 28         409 $charged_self->_principal_recentfile($rfs->[0]);
141             # die "FIXME: thaw all recentfiles from reduced_rfs into _recentfiles as well, watch out for pathdb and rsync";
142 28         324 return $charged_self;
143             }
144              
145             =head1 ACCESSORS
146              
147             =cut
148              
149             my @accessors;
150              
151             BEGIN {
152 6     6   5145 @accessors =
153             (
154             "__pathdb",
155             "_dirtymark", # keeps track of the dirtymark of the recentfiles
156             "_havelostpathdb", # boolean
157             "_have_written_statusfile", # boolean
158             "_logfilefordone", # turns on _logfile on all DONE
159             # systems (disk intensive)
160             "_max_one_state", # when we have no time left but want
161             # at least get one file per
162             # iteration to avoid procrastination
163             "_principal_recentfile",
164             "_recentfiles",
165             "_rsync",
166             "_runstatusfile", # occasionally dumps all rfs
167             "_verbose", # internal variable for verbose setter/getter
168             "_verboselog", # internal variable for verboselog setter/getter
169             );
170              
171 6         94 my @pod_lines =
172 6         13 split /\n/, <<'=cut'; push @accessors, grep {s/^=item\s+//} @pod_lines; }
  306         850  
173              
174             =over 4
175              
176             =item ignore_link_stat_errors
177              
178             as in F:R:M:Recentfile
179              
180             =item local
181              
182             Option to specify the local principal file for operations with a local
183             collection of recentfiles.
184              
185             =item localroot
186              
187             as in F:R:M:Recentfile
188              
189             =item max_files_per_connection
190              
191             as in F:R:M:Recentfile
192              
193             =item remote
194              
195             The remote principal recentfile in rsync notation. E.g.
196              
197             pause.perl.org::authors/RECENT.recent
198              
199             =item remoteroot
200              
201             as in F:R:M:Recentfile
202              
203             =item remote_recentfile
204              
205             Rsync address of the remote C symlink or whichever name
206             the principal remote recentfile has.
207              
208             =item rsync_options
209              
210             Things like compress, links, times or checksums. Passed in to the
211             File::Rsync object used to run the mirror. Can be a hashref or an
212             arrayref. Depending on the version of File::Rsync it is passed on as a
213             hashref or as a flat list.
214              
215             =item tempdir
216              
217             as in F:R:M:Recentfile
218              
219             =item ttl
220              
221             Minimum time before fetching the principal recentfile again.
222              
223             =back
224              
225             =cut
226              
227 6     6   42 use accessors @accessors;
  6         12  
  6         51  
228              
229             =head1 METHODS
230              
231             =head2 $arrayref = $obj->news ( %options )
232              
233             Test this with:
234              
235             perl -Ilib bin/rrr-news \
236             -after 1217200539 \
237             -max 12 \
238             -local /home/ftp/pub/PAUSE/authors/RECENT.recent
239              
240             perl -Ilib bin/rrr-news \
241             -after 1217200539 \
242             -rsync=compress=1 \
243             -rsync=links=1 \
244             -localroot /home/ftp/pub/PAUSE/authors/ \
245             -remote pause.perl.org::authors/RECENT.recent
246             -verbose
247              
248             All parameters that can be passed to
249             File:Rsync:Mirror:Recentfile::recent_events() can also be specified
250             here.
251              
252             One additional option is supported. If C<$Options{callback}> is
253             specified, it must be a subref. This sub is called whenever one chunk
254             of events is found. The first argument to the callback is a reference
255             to the currently accumulated array of events.
256              
257             Note: all data are kept in memory.
258              
259             =cut
260              
261             sub news {
262 500     500 1 3835 my($self, %opt) = @_;
263 500         1905 my $local = $self->local;
264 500 50       3855 unless ($local) {
265 0 0       0 if (my $remote = $self->remote) {
266 0         0 my $localroot;
267 0 0       0 if ($localroot = $self->localroot) {
268             # nice, they know what they are doing
269             } else {
270 0         0 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
271             }
272             } else {
273 0         0 die "Alert: neither local nor remote specified, cannot continue";
274             }
275             }
276 500         2875 my $rfs = $self->recentfiles;
277 500         1325 my $ret = [];
278 500         1725 my $before;
279 500         1520 for my $rf (@$rfs) {
280 2500         5350 my %locopt = %opt;
281 2500         5520 $locopt{before} = $before;
282 2500 50       6465 if ($opt{max}) {
283 0         0 $locopt{max} -= scalar @$ret;
284 0 0       0 last if $locopt{max} <= 0;
285             }
286 2500         4960 $locopt{info} = {};
287 2500         8570 my $res = $rf->recent_events(%locopt);
288 2500 50       8990 if (@$res){
289 2500         46955 push @$ret, @$res;
290             }
291 2500 50 33     8840 if ($opt{max} && scalar @$ret > $opt{max}) {
292 0         0 last;
293             }
294 2500 50       6505 if ($opt{after}){
295 0 0 0     0 if ( $locopt{info}{last} && _bigfloatlt($locopt{info}{last}{epoch},$opt{after}) ) {
296 0         0 last;
297             }
298 0 0       0 if ( _bigfloatgt($opt{after},$locopt{info}{first}{epoch}) ) {
299 0         0 last;
300             }
301             }
302 2500 50       6515 if (!@$res){
303 0         0 next;
304             }
305 2500         5030 $before = $res->[-1]{epoch};
306 2500 50 33     6540 $before = $opt{before} if $opt{before} && _bigfloatlt($opt{before},$before);
307 2500 50       17005 if (my $sub = $opt{callback}) {
308 0         0 $sub->($ret);
309             }
310             }
311 500         2505 $ret;
312             }
313              
314             =head2 overview ( %options )
315              
316             returns a small table that summarizes the state of all recentfiles
317             collected in this Recent object.
318              
319             $options{verbose}=1 increases the number of columns displayed.
320              
321             Here is an example output:
322              
323             Ival Cnt Max Min Span Util Cloud
324             1h 47 1225053014.38 1225049650.91 3363.47 93.4% ^ ^
325             6h 324 1225052939.66 1225033394.84 19544.82 90.5% ^ ^
326             1d 437 1225049651.53 1224966402.53 83248.99 96.4% ^ ^
327             1W 1585 1225039015.75 1224435339.46 603676.29 99.8% ^ ^
328             1M 5855 1225017376.65 1222428503.57 2588873.08 99.9% ^ ^
329             1Q 17066 1224578930.40 1216803512.90 7775417.50 100.0% ^ ^
330             1Y 15901 1223966162.56 1216766820.67 7199341.89 22.8% ^ ^
331             Z 9909 1223966162.56 1216766820.67 7199341.89 - ^ ^
332              
333             I is the name of the interval.
334              
335             I is the number of entries in this recentfile.
336              
337             I is the highest(first) epoch in this recentfile, rounded.
338              
339             I is the lowest(last) epoch in this recentfile, rounded.
340              
341             I is the timespan currently covered, rounded.
342              
343             I is I devided by the designated timespan of this
344             recentfile.
345              
346             I is ascii art illustrating the sequence of the Max and Min
347             timestamps.
348              
349             =cut
350             sub overview {
351 10     10 1 75 my($self,%options) = @_;
352 10         50 my $rfs = $self->recentfiles;
353 10         20 my(@s,%rank);
354 10         25 RECENTFILE: for my $rf (@$rfs) {
355 60         135 my $re=$rf->recent_events;
356 60         100 my $rfsummary;
357 60 50       140 if (@$re) {
358 60         270 my $span = $re->[0]{epoch}-$re->[-1]{epoch};
359 60         150 my $merged = $rf->merged;
360             $rfsummary =
361             [
362             "Ival",
363             $rf->interval,
364             "Cnt",
365             scalar @$re,
366             "Dirtymark",
367             $rf->dirtymark ? sprintf("%.2f",$rf->dirtymark) : "-",
368             "Produced",
369             sprintf ("%.2f", $rf->{ORIG}{Producers}{time}||0),
370             "Merged",
371             ($rf->interval eq "Z"
372             ?
373             "-"
374             :
375             sprintf ("%.2f", $merged->{epoch} || 0)),
376             "Max",
377             sprintf ("%.2f", $re->[0]{epoch}),
378             "Min",
379 60 50 50     130 sprintf ("%.2f", $re->[-1]{epoch}),
    100 50        
    100          
380             "Span",
381             sprintf ("%.2f", $span),
382             "Util", # u9n:)
383             ($rf->interval eq "Z"
384             ?
385             "-"
386             :
387             sprintf ("%5.1f%%", 100 * $span / $rf->interval_secs)
388             ),
389             ];
390 60     288   555 @rank{mapp {$b} grepp {$a =~ /^(Max|Min)$/} @$rfsummary} = ();
  120         2010  
  540         4885  
391             } else {
392 0         0 next RECENTFILE;
393             }
394 60         760 push @s, $rfsummary;
395             }
396 10         80 @rank{sort {$b <=> $a} keys %rank} = 1..keys %rank;
  120         200  
397 10         55 my $maxrank = max values %rank;
398 10         30 for my $rfsummary (@s) {
399 60         115 my $string = " " x $maxrank;
400 60         70 my @borders;
401 60         85 for my $ele (qw(Max Min)) {
402 120     888   455 my($r) = mapp {$b} grepp {$a eq $ele} @$rfsummary;
  120         2810  
  1080         6840  
403 120         405 push @borders, $rank{$r}-1;
404             }
405 60         105 for ($borders[0],$borders[1]) {
406 120         210 substr($string,$_,1) = "^";
407             }
408 60         170 push @$rfsummary, "Cloud", $string;
409             }
410 10 50       35 unless ($options{verbose}) {
411 10         30 my %filter = map {($_=>1)} qw(Ival Cnt Max Min Span Util Cloud);
  70         145  
412 10         30 for (@s) {
413 60     564   245 $_ = [mapp {($a,$b)} grepp {!!$filter{$a}} @$_];
  420         3205  
  600         3935  
414             }
415             }
416 10         20 my @sprintf;
417 10         25 for (my $i = 0; $i <= $#{$s[0]}; $i+=2) {
  80         165  
418 70         100 my $maxlength = max ((map { length $_->[$i+1] } @s), length $s[0][$i]);
  420         595  
419 70         170 push @sprintf, "%" . $maxlength . "s";
420             }
421 10         35 my $sprintf = join " ", @sprintf;
422 10         20 $sprintf .= "\n";
423 10     70   30 my $headline = sprintf $sprintf, mapp {$a} @{$s[0]};
  70         505  
  10         40  
424 10     420   45 join "", $headline, map { sprintf $sprintf, mapp {$b} @$_ } @s;
  60         210  
  420         3060  
425             }
426              
427             =head2 _pathdb
428              
429             Keeping track of already handled files. Currently it is a hash, will
430             probably become a database with its own accessors.
431              
432             =cut
433              
434             sub _pathdb {
435 582     582   1789 my($self, $set) = @_;
436 582 50       1904 if ($set) {
437 0         0 $self->__pathdb ($set);
438             }
439 582         2784 my $pathdb = $self->__pathdb;
440 582 100       4425 unless (defined $pathdb) {
441 574         2040 $self->__pathdb(+{});
442             }
443 582         3046 return $self->__pathdb;
444             }
445              
446             =head2 $recentfile = $obj->principal_recentfile ()
447              
448             returns the principal recentfile object of this tree.
449              
450             =cut
451             # mirrors the recentfile and instantiates the recentfile object
452             sub _principal_recentfile_fromremote {
453 9     9   63 my($self) = @_;
454             # get the remote recentfile
455 9 50       42 my $rrfile = $self->remote or die "Alert: cannot construct a recentfile object without the 'remote' attribute";
456 9         165 my $splitter = qr{(.+)/([^/]*)};
457 9         212 my($remoteroot,$rfilename) = $rrfile =~ $splitter;
458 9         92 $self->remoteroot($remoteroot);
459 9         93 my($abslfile, $fh);
460 9 50 66     277 if (!defined $rfilename) {
    100          
461 0         0 die "Alert: Cannot resolve '$rrfile', does not match $splitter";
462             } elsif (not length $rfilename or $rfilename eq "RECENT.recent") {
463 4         100 ($abslfile,$rfilename,$fh) = $self->_principal_recentfile_fromremote_resosymlink($rfilename);
464             }
465 9         153 my @need_args =
466             (
467             "ignore_link_stat_errors",
468             "localroot",
469             "max_files_per_connection",
470             "remoteroot",
471             "rsync_options",
472             "tempdir",
473             "ttl",
474             "verbose",
475             "verboselog",
476             );
477 9         27 my $rf0;
478 9 100       110 unless ($abslfile) {
479 5         20 $rf0 = File::Rsync::Mirror::Recentfile->new (map {($_ => $self->$_)} @need_args);
  45         250  
480 5         90 $rf0->split_rfilename($rfilename);
481 5         25 $abslfile = $rf0->get_remote_recentfile_as_tempfile ();
482             }
483 9         1483 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ( $abslfile );
484 9         213 $rf0->_current_tempfile ( $abslfile );
485 9         164 $rf0->_current_tempfile_fh ( $fh );
486 9         232 $rf0->_use_tempfile (1);
487 9         138 for my $override (@need_args) {
488 81         1535 $rf0->$override ( $self->$override );
489             }
490 9         179 $rf0->is_slave (1);
491 9         248 return $rf0;
492             }
493             sub principal_recentfile {
494 582     582 1 1572 my($self) = @_;
495 582         2450 my $rf0 = $self->_principal_recentfile;
496 582 100       3370 return $rf0 if defined $rf0;
497 550         1445 my $local = $self->local;
498 550 100       2720 if ($local) {
499 541         2799 $rf0 = File::Rsync::Mirror::Recentfile->new_from_file ($local);
500             } else {
501 9 50       118 if (my $remote = $self->remote) {
502 9         139 my $localroot;
503 9 50       77 if ($localroot = $self->localroot) {
504             # nice, they know what they are doing
505             } else {
506 0         0 die "FIXME: remote called without localroot should trigger File::Temp.... TBD, sorry";
507             }
508 9         177 $rf0 = $self->_principal_recentfile_fromremote;
509             } else {
510 0         0 die "Alert: neither local nor remote specified, cannot continue";
511             }
512             }
513 550         2935 $self->_principal_recentfile($rf0);
514 550         2941 return $rf0;
515             }
516              
517             =head2 $recentfiles_arrayref = $obj->recentfiles ()
518              
519             returns a reference to the complete list of recentfile objects that
520             describe this tree. No guarantee is given that the represented
521             recentfiles exist or have been read. They are just bare objects.
522              
523             =cut
524              
525             sub recentfiles {
526 655     655 1 2893 my($self) = @_;
527 655         3501 my $rfs = $self->_recentfiles;
528 655 100       4590 return $rfs if defined $rfs;
529 550         2281 my $rf0 = $self->principal_recentfile;
530 550         2286 my $pathdb = $self->_pathdb;
531 550         3804 $rf0->_pathdb ($pathdb);
532 550         2914 my $aggregator = $rf0->aggregator;
533 550         2935 my @rf = $rf0;
534 550         1845 for my $agg (@$aggregator) {
535 2240         4957 my $nrf = $rf0->_sparse_clone;
536 2240         5670 $nrf->interval ( $agg );
537 2240         5842 $nrf->have_mirrored ( 0 );
538 2240         9763 $nrf->_pathdb ( $pathdb );
539 2240         8337 push @rf, $nrf;
540             }
541 550         2577 $self->_recentfiles(\@rf);
542 550         3246 return \@rf;
543             }
544              
545             =head2 $success = $obj->rmirror ( %options )
546              
547             Mirrors all recentfiles of the I address working through all
548             of them, mirroring their contents.
549              
550             Test this with:
551              
552             use File::Rsync::Mirror::Recent;
553             my $rrr = File::Rsync::Mirror::Recent->new(
554             ignore_link_stat_errors => 1,
555             localroot => "/home/ftp/pub/PAUSE/authors",
556             remote => "pause.perl.org::authors/RECENT.recent",
557             max_files_per_connection => 5000,
558             rsync_options => {
559             compress => 1,
560             links => 1,
561             times => 1,
562             checksum => 0,
563             },
564             verbose => 1,
565             _runstatusfile => "recent-rmirror-state.yml",
566             _logfilefordone => "recent-rmirror-donelog.log",
567             );
568             $rrr->rmirror ( "skip-deletes" => 1, loop => 1 );
569              
570             Or try without the loop parameter and write the loop yourself:
571              
572             use File::Rsync::Mirror::Recent;
573             my @rrr;
574             for my $t ("authors","modules"){
575             my $rrr = File::Rsync::Mirror::Recent->new(
576             ignore_link_stat_errors => 1,
577             localroot => "/home/ftp/pub/PAUSE/$t",
578             remote => "pause.perl.org::$t/RECENT.recent",
579             max_files_per_connection => 512,
580             rsync_options => {
581             compress => 1,
582             links => 1,
583             times => 1,
584             checksum => 0,
585             },
586             verbose => 1,
587             _runstatusfile => "recent-rmirror-state-$t.yml",
588             _logfilefordone => "recent-rmirror-donelog-$t.log",
589             ttl => 5,
590             );
591             push @rrr, $rrr;
592             }
593             while (){
594             for my $rrr (@rrr){
595             $rrr->rmirror ( "skip-deletes" => 1 );
596             }
597             warn "sleeping 23\n"; sleep 23;
598             }
599              
600              
601             =cut
602             # _alluptodate is unused but at least it worked last time I needed it,
603             # so let us keep it around
604             sub _alluptodate {
605 0     0   0 my($self) = @_;
606 0         0 my $sdm = $self->_dirtymark;
607 0 0       0 return unless defined $sdm;
608 0         0 for my $rf (@{$self->recentfiles}) {
  0         0  
609 0 0       0 return if $rf->seeded;
610 0         0 my $rfdm = $rf->dirtymark;
611 0 0       0 return unless defined $rfdm;
612 0 0       0 return unless $rfdm eq $sdm;
613 0         0 my $done = $rf->done;
614 0 0       0 return unless defined $done;
615 0         0 my $done_intervals = $done->_intervals;
616 0 0       0 return if !defined $done_intervals;
617             # nonono, may be more than one, only covered it must be:
618             # return if @$done_intervals > 1;
619 0         0 my $minmax = $rf->minmax;
620 0 0       0 return unless defined $minmax;
621 0 0       0 return unless $done->covered(@$minmax{qw(max min)});
622             }
623             # $DB::single++;
624 0         0 return 1;
625             }
626             sub _fullseed {
627 1     1   10 my($self) = @_;
628 1         16 for ( @{$self->recentfiles} ) { $_->seed(1) }
  1         26  
  5         485  
629             }
630             sub rmirror {
631 14     14 1 340 my($self, %options) = @_;
632              
633 14         125 my $rfs = $self->recentfiles;
634              
635 14         162 $self->principal_recentfile->seed;
636       0     my $_sigint = sub {
637             # XXX exit gracefully (reminder)
638 14         260 };
639              
640             # XXX needs accessor: warning, if set too low, we do nothing but
641             # mirror the principal!
642 14         46 my $minimum_time_per_loop = 20;
643              
644 14 50       125 if (my $logfile = $self->_logfilefordone) {
645 0         0 for my $i (0..$#$rfs) {
646 0         0 $rfs->[$i]->done->_logfile($logfile);
647             }
648             }
649 14 50       223 if (my $dirtymark = $self->principal_recentfile->dirtymark) {
650 14         237 my $mydm = $self->_dirtymark;
651 14 100       251 if (!defined $mydm){
    50          
652 9         41 $self->_dirtymark($dirtymark);
653             } elsif ($dirtymark ne $mydm) {
654 0 0       0 if ($self->verbose) {
655 0         0 my $fh;
656 0 0       0 if (my $vl = $self->verboselog) {
657 0 0       0 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
658             } else {
659 0         0 $fh = \*STDERR;
660             }
661 0         0 print $fh "NewDirtymark: old[$mydm] new[$dirtymark]\n";
662             }
663 0         0 $self->_dirtymark($dirtymark);
664             }
665             }
666 14         226 my $rstfile = $self->runstatusfile;
667 14 100       256 unless ($self->_have_written_statusfile) {
668 9         209 $self->_rmirror_runstatusfile_write ($rstfile, \%options);
669 9         137 $self->_have_written_statusfile(1);
670             }
671 14         303 $self->_rmirror_loop($minimum_time_per_loop,\%options);
672             }
673              
674             sub _rmirror_loop {
675 14     14   71 my($self,$minimum_time_per_loop,$options) = @_;
676 14         33 LOOP: while () {
677 14         44 my $ttleave = time + $minimum_time_per_loop;
678 14         91 my $rstfile = $self->runstatusfile;
679 14         154 my $otherproc = $self->_thaw_without_pathdb ($rstfile);
680 14         21868 my $pid = fork;
681 14 50       1907 if (! defined $pid) {
    100          
682 0         0 warn "Contention: $!";
683 0         0 sleep 0.25;
684 0         0 next LOOP;
685             } elsif ($pid) {
686 10         42811895 waitpid($pid,0);
687             } else {
688 4         802 $self = $self->thaw ($rstfile);
689 4         209 my $rfs = $self->recentfiles;
690 4         222 $self->principal_recentfile->seed;
691 4         142 RECENTFILE: for my $i (0..$#$rfs) {
692 20         135 my $rf = $rfs->[$i];
693 20 50       294 if (time > $ttleave) {
694             # Must make sure that one file can get fetched in any case
695 0         0 $self->_max_one_state(1);
696             }
697 20 100       275 if ($rf->seeded) {
    100          
698 8         168 $self->_rmirror_mirror ($i, $options);
699             } elsif ($rf->uptodate) {
700 4 100       15 if ($i < $#$rfs) {
701 3         16 $rfs->[$i+1]->done->merge($rf->done);
702             }
703             # no further seed necessary because "periodic" does it
704 4         16 next RECENTFILE;
705             }
706 16         195 WORKUNIT: while (time < $ttleave) {
707 30 100       474 if ($rf->uptodate) {
708 16         343 $self->_rmirror_sleep_per_connection ($i);
709 16         189 next RECENTFILE;
710             } else {
711 14         197 $self->_rmirror_mirror ($i, $options);
712             }
713             }
714 0 0       0 if ($self->_max_one_state) {
715 0         0 last RECENTFILE;
716             }
717             }
718 4         230 $self->_max_one_state(0);
719 4         118 my $exit = 0;
720 4 50       149 if ($rfs->[-1]->uptodate) {
721 4         142 $self->_rmirror_cleanup;
722             }
723 4 50       40 unless ($options->{loop}) {
724 4         25 $exit = 1;
725             }
726 4         130 $self->_rmirror_runstatusfile_write ($rstfile, $options);
727 4 50       187 exit if $exit;
728 0         0 last LOOP;
729             }
730              
731 10         723 $otherproc = $self->_thaw_without_pathdb ($rstfile);
732 10 50 33     580 if (!$options->{loop} && $otherproc && $otherproc->recentfiles->[-1]->uptodate) {
      33        
733 10         588 last LOOP;
734             }
735 0         0 my $sleep = $ttleave - time;
736 0 0       0 if ($sleep > 0.01) {
737 0         0 $self->_rmirror_endofloop_sleep ($sleep);
738             } else {
739             # negative time not invented yet:)
740             }
741             }
742             }
743              
744             sub _rmirror_mirror {
745 22     22   168 my($self, $i, $options) = @_;
746 22         219 my $rfs = $self->recentfiles;
747 22         167 my $rf = $rfs->[$i];
748 22         247 my %locopt = %$options;
749 22 50       405 if ($self->_max_one_state) {
750 0         0 $locopt{max} = 1;
751             }
752 22         440 $locopt{piecemeal} = 1;
753 22         469 $rf->mirror (%locopt);
754 22 100       868 if ($i==0) {
755             # we limit to 0 for the case that upstream is broken and has
756             # more than one timestamp (happened on PAUSE 200903)
757 4 50       116 if (my $dirtymark = $rf->dirtymark) {
758 4         172 my $mydm = $self->_dirtymark;
759 4 100 66     242 if (!defined $mydm or $dirtymark ne $mydm) {
760 1         20 $self->_dirtymark($dirtymark);
761 1         27 $self->_fullseed;
762             }
763             }
764             }
765             }
766              
767             sub _rmirror_sleep_per_connection {
768 16     16   84 my($self, $i) = @_;
769 16         323 my $rfs = $self->recentfiles;
770 16         162 my $rf = $rfs->[$i];
771 16         188 my $sleep = $rf->sleep_per_connection;
772 16 50       254 $sleep = 0.42 unless defined $sleep;
773 16         6723255 Time::HiRes::sleep $sleep;
774 16 100       1038 $rfs->[$i+1]->done->merge($rf->done) if $i < $#$rfs;
775             }
776              
777             sub _rmirror_cleanup {
778 4     4   31 my($self) = @_;
779 4         90 my $pathdb = $self->_pathdb();
780 4         2077 for my $k (keys %$pathdb) {
781 555         2774 delete $pathdb->{$k};
782             }
783 4         447 my $rfs = $self->recentfiles;
784 4         41 for my $i (0..$#$rfs-1) {
785 16         544 my $thismerged = $rfs->[$i]->merged;
786 16         47 my $next = $rfs->[$i+1];
787 16         107 my $nextminmax = $next->minmax;
788 16 50 33     232 if (not defined $thismerged->{epoch} or _bigfloatlt($nextminmax->{max},$thismerged->{epoch})){
789 0         0 $next->seed;
790             }
791             }
792             }
793              
794             =head2 $file = $obj->runstatusfile ($set)
795              
796             Getter/setter for C<_runstatusfile> attribute. Defaults to a temporary
797             file created by C. A status file is required for
798             C working. Since it may be interesting for debugging
799             purposes, you may want to specify a permanent file for this.
800              
801             =cut
802             sub runstatusfile {
803 28     28 1 93 my($self,$set) = @_;
804 28 50       139 if (defined $set) {
805 0         0 $self->_runstatusfile ($set);
806             }
807 28         130 my $x = $self->_runstatusfile;
808 28 100       221 unless (defined $x) {
809 5         40 require File::Temp;
810 5         170 my $tfile = File::Temp->new
811             (
812             TEMPLATE => "Recent-XXXX",
813             TMPDIR => 1,
814             UNLINK => 0,
815             CLEANUP => 0,
816             SUFFIX => '.dat',
817             );
818 5         4155 $self->_runstatusfile($tfile->filename);
819             }
820 28         463 return $self->_runstatusfile;
821             }
822              
823             # unused code.... it was an oops, discovered the thaw() method too
824             # late, and starting writing this here....
825             sub _rmirror_runstatusfile_read {
826 0     0   0 my($self, $file) = @_;
827              
828 0         0 require YAML::Syck;
829 0         0 my $start = time;
830             # XXX is locking useful here?
831 0         0 while (not mkdir "$file.lock") {
832 0         0 Time::HiRes::sleep 0.2;
833 0 0       0 warn "*** waiting for lock ***" if time - $start >= 3;
834             }
835 0         0 my $yml = YAML::Syck::LoadFile $file;
836 0 0       0 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
837 0         0 my $rself = $yml->{reduced_self};
838 0         0 my $rfs = $yml->{reduced_rfs};
839             # XXX bring them into self
840             }
841              
842             sub _rmirror_runstatusfile_write {
843 13     13   87 my($self, $file, $options) = @_;
844 13         70 my $rself;
845 13         137 while (my($k,$v) = each %$self) {
846 160 100       809 next if $k =~ /^-(_principal_recentfile|_recentfiles)$/;
847 134         701 $rself->{$k} = $v;
848             }
849 13         73 my $rfs = $self->recentfiles;
850 13         42 my $rrfs;
851 13         162 for my $i (0..$#$rfs) {
852 65         175 my $rf = $rfs->[$i];
853 65         581 while (my($k,$v) = each %$rf) {
854 1500 100       3354 next if $k =~ /^-(_current_tempfile_fh|_pathdb|_rsync)$/;
855 1400         5253 $rrfs->[$i]{$k} = $rfs->[$i]{$k};
856             }
857             }
858 13         138 require YAML::Syck;
859 13         46 my $start = time;
860 13         1837 while (not mkdir "$file.lock") {
861 0         0 Time::HiRes::sleep 0.15;
862 0 0       0 warn "*** waiting for lock directory '$file.lock' ***" if time - $start >= 3;
863             }
864             YAML::Syck::DumpFile
865             (
866 13         459 "$file.new",
867             {
868             options => $options,
869             time => time,
870             reduced_rfs => $rrfs,
871             reduced_self => $rself,
872             });
873 13 50       13187 rename "$file.new", $file or die "Could not rename: $!";
874 13 50       1019 rmdir "$file.lock" or die "Could not rmdir lockfile: $!";
875             }
876              
877             sub _rmirror_endofloop_sleep {
878 0     0   0 my($self, $sleep) = @_;
879 0 0       0 if ($self->verbose) {
880 0         0 my $fh;
881 0 0       0 if (my $vl = $self->verboselog) {
882 0 0       0 open $fh, ">>", $vl or die "Could not open >> '$vl': $!";
883             } else {
884 0         0 $fh = \*STDERR;
885             }
886 0         0 printf $fh
887             (
888             "Dorm %d (%s secs)\n",
889             time,
890             $sleep,
891             );
892             }
893 0         0 sleep $sleep;
894             }
895              
896             # it returns two things: abslfile and rfilename. But the abslfile is
897             # undef when the rfilename ends in .recent. A weird interface, my
898             # friend.
899             sub _principal_recentfile_fromremote_resosymlink {
900 4     4   20 my($self, $rfilename) = @_;
901 4 50       48 $rfilename = "RECENT.recent" unless length $rfilename;
902 4         44 my $abslfile = undef;
903 4         12 my $fh;
904 4 50       48 if ($rfilename =~ /\.recent$/) {
905             # may be a file *or* a symlink,
906 4         64 ($abslfile,$fh) = $self->_fetch_as_tempfile ($rfilename);
907 4         208 while (-l $abslfile) {
908 4         104 my $symlink = readlink $abslfile;
909 4 50       140 if ($symlink =~ m|/|) {
910 0         0 die "FIXME: filenames containing '/' not supported, got '$symlink'";
911             }
912 4         160 my $localrfile = File::Spec->catfile($self->localroot, $rfilename);
913 4 50       408 if (-e $localrfile) {
914 0         0 my $old_symlink = readlink $localrfile;
915 0 0       0 if ($old_symlink eq $symlink) {
916 0 0       0 unlink $abslfile or die "Cannot unlink '$abslfile': $!";
917             } else {
918 0         0 unlink $localrfile; # may fail
919 0 0       0 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
920             }
921             } else {
922 4 50       304 rename $abslfile, $localrfile or die "Cannot rename to '$localrfile': $!";
923             }
924 4         84 ($abslfile,$fh) = $self->_fetch_as_tempfile ($symlink);
925             }
926             }
927 4         1164 return ($abslfile, $rfilename, $fh);
928             }
929              
930             # takes a basename, returns an absolute name, does not delete the
931             # file, throws the $fh away. Caller must rename or unlink
932              
933             # XXX needs to activate the fh in the rf0 so that it is able to unlink
934             # the file. I would like that the file is used immediately by $rf0
935             sub _fetch_as_tempfile {
936 8     8   56 my($self, $rfile) = @_;
937 8         148 my($suffix) = $rfile =~ /(\.[^\.]+)$/;
938 8 50       108 $suffix = "" unless defined $suffix;
939 8   33     188 my $fh = File::Temp->new
940             (TEMPLATE => sprintf(".FRMRecent-%s-XXXX",
941             $rfile,
942             ),
943             DIR => $self->tempdir || $self->localroot,
944             SUFFIX => $suffix,
945             UNLINK => 0,
946             );
947 8         5356 my $rsync;
948             my @rsync_options;
949 8 50       116 if (my $rso = $self->rsync_options) {
950 8 50       116 if (ref $rso eq "HASH") {
    0          
951 8         68 @rsync_options = %$rso;
952             } elsif (ref $rso eq "ARRAY") {
953 0         0 @rsync_options = @$rso;
954             }
955             } else {
956 0         0 @rsync_options = ();
957             }
958 8 50       116 if ($File::Rsync::VERSION <= 0.45) {
959 0         0 $rsync = File::Rsync->new({@rsync_options});
960             } else {
961 8         204 $rsync = File::Rsync->new(@rsync_options);
962             }
963 8 50       10292 unless ($rsync) {
964 0         0 require Carp;
965 0         0 Carp::confess(YAML::Syck::Dump($self->rsync_options));
966             }
967 8         56 my $dst = $fh->filename;
968 8         272 local($ENV{LANG}) = "C";
969 8 50       76 $rsync->exec
970             (
971             src => join("/",$self->remoteroot,$rfile),
972             dst => $dst,
973             ) or die "Could not mirror '$rfile' to $fh\: ".join(" ",$rsync->err);
974 8 100       414348 unless (-l $dst) {
975 4         140 my $mode = 0644;
976 4 50       320 chmod $mode, $dst or die "Could not chmod $mode '$dst': $!";
977             }
978 8         7160 return($dst,$fh);
979             }
980              
981             =head2 $verbose = $obj->verbose ( $set )
982              
983             Getter/setter method to set verbosity for this F:R:M:Recent object and
984             all associated Recentfile objects.
985              
986             =cut
987             sub verbose {
988 18     18 1 80 my($self,$set) = @_;
989 18 50       92 if (defined $set) {
990 0         0 for ( @{$self->recentfiles} ) { $_->verbose($set) }
  0         0  
  0         0  
991 0         0 $self->_verbose ($set);
992             }
993 18         110 my $x = $self->_verbose;
994 18 100       131 unless (defined $x) {
995 9         27 $x = 0;
996 9         23 $self->_verbose ($x);
997             }
998 18         216 return $x;
999            
1000             }
1001              
1002             =head2 my $vl = $obj->verboselog ( $set )
1003              
1004             Getter/setter method for the path to the logfile to write verbose
1005             progress information to.
1006              
1007             Note: This is a primitive stop gap solution to get simple verbose
1008             logging working. The program still sends error messages to STDERR.
1009             Switching to Log4perl or similar is probably the way to go. TBD.
1010              
1011             =cut
1012             sub verboselog {
1013 14     14 1 47 my($self,$set) = @_;
1014 14 50       96 if (defined $set) {
1015 0         0 for ( @{$self->recentfiles} ) { $_->verboselog($set) }
  0         0  
  0         0  
1016 0         0 $self->_verboselog ($set);
1017             }
1018 14         65 my $x = $self->_verboselog;
1019 14 100       187 unless (defined $x) {
1020 9         23 $x = 0;
1021 9         32 $self->_verboselog ($x);
1022             }
1023 14         227 return $x;
1024             }
1025              
1026             =head1 THE ARCHITECTURE OF A COLLECTION OF RECENTFILES
1027              
1028             The idea is that we want to have a short file that records really
1029             recent changes. So that a fresh mirror can be kept fresh as long as
1030             the connectivity is given. Then we want longer files that record the
1031             history before. So when the mirror falls behind the update period
1032             reflected in the shortest file, it can complement the list of recent
1033             file events with the next one. And if this is not long enough we want
1034             another one, again a bit longer. And we want one that completes the
1035             history back to the oldest file. The index files together do contain
1036             the complete list of current files. The longer a period covered by an
1037             index file is gone the less often the index file is updated. For
1038             practical reasons adjacent files will often overlap a bit but this is
1039             neither necessary nor enforced. Enforced is only that there must not
1040             ever be a gap between two adjacent index files that would have to
1041             contain a file reference. That's the basic idea. The following example
1042             represents a tree that has a few updates every day:
1043              
1044             RECENT.recent -> RECENT-1h.yaml
1045             RECENT-1h.yaml
1046             RECENT-6h.yaml
1047             RECENT-1d.yaml
1048             RECENT-1M.yaml
1049             RECENT-1W.yaml
1050             RECENT-1Q.yaml
1051             RECENT-1Y.yaml
1052             RECENT-Z.yaml
1053              
1054             Each of these files represents a contract to hold a record for every
1055             filesystem event within the period indicated in the filename.
1056              
1057             The first file is the principal file, in so far it is the one that is
1058             written first after a filesystem change. Usually a symlink links to it
1059             with a filename that has the same filenameroot and the suffix
1060             C<.recent>. On systems that do not support symlinks there is a plain
1061             copy maintained instead.
1062              
1063             The last file, the Z file, contains the complementary files that are
1064             in none of the other files. It may contain C events but often
1065             C events are discarded at the transition to the Z file.
1066              
1067             =head2 SITE SEEING TOUR
1068              
1069             This section illustrates the operation of a server-client couple in a
1070             fictious installation that has to deal with a long time of inactivity.
1071             I think such an edge case installation demonstrates the economic
1072             behaviour of our model of overlapping time slices best.
1073              
1074             The sleeping beauty (http://en.wikipedia.org/wiki/Sleeping_Beauty) is
1075             a classic fairytale of a princess sleeping for a hundred years. The
1076             story inspired the test case 02-aurora.t.
1077              
1078             Given an upstream server where the people stop feeding new files for
1079             one hundred years. That upstream server has no driving energy to do
1080             major changes to its RECENT files. Cronjobs will continue to shift
1081             things towards the Z file but soon will stop doing so since all of
1082             them have to keep their promise to record files covering a certain
1083             period. Soon all RECENT files will cover exactly their native period.
1084              
1085             Downstream servers will stubbornly ask their question to the rsync
1086             server whether there is a newer RECENT.recent. As soon as the smallest
1087             RECENT file has reached the state of maximum possible merge with the
1088             second smallest RECENT file, the answer of the rsync server will
1089             always be: nothing new. And downstream servers that were uptodate on
1090             the previous request will be satisfied and do nothing. Never will they
1091             request a download. The answer that there is no change is sufficient
1092             to determine that there is no change in the whole tree.
1093              
1094             Let's presume the smallest RECENT file on this castle is a 1h file and
1095             downstream decides to ask every 30 minutes. Now the hundred years are
1096             over and upstream starts producing files again. One file every minute.
1097             After one minute it will move old files over to the, say, 1d file. In
1098             the next sixty minutes it will not be allowed to move any other file
1099             over to the 1d file. At some point in time downstream will ask the
1100             obligatory question "anything new?" and it will get the current 1h
1101             file. It will recognize in the meta part of the current file which
1102             timestamps have been moved to the 1d file, it will recognize that it
1103             has all those. It will have no need to download the 1d file, it will
1104             download the missing files and be done. No second RECENT file needs to
1105             be downloaded.
1106              
1107             Downstream only decides to download another RECENT file when not doing
1108             so would result in a gap between two recent files. Such that
1109             consistency checks would become impossible. Or for potentially
1110             interested third parties, like down-down-stream servers.
1111              
1112             Downloads of RECENT files are subject to rsync optimizations in that
1113             rsync does some level of blockwise checksumming that is considered
1114             efficient to avoid copying blocks of data that have not changed. Our
1115             format is that of an ordered array, so that large blocks stay constant
1116             when elements are prepended to the array. This means we usually do not
1117             have to rsync full RECENT files. Only if they are really small, the
1118             rsync algorithm will not come into play but that's OK for small files.
1119              
1120             Upstream servers are extremely lazy in writing the larger files. See
1121             File::Rsync::Mirror::Recentfile::aggregate() for the specs. Long
1122             before the one hundred years are over, the upstream server will stop
1123             changing files. Slowly everything that existed before upstream fell
1124             asleep trickles into the Z file. Say, the second-largest RECENT file
1125             is a 1Y file and the third-largest RECENT file is a 1Q file, then it
1126             will take at least one quarter of a year that the 1Y file will be
1127             merged into the Z file. From that point in time everything will have
1128             been merged into the Z file and the server's job to call C
1129             regularly will become a noop. Consequently downstream will never again
1130             download anything. Just the obligatory question: anything new?
1131              
1132             =head2 THE INDIVIDUAL RECENTFILE
1133              
1134             A I consists of a hash that has two keys: C and
1135             C. The C part has metadata and the C part has a
1136             list of fileobjects.
1137              
1138             =head2 THE META PART
1139              
1140             Here we find things that are pretty much self explaining: all
1141             lowercase attributes are accessors and as such explained in the
1142             manpages. The uppercase attribute C contains version
1143             information about involved software components.
1144              
1145             Even though the lowercase attributes are documented in the
1146             F:R:M:Recentfile manpage, let's focus on the important stuff to make
1147             sure nothing goes by unnoticed: meta contains the aggregator levels in
1148             use in this installation, in other words the names of the RECENT
1149             files, eg:
1150              
1151             aggregator:
1152             - 3s
1153             - 8s
1154             - 21s
1155             - 55s
1156             - Z
1157              
1158             It contains a dirtymark telling us the timestamp of the last protocol
1159             violation of the upstream server:
1160              
1161             dirtymark: '1325093856.49272'
1162              
1163             Plus a few things convenient in a situation where we need to do some
1164             debugging.
1165              
1166             And it contains information about which timestamp is the maximum
1167             timestamp in the neighboring file. This is probably the most important
1168             data in meta:
1169              
1170             merged:
1171             epoch: 1307159461.94575
1172              
1173             This keeps track of the highest epoch we would find if we looked into
1174             the next RECENT file.
1175              
1176             Another entry is the minmax, eg:
1177              
1178             minmax:
1179             max: 1307161441.97444
1180             min: 1307140103.70322
1181              
1182             The merged/epoch and minmax examples above illustrate one case of an
1183             overlap (130715... is between 130716... and 130714...). The syncing
1184             strategy for the client is in general the imperative: if the interval
1185             covered by a recentfile (minmax) and the interval covered by the next
1186             higher recentfile (merged/epoch) do not overlap anymore, then it is
1187             time to refresh the next recentfile.
1188              
1189             =head2 THE RECENT PART
1190              
1191             This is the interesting part. Every entry refers to some filesystem
1192             change (with path, epoch, type).
1193              
1194             The I value is the point in time when some change was
1195             I but can be set to arbitrary values. Do not be tempted to
1196             believe that the entry has a direct relation to something like
1197             modification time or change time on the filesystem level. They are not
1198             reflecting release dates. (If you want exact release dates: Barbie is
1199             providing a database of them. See
1200             http://use.perl.org/~barbie/journal/37907).
1201              
1202             All these entries can be devided into two types (denoted by the
1203             I attribute): Cs and Cs. Changes and creations are
1204             Cs. Deletes are Cs.
1205              
1206             Besides an I and a I attribute we find a third one:
1207             I. This path is relative to the directory we find the
1208             I in.
1209              
1210             The order of the entries in the I is by decreasing epoch
1211             attribute. These are unique floating point numbers. When the server
1212             has ntp running correctly, then the timestamps are usually reflecting
1213             a real epoch. If time is running backwards, we trump the system epoch
1214             with strictly monotonically increasing floating point timestamps and
1215             guarantee they are unique.
1216              
1217             =head1 CORRUPTION AND RECOVERY
1218              
1219             If the origin host breaks the promise to deliver consistent and
1220             complete I then it must update its C and all
1221             slaves must discard what they cosider the truth.
1222              
1223             In the worst case that something goes wrong despite the dirtymark
1224             mechanism the way back to sanity can be achieved through traditional
1225             rsyncing between the hosts. But please be wary doing that: mixing
1226             traditional rsync and the F:R:M:R technique can lead to gratuitous
1227             extra errors. If you're the last host in a chain, there's nobody you
1228             can disturb, but if you have downstream clients, it is possible that
1229             rsync copies a RECENT file before the contained files are actually
1230             available.
1231              
1232             =head1 BACKGROUND
1233              
1234             This is about speeding up rsync operation on large trees. Uses a small
1235             metadata cocktail and pull technology.
1236              
1237             rersyncrecent solves this problem with a couple of (usually 2-10)
1238             lightweight index files which cover different overlapping time
1239             intervals. The master writes these files and the clients/slaves can
1240             construct the full tree from the information contained in them. The
1241             most recent index file usually covers the last seconds or minutes or
1242             hours of the tree and depending on the needs, slaves can rsync every
1243             few seconds or minutes and then bring their trees in full sync.
1244              
1245             The rersyncrecent model was developed for CPAN but as it is both
1246             convenient and economic it is also a general purpose solution. I'm
1247             looking forward to see a CPAN backbone that is only a few seconds
1248             behind PAUSE.
1249              
1250             =head2 NON-COMPETITORS
1251              
1252             File::Mirror JWU/File-Mirror/File-Mirror-0.10.tar.gz only local trees
1253             Mirror::YAML ADAMK/Mirror-YAML-0.03.tar.gz some sort of inner circle
1254             Net::DownloadMirror KNORR/Net-DownloadMirror-0.04.tar.gz FTP sites and stuff
1255             Net::MirrorDir KNORR/Net-MirrorDir-0.05.tar.gz dito
1256             Net::UploadMirror KNORR/Net-UploadMirror-0.06.tar.gz dito
1257             Pushmi::Mirror CLKAO/Pushmi-v1.0.0.tar.gz something SVK
1258              
1259             rsnapshot www.rsnapshot.org focus on backup
1260             csync www.csync.org more like unison
1261             multi-rsync sourceforge 167893 lan push to many
1262             chasm chasmd.org per-directory manifests
1263              
1264             =head2 COMPETITORS
1265              
1266             The problem to solve which clusters and ftp mirrors and otherwise
1267             replicated datasets like CPAN share: how to transfer only a minimum
1268             amount of data to determine the diff between two hosts.
1269              
1270             Normally it takes a long time to determine the diff itself before it
1271             can be transferred. Known solutions at the time of this writing are
1272             csync2, and rsync 3 batch mode.
1273              
1274             For many years the best solution was B which solves the
1275             problem by maintaining a sqlite database on both ends and talking a
1276             highly sophisticated protocol to quickly determine which files to send
1277             and which to delete at any given point in time. Csync2 is often
1278             inconvenient because it is push technology and the act of syncing
1279             demands quite an intimate relationship between the sender and the
1280             receiver. This is hard to achieve in an environment of loosely coupled
1281             sites where the number of sites is large or connections are unreliable
1282             or network topology is changing.
1283              
1284             B works around these problems by providing
1285             rsync-able batch files which allow receiving nodes to replay the
1286             history of the other nodes. This reduces the need to have an
1287             incestuous relation but it has the disadvantage that these batch files
1288             replicate the contents of the involved files. This seems inappropriate
1289             when the nodes already have a means of communicating over rsync.
1290              
1291             =head2 HONORABLE MENTION
1292              
1293             B at https://fedorahosted.org/InstantMirror/ is an
1294             ambitious project that tries to combine various technologies (squid,
1295             bittorrent) to overcome the current slowness with the main focus on
1296             fedora. It's been founded in 2009-03 and at the time of this writing
1297             it is still a bit early to comment on.
1298              
1299             =head1 LIMITATIONS
1300              
1301             If the tree of the master server is changing faster than the bandwidth
1302             permits to mirror then additional protocols may need to be deployed.
1303             Certainly p2p/bittorrent can help in such situations because
1304             downloading sites help each other and bittorrent chunks large files
1305             into pieces.
1306              
1307             =head1 INOTIFY
1308              
1309             Currently the origin server has two options. The traditional one is to
1310             strictly keep track of injected and removed files through all involved
1311             processes and call C on every file system event. The other
1312             option is to let data come in and use the assistance of inotify. PAUSE
1313             is running the former, the cpan master site is running the latter.
1314             Both work equally well for CPAN because CPAN has not yet had any
1315             problem with upload storms. On installations that have to deal with
1316             more uploaded data than inotify+rrr can handle it's better to use the
1317             traditional method such that the relevant processes can build up some
1318             backpressure to throttle writing processes until we're ready to accept
1319             the next data chunk.
1320              
1321             =head1 FUTURE DIRECTIONS
1322              
1323             Convince other users outside the CPAN like
1324             http://fedoraproject.org/wiki/Infrastructure/Mirroring
1325              
1326             =head1 SEE ALSO
1327              
1328             L,
1329             L,
1330             L
1331              
1332             =head1 BUGS
1333              
1334             Please report any bugs or feature requests through the web interface
1335             at
1336             L.
1337             I will be notified, and then you'll automatically be notified of
1338             progress on your bug as I make changes.
1339              
1340             =head1 SUPPORT
1341              
1342             You can find documentation for this module with the perldoc command.
1343              
1344             perldoc File::Rsync::Mirror::Recent
1345              
1346             You can also look for information at:
1347              
1348             =over 4
1349              
1350             =item * RT: CPAN's request tracker
1351              
1352             L
1353              
1354             =item * AnnoCPAN: Annotated CPAN documentation
1355              
1356             L
1357              
1358             =item * CPAN Ratings
1359              
1360             L
1361              
1362             =item * Search CPAN
1363              
1364             L
1365              
1366             =back
1367              
1368              
1369             =head1 ACKNOWLEDGEMENTS
1370              
1371             Thanks to RJBS for module-starter.
1372              
1373             =head1 AUTHOR
1374              
1375             Andreas König
1376              
1377             =head1 COPYRIGHT & LICENSE
1378              
1379             Copyright 2008, 2009 Andreas König.
1380              
1381             This program is free software; you can redistribute it and/or modify it
1382             under the same terms as Perl itself.
1383              
1384              
1385             =cut
1386              
1387             1; # End of File::Rsync::Mirror::Recent