| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MogileFS::Worker::Reaper; |
|
2
|
|
|
|
|
|
|
# deletes files |
|
3
|
|
|
|
|
|
|
|
|
4
|
21
|
|
|
21
|
|
142
|
use strict; |
|
|
21
|
|
|
|
|
55
|
|
|
|
21
|
|
|
|
|
1078
|
|
|
5
|
21
|
|
|
21
|
|
130
|
use base 'MogileFS::Worker'; |
|
|
21
|
|
|
|
|
54
|
|
|
|
21
|
|
|
|
|
3777
|
|
|
6
|
21
|
|
|
21
|
|
141
|
use MogileFS::Server; |
|
|
21
|
|
|
|
|
55
|
|
|
|
21
|
|
|
|
|
605
|
|
|
7
|
21
|
|
|
21
|
|
122
|
use MogileFS::Util qw(error debug); |
|
|
21
|
|
|
|
|
44
|
|
|
|
21
|
|
|
|
|
1659
|
|
|
8
|
21
|
|
|
21
|
|
144
|
use MogileFS::Config qw(DEVICE_SUMMARY_CACHE_TIMEOUT); |
|
|
21
|
|
|
|
|
52
|
|
|
|
21
|
|
|
|
|
1432
|
|
|
9
|
21
|
|
|
21
|
|
141
|
use constant REAP_INTERVAL => 5; |
|
|
21
|
|
|
|
|
50
|
|
|
|
21
|
|
|
|
|
1570
|
|
|
10
|
21
|
|
|
21
|
|
145
|
use constant REAP_BACKOFF_MIN => 60; |
|
|
21
|
|
|
|
|
48
|
|
|
|
21
|
|
|
|
|
1090
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# completely forget about devices we've reaped after 2 hours of idleness |
|
13
|
21
|
|
|
21
|
|
126
|
use constant REAP_BACKOFF_MAX => 7200; |
|
|
21
|
|
|
|
|
54
|
|
|
|
21
|
|
|
|
|
28971
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new { |
|
16
|
0
|
|
|
0
|
0
|
|
my ($class, $psock) = @_; |
|
17
|
0
|
|
|
|
|
|
my $self = fields::new($class); |
|
18
|
0
|
|
|
|
|
|
$self->SUPER::new($psock); |
|
19
|
|
|
|
|
|
|
|
|
20
|
0
|
|
|
|
|
|
return $self; |
|
21
|
|
|
|
|
|
|
} |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub watchdog_timeout { |
|
24
|
0
|
|
|
0
|
0
|
|
return 240; |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# order is important here: |
|
28
|
|
|
|
|
|
|
# |
|
29
|
|
|
|
|
|
|
# first, add fid to file_to_replicate table. it |
|
30
|
|
|
|
|
|
|
# shouldn't matter if the replicator gets to this |
|
31
|
|
|
|
|
|
|
# before the subsequent 'forget_about' method, as the |
|
32
|
|
|
|
|
|
|
# replicator will treat dead file_on devices as |
|
33
|
|
|
|
|
|
|
# non-existent anyway. however, it is important that |
|
34
|
|
|
|
|
|
|
# we enqueue it for replication first, before we |
|
35
|
|
|
|
|
|
|
# forget about that file_on row, otherwise a failure |
|
36
|
|
|
|
|
|
|
# after/during 'forget_about' could leave a stranded |
|
37
|
|
|
|
|
|
|
# file on a dead device and we'd never fix it. |
|
38
|
|
|
|
|
|
|
sub reap_fid { |
|
39
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $dev) = @_; |
|
40
|
|
|
|
|
|
|
|
|
41
|
0
|
|
|
|
|
|
$fid->enqueue_for_replication(in => 1); |
|
42
|
0
|
|
|
|
|
|
$dev->forget_about($fid); |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# this returns 1000 by default |
|
46
|
|
|
|
|
|
|
sub reaper_inject_limit { |
|
47
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
48
|
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
|
50
|
0
|
|
|
|
|
|
my $max = MogileFS::Config->server_setting_cached('queue_size_for_reaper'); |
|
51
|
0
|
|
0
|
|
|
|
my $limit = MogileFS::Config->server_setting_cached('queue_rate_for_reaper') || 1000; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# max defaults to zero, meaning we inject $limit every wakeup |
|
54
|
0
|
0
|
|
|
|
|
if ($max) { |
|
55
|
|
|
|
|
|
|
# if a queue size limit is configured for reaper, prevent too many |
|
56
|
|
|
|
|
|
|
# files from entering the repl queue: |
|
57
|
0
|
|
|
|
|
|
my $len = $sto->deferred_repl_queue_length; |
|
58
|
0
|
|
|
|
|
|
my $space_left = $max - $len; |
|
59
|
|
|
|
|
|
|
|
|
60
|
0
|
0
|
|
|
|
|
$limit = $space_left if ($limit > $space_left); |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# limit may end up being negative here since other processes |
|
63
|
|
|
|
|
|
|
# can inject into the deferred replication queue, reaper is |
|
64
|
|
|
|
|
|
|
# the only one which can respect this queue size |
|
65
|
0
|
0
|
|
|
|
|
$limit = 0 if $limit < 0; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
return $limit; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# we pass the $devid here (instead of a Device object) to avoid |
|
72
|
|
|
|
|
|
|
# potential memory leaks since this sub reschedules itself to run |
|
73
|
|
|
|
|
|
|
# forever. $delay is the current delay we were scheduled at |
|
74
|
|
|
|
|
|
|
sub reap_dev { |
|
75
|
0
|
|
|
0
|
0
|
|
my ($self, $devid, $delay) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# ensure the master DB is up, retry in REAP_INTERVAL if down |
|
78
|
0
|
0
|
|
|
|
|
unless ($self->validate_dbh) { |
|
79
|
0
|
|
|
|
|
|
$delay = REAP_INTERVAL; |
|
80
|
0
|
|
|
0
|
|
|
Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); |
|
|
0
|
|
|
|
|
|
|
|
81
|
0
|
|
|
|
|
|
return; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
0
|
|
|
|
|
|
my $limit = $self->reaper_inject_limit; |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# just in case a user mistakenly nuked a devid from the device table: |
|
87
|
0
|
|
|
|
|
|
my $dev = Mgd::device_factory()->get_by_id($devid); |
|
88
|
0
|
0
|
|
|
|
|
unless ($dev) { |
|
89
|
0
|
|
|
|
|
|
error("No device row for dev$devid, cannot reap"); |
|
90
|
0
|
|
|
|
|
|
$delay = undef; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# limit == 0 if we hit the queue size limit, we'll just reschedule |
|
94
|
0
|
0
|
0
|
|
|
|
if ($limit && $dev) { |
|
95
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
|
96
|
0
|
|
|
|
|
|
my $lock = "mgfs:reaper"; |
|
97
|
0
|
|
|
|
|
|
my $lock_timeout = $self->watchdog_timeout / 4; |
|
98
|
0
|
|
|
|
|
|
my @fids; |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
|
if ($sto->get_lock($lock, $lock_timeout)) { |
|
101
|
0
|
|
|
|
|
|
@fids = $dev->fid_list(limit => $limit); |
|
102
|
0
|
0
|
|
|
|
|
if (@fids) { |
|
103
|
0
|
|
|
|
|
|
$self->still_alive; |
|
104
|
0
|
|
|
|
|
|
foreach my $fid (@fids) { |
|
105
|
0
|
|
|
|
|
|
$self->reap_fid($fid, $dev); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
} |
|
108
|
0
|
|
|
|
|
|
$sto->release_lock($lock); |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# if we've found any FIDs (perhaps even while backing off) |
|
111
|
|
|
|
|
|
|
# ensure we try to find more soon: |
|
112
|
0
|
0
|
|
|
|
|
if (@fids) { |
|
113
|
0
|
|
|
|
|
|
$delay = REAP_INTERVAL; |
|
114
|
|
|
|
|
|
|
} else { |
|
115
|
0
|
|
|
|
|
|
$delay = $self->reap_dev_backoff_delay($delay); |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
} else { |
|
118
|
|
|
|
|
|
|
# No lock after a long lock_timeout? Try again soon. |
|
119
|
|
|
|
|
|
|
# We should never get here under MySQL, and rarely for other DBs. |
|
120
|
0
|
|
|
|
|
|
debug("get_lock($lock, $lock_timeout) failed"); |
|
121
|
0
|
|
|
|
|
|
$delay = REAP_INTERVAL; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
return unless defined $delay; |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# schedule another update, delay could be REAP_BACKOFF_MAX |
|
128
|
0
|
|
|
0
|
|
|
Danga::Socket->AddTimer($delay, sub { $self->reap_dev($devid, $delay) }); |
|
|
0
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# called when we're hopefully all done with a device, but reschedule |
|
132
|
|
|
|
|
|
|
# into the future in case the replicator had an out-of-date cache and the |
|
133
|
|
|
|
|
|
|
# "dead" device was actually writable. |
|
134
|
|
|
|
|
|
|
sub reap_dev_backoff_delay { |
|
135
|
0
|
|
|
0
|
0
|
|
my ($self, $delay) = @_; |
|
136
|
|
|
|
|
|
|
|
|
137
|
0
|
0
|
|
|
|
|
return REAP_BACKOFF_MIN if ($delay < REAP_BACKOFF_MIN); |
|
138
|
|
|
|
|
|
|
|
|
139
|
0
|
|
|
|
|
|
$delay *= 2; |
|
140
|
0
|
0
|
|
|
|
|
return $delay > REAP_BACKOFF_MAX ? undef : $delay; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# looks for dead devices |
|
144
|
|
|
|
|
|
|
sub work { |
|
145
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# ensure we get monitor updates |
|
148
|
0
|
|
|
0
|
|
|
Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent }); |
|
|
0
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
my %devid_seen; |
|
151
|
|
|
|
|
|
|
my $reap_check; |
|
152
|
|
|
|
|
|
|
$reap_check = sub { |
|
153
|
|
|
|
|
|
|
# get db and note we're starting a run |
|
154
|
0
|
|
|
0
|
|
|
$self->parent_ping; |
|
155
|
0
|
|
|
|
|
|
debug("Reaper running; looking for dead devices"); |
|
156
|
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
|
foreach my $dev (grep { $_->dstate->is_perm_dead } |
|
|
0
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Mgd::device_factory()->get_all) |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
0
|
0
|
|
|
|
|
next if $devid_seen{$dev->id}; |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# delay the initial device reap in case any replicator cache |
|
163
|
|
|
|
|
|
|
# thinks the device is still alive |
|
164
|
|
|
|
|
|
|
Danga::Socket->AddTimer(DEVICE_SUMMARY_CACHE_TIMEOUT + 1, sub { |
|
165
|
0
|
|
|
|
|
|
$self->reap_dev($dev->id, REAP_INTERVAL); |
|
166
|
0
|
|
|
|
|
|
}); |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# once we've seen a device, reap_dev will takeover scheduling |
|
169
|
|
|
|
|
|
|
# reaping for the given device. |
|
170
|
0
|
|
|
|
|
|
$devid_seen{$dev->id} = 1; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
Danga::Socket->AddTimer(REAP_INTERVAL, $reap_check); |
|
174
|
0
|
|
|
|
|
|
}; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# kick off the reaper and loop forever |
|
177
|
0
|
|
|
|
|
|
$reap_check->(); |
|
178
|
0
|
|
|
|
|
|
Danga::Socket->EventLoop; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
1; |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Local Variables: |
|
184
|
|
|
|
|
|
|
# mode: perl |
|
185
|
|
|
|
|
|
|
# c-basic-indent: 4 |
|
186
|
|
|
|
|
|
|
# indent-tabs-mode: nil |
|
187
|
|
|
|
|
|
|
# End: |