File Coverage

blib/lib/Sys/FreezeThaw.pm
Criterion Covered Total %
statement 3 77 3.9
branch 0 26 0.0
condition 0 6 0.0
subroutine 1 14 7.1
pod 3 6 50.0
total 7 129 5.4


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Sys::FreezeThaw - stop and start all user processes on a machine
4              
5             =head1 SYNOPSIS
6              
7             use Sys::FreezeThaw;
8              
9             Sys::FreezeThaw::freezethaw {
10             # run code while system is frozen
11             };
12              
13             my $token = Sys::FreezeThaw::freeze;
14             ... do something ...
15             Sys::FreezeThaw::thaw $token;
16            
17             =head1 DESCRIPTION
18              
19             Operating Systems/Kernels current supported: Linux-2.6/3.0 with F.
20              
21             This module implements a very specific feature: stopping(freezing and
22             thawing/continuing all userspace processes on the machine. It works by
23             sending SIGSTOP to all processes, parent-process first, so that the wait
24             syscall will not trigger on stopped children. Restarting is done in
25             reverse order.
26              
27             Using the combined function Sys::FreezeThaw::freezethaw is recommended as
28             it will catch runtime errors, but stopping and restarting can be dine via
29             separate function calls.
30              
31             =head2 What could it possibly be sueful for??
32              
33             Possible uses include: doing atomic file system operations (such as
34             replacing files while they are guaranteed not to be in use), or quieting
35             down a system to investigate suspicious behaviour.
36              
37             =over 4
38              
39             =cut
40              
41             package Sys::FreezeThaw;
42              
43 1     1   852 use Carp;
  1         2  
  1         1199  
44              
45             $VERSION = '0.02';
46             $PARTIAL_OK = 0;
47              
48             =item Sys::FreezeThaw::freezethaw { BLOCK }
49              
50             First tries to stop all processes. If successful, runs the given code block
51             (or code reference), then restarts all processes again. As the system is
52             basically frozen during the code block execution, it should be as fast as
53             possible.
54              
55             Runtime errors will be caught with C. If an exception occurs it will
56             be re-thrown after processes are restarted. If processes cannot be frozen
57             or restarted, this function will throw an exception.
58              
59             Signal handlers for SIGINT, SIGTERM, SIGPIPE, SIGHUP, SIGALRM, SIGUSR1 and
60             SIGUSR2 will be installed temporarily, so if you want to catch these, you
61             have to do so yourself within the executed code block.
62              
63             Try to do as few things as possible. For example, outputting text might
64             cause a deadlock, as the terminal emulator on the other side of STDOUT
65             might be stopped, logging to syslog might not work and so on.
66              
67             The return value of the code block is ignored right now, and the function
68             doesn't yet return anything sensible.
69              
70             =item $token = Sys::FreezeThaw::freeze
71              
72             Send SIGSTOP to all processes, and return a token that allows them to be
73             thawed again.
74              
75             If an error occurs, an exception will be thrown and all stopped processes
76             will automatically be thawed.
77              
78             =item Sys::FreezeThaw::thaw $token
79              
80             Take a token returned by Sys::FreezeThaw::freeze and send all processes
81             a C signal, in the order required for them not to receive child STOP
82             notifications.
83              
84             =item $Sys::FreezeThaw::PARTIAL_OK
85              
86             A boolean that tells C whether it is an error if a process cannot
87             be stopped. If false (the default), then C will fail if there is
88             an unstoppable process. If it is true, then C will pretend it the
89             process stopped.
90              
91             =cut
92              
93             # this is laughably broken, but...
94             sub yield {
95 0     0 0   select undef, undef, undef, 1/1000;
96             }
97              
98             # the maximum number of iterations per stop/cont etc. loop
99             # used to shield against catastrophic events (or bugs :)
100             # on current linux systems it can take an enourmous amount of
101             # time for some processes to stop, but usually it only takes
102             # one or two iterations.
103             sub MAX_WAIT() { 10 }
104              
105             # return a list o fall pid's in the system,
106             # topologically sorted parent-first
107             # skips, keys %$exclude_pid, zombies and stopped processes
108             sub enum_pids($) {
109 0     0 0   my ($exclude_pid) = @_;
110              
111 0 0         opendir my $proc, "/proc"
112             or die "/proc: $!";
113 0           my @pid = sort { $b <=> $a }
  0            
114             grep /^\d+/,
115             readdir $proc;
116 0           closedir $proc;
117              
118 0           my %ppid;
119 0           for (@pid) {
120 0 0         next if exists $exclude_pid->{$_};
121              
122 0 0         open my $stat, "<", "/proc/$_/stat"
123             or next;
124 0           my ($state, $ppid, $vsize, $rss) = (split /\s+/, scalar <$stat>)[2,3,22,23];
125              
126 0 0         next if $state =~ /^[TZX]/i; # stopped, zombies, dead
127 0 0 0       next unless $vsize || $rss; # skip kernel threads or other nasties
128              
129 0           $ppid{$_} = $ppid;
130             }
131              
132             # now topologically sort by parent-id
133 0           my @res;
134 0           while (scalar %ppid) {
135 0           my @pass;
136              
137 0           for my $pid (keys %ppid) {
138 0 0         if (!exists $ppid{$ppid{$pid}}) {
139 0           push @pass, $pid;
140             }
141             }
142              
143 0           delete $ppid{$_} for @pass;
144              
145 0           push @res, \@pass;
146             }
147              
148             \@res
149 0           }
150              
151             sub process_stopped($) {
152 0 0   0 0   open my $stat, "
153             or return 1;
154              
155 0           return +(split /\s+/, <$stat>)[2] =~ /^[TZX]/i;
156             }
157              
158             sub thaw($) {
159 0     0 1   local $@;
160              
161 0           my $token = shift;
162              
163 0           for (reverse @$token) {
164 0           my @pids = @$_;
165 0           kill CONT => @pids;
166              
167             # now wait till processes actually run again before the next round
168 0           for (1..MAX_WAIT) {
169 0           @pids = grep process_stopped $_, @pids;
170 0 0         last unless @pids;
171              
172 0           yield;
173             }
174             }
175             }
176              
177             sub freeze(;$) {
178 0     0 1   local $@;
179              
180 0           my $procs;
181              
182 0           eval {
183 0           for (1..MAX_WAIT) {
184 0           my $passes = enum_pids { 1 => 1, $$ => 1 };
185 0 0         last unless @$passes;
186              
187 0           for (@$passes) {
188 0           my @pids = @$_;
189 0           push @procs, $_;
190 0           kill STOP => @pids;
191              
192 0           for (1..MAX_WAIT) {
193 0           @pids = grep !process_stopped $_, @pids;
194 0 0         last unless @pids;
195              
196             # wait till processes are really stopped
197 0           yield;
198             }
199              
200 0 0 0       die "unable to stop some processes: @pids" if @pids && !$PARTIAL_OK;
201             }
202             }
203             };
204              
205 0 0         if ($@) {
206 0           thaw \@procs;
207 0           die $@;
208             }
209              
210             \@procs
211 0           }
212              
213             sub freezethaw(&) {
214 0     0 1   my ($code) = @_;
215              
216 0           my $token = freeze;
217              
218 0           eval {
219 0     0     local $SIG{HUP} = sub { die "ERROR: caught SIGHUP while system frozen" };
  0            
220 0     0     local $SIG{INT} = sub { die "ERROR: caught SIGINT while system frozen" };
  0            
221 0     0     local $SIG{TERM} = sub { die "ERROR: caught SIGTERM while system frozen" };
  0            
222 0     0     local $SIG{PIPE} = sub { die "ERROR: caught SIGPIPE while system frozen" };
  0            
223 0     0     local $SIG{ALRM} = sub { die "ERROR: caught SIGALRM while system frozen" };
  0            
224 0     0     local $SIG{USR1} = sub { die "ERROR: caught SIGUSR1 while system frozen" };
  0            
225 0     0     local $SIG{USR2} = sub { die "ERROR: caught SIGUSR2 while system frozen" };
  0            
226              
227 0           $code->();
228             };
229              
230 0           thaw $token;
231              
232 0 0         die $@ if $@;
233              
234             ()
235 0           }
236              
237             1;
238              
239             =back
240              
241             =head1 BUGS
242              
243             SIGCONT is not unnoticed by processes. Some programs (such as irssi-text)
244             respond by flickering (IMHO a bug in irssi-text). Other programs might
245             have other problems, but actual problems should be rare. However, one
246             shouldn't overuse this module.
247              
248             =head1 AUTHOR
249              
250             Marc Lehmann
251             http://home.schmorp.de/
252              
253             =cut
254