File Coverage

lib/Ubic/Watchdog.pm
Criterion Covered Total %
statement 27 116 23.2
branch 0 36 0.0
condition n/a
subroutine 9 18 50.0
pod 5 5 100.0
total 41 175 23.4


line stmt bran cond sub pod time code
1             package Ubic::Watchdog;
2             $Ubic::Watchdog::VERSION = '1.60';
3 5     5   3745 use strict;
  5         13  
  5         159  
4 5     5   66 use warnings;
  5         5  
  5         228  
5              
6             # ABSTRACT: watchdog code
7              
8              
9 5     5   30 use POSIX;
  5         6  
  5         35  
10 5     5   8007 use IO::Handle;
  5         6  
  5         191  
11 5     5   20 use Params::Validate qw(:all);
  5         6  
  5         695  
12 5     5   26 use Try::Tiny;
  5         6  
  5         294  
13 5     5   19 use List::MoreUtils qw(any);
  5         6  
  5         47  
14 5     5   1763 use Ubic;
  5         5  
  5         84  
15              
16 5     5   1141 use Ubic::Logger;
  5         62  
  5         5416  
17              
18             sub run {
19 0     0 1   my $class = shift;
20 0           my $options = validate(@_, {
21             glob_filter => { type => ARRAYREF, default => [] },
22             compile_timeout => { type => SCALAR, regex => qr/^\d+$/ },
23             verbose => { type => SCALAR|UNDEF },
24             });
25              
26 0           my @filter;
27             {
28 0           for my $arg (@{ $options->{glob_filter} }) {
  0            
  0            
29 0 0         $arg =~ /^[*\w.-]+$/ or die "Invalid argument '$arg', expected service name or shell-style glob";
30 0           $arg =~ s/\./\\./g;
31 0           $arg =~ s/\*/.*/g;
32 0           push @filter, qr/^$arg$/;
33             }
34             }
35 0 0         $options->{filter} = \@filter if @filter;
36 0           delete $options->{glob_filter};
37              
38 0           my $self = bless $options => $class;
39              
40 0           my @services = $self->load_services(Ubic->root_service);
41 0           $self->check_all(@services);
42             }
43              
44             sub match($$) {
45 0     0 1   my ($name, $filter) = @_;
46 0           do {
47 0 0         return 1 if $name =~ $filter;
48             } while ($name =~ s/\.[^.]+$//);
49 0           return;
50             }
51              
52             sub load_services {
53 0     0 1   my $self = shift;
54 0           my ($parent) = @_;
55 0           alarm($self->{compile_timeout});
56             $SIG{ALRM} = sub {
57 0     0     die "Couldn't compile $parent services in $self->{compile_timeout} seconds";
58 0           };
59 0           my @services = $parent->services;
60 0           alarm(0);
61 0           return @services;
62             }
63              
64             sub check_all {
65 0     0 1   my $self = shift;
66 0           my @services = @_;
67 0           for my $service (@services) {
68 0           my $name = $service->full_name;
69 0 0         if ($service->isa('Ubic::Multiservice')) {
70 0 0         INFO("$name is multiservice, checking subservices") if $self->{verbose};
71 0           $self->check_all($self->load_services($service));
72 0           next;
73             }
74 0 0         if ($self->{filter}) {
75 0 0   0     next unless any { match($name, $_) } @{ $self->{filter} };
  0            
  0            
76             }
77              
78             # trying to get logs a little bit more ordered
79 0           STDOUT->flush;
80 0           STDERR->flush;
81              
82 0           my $child = fork;
83 0 0         unless (defined $child) {
84 0           die "fork failed";
85             }
86 0 0         unless ($child) {
87 0           POSIX::setsid; # so we could kill this watchdog and its children safely later
88 0           $self->check($service);
89 0           exit;
90             }
91             }
92 0           1 while wait() > 0;
93 0           return;
94             }
95              
96             sub check($) {
97 0     0 1   my $self = shift;
98 0           my $service = shift;
99 0           my $name = $service->full_name;
100 0 0         if ($self->{verbose}) {
101 0           INFO("Checking $name");
102             }
103 0           $0 = "ubic-watchdog $name";
104              
105             try {
106 0     0     alarm($service->check_timeout);
107              
108             # TODO - do additional fork, so that if service code overrides SIG{ALRM} or resets alarm(), watchdog still will finish in time
109             $SIG{ALRM} = sub {
110 0           ERROR("$name check_timeout exceeded");
111 0           STDOUT->flush;
112 0           STDERR->flush;
113 0           kill -9 => $$; # suicide
114 0           ERROR "kill sent, still alive"; # should never happen, we called setsid earlier
115 0           };
116              
117             # permanently use service credentials
118             # this line optimizes the number of fork calls - future status/restart calls would perform forked_call() otherwise
119 0           Ubic::Credentials->new( service => $service )->set;
120              
121             # so we don't need access guard for this lock
122 0           my $watchdog_lock = Ubic::SingletonLock->new(Ubic->get_data_dir()."/watchdog/lock/".$name, { blocking => 0 });
123              
124 0 0         unless ($watchdog_lock) {
125 0 0         if ($self->{verbose}) {
126 0           INFO "$name is locked by another watchdog process";
127             }
128 0           return;
129             }
130              
131 0           my $lock = Ubic->lock($name);
132 0 0         unless (Ubic->is_enabled($name)) {
133 0 0         INFO("$name disabled") if $self->{verbose};
134 0           return;
135             }
136              
137 0           my $cached_status = Ubic->cached_status($name);
138 0           my $status = Ubic->status($name);
139 0 0         unless ($status->status eq 'running') {
140             # following code can throw an exception, so we want to cache invalid status immediately
141 0           Ubic->set_cached_status($name, $status);
142              
143 0 0         if ($cached_status eq "autostarting") {
144 0           INFO("$name is autostarting");
145             }
146             else {
147 0           ERROR("$name status is '$status', restarting");
148             }
149              
150 0           Ubic->restart($name);
151              
152             # This is a precaution against services with wrong start/status logic.
153 0           $status = Ubic->status($name);
154 0 0         if ($status->status ne 'running') {
155 0           INFO("$name started, but status is still '$status'");
156             }
157             }
158              
159 0           alarm(0);
160 0           Ubic->set_cached_status($name, $status); # if service's start implementation is invalid, ubic-watchdog will restart it every minute, so be careful
161             }
162             catch {
163 0     0     ERROR("Failed to revive $name: $_");
164 0           };
165              
166 0 0         INFO("$name checked") if $self->{verbose};
167             }
168              
169              
170             1;
171              
172             __END__