mirror of
https://git.proxmox.com/git/pve-manager
synced 2025-08-15 09:22:03 +00:00
pvescheduler: fix potential stall on full shutdown
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
8f3b5bbb91
commit
9c1943935c
@ -19,6 +19,12 @@ my $daemon = __PACKAGE__->new('pvescheduler', $cmdline, %daemon_options);
|
|||||||
|
|
||||||
my @JOB_TYPES = qw(replication jobs);
|
my @JOB_TYPES = qw(replication jobs);
|
||||||
|
|
||||||
|
my sub running_job_pids : prototype($) {
|
||||||
|
my ($self) = @_;
|
||||||
|
my $pids = [ map { keys $_->%* } values $self->{jobs}->%* ];
|
||||||
|
return scalar($pids->@*) ? $pids : undef;
|
||||||
|
}
|
||||||
|
|
||||||
my $finish_jobs = sub {
|
my $finish_jobs = sub {
|
||||||
my ($self) = @_;
|
my ($self) = @_;
|
||||||
for my $type (@JOB_TYPES) {
|
for my $type (@JOB_TYPES) {
|
||||||
@ -134,21 +140,19 @@ sub run {
|
|||||||
|
|
||||||
# NOTE: we only get here on shutdown_request, so we already sent a TERM to all job-types
|
# NOTE: we only get here on shutdown_request, so we already sent a TERM to all job-types
|
||||||
my $timeout = 0;
|
my $timeout = 0;
|
||||||
while (scalar(keys $jobs->%*)) {
|
while(my $pids = running_job_pids($self)) {
|
||||||
for my $type (keys $jobs->%*) {
|
kill 'TERM', $pids->@*; # send TERM to all workers at once, possible thundering herd - FIXME?
|
||||||
next if !scalar(keys $jobs->{$type}->%*);
|
|
||||||
kill 'TERM', keys $jobs->{$type}->%*;
|
$finish_jobs->($self);
|
||||||
}
|
|
||||||
$finish_jobs->($self); # doesn't hurt
|
|
||||||
# some jobs have a lock timeout of 60s, wait a bit more for graceful termination
|
# some jobs have a lock timeout of 60s, wait a bit more for graceful termination
|
||||||
last if $timeout > 75;
|
last if $timeout > 75;
|
||||||
$timeout += sleep(3);
|
$timeout += sleep(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
for my $type (keys $jobs->%*) { # ensure the rest gets stopped
|
if (my $pids = running_job_pids($self)) {
|
||||||
my @pids = keys $jobs->{$type}->%*;
|
syslog('warn', "unresponsive job-worker, killing now: " . join(', ', $pids->@*));
|
||||||
syslog('warn', "unresponsive job-worker, killing now: " . join(', ', @pids));
|
kill 'KILL', $pids->@*;
|
||||||
kill 'KILL', @pids;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user