replace systemd timer with pvescheduler daemon

The whole thing is already prepared for this, the systemd timer was
just a fixed periodic timer with a frequency of one minute. And we
just introduced it as the assumption was made that less memory usage
would be generated with this approach, AFAIK.

But logging 4+ lines just about that the timer was started, even if
it does nothing, and that 24/7 is not to cheap and a bit annoying.

So in a first step add a simple daemon, which forks of a child for
running jobs once a minute.
This could be made still a bit more intelligent, i.e., look if we
have jobs tor run before forking - as forking is not the cheapest
syscall. Further, we could adapt the sleep interval to the next time
we actually need to run a job (and sending a SIGUSR to the daemon if
a job interval changes such, that this interval got narrower)

We try to sync running on minute-change boundaries at start, this
emulates systemd.timer behaviour, we had until now. Also user can
configure jobs on minute precision, so they probably expect that
those also start really close to a minute change event.
Could be adapted to resync during running, to factor in time drift.
But, as long as enough cpu cycles are available we run in correct
monotonic intervalls, so this isn't a must, IMO.

Another improvement could be locking a bit more fine grained, i.e.
not on a per-all-local-job-runs basis, but per-job (per-guest?)
basis, which would improve temporary starvement  of small
high-periodic jobs through big, less peridoci jobs.
We argued that it's the user fault if such situations arise, but they
can evolve over time without noticing, especially in compolexer
setups.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2021-11-08 14:07:53 +01:00
parent 04aae00d44
commit 6385fb8183
9 changed files with 171 additions and 25 deletions

View File

@ -1,6 +1,6 @@
include ../../defines.mk
SOURCES=pvestatd.pm pveproxy.pm pvedaemon.pm spiceproxy.pm
SOURCES=pvestatd.pm pveproxy.pm pvedaemon.pm spiceproxy.pm pvescheduler.pm
all:

118
PVE/Service/pvescheduler.pm Executable file
View File

@ -0,0 +1,118 @@
package PVE::Service::pvescheduler;
use strict;
use warnings;
use POSIX qw(WNOHANG);
use PVE::SafeSyslog;
use PVE::API2::Replication;
use PVE::Daemon;
use base qw(PVE::Daemon);
my $cmdline = [$0, @ARGV];
my %daemon_options = (stop_wait_time => 180, max_workers => 0);
my $daemon = __PACKAGE__->new('pvescheduler', $cmdline, %daemon_options);
my $finish_jobs = sub {
my ($self) = @_;
foreach my $cpid (keys %{$self->{jobs}}) {
my $waitpid = waitpid($cpid, WNOHANG);
if (defined($waitpid) && ($waitpid == $cpid)) {
delete ($self->{jobs}->{$cpid});
}
}
};
my $get_sleep_time = sub {
my ($calculate_offset) = @_;
my $time = 60;
if ($calculate_offset) {
# try to run near minute boundaries, makes more sense to the user as he
# configures jobs with minute precision
my ($current_seconds) = localtime;
$time = (60 - $current_seconds) if (60 - $current_seconds >= 5);
}
return $time;
};
sub run {
my ($self) = @_;
my $jobs= {};
$self->{jobs} = $jobs;
my $old_sig_chld = $SIG{CHLD};
local $SIG{CHLD} = sub {
local ($@, $!, $?); # do not overwrite error vars
$finish_jobs->($self);
$old_sig_chld->(@_) if $old_sig_chld;
};
my $run_jobs = sub {
my $child = fork();
if (!defined($child)) {
die "fork failed: $!\n";
} elsif ($child == 0) {
$self->after_fork_cleanup();
PVE::API2::Replication::run_jobs(undef, sub {}, 0, 1);
POSIX::_exit(0);
}
$jobs->{$child} = 1;
};
PVE::Jobs::setup_dirs();
for (my $count = 1000;;$count++) {
last if $self->{shutdown_request};
$run_jobs->();
my $sleep_time;
if ($count >= 1000) {
$sleep_time = $get_sleep_time->(1);
$count = 0;
} else {
$sleep_time = $get_sleep_time->(0);
}
my $slept = 0; # SIGCHLD interrupts sleep, so we need to keep track
while ($slept < $sleep_time) {
last if $self->{shutdown_request};
$slept += sleep($sleep_time - $slept);
}
}
# jobs have a lock timeout of 60s, wait a bit more for graceful termination
my $timeout = 0;
while (keys %$jobs > 0 && $timeout < 75) {
kill 'TERM', keys %$jobs;
$timeout += sleep(5);
}
# ensure the rest gets stopped
kill 'KILL', keys %$jobs if (keys %$jobs > 0);
}
sub shutdown {
my ($self) = @_;
syslog('info', 'got shutdown request, signal running jobs to stop');
kill 'TERM', keys %{$self->{jobs}};
$self->{shutdown_request} = 1;
}
$daemon->register_start_command();
$daemon->register_stop_command();
$daemon->register_status_command();
our $cmddef = {
start => [ __PACKAGE__, 'start', []],
stop => [ __PACKAGE__, 'stop', []],
status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
};
1;

View File

@ -6,7 +6,7 @@ export NOVIEW=1
PERL_DOC_INC_DIRS=..
include /usr/share/pve-doc-generator/pve-doc-generator.mk
SERVICES = pvestatd pveproxy pvedaemon spiceproxy
SERVICES = pvestatd pveproxy pvedaemon spiceproxy pvescheduler
CLITOOLS = vzdump pvesubscription pveceph pveam pvesr pvenode pvesh pve6to7
SCRIPTS = \
@ -52,6 +52,10 @@ pve6to7.1:
printf ".TH PVE6TO7 1\n.SH NAME\npve6to7 \- Proxmox VE upgrade checker script for 6.4 to 7.x\n" > $@
printf ".SH SYNOPSIS\npve6to7 [--full]\n" >> $@
pvescheduler.8:
# FIXME: add to doc-generator
echo ".TH pvescheduler 8" > $@
pveversion.1.pod: pveversion
pveupgrade.1.pod: pveupgrade
pvereport.1.pod: pvereport

28
bin/pvescheduler Executable file
View File

@ -0,0 +1,28 @@
#!/usr/bin/perl
use strict;
use warnings;
use PVE::Service::pvescheduler;
use PVE::RPCEnvironment;
use PVE::SafeSyslog;
$SIG{'__WARN__'} = sub {
my $err = $@;
my $t = $_[0];
chomp $t;
print STDERR "$t\n";
syslog('warning', "%s", $t);
$@ = $err;
};
my $prepare = sub {
my $rpcenv = PVE::RPCEnvironment->init('priv');
$rpcenv->init_request();
$rpcenv->set_language($ENV{LANG});
$rpcenv->set_user('root@pam');
};
PVE::Service::pvescheduler->run_cli_handler(prepare => $prepare);

3
debian/postinst vendored
View File

@ -79,6 +79,7 @@ case "$1" in
deb-systemd-invoke reload-or-try-restart pvestatd.service
deb-systemd-invoke reload-or-try-restart pveproxy.service
deb-systemd-invoke reload-or-try-restart spiceproxy.service
deb-systemd-invoke reload-or-try-restart pvescheduler.service
exit 0;;
@ -102,7 +103,7 @@ case "$1" in
# same as dh_systemd_enable (code copied)
UNITS="pvedaemon.service pveproxy.service spiceproxy.service pvestatd.service pvebanner.service pvesr.timer pve-daily-update.timer"
UNITS="pvedaemon.service pveproxy.service spiceproxy.service pvestatd.service pvebanner.service pvescheduler.service pve-daily-update.timer"
NO_RESTART_UNITS="pvenetcommit.service pve-guests.service"
for unit in ${UNITS} ${NO_RESTART_UNITS}; do

View File

@ -13,8 +13,7 @@ SERVICES= \
pve-storage.target \
pve-daily-update.service\
pve-daily-update.timer \
pvesr.service \
pvesr.timer
pvescheduler.service
.PHONY: install
install: ${SERVICES}

View File

@ -0,0 +1,16 @@
[Unit]
Description=Proxmox VE scheduler
ConditionPathExists=/usr/bin/pvescheduler
Wants=pve-cluster.service
After=pve-cluster.service
After=pve-storage.target
[Service]
ExecStart=/usr/bin/pvescheduler start
ExecStop=/usr/bin/pvescheduler stop
PIDFile=/var/run/pvescheduler.pid
KillMode=process
Type=forking
[Install]
WantedBy=multi-user.target

View File

@ -1,8 +0,0 @@
[Unit]
Description=Proxmox VE replication runner
ConditionPathExists=/usr/bin/pvesr
After=pve-cluster.service
[Service]
Type=oneshot
ExecStart=/usr/bin/pvesr run --mail 1

View File

@ -1,12 +0,0 @@
[Unit]
Description=Proxmox VE replication runner
[Timer]
AccuracySec=1
RemainAfterElapse=no
[Timer]
OnCalendar=minutely
[Install]
WantedBy=timers.target