mirror of
https://git.proxmox.com/git/pve-manager
synced 2025-06-05 04:01:20 +00:00
577 lines
15 KiB
Perl
Executable File
577 lines
15 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
|
|
# Resource Agent for managing PVE VMs (openvz and qemu-kvm)
|
|
#
|
|
# License: GNU Affero General Public License (AGPL3)
|
|
# Copyright (C) 2011 Proxmox Server Solutions GmbH
|
|
|
|
use strict;
|
|
use File::Basename;
|
|
use File::Copy;
|
|
use PVE::Tools;
|
|
use PVE::ProcFSTools;
|
|
use PVE::Cluster;
|
|
use PVE::INotify;
|
|
use PVE::RPCEnvironment;
|
|
use PVE::OpenVZ;
|
|
use PVE::API2::OpenVZ;
|
|
use PVE::QemuServer;
|
|
use PVE::API2::Qemu;
|
|
|
|
use constant OCF_SUCCESS => 0;
|
|
use constant OCF_ERR_GENERIC => 1;
|
|
use constant OCF_ERR_ARGS => 2;
|
|
use constant OCF_ERR_UNIMPLEMENTED => 3;
|
|
use constant OCF_ERR_PERM => 4;
|
|
use constant OCF_ERR_INSTALLED => 5;
|
|
use constant OCF_ERR_CONFIGURED => 6;
|
|
use constant OCF_NOT_RUNNING => 7;
|
|
use constant OCF_RUNNING_MASTER => 8;
|
|
use constant OCF_FAILED_MASTER => 9;
|
|
|
|
$ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
|
|
|
|
my $ocf_ressource_type = 'pvevm';
|
|
|
|
my $prio_hash = {
|
|
err => 3,
|
|
note => 5,
|
|
info => 6,
|
|
debug => 7,
|
|
};
|
|
|
|
$SIG{__DIE__} = sub {
|
|
die @_ if $^S; # skip if inside eval
|
|
$! = OCF_ERR_GENERIC;
|
|
ocf_log('err', @_);
|
|
exit($!);
|
|
};
|
|
|
|
if ($> != 0) {
|
|
print STDERR "Cannot control VMs. as non-root user.\n";
|
|
exit(OCF_ERR_PERM);
|
|
}
|
|
|
|
PVE::INotify::inotify_init();
|
|
|
|
my $rpcenv = PVE::RPCEnvironment->init('ha');
|
|
|
|
$rpcenv->init_request();
|
|
$rpcenv->set_language($ENV{LANG});
|
|
$rpcenv->set_user('root@pam');
|
|
|
|
my $nodename = PVE::INotify::nodename();
|
|
|
|
my @ssh_opts = ('-o', 'BatchMode=yes');
|
|
my @ssh_cmd = ('ssh', @ssh_opts);
|
|
|
|
sub ocf_log {
|
|
my ($level, $msg) = @_;
|
|
|
|
chomp $msg;
|
|
print "$level: $msg\n";
|
|
|
|
my $level_n = $prio_hash->{$level};
|
|
$level_n = $prio_hash->{note} if !defined($level_n);
|
|
|
|
my $cmd = ['clulog', '-m', $ocf_ressource_type, '-s', $level_n, $msg];
|
|
|
|
eval { PVE::Tools::run_command($cmd); }; # ignore errors
|
|
}
|
|
|
|
sub get_timeout {
|
|
my $default_timeout = 60;
|
|
my $tout = $default_timeout;
|
|
|
|
if ($ENV{OCF_RESKEY_RGMANAGER_meta_timeout}) {
|
|
$tout = $ENV{OCF_RESKEY_RGMANAGER_meta_timeout};
|
|
} elsif ($ENV{OCF_RESKEY_CRM_meta_timeout}) {
|
|
$tout = $ENV{OCF_RESKEY_CRM_meta_timeout};
|
|
}
|
|
|
|
return $default_timeout if $tout <= 0;
|
|
|
|
return $tout;
|
|
}
|
|
|
|
sub check_running {
|
|
my ($status, $verbose) = @_;
|
|
|
|
if ($status->{type} eq 'qemu') {
|
|
$status->{running} = PVE::QemuServer::check_running($status->{vmid}, 1);
|
|
} elsif ($status->{type} eq 'openvz') {
|
|
$status->{running} = PVE::OpenVZ::check_running($status->{vmid});
|
|
} else {
|
|
die "got strange VM type '$status->{type}'\n";
|
|
}
|
|
}
|
|
|
|
sub validate_all {
|
|
my $status = {};
|
|
|
|
eval {
|
|
|
|
my $vmid = $ENV{OCF_RESKEY_vmid};
|
|
die "no VMID specified\n" if !defined($vmid);
|
|
die "got invalid VMID '$vmid'\n" if $vmid !~ m/^[1-9]\d*$/;
|
|
|
|
my $vmlist = PVE::Cluster::get_vmlist();
|
|
die "got empty cluster VM list\n" if !$vmlist || !$vmlist->{ids};
|
|
my $data = $vmlist->{ids}->{$vmid};
|
|
die "VM $vmid does not exist\n" if !$data;
|
|
|
|
$status->{vmid} = $vmid;
|
|
$status->{type} = $data->{type};
|
|
$status->{node} = $data->{node};
|
|
|
|
if ($status->{type} eq 'qemu') {
|
|
$status->{name} = "VM $vmid";
|
|
} else {
|
|
$status->{name} = "CT $vmid";
|
|
}
|
|
|
|
check_running($status);
|
|
};
|
|
if (my $err = $@) {
|
|
ocf_log('err', $err);
|
|
exit(OCF_ERR_ARGS);
|
|
}
|
|
|
|
return $status;
|
|
}
|
|
|
|
sub upid_wait {
|
|
my ($upid) = @_;
|
|
|
|
my $task = PVE::Tools::upid_decode($upid);
|
|
|
|
sleep(1);
|
|
while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
|
|
ocf_log('debug', "Task still active, waiting");
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
sub copy_scripts {
|
|
my ($vmid, $oldconfig, $newconfig) = @_;
|
|
|
|
my $oldcfgdir = dirname($oldconfig);
|
|
my $newcfgdir = dirname($newconfig);
|
|
|
|
my $newfiles = [];
|
|
my $oldfiles = [];
|
|
eval {
|
|
foreach my $s (PVE::OpenVZ::SCRIPT_EXT) {
|
|
my $scriptfn = "${vmid}.$s";
|
|
my $oldfn = "$oldcfgdir/$scriptfn";
|
|
next if ! -f $oldfn;
|
|
my $dstfn = "$newcfgdir/$scriptfn";
|
|
push @$oldfiles, $oldfn;
|
|
push @$newfiles, $dstfn;
|
|
copy($oldfn, $dstfn) || die "copy '$oldfn' to '$dstfn' failed - $!\n";
|
|
}
|
|
};
|
|
if (my $err = $@) {
|
|
foreach my $fn (@$newfiles) {
|
|
unlink($fn);
|
|
}
|
|
die $err;
|
|
}
|
|
|
|
return ($newfiles, $oldfiles);
|
|
}
|
|
|
|
my $cmd = shift || '';
|
|
my $migratetarget = shift if $cmd eq 'migrate';
|
|
|
|
die "too many arguments\n" if scalar (@ARGV) != 0;
|
|
|
|
if ($cmd eq 'start') {
|
|
my $status = validate_all();
|
|
if ($status->{running}) {
|
|
ocf_log('info', "$status->{name} is already running");
|
|
exit(OCF_SUCCESS);
|
|
}
|
|
|
|
if ($status->{node} ne $nodename) {
|
|
ocf_log('info', "Move config for $status->{name} to local node");
|
|
my ($oldconfig, $newconfig, $oldfiles, $newfiles);
|
|
if ($status->{type} eq 'qemu') {
|
|
$oldconfig = PVE::QemuServer::config_file($status->{vmid}, $status->{node});
|
|
$newconfig = PVE::QemuServer::config_file($status->{vmid}, $nodename);
|
|
} else {
|
|
$oldconfig = PVE::OpenVZ::config_file($status->{vmid}, $status->{node});
|
|
$newconfig = PVE::OpenVZ::config_file($status->{vmid}, $nodename);
|
|
|
|
eval { ($newfiles, $oldfiles) = copy_scripts($status->{vmid}, $oldconfig, $newconfig); };
|
|
if (my $err = $@) {
|
|
ocf_log('err', "unable to move config scripts: $err");
|
|
exit(OCF_ERR_GENERIC);
|
|
}
|
|
}
|
|
if (!rename($oldconfig, $newconfig)) {
|
|
ocf_log('err', "unable to move config file from '$oldconfig' to '$newconfig' - $!");
|
|
if ($newfiles) {
|
|
foreach my $fn (@$newfiles) {
|
|
unlink($fn);
|
|
}
|
|
}
|
|
exit(OCF_ERR_GENERIC);
|
|
}
|
|
if ($oldfiles) {
|
|
foreach my $fn (@$oldfiles) {
|
|
unlink($fn);
|
|
}
|
|
}
|
|
}
|
|
|
|
my $upid;
|
|
|
|
if ($status->{type} eq 'qemu') {
|
|
$upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $status->{vmid}});
|
|
} else {
|
|
$upid = PVE::API2::OpenVZ->vm_start({node => $nodename, vmid => $status->{vmid}});
|
|
}
|
|
|
|
upid_wait($upid);
|
|
|
|
check_running($status);
|
|
|
|
exit(OCF_ERR_GENERIC) if !$status->{running};
|
|
|
|
if (my $testprog = $ENV{OCF_RESKEY_status_program}) {
|
|
|
|
my $timeout = get_timeout();
|
|
|
|
my $wait_func = sub {
|
|
while (system($testprog) != 0) { sleep(3); }
|
|
};
|
|
|
|
eval { PVE::Tools::run_with_timeout($timeout, $wait_func); };
|
|
if (my $err = $@) {
|
|
ocf_log('err', "Start of $status->{name} has failed");
|
|
ocf_log('err', "error while waiting for '$testprog' - $err");
|
|
exit(OCF_ERR_GENERIC);
|
|
}
|
|
}
|
|
|
|
exit(OCF_SUCCESS);
|
|
|
|
} elsif($cmd eq 'stop') {
|
|
my $status = validate_all();
|
|
|
|
if (!$status->{running}) {
|
|
ocf_log('info', "$status->{name} is already stopped");
|
|
exit(OCF_SUCCESS);
|
|
}
|
|
|
|
my $timeout = get_timeout();
|
|
|
|
my $upid;
|
|
|
|
my $param = {
|
|
node => $nodename,
|
|
vmid => $status->{vmid},
|
|
timeout => $timeout,
|
|
forceStop => 1,
|
|
};
|
|
|
|
if ($status->{type} eq 'qemu') {
|
|
$upid = PVE::API2::Qemu->vm_shutdown($param);
|
|
} else {
|
|
$upid = PVE::API2::OpenVZ->vm_shutdown($param);
|
|
}
|
|
|
|
upid_wait($upid);
|
|
|
|
check_running($status);
|
|
|
|
exit($status->{running} ? OCF_ERR_GENERIC : OCF_SUCCESS);
|
|
|
|
} elsif($cmd eq 'recover' || $cmd eq 'restart' || $cmd eq 'reload') {
|
|
|
|
exit(OCF_SUCCESS);
|
|
|
|
} elsif($cmd eq 'status' || $cmd eq 'monitor') {
|
|
|
|
my $status = validate_all();
|
|
|
|
if (!$status->{running}) {
|
|
ocf_log('debug', "$status->{name} is not running");
|
|
exit(OCF_NOT_RUNNING);
|
|
}
|
|
|
|
ocf_log('debug', "$status->{name} is running");
|
|
|
|
my $testprog = $ENV{OCF_RESKEY_status_program};
|
|
my $checklevel = $ENV{OCF_CHECK_LEVEL};
|
|
|
|
if ($testprog && $checklevel && $checklevel >= 10) {
|
|
if (system($testprog) != 0) {
|
|
exit(OCF_NOT_RUNNING);
|
|
}
|
|
}
|
|
|
|
exit(OCF_SUCCESS);
|
|
|
|
} elsif($cmd eq 'migrate') {
|
|
my $status = validate_all();
|
|
if (!$status->{running}) {
|
|
ocf_log('err', "$status->{name} is not running");
|
|
exit(OCF_ERR_GENERIC);
|
|
}
|
|
|
|
if (!$migratetarget) {
|
|
ocf_log('err', "No target specified");
|
|
exit(OCF_ERR_ARGS);
|
|
|
|
};
|
|
|
|
my $upid;
|
|
my $params = {
|
|
node => $nodename,
|
|
vmid => $status->{vmid},
|
|
target => $migratetarget,
|
|
online => 1,
|
|
};
|
|
|
|
my $oldconfig;
|
|
if ($status->{type} eq 'qemu') {
|
|
$oldconfig = PVE::QemuServer::config_file($status->{vmid}, $status->{node});
|
|
$upid = PVE::API2::Qemu->migrate_vm($params);
|
|
} else {
|
|
$oldconfig = PVE::OpenVZ::config_file($status->{vmid}, $status->{node});
|
|
$upid = PVE::API2::OpenVZ->migrate_vm($params);
|
|
}
|
|
|
|
upid_wait($upid);
|
|
|
|
# something went wrong if old config file is still there
|
|
exit((-f $oldconfig) ? OCF_ERR_GENERIC : OCF_SUCCESS);
|
|
|
|
} elsif($cmd eq 'stop') {
|
|
my $status = validate_all();
|
|
|
|
if (!$status->{running}) {
|
|
ocf_log('info', "$status->{name} is already stopped");
|
|
exit(OCF_SUCCESS);
|
|
}
|
|
|
|
my $upid;
|
|
|
|
if ($status->{type} eq 'qemu') {
|
|
$upid = PVE::API2::Qemu->vm_stop({node => $nodename, vmid => $status->{vmid}});
|
|
} else {
|
|
$upid = PVE::API2::OpenVZ->vm_stop({node => $nodename, vmid => $status->{vmid}, fast => 1});
|
|
}
|
|
|
|
upid_wait($upid);
|
|
|
|
die "implement me";
|
|
|
|
} elsif($cmd eq 'reconfig') {
|
|
# Reconfigure a running VM
|
|
my $status = validate_all();
|
|
|
|
# we do nothing here
|
|
|
|
} elsif($cmd eq 'meta-data') {
|
|
while(<DATA>) {
|
|
print;
|
|
}
|
|
} elsif($cmd eq 'validate-all') {
|
|
my $status = validate_all();
|
|
} else {
|
|
die "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}\n";
|
|
}
|
|
|
|
exit(OCF_SUCCESS);
|
|
|
|
__DATA__
|
|
<?xml version="1.0"?>
|
|
<resource-agent version="rgmanager 2.0" name="pvevm">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
Defines a PVE Virtual Machine
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Defines a PVE Virtual Machine
|
|
</shortdesc>
|
|
|
|
<parameters>
|
|
<parameter name="vmid" primary="1">
|
|
<longdesc lang="en">
|
|
This is the VMID of the virtual machine.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
VMID
|
|
</shortdesc>
|
|
<content type="string"/>
|
|
</parameter>
|
|
|
|
<parameter name="domain" reconfig="1">
|
|
<longdesc lang="en">
|
|
Failover domains define lists of cluster members
|
|
to try in the event that the host of the virtual machine
|
|
fails.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Cluster failover Domain
|
|
</shortdesc>
|
|
<content type="string"/>
|
|
</parameter>
|
|
|
|
<parameter name="autostart" reconfig="1">
|
|
<longdesc lang="en">
|
|
If set to yes, this resource group will automatically be started
|
|
after the cluster forms a quorum. If set to no, this virtual
|
|
machine will start in the 'disabled' state after the cluster
|
|
forms a quorum.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Automatic start after quorum formation
|
|
</shortdesc>
|
|
<content type="boolean" default="1"/>
|
|
</parameter>
|
|
|
|
<parameter name="exclusive" reconfig="1">
|
|
<longdesc lang="en">
|
|
If set, this resource group will only relocate to
|
|
nodes which have no other resource groups running in the
|
|
event of a failure. If no empty nodes are available,
|
|
this resource group will not be restarted after a failure.
|
|
Additionally, resource groups will not automatically
|
|
relocate to the node running this resource group. This
|
|
option can be overridden by manual start and/or relocate
|
|
operations.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Exclusive resource group
|
|
</shortdesc>
|
|
<content type="boolean" default="0"/>
|
|
</parameter>
|
|
|
|
<parameter name="recovery" reconfig="1">
|
|
<longdesc lang="en">
|
|
This currently has three possible options: "restart" tries
|
|
to restart this virtual machine locally before
|
|
attempting to relocate (default); "relocate" does not bother
|
|
trying to restart the VM locally; "disable" disables
|
|
the VM if it fails.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Failure recovery policy
|
|
</shortdesc>
|
|
<content type="string"/>
|
|
</parameter>
|
|
|
|
<parameter name="migrate">
|
|
<longdesc lang="en">
|
|
Migration type (live or pause, default = live).
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Migration type (live or pause, default = live).
|
|
</shortdesc>
|
|
<content type="string" default="live"/>
|
|
</parameter>
|
|
|
|
<parameter name="depend">
|
|
<longdesc lang="en">
|
|
Service dependency; will not start without the specified
|
|
service running.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Top-level service this depends on, in service:name format.
|
|
</shortdesc>
|
|
<content type="string"/>
|
|
</parameter>
|
|
|
|
<parameter name="depend_mode">
|
|
<longdesc lang="en">
|
|
Service dependency mode.
|
|
hard - This service is stopped/started if its dependency
|
|
is stopped/started
|
|
soft - This service only depends on the other service for
|
|
initial startip. If the other service stops, this
|
|
service is not stopped.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Service dependency mode (soft or hard).
|
|
</shortdesc>
|
|
<content type="string" default="hard"/>
|
|
</parameter>
|
|
|
|
<parameter name="max_restarts" reconfig="1">
|
|
<longdesc lang="en">
|
|
Maximum restarts for this service.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Maximum restarts for this service.
|
|
</shortdesc>
|
|
<content type="string" default="0"/>
|
|
</parameter>
|
|
|
|
<parameter name="restart_expire_time" reconfig="1">
|
|
<longdesc lang="en">
|
|
Restart expiration time. A restart is forgotten
|
|
after this time. When combined with the max_restarts
|
|
option, this lets administrators specify a threshold
|
|
for when to fail over services. If max_restarts
|
|
is exceeded in this given expiration time, the service
|
|
is relocated instead of restarted again.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Restart expiration time; amount of time before a restart
|
|
is forgotten.
|
|
</shortdesc>
|
|
<content type="string" default="0"/>
|
|
</parameter>
|
|
|
|
<parameter name="status_program" reconfig="1">
|
|
<longdesc lang="en">
|
|
Ordinarily, only the presence/health of a virtual machine
|
|
is checked. If specified, the status_program value is
|
|
executed during a depth 10 check. The intent of this
|
|
program is to ascertain the status of critical services
|
|
within a virtual machine.
|
|
</longdesc>
|
|
<shortdesc lang="en">
|
|
Additional status check program
|
|
</shortdesc>
|
|
<content type="string" default=""/>
|
|
</parameter>
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="75"/>
|
|
<action name="stop" timeout="75"/>
|
|
|
|
<action name="status" timeout="10" interval="30"/>
|
|
<action name="monitor" timeout="10" interval="30"/>
|
|
|
|
<!-- depth 10 calls the status_program -->
|
|
<action name="status" depth="10" timeout="20" interval="60"/>
|
|
<action name="monitor" depth="10" timeout="20" interval="60"/>
|
|
|
|
<!-- reconfigure - reconfigure with new OCF parameters.
|
|
NOT OCF COMPATIBLE AT ALL -->
|
|
<action name="reconfig" timeout="10"/>
|
|
|
|
<action name="migrate" timeout="10m"/>
|
|
|
|
<action name="meta-data" timeout="5"/>
|
|
<action name="validate-all" timeout="5"/>
|
|
|
|
</actions>
|
|
|
|
<special tag="rgmanager">
|
|
<!-- Destroy_on_delete / init_on_add are currently only
|
|
supported for migratory resources (no children
|
|
and the 'migrate' action; see above. Do not try this
|
|
with normal services -->
|
|
<attributes maxinstances="1" destroy_on_delete="0" init_on_add="0"/>
|
|
</special>
|
|
</resource-agent>
|
|
|