mirror of
https://git.proxmox.com/git/qemu-server
synced 2025-04-30 11:15:51 +00:00
api: clone: fork before locking
using the familiar early+repeated checks pattern from other API calls. Only intended functional changes are with regard to locking/forking. For a full clone of a running VM without guest agent, this also fixes issuing vm_{resume,suspend} calls for drive mirror completion. Previously, those just timed out, because of not getting the lock: > create full clone of drive scsi0 (rbdkvm:vm-104-disk-0) > Formatting '/var/lib/vz/images/105/vm-105-disk-0.raw', fmt=raw > size=4294967296 preallocation=off > drive mirror is starting for drive-scsi0 > drive-scsi0: transferred 2.0 MiB of 4.0 GiB (0.05%) in 0s > drive-scsi0: transferred 635.0 MiB of 4.0 GiB (15.50%) in 1s > drive-scsi0: transferred 1.6 GiB of 4.0 GiB (40.50%) in 2s > drive-scsi0: transferred 3.6 GiB of 4.0 GiB (90.23%) in 3s > drive-scsi0: transferred 4.0 GiB of 4.0 GiB (100.00%) in 4s, ready > all 'mirror' jobs are ready > suspend vm > trying to acquire lock... > can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout > drive-scsi0: Cancelling block job > drive-scsi0: Done. > resume vm > trying to acquire lock... > can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
This commit is contained in:
parent
d6cdfae417
commit
dbecb46f2a
220
PVE/API2/Qemu.pm
220
PVE/API2/Qemu.pm
@ -3079,9 +3079,7 @@ __PACKAGE__->register_method({
|
||||
|
||||
my $running = PVE::QemuServer::check_running($vmid) || 0;
|
||||
|
||||
my $clonefn = sub {
|
||||
# do all tests after lock but before forking worker - if possible
|
||||
|
||||
my $load_and_check = sub {
|
||||
my $conf = PVE::QemuConfig->load_config($vmid);
|
||||
PVE::QemuConfig->check_lock($conf);
|
||||
|
||||
@ -3091,7 +3089,7 @@ __PACKAGE__->register_method({
|
||||
die "snapshot '$snapname' does not exist\n"
|
||||
if $snapname && !defined( $conf->{snapshots}->{$snapname});
|
||||
|
||||
my $full = extract_param($param, 'full') // !PVE::QemuConfig->is_template($conf);
|
||||
my $full = $param->{full} // !PVE::QemuConfig->is_template($conf);
|
||||
|
||||
die "parameter 'storage' not allowed for linked clones\n"
|
||||
if defined($storage) && !$full;
|
||||
@ -3156,7 +3154,13 @@ __PACKAGE__->register_method({
|
||||
}
|
||||
}
|
||||
|
||||
# auto generate a new uuid
|
||||
return ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone);
|
||||
};
|
||||
|
||||
my $clonefn = sub {
|
||||
my ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone) = $load_and_check->();
|
||||
|
||||
# auto generate a new uuid
|
||||
my $smbios1 = PVE::QemuServer::parse_smbios1($newconf->{smbios1} || '');
|
||||
$smbios1->{uuid} = PVE::QemuServer::generate_uuid();
|
||||
$newconf->{smbios1} = PVE::QemuServer::print_smbios1($smbios1);
|
||||
@ -3181,105 +3185,99 @@ __PACKAGE__->register_method({
|
||||
# FIXME use PVE::QemuConfig->create_and_lock_config and adapt code
|
||||
PVE::Tools::file_set_contents($conffile, "# qmclone temporary file\nlock: clone\n");
|
||||
|
||||
my $realcmd = sub {
|
||||
my $upid = shift;
|
||||
|
||||
my $newvollist = [];
|
||||
my $jobs = {};
|
||||
|
||||
eval {
|
||||
local $SIG{INT} =
|
||||
local $SIG{TERM} =
|
||||
local $SIG{QUIT} =
|
||||
local $SIG{HUP} = sub { die "interrupted by signal\n"; };
|
||||
|
||||
PVE::Storage::activate_volumes($storecfg, $vollist, $snapname);
|
||||
|
||||
my $bwlimit = extract_param($param, 'bwlimit');
|
||||
|
||||
my $total_jobs = scalar(keys %{$drives});
|
||||
my $i = 1;
|
||||
|
||||
foreach my $opt (sort keys %$drives) {
|
||||
my $drive = $drives->{$opt};
|
||||
my $skipcomplete = ($total_jobs != $i); # finish after last drive
|
||||
my $completion = $skipcomplete ? 'skip' : 'complete';
|
||||
|
||||
my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
|
||||
my $storage_list = [ $src_sid ];
|
||||
push @$storage_list, $storage if defined($storage);
|
||||
my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit);
|
||||
|
||||
my $newdrive = PVE::QemuServer::clone_disk(
|
||||
$storecfg,
|
||||
$vmid,
|
||||
$running,
|
||||
$opt,
|
||||
$drive,
|
||||
$snapname,
|
||||
$newid,
|
||||
$storage,
|
||||
$format,
|
||||
$fullclone->{$opt},
|
||||
$newvollist,
|
||||
$jobs,
|
||||
$completion,
|
||||
$oldconf->{agent},
|
||||
$clonelimit,
|
||||
$oldconf
|
||||
);
|
||||
|
||||
$newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
|
||||
|
||||
PVE::QemuConfig->write_config($newid, $newconf);
|
||||
$i++;
|
||||
}
|
||||
|
||||
delete $newconf->{lock};
|
||||
|
||||
# do not write pending changes
|
||||
if (my @changes = keys %{$newconf->{pending}}) {
|
||||
my $pending = join(',', @changes);
|
||||
warn "found pending changes for '$pending', discarding for clone\n";
|
||||
delete $newconf->{pending};
|
||||
}
|
||||
|
||||
PVE::QemuConfig->write_config($newid, $newconf);
|
||||
|
||||
if ($target) {
|
||||
# always deactivate volumes - avoid lvm LVs to be active on several nodes
|
||||
PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running;
|
||||
PVE::Storage::deactivate_volumes($storecfg, $newvollist);
|
||||
|
||||
my $newconffile = PVE::QemuConfig->config_file($newid, $target);
|
||||
die "Failed to move config to node '$target' - rename failed: $!\n"
|
||||
if !rename($conffile, $newconffile);
|
||||
}
|
||||
|
||||
PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
|
||||
};
|
||||
if (my $err = $@) {
|
||||
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
|
||||
sleep 1; # some storage like rbd need to wait before release volume - really?
|
||||
|
||||
foreach my $volid (@$newvollist) {
|
||||
eval { PVE::Storage::vdisk_free($storecfg, $volid); };
|
||||
warn $@ if $@;
|
||||
}
|
||||
|
||||
PVE::Firewall::remove_vmfw_conf($newid);
|
||||
|
||||
unlink $conffile; # avoid races -> last thing before die
|
||||
|
||||
die "clone failed: $err";
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
PVE::Firewall::clone_vmfw_conf($vmid, $newid);
|
||||
|
||||
return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $realcmd);
|
||||
my $newvollist = [];
|
||||
my $jobs = {};
|
||||
|
||||
eval {
|
||||
local $SIG{INT} =
|
||||
local $SIG{TERM} =
|
||||
local $SIG{QUIT} =
|
||||
local $SIG{HUP} = sub { die "interrupted by signal\n"; };
|
||||
|
||||
PVE::Storage::activate_volumes($storecfg, $vollist, $snapname);
|
||||
|
||||
my $bwlimit = extract_param($param, 'bwlimit');
|
||||
|
||||
my $total_jobs = scalar(keys %{$drives});
|
||||
my $i = 1;
|
||||
|
||||
foreach my $opt (sort keys %$drives) {
|
||||
my $drive = $drives->{$opt};
|
||||
my $skipcomplete = ($total_jobs != $i); # finish after last drive
|
||||
my $completion = $skipcomplete ? 'skip' : 'complete';
|
||||
|
||||
my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
|
||||
my $storage_list = [ $src_sid ];
|
||||
push @$storage_list, $storage if defined($storage);
|
||||
my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit);
|
||||
|
||||
my $newdrive = PVE::QemuServer::clone_disk(
|
||||
$storecfg,
|
||||
$vmid,
|
||||
$running,
|
||||
$opt,
|
||||
$drive,
|
||||
$snapname,
|
||||
$newid,
|
||||
$storage,
|
||||
$format,
|
||||
$fullclone->{$opt},
|
||||
$newvollist,
|
||||
$jobs,
|
||||
$completion,
|
||||
$oldconf->{agent},
|
||||
$clonelimit,
|
||||
$oldconf
|
||||
);
|
||||
|
||||
$newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
|
||||
|
||||
PVE::QemuConfig->write_config($newid, $newconf);
|
||||
$i++;
|
||||
}
|
||||
|
||||
delete $newconf->{lock};
|
||||
|
||||
# do not write pending changes
|
||||
if (my @changes = keys %{$newconf->{pending}}) {
|
||||
my $pending = join(',', @changes);
|
||||
warn "found pending changes for '$pending', discarding for clone\n";
|
||||
delete $newconf->{pending};
|
||||
}
|
||||
|
||||
PVE::QemuConfig->write_config($newid, $newconf);
|
||||
|
||||
if ($target) {
|
||||
# always deactivate volumes - avoid lvm LVs to be active on several nodes
|
||||
PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running;
|
||||
PVE::Storage::deactivate_volumes($storecfg, $newvollist);
|
||||
|
||||
my $newconffile = PVE::QemuConfig->config_file($newid, $target);
|
||||
die "Failed to move config to node '$target' - rename failed: $!\n"
|
||||
if !rename($conffile, $newconffile);
|
||||
}
|
||||
|
||||
PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
|
||||
};
|
||||
if (my $err = $@) {
|
||||
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
|
||||
sleep 1; # some storage like rbd need to wait before release volume - really?
|
||||
|
||||
foreach my $volid (@$newvollist) {
|
||||
eval { PVE::Storage::vdisk_free($storecfg, $volid); };
|
||||
warn $@ if $@;
|
||||
}
|
||||
|
||||
PVE::Firewall::remove_vmfw_conf($newid);
|
||||
|
||||
unlink $conffile; # avoid races -> last thing before die
|
||||
|
||||
die "clone failed: $err";
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
# Aquire exclusive lock lock for $newid
|
||||
@ -3287,12 +3285,18 @@ __PACKAGE__->register_method({
|
||||
return PVE::QemuConfig->lock_config_full($newid, 1, $clonefn);
|
||||
};
|
||||
|
||||
# exclusive lock if VM is running - else shared lock is enough;
|
||||
if ($running) {
|
||||
return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm);
|
||||
} else {
|
||||
return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm);
|
||||
}
|
||||
my $lock_source_vm = sub {
|
||||
# exclusive lock if VM is running - else shared lock is enough;
|
||||
if ($running) {
|
||||
return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm);
|
||||
} else {
|
||||
return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm);
|
||||
}
|
||||
};
|
||||
|
||||
$load_and_check->(); # early checks before forking/locking
|
||||
|
||||
return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $lock_source_vm);
|
||||
}});
|
||||
|
||||
__PACKAGE__->register_method({
|
||||
|
Loading…
Reference in New Issue
Block a user