mirror of
https://git.proxmox.com/git/qemu-server
synced 2025-08-06 11:08:34 +00:00
migration: move finishing block jobs to phase2 for better/uniform error handling
avoids the possibility to die during phase3_cleanup and instead of needing to duplicate the cleanup ourselves, benefit from phase2_cleanup doing so. The duplicate cleanup was also very incomplete: it didn't stop the remote kvm process (leading to 'VM already running' when trying to migrate again afterwards), but it removed its disks, and it didn't unlock the config, didn't close the tunnel and didn't cancel the block-dirty bitmaps. Since migrate_cancel should do nothing after the (non-storage) migrate process has completed, even that cleanup step is fine here. Since phase3 is empty at the moment, the order of operations is still the same. Also add a test, that would complain about finish_tunnel not being called before this patch. That test also checks that local disks are not already removed before finishing the block jobs. Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
This commit is contained in:
parent
a6be63ac9b
commit
0783c3c271
@ -1134,6 +1134,16 @@ sub phase2 {
|
||||
die "unable to parse migration status '$stat->{status}' - aborting\n";
|
||||
}
|
||||
}
|
||||
|
||||
if ($self->{storage_migration}) {
|
||||
# finish block-job with block-job-cancel, to disconnect source VM from NBD
|
||||
# to avoid it trying to re-establish it. We are in blockjob ready state,
|
||||
# thus, this command changes to it to blockjob complete (see qapi docs)
|
||||
eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}, 'cancel'); };
|
||||
if (my $err = $@) {
|
||||
die "Failed to complete storage migration: $err\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub phase2_cleanup {
|
||||
@ -1209,19 +1219,6 @@ sub phase3_cleanup {
|
||||
|
||||
my $tunnel = $self->{tunnel};
|
||||
|
||||
if ($self->{storage_migration}) {
|
||||
# finish block-job with block-job-cancel, to disconnect source VM from NBD
|
||||
# to avoid it trying to re-establish it. We are in blockjob ready state,
|
||||
# thus, this command changes to it to blockjob complete (see qapi docs)
|
||||
eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}, 'cancel'); };
|
||||
|
||||
if (my $err = $@) {
|
||||
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $self->{storage_migration_jobs}) };
|
||||
eval { PVE::QemuMigrate::cleanup_remotedisks($self) };
|
||||
die "Failed to complete storage migration: $err\n";
|
||||
}
|
||||
}
|
||||
|
||||
if ($self->{volume_map}) {
|
||||
my $target_drives = $self->{target_drive};
|
||||
|
||||
|
@ -139,6 +139,12 @@ $MigrationTest::Shared::qemu_server_module->mock(
|
||||
file_set_contents("${RUN_DIR_PATH}/nbd_info", to_json($nbd_info));
|
||||
},
|
||||
qemu_drive_mirror_monitor => sub {
|
||||
my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
|
||||
|
||||
if ($fail_config->{qemu_drive_mirror_monitor} &&
|
||||
$fail_config->{qemu_drive_mirror_monitor} eq $completion) {
|
||||
die "qemu_drive_mirror_monitor '$completion' error\n";
|
||||
}
|
||||
return;
|
||||
},
|
||||
set_migration_caps => sub {
|
||||
|
@ -1444,6 +1444,41 @@ my $tests = [
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name => '149_running_unused_block_job_cancel_fail',
|
||||
target => 'pve1',
|
||||
vmid => 149,
|
||||
vm_status => {
|
||||
running => 1,
|
||||
runningmachine => 'pc-q35-5.0+pve0',
|
||||
},
|
||||
opts => {
|
||||
online => 1,
|
||||
'with-local-disks' => 1,
|
||||
},
|
||||
config_patch => {
|
||||
scsi1 => undef,
|
||||
unused0 => 'local-dir:149/vm-149-disk-0.qcow2',
|
||||
},
|
||||
expected_calls => {},
|
||||
expect_die => "qemu_drive_mirror_monitor 'cancel' error",
|
||||
# note that 'cancel' is also used to finish and that's what this test is about
|
||||
fail_config => {
|
||||
'qemu_drive_mirror_monitor' => 'cancel',
|
||||
},
|
||||
expected => {
|
||||
source_volids => local_volids_for_vm(149),
|
||||
target_volids => {},
|
||||
vm_config => get_patched_config(149, {
|
||||
scsi1 => undef,
|
||||
unused0 => 'local-dir:149/vm-149-disk-0.qcow2',
|
||||
}),
|
||||
vm_status => {
|
||||
running => 1,
|
||||
runningmachine => 'pc-q35-5.0+pve0',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name => '149_offline',
|
||||
target => 'pve1',
|
||||
|
Loading…
Reference in New Issue
Block a user