From 1aa4d844a1699d4f8541d45dae513f21c8907ed7 Mon Sep 17 00:00:00 2001 From: Dominik Csapak Date: Fri, 3 Jun 2022 09:16:29 +0200 Subject: [PATCH] ReplicationState: purge state from non local vms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when running replication, we don't want to keep replication states for non-local vms. Normally this would not be a problem, since on migration, we transfer the states anyway, but when the ha-manager steals a vm, it cannot do that. In that case, having an old state lying around is harmful, since the code does not expect the state to be out-of-sync with the actual snapshots on disk. One such problem is the following: Replicate vm 100 from node A to node B and C, and activate HA. When node A dies, it will be relocated to e.g. node B and start replicate from there. If node B now had an old state lying around for it's sync to node C, it might delete the common base snapshots of B and C and cannot sync again. Deleting the state for all non local guests fixes that issue, since it always starts fresh, and the potentially existing old state cannot be valid anyway since we just relocated the vm here (from a dead node). Signed-off-by: Dominik Csapak Reviewed-by: Fabian Grünbichler Reviewed-by: Fabian Ebner --- src/PVE/ReplicationState.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PVE/ReplicationState.pm b/src/PVE/ReplicationState.pm index 0a5e410..8eebb42 100644 --- a/src/PVE/ReplicationState.pm +++ b/src/PVE/ReplicationState.pm @@ -215,7 +215,7 @@ sub purge_old_states { my $tid = $plugin->get_unique_target_id($jobcfg); my $vmid = $jobcfg->{guest}; $used_tids->{$vmid}->{$tid} = 1 - if defined($vms->{ids}->{$vmid}); # && $vms->{ids}->{$vmid}->{node} eq $local_node; + if defined($vms->{ids}->{$vmid}) && $vms->{ids}->{$vmid}->{node} eq $local_node; } my $purge_state = sub {