From 88126be3f70650b05af9c04e5b13785bdecb2031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Thu, 26 Mar 2020 15:42:57 +0100 Subject: [PATCH] migrate: fix replication false-positives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit by only checking for replicatable volumes when a replication job is defined, and passing only actually replicated volumes to the target node via STDIN, and back via STDOUT. otherwise this can pick up theoretically replicatable, but not actually replicated volumes and treat them wrong. Signed-off-by: Fabian Grünbichler --- PVE/API2/Qemu.pm | 5 ++++- PVE/QemuMigrate.pm | 22 ++++++++++++++++++---- PVE/QemuServer.pm | 7 +++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm index ef8a7c38..8a7e98c1 100644 --- a/PVE/API2/Qemu.pm +++ b/PVE/API2/Qemu.pm @@ -2073,6 +2073,7 @@ __PACKAGE__->register_method({ # read spice ticket from STDIN my $spice_ticket; my $nbd_protocol_version = 0; + my $replicated_volumes = {}; if ($stateuri && ($stateuri eq 'tcp' || $stateuri eq 'unix') && $migratedfrom && ($rpcenv->{type} eq 'cli')) { while (defined(my $line = )) { chomp $line; @@ -2080,6 +2081,8 @@ __PACKAGE__->register_method({ $spice_ticket = $1; } elsif ($line =~ m/^nbd_protocol_version: (\d+)$/) { $nbd_protocol_version = $1; + } elsif ($line =~ m/^replicated_volume: (.*)$/) { + $replicated_volumes->{$1} = 1; } else { # fallback for old source node $spice_ticket = $line; @@ -2113,7 +2116,7 @@ __PACKAGE__->register_method({ PVE::QemuServer::vm_start($storecfg, $vmid, $stateuri, $skiplock, $migratedfrom, undef, $machine, $spice_ticket, $migration_network, $migration_type, $targetstorage, $timeout, - $nbd_protocol_version); + $nbd_protocol_version, $replicated_volumes); return; }; diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm index 9cff64d0..b14ee012 100644 --- a/PVE/QemuMigrate.pm +++ b/PVE/QemuMigrate.pm @@ -330,7 +330,9 @@ sub sync_disks { }); } - my $replicatable_volumes = PVE::QemuConfig->get_replicatable_volumes($self->{storecfg}, $self->{vmid}, $conf, 0, 1); + my $rep_cfg = PVE::ReplicationConfig->new(); + my $replication_jobcfg = $rep_cfg->find_local_replication_job($vmid, $self->{node}); + my $replicatable_volumes = $replication_jobcfg ? PVE::QemuConfig->get_replicatable_volumes($self->{storecfg}, $self->{vmid}, $conf, 0, 1) : {}; my $test_volid = sub { my ($volid, $attr) = @_; @@ -449,8 +451,7 @@ sub sync_disks { } } - my $rep_cfg = PVE::ReplicationConfig->new(); - if (my $jobcfg = $rep_cfg->find_local_replication_job($vmid, $self->{node})) { + if ($replication_jobcfg) { if ($self->{running}) { my $version = PVE::QemuServer::kvm_user_version(); @@ -484,7 +485,7 @@ sub sync_disks { my $start_time = time(); my $logfunc = sub { $self->log('info', shift) }; $self->{replicated_volumes} = PVE::Replication::run_replication( - 'PVE::QemuConfig', $jobcfg, $start_time, $start_time, $logfunc); + 'PVE::QemuConfig', $replication_jobcfg, $start_time, $start_time, $logfunc); } # sizes in config have to be accurate for remote node to correctly @@ -657,6 +658,11 @@ sub phase2 { # TODO change to 'spice_ticket: \n' in 7.0 my $input = $spice_ticket ? "$spice_ticket\n" : "\n"; $input .= "nbd_protocol_version: $nbd_protocol_version\n"; + foreach my $volid (keys %{$self->{replicated_volumes}}) { + $input .= "replicated_volume: $volid\n"; + } + + my $target_replicated_volumes = {}; # Note: We try to keep $spice_ticket secret (do not pass via command line parameter) # instead we pipe it through STDIN @@ -701,6 +707,10 @@ sub phase2 { $self->{target_drive}->{$targetdrive}->{nbd_uri} = $nbd_uri; push @$tunnel_addr, "$nbd_unix_addr:$nbd_unix_addr"; push @$sock_addr, $nbd_unix_addr; + } elsif ($line =~ m/^re-using replicated volume: (\S+) - (.*)$/) { + my $drive = $1; + my $volid = $2; + $target_replicated_volumes->{$volid} = $drive; } elsif ($line =~ m/^QEMU: (.*)$/) { $self->log('info', "[$self->{node}] $1\n"); } @@ -713,6 +723,10 @@ sub phase2 { die "unable to detect remote migration address\n" if !$raddr; + if (scalar(keys %$target_replicated_volumes) != scalar(keys %{$self->{replicated_volumes}})) { + die "number of replicated disks on source and target node do not match - target node too old?\n" + } + $self->log('info', "start remote tunnel"); if ($migration_type eq 'secure') { diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index a3e3269a..9c9debfd 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -4706,7 +4706,7 @@ sub vmconfig_update_disk { sub vm_start { my ($storecfg, $vmid, $statefile, $skiplock, $migratedfrom, $paused, $forcemachine, $spice_ticket, $migration_network, $migration_type, - $targetstorage, $timeout, $nbd_protocol_version) = @_; + $targetstorage, $timeout, $nbd_protocol_version, $replicated_volumes) = @_; PVE::QemuConfig->lock_config($vmid, sub { my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom); @@ -4755,16 +4755,15 @@ sub vm_start { $local_volumes->{$ds} = [$volid, $storeid, $volname]; }); - my $replicatable_volumes = PVE::QemuConfig->get_replicatable_volumes($storecfg, $vmid, $conf, 0, 1); - my $format = undef; foreach my $opt (sort keys %$local_volumes) { my ($volid, $storeid, $volname) = @{$local_volumes->{$opt}}; - if ($replicatable_volumes->{$volid}) { + if ($replicated_volumes->{$volid}) { # re-use existing, replicated volume with bitmap on source side $local_volumes->{$opt} = $conf->{${opt}}; + print "re-using replicated volume: $opt - $volid\n"; next; } my $drive = parse_drive($opt, $conf->{$opt});