mirror of
https://git.proxmox.com/git/pve-qemu
synced 2025-10-04 20:12:16 +00:00
PVE backup: improve error handling for fleecing
See Fiona's inner commit for details. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
531db7df01
commit
9e8ef15831
103
debian/patches/pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch
vendored
Normal file
103
debian/patches/pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch
vendored
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
|
Date: Thu, 7 Nov 2024 17:51:14 +0100
|
||||||
|
Subject: [PATCH] PVE backup: fixup error handling for fleecing
|
||||||
|
|
||||||
|
The drained section needs to be terminated before breaking out of the
|
||||||
|
loop in the error scenarios. Otherwise, guest IO on the drive would
|
||||||
|
become stuck.
|
||||||
|
|
||||||
|
If the job is created successfully, then the job completion callback
|
||||||
|
will clean up the snapshot access block nodes. In case failure
|
||||||
|
happened before the job is created, there was no cleanup for the
|
||||||
|
snapshot access block nodes yet. Add it.
|
||||||
|
|
||||||
|
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||||
|
---
|
||||||
|
pve-backup.c | 38 +++++++++++++++++++++++++-------------
|
||||||
|
1 file changed, 25 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pve-backup.c b/pve-backup.c
|
||||||
|
index 4e730aa3da..c4178758b3 100644
|
||||||
|
--- a/pve-backup.c
|
||||||
|
+++ b/pve-backup.c
|
||||||
|
@@ -357,22 +357,23 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
|
||||||
|
qemu_co_mutex_unlock(&backup_state.backup_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void pvebackup_complete_cb(void *opaque, int ret)
|
||||||
|
+static void cleanup_snapshot_access(PVEBackupDevInfo *di)
|
||||||
|
{
|
||||||
|
- PVEBackupDevInfo *di = opaque;
|
||||||
|
- di->completed_ret = ret;
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
||||||
|
- * won't be done as a coroutine anyways:
|
||||||
|
- * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
||||||
|
- * just spawn a BH calling bdrv_unref().
|
||||||
|
- * - For cbw, draining would need to spawn a BH.
|
||||||
|
- */
|
||||||
|
if (di->fleecing.snapshot_access) {
|
||||||
|
bdrv_unref(di->fleecing.snapshot_access);
|
||||||
|
di->fleecing.snapshot_access = NULL;
|
||||||
|
}
|
||||||
|
+ if (di->fleecing.cbw) {
|
||||||
|
+ bdrv_cbw_drop(di->fleecing.cbw);
|
||||||
|
+ di->fleecing.cbw = NULL;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void pvebackup_complete_cb(void *opaque, int ret)
|
||||||
|
+{
|
||||||
|
+ PVEBackupDevInfo *di = opaque;
|
||||||
|
+ di->completed_ret = ret;
|
||||||
|
+
|
||||||
|
if (di->fleecing.cbw) {
|
||||||
|
/*
|
||||||
|
* With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
|
||||||
|
@@ -383,10 +384,17 @@ static void pvebackup_complete_cb(void *opaque, int ret)
|
||||||
|
if (di->completed_ret == -EACCES && snapshot_error) {
|
||||||
|
di->completed_ret = snapshot_error;
|
||||||
|
}
|
||||||
|
- bdrv_cbw_drop(di->fleecing.cbw);
|
||||||
|
- di->fleecing.cbw = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
|
||||||
|
+ * won't be done as a coroutine anyways:
|
||||||
|
+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
|
||||||
|
+ * just spawn a BH calling bdrv_unref().
|
||||||
|
+ * - For cbw, draining would need to spawn a BH.
|
||||||
|
+ */
|
||||||
|
+ cleanup_snapshot_access(di);
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Needs to happen outside of coroutine, because it takes the graph write lock.
|
||||||
|
*/
|
||||||
|
@@ -587,6 +595,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||||
|
if (!di->fleecing.cbw) {
|
||||||
|
error_setg(errp, "appending cbw node for fleecing failed: %s",
|
||||||
|
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||||
|
+ bdrv_drained_end(di->bs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -599,6 +608,8 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||||
|
if (!di->fleecing.snapshot_access) {
|
||||||
|
error_setg(errp, "setting up snapshot access for fleecing failed: %s",
|
||||||
|
local_err ? error_get_pretty(local_err) : "unknown error");
|
||||||
|
+ cleanup_snapshot_access(di);
|
||||||
|
+ bdrv_drained_end(di->bs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
source_bs = di->fleecing.snapshot_access;
|
||||||
|
@@ -637,6 +648,7 @@ static void create_backup_jobs_bh(void *opaque) {
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!job || local_err) {
|
||||||
|
+ cleanup_snapshot_access(di);
|
||||||
|
error_setg(errp, "backup_job_create failed: %s",
|
||||||
|
local_err ? error_get_pretty(local_err) : "null");
|
||||||
|
break;
|
1
debian/patches/series
vendored
1
debian/patches/series
vendored
@ -88,3 +88,4 @@ pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
|
|||||||
pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
|
pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
|
||||||
pve/0046-PVE-backup-add-fleecing-option.patch
|
pve/0046-PVE-backup-add-fleecing-option.patch
|
||||||
pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
|
pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
|
||||||
|
pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch
|
||||||
|
Loading…
Reference in New Issue
Block a user