From 05089ab57d248a74134cd25febe9a594be1f68bc Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Tue, 12 Nov 2024 16:48:26 +0100 Subject: [PATCH] various PVE backup code refactoring/improvements Mostly preparation for our external backup plugin work, but fine to already commit now. Signed-off-by: Thomas Lamprecht --- ...r-out-setting-up-snapshot-access-for.patch | 135 ++++++++++++++++++ ...device-name-in-device-info-structure.patch | 135 ++++++++++++++++++ ...de-device-name-in-error-when-setting.patch | 25 ++++ debian/patches/series | 3 + 4 files changed, 298 insertions(+) create mode 100644 debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch create mode 100644 debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch create mode 100644 debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch diff --git a/debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch b/debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch new file mode 100644 index 0000000..81ac557 --- /dev/null +++ b/debian/patches/pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch @@ -0,0 +1,135 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Thu, 7 Nov 2024 17:51:15 +0100 +Subject: [PATCH] PVE backup: factor out setting up snapshot access for + fleecing + +Avoids some line bloat in the create_backup_jobs_bh() function and is +in preparation for setting up the snapshot access independently of +fleecing, in particular that will be useful for providing access to +the snapshot via NBD. + +Signed-off-by: Fiona Ebner +--- + pve-backup.c | 95 ++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 58 insertions(+), 37 deletions(-) + +diff --git a/pve-backup.c b/pve-backup.c +index c4178758b3..051ebffe48 100644 +--- a/pve-backup.c ++++ b/pve-backup.c +@@ -525,6 +525,62 @@ static int coroutine_fn pvebackup_co_add_config( + goto out; + } + ++/* ++ * Setup a snapshot-access block node for a device with associated fleecing image. ++ */ ++static int setup_snapshot_access(PVEBackupDevInfo *di, Error **errp) ++{ ++ Error *local_err = NULL; ++ ++ if (!di->fleecing.bs) { ++ error_setg(errp, "no associated fleecing image"); ++ return -1; ++ } ++ ++ QDict *cbw_opts = qdict_new(); ++ qdict_put_str(cbw_opts, "driver", "copy-before-write"); ++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs)); ++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs)); ++ ++ if (di->bitmap) { ++ /* ++ * Only guest writes to parts relevant for the backup need to be intercepted with ++ * old data being copied to the fleecing image. ++ */ ++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs)); ++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap)); ++ } ++ /* ++ * Fleecing storage is supposed to be fast and it's better to break backup than guest ++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so ++ * abort a bit before that. ++ */ ++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot"); ++ qdict_put_int(cbw_opts, "cbw-timeout", 45); ++ ++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err); ++ ++ if (!di->fleecing.cbw) { ++ error_setg(errp, "appending cbw node for fleecing failed: %s", ++ local_err ? error_get_pretty(local_err) : "unknown error"); ++ return -1; ++ } ++ ++ QDict *snapshot_access_opts = qdict_new(); ++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access"); ++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw)); ++ ++ di->fleecing.snapshot_access = ++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err); ++ if (!di->fleecing.snapshot_access) { ++ error_setg(errp, "setting up snapshot access for fleecing failed: %s", ++ local_err ? error_get_pretty(local_err) : "unknown error"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + /* + * backup_job_create can *not* be run from a coroutine, so this can't either. + * The caller is responsible that backup_mutex is held nonetheless. +@@ -569,49 +625,14 @@ static void create_backup_jobs_bh(void *opaque) { + const char *job_id = bdrv_get_device_name(di->bs); + bdrv_graph_co_rdunlock(); + if (di->fleecing.bs) { +- QDict *cbw_opts = qdict_new(); +- qdict_put_str(cbw_opts, "driver", "copy-before-write"); +- qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs)); +- qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs)); +- +- if (di->bitmap) { +- /* +- * Only guest writes to parts relevant for the backup need to be intercepted with +- * old data being copied to the fleecing image. +- */ +- qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs)); +- qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap)); +- } +- /* +- * Fleecing storage is supposed to be fast and it's better to break backup than guest +- * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so +- * abort a bit before that. +- */ +- qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot"); +- qdict_put_int(cbw_opts, "cbw-timeout", 45); +- +- di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err); +- +- if (!di->fleecing.cbw) { +- error_setg(errp, "appending cbw node for fleecing failed: %s", +- local_err ? error_get_pretty(local_err) : "unknown error"); +- bdrv_drained_end(di->bs); +- break; +- } +- +- QDict *snapshot_access_opts = qdict_new(); +- qdict_put_str(snapshot_access_opts, "driver", "snapshot-access"); +- qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw)); +- +- di->fleecing.snapshot_access = +- bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err); +- if (!di->fleecing.snapshot_access) { ++ if (setup_snapshot_access(di, &local_err) < 0) { + error_setg(errp, "setting up snapshot access for fleecing failed: %s", + local_err ? error_get_pretty(local_err) : "unknown error"); + cleanup_snapshot_access(di); + bdrv_drained_end(di->bs); + break; + } ++ + source_bs = di->fleecing.snapshot_access; + discard_source = true; + diff --git a/debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch b/debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch new file mode 100644 index 0000000..5ad62ca --- /dev/null +++ b/debian/patches/pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch @@ -0,0 +1,135 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Thu, 7 Nov 2024 17:51:16 +0100 +Subject: [PATCH] PVE backup: save device name in device info structure + +The device name needs to be queried while holding the graph read lock +and since it doesn't change during the whole operation, just get it +once during setup and avoid the need to query it again in different +places. + +Also in preparation to use it more often in error messages and for the +upcoming external backup access API. + +Signed-off-by: Fiona Ebner +--- + pve-backup.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +diff --git a/pve-backup.c b/pve-backup.c +index 051ebffe48..33c23e53c2 100644 +--- a/pve-backup.c ++++ b/pve-backup.c +@@ -94,6 +94,7 @@ typedef struct PVEBackupDevInfo { + size_t size; + uint64_t block_size; + uint8_t dev_id; ++ char* device_name; + int completed_ret; // INT_MAX if not completed + BdrvDirtyBitmap *bitmap; + BlockDriverState *target; +@@ -327,6 +328,8 @@ static void coroutine_fn pvebackup_co_complete_stream(void *opaque) + } + + di->bs = NULL; ++ g_free(di->device_name); ++ di->device_name = NULL; + + assert(di->target == NULL); + +@@ -621,9 +624,6 @@ static void create_backup_jobs_bh(void *opaque) { + + BlockDriverState *source_bs = di->bs; + bool discard_source = false; +- bdrv_graph_co_rdlock(); +- const char *job_id = bdrv_get_device_name(di->bs); +- bdrv_graph_co_rdunlock(); + if (di->fleecing.bs) { + if (setup_snapshot_access(di, &local_err) < 0) { + error_setg(errp, "setting up snapshot access for fleecing failed: %s", +@@ -654,7 +654,7 @@ static void create_backup_jobs_bh(void *opaque) { + } + + BlockJob *job = backup_job_create( +- job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap, ++ di->device_name, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap, + bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, + BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, + &local_err); +@@ -751,6 +751,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info( + } + PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1); + di->bs = bs; ++ di->device_name = g_strdup(bdrv_get_device_name(bs)); + + if (fleecing && device_uses_fleecing(*d)) { + g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL); +@@ -789,6 +790,7 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info( + + PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1); + di->bs = bs; ++ di->device_name = g_strdup(bdrv_get_device_name(bs)); + di_list = g_list_append(di_list, di); + } + } +@@ -956,9 +958,6 @@ UuidInfo coroutine_fn *qmp_backup( + + di->block_size = dump_cb_block_size; + +- bdrv_graph_co_rdlock(); +- const char *devname = bdrv_get_device_name(di->bs); +- bdrv_graph_co_rdunlock(); + PBSBitmapAction action = PBS_BITMAP_ACTION_NOT_USED; + size_t dirty = di->size; + +@@ -973,7 +972,8 @@ UuidInfo coroutine_fn *qmp_backup( + } + action = PBS_BITMAP_ACTION_NEW; + } else { +- expect_only_dirty = proxmox_backup_check_incremental(pbs, devname, di->size) != 0; ++ expect_only_dirty = ++ proxmox_backup_check_incremental(pbs, di->device_name, di->size) != 0; + } + + if (expect_only_dirty) { +@@ -997,7 +997,8 @@ UuidInfo coroutine_fn *qmp_backup( + } + } + +- int dev_id = proxmox_backup_co_register_image(pbs, devname, di->size, expect_only_dirty, errp); ++ int dev_id = proxmox_backup_co_register_image(pbs, di->device_name, di->size, ++ expect_only_dirty, errp); + if (dev_id < 0) { + goto err_mutex; + } +@@ -1009,7 +1010,7 @@ UuidInfo coroutine_fn *qmp_backup( + di->dev_id = dev_id; + + PBSBitmapInfo *info = g_malloc(sizeof(*info)); +- info->drive = g_strdup(devname); ++ info->drive = g_strdup(di->device_name); + info->action = action; + info->size = di->size; + info->dirty = dirty; +@@ -1034,10 +1035,7 @@ UuidInfo coroutine_fn *qmp_backup( + goto err_mutex; + } + +- bdrv_graph_co_rdlock(); +- const char *devname = bdrv_get_device_name(di->bs); +- bdrv_graph_co_rdunlock(); +- di->dev_id = vma_writer_register_stream(vmaw, devname, di->size); ++ di->dev_id = vma_writer_register_stream(vmaw, di->device_name, di->size); + if (di->dev_id <= 0) { + error_set(errp, ERROR_CLASS_GENERIC_ERROR, + "register_stream failed"); +@@ -1148,6 +1146,9 @@ err: + bdrv_co_unref(di->target); + } + ++ g_free(di->device_name); ++ di->device_name = NULL; ++ + g_free(di); + } + g_list_free(di_list); diff --git a/debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch b/debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch new file mode 100644 index 0000000..dc9c883 --- /dev/null +++ b/debian/patches/pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch @@ -0,0 +1,25 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Thu, 7 Nov 2024 17:51:17 +0100 +Subject: [PATCH] PVE backup: include device name in error when setting up + snapshot access fails + +Signed-off-by: Fiona Ebner +--- + pve-backup.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/pve-backup.c b/pve-backup.c +index 33c23e53c2..d931746453 100644 +--- a/pve-backup.c ++++ b/pve-backup.c +@@ -626,7 +626,8 @@ static void create_backup_jobs_bh(void *opaque) { + bool discard_source = false; + if (di->fleecing.bs) { + if (setup_snapshot_access(di, &local_err) < 0) { +- error_setg(errp, "setting up snapshot access for fleecing failed: %s", ++ error_setg(errp, "%s - setting up snapshot access for fleecing failed: %s", ++ di->device_name, + local_err ? error_get_pretty(local_err) : "unknown error"); + cleanup_snapshot_access(di); + bdrv_drained_end(di->bs); diff --git a/debian/patches/series b/debian/patches/series index 8407fe6..93c97bf 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -89,3 +89,6 @@ pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch pve/0046-PVE-backup-add-fleecing-option.patch pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch pve/0048-PVE-backup-fixup-error-handling-for-fleecing.patch +pve/0049-PVE-backup-factor-out-setting-up-snapshot-access-for.patch +pve/0050-PVE-backup-save-device-name-in-device-info-structure.patch +pve/0051-PVE-backup-include-device-name-in-error-when-setting.patch