From 71365ee433125026d9744a0a37142c81ff312b53 Mon Sep 17 00:00:00 2001 From: Vincent Vanlaer Date: Sat, 26 Oct 2024 18:30:05 +0200 Subject: [PATCH 1/8] block: get type of block allocation in commit_run bdrv_co_common_block_status_above not only returns whether the block is allocated, but also if it contains zeroes. Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20241026163010.2865002-2-libvirt-e6954efa@volkihar.be> Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/commit.c b/block/commit.c index 5df3d05346..ba0ba59316 100644 --- a/block/commit.c +++ b/block/commit.c @@ -15,6 +15,8 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" #include "trace.h" +#include "block/block-common.h" +#include "block/coroutines.h" #include "block/block_int.h" #include "block/blockjob_int.h" #include "qapi/error.h" @@ -167,9 +169,13 @@ static int coroutine_fn commit_run(Job *job, Error **errp) break; } /* Copy if allocated above the base */ - ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, - offset, COMMIT_BUFFER_SIZE, &n); - copy = (ret > 0); + WITH_GRAPH_RDLOCK_GUARD() { + ret = bdrv_co_common_block_status_above(blk_bs(s->top), + s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, + &n, NULL, NULL, NULL); + } + + copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); trace_commit_one_iteration(s, offset, n, ret); if (copy) { assert(n < SIZE_MAX); From 23743ab282af4fbb80fdc049bff2c93668c73c83 Mon Sep 17 00:00:00 2001 From: Vincent Vanlaer Date: Sat, 26 Oct 2024 18:30:06 +0200 Subject: [PATCH 2/8] block: move commit_run loop to separate function Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20241026163010.2865002-3-libvirt-e6954efa@volkihar.be> Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 87 +++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/block/commit.c b/block/commit.c index ba0ba59316..3ee0ade7df 100644 --- a/block/commit.c +++ b/block/commit.c @@ -128,6 +128,55 @@ static void commit_clean(Job *job) blk_unref(s->top); } +static int commit_iteration(CommitBlockJob *s, int64_t offset, + int64_t *n, void *buf) +{ + int ret = 0; + bool copy; + bool error_in_source = true; + + /* Copy if allocated above the base */ + WITH_GRAPH_RDLOCK_GUARD() { + ret = bdrv_co_common_block_status_above(blk_bs(s->top), + s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, + n, NULL, NULL, NULL); + } + + copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); + trace_commit_one_iteration(s, offset, *n, ret); + if (copy) { + assert(*n < SIZE_MAX); + + ret = blk_co_pread(s->top, offset, *n, buf, 0); + if (ret >= 0) { + ret = blk_co_pwrite(s->base, offset, *n, buf, 0); + if (ret < 0) { + error_in_source = false; + } + } + } + if (ret < 0) { + BlockErrorAction action = block_job_error_action(&s->common, + s->on_error, + error_in_source, + -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + return ret; + } else { + *n = 0; + return 0; + } + } + /* Publish progress */ + job_progress_update(&s->common.job, *n); + + if (copy) { + block_job_ratelimit_processed_bytes(&s->common, *n); + } + + return 0; +} + static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); @@ -158,9 +207,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); for (offset = 0; offset < len; offset += n) { - bool copy; - bool error_in_source = true; - /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ @@ -168,42 +214,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (job_is_cancelled(&s->common.job)) { break; } - /* Copy if allocated above the base */ - WITH_GRAPH_RDLOCK_GUARD() { - ret = bdrv_co_common_block_status_above(blk_bs(s->top), - s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, - &n, NULL, NULL, NULL); - } - copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); - trace_commit_one_iteration(s, offset, n, ret); - if (copy) { - assert(n < SIZE_MAX); + ret = commit_iteration(s, offset, &n, buf); - ret = blk_co_pread(s->top, offset, n, buf, 0); - if (ret >= 0) { - ret = blk_co_pwrite(s->base, offset, n, buf, 0); - if (ret < 0) { - error_in_source = false; - } - } - } if (ret < 0) { - BlockErrorAction action = - block_job_error_action(&s->common, s->on_error, - error_in_source, -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - return ret; - } else { - n = 0; - continue; - } - } - /* Publish progress */ - job_progress_update(&s->common.job, n); - - if (copy) { - block_job_ratelimit_processed_bytes(&s->common, n); + return ret; } } From 0648c76ad198e91515771fbbeaac3a3807669a4a Mon Sep 17 00:00:00 2001 From: Vincent Vanlaer Date: Sat, 26 Oct 2024 18:30:07 +0200 Subject: [PATCH 3/8] block: refactor error handling of commit_iteration Signed-off-by: Vincent Vanlaer Message-Id: <20241026163010.2865002-4-libvirt-e6954efa@volkihar.be> [vsementsov]: move action declaration to the top of the function Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 67 ++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/block/commit.c b/block/commit.c index 3ee0ade7df..5c6596a52e 100644 --- a/block/commit.c +++ b/block/commit.c @@ -129,51 +129,60 @@ static void commit_clean(Job *job) } static int commit_iteration(CommitBlockJob *s, int64_t offset, - int64_t *n, void *buf) + int64_t *requested_bytes, void *buf) { + BlockErrorAction action; + int64_t bytes = *requested_bytes; int ret = 0; - bool copy; bool error_in_source = true; /* Copy if allocated above the base */ WITH_GRAPH_RDLOCK_GUARD() { ret = bdrv_co_common_block_status_above(blk_bs(s->top), s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, - n, NULL, NULL, NULL); + &bytes, NULL, NULL, NULL); } - copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); - trace_commit_one_iteration(s, offset, *n, ret); - if (copy) { - assert(*n < SIZE_MAX); + trace_commit_one_iteration(s, offset, bytes, ret); - ret = blk_co_pread(s->top, offset, *n, buf, 0); - if (ret >= 0) { - ret = blk_co_pwrite(s->base, offset, *n, buf, 0); - if (ret < 0) { - error_in_source = false; - } - } - } if (ret < 0) { - BlockErrorAction action = block_job_error_action(&s->common, - s->on_error, - error_in_source, - -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - return ret; - } else { - *n = 0; - return 0; - } + goto fail; } - /* Publish progress */ - job_progress_update(&s->common.job, *n); - if (copy) { - block_job_ratelimit_processed_bytes(&s->common, *n); + if (ret & BDRV_BLOCK_ALLOCATED) { + assert(bytes < SIZE_MAX); + + ret = blk_co_pread(s->top, offset, bytes, buf, 0); + if (ret < 0) { + goto fail; + } + + ret = blk_co_pwrite(s->base, offset, bytes, buf, 0); + if (ret < 0) { + error_in_source = false; + goto fail; + } + + block_job_ratelimit_processed_bytes(&s->common, bytes); } + /* Publish progress */ + + job_progress_update(&s->common.job, bytes); + + *requested_bytes = bytes; + + return 0; + +fail: + action = block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + return ret; + } + + *requested_bytes = 0; + return 0; } From 6f3199f99600fe75f32f78574e507f347de80854 Mon Sep 17 00:00:00 2001 From: Vincent Vanlaer Date: Sat, 26 Oct 2024 18:30:08 +0200 Subject: [PATCH 4/8] block: allow commit to unmap zero blocks Non-active block commits do not discard blocks only containing zeros, causing images to lose sparseness after the commit. This commit fixes that by writing zero blocks using blk_co_pwrite_zeroes rather than writing them out as any other arbitrary data. Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20241026163010.2865002-5-libvirt-e6954efa@volkihar.be> Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/block/commit.c b/block/commit.c index 5c6596a52e..7cc8c0f0df 100644 --- a/block/commit.c +++ b/block/commit.c @@ -150,19 +150,39 @@ static int commit_iteration(CommitBlockJob *s, int64_t offset, } if (ret & BDRV_BLOCK_ALLOCATED) { - assert(bytes < SIZE_MAX); + if (ret & BDRV_BLOCK_ZERO) { + /* + * If the top (sub)clusters are smaller than the base + * (sub)clusters, this will not unmap unless the underlying device + * does some tracking of these requests. Ideally, we would find + * the maximal extent of the zero clusters. + */ + ret = blk_co_pwrite_zeroes(s->base, offset, bytes, + BDRV_REQ_MAY_UNMAP); + if (ret < 0) { + error_in_source = false; + goto fail; + } + } else { + assert(bytes < SIZE_MAX); - ret = blk_co_pread(s->top, offset, bytes, buf, 0); - if (ret < 0) { - goto fail; - } - - ret = blk_co_pwrite(s->base, offset, bytes, buf, 0); - if (ret < 0) { - error_in_source = false; - goto fail; + ret = blk_co_pread(s->top, offset, bytes, buf, 0); + if (ret < 0) { + goto fail; + } + + ret = blk_co_pwrite(s->base, offset, bytes, buf, 0); + if (ret < 0) { + error_in_source = false; + goto fail; + } } + /* + * Whether zeroes actually end up on disk depends on the details of + * the underlying driver. Therefore, this might rate limit more than + * is necessary. + */ block_job_ratelimit_processed_bytes(&s->common, bytes); } From 68aba2a9350345d109f8036f9eff68b81b1c2167 Mon Sep 17 00:00:00 2001 From: Vincent Vanlaer Date: Sat, 26 Oct 2024 18:30:09 +0200 Subject: [PATCH 5/8] block: add test non-active commit with zeroed data Signed-off-by: Vincent Vanlaer Tested-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20241026163010.2865002-6-libvirt-e6954efa@volkihar.be> Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/tests/commit-zero-blocks | 96 +++++++++++++++++++ .../qemu-iotests/tests/commit-zero-blocks.out | 54 +++++++++++ 2 files changed, 150 insertions(+) create mode 100755 tests/qemu-iotests/tests/commit-zero-blocks create mode 100644 tests/qemu-iotests/tests/commit-zero-blocks.out diff --git a/tests/qemu-iotests/tests/commit-zero-blocks b/tests/qemu-iotests/tests/commit-zero-blocks new file mode 100755 index 0000000000..de00273e72 --- /dev/null +++ b/tests/qemu-iotests/tests/commit-zero-blocks @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# group: rw quick +# +# Test for commit of discarded blocks +# +# This tests committing a live snapshot where some of the blocks that +# are present in the base image are discarded in the intermediate image. +# This intends to check that these blocks are also discarded in the base +# image after the commit. +# +# Copyright (C) 2024 Vincent Vanlaer. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# creator +owner=libvirt-e6954efa@volkihar.be + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_qemu + _rm_test_img "${TEST_IMG}.base" + _rm_test_img "${TEST_IMG}.mid" + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +cd .. +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto file + +size="1M" + +TEST_IMG="$TEST_IMG.base" _make_test_img $size +TEST_IMG="$TEST_IMG.mid" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size +_make_test_img -b "${TEST_IMG}.mid" -F $IMGFMT $size + +$QEMU_IO -c "write -P 0x01 64k 128k" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "discard 64k 64k" "$TEST_IMG.mid" | _filter_qemu_io + +echo +echo "=== Base image info before commit ===" +TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +echo +echo "=== Middle image info before commit ===" +TEST_IMG="${TEST_IMG}.mid" _img_info | _filter_img_info +$QEMU_IMG map --output=json "$TEST_IMG.mid" | _filter_qemu_img_map + +echo +echo === Running QEMU Live Commit Test === +echo + +qemu_comm_method="qmp" +_launch_qemu -drive file="${TEST_IMG}",if=virtio,id=test +h=$QEMU_HANDLE + +_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" "return" + +_send_qemu_cmd $h "{ 'execute': 'block-commit', + 'arguments': { 'device': 'test', + 'top': '"${TEST_IMG}.mid"', + 'base': '"${TEST_IMG}.base"'} }" '"status": "null"' + +_cleanup_qemu + +echo +echo "=== Base image info after commit ===" +TEST_IMG="${TEST_IMG}.base" _img_info | _filter_img_info +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/tests/commit-zero-blocks.out b/tests/qemu-iotests/tests/commit-zero-blocks.out new file mode 100644 index 0000000000..85bdc46aaf --- /dev/null +++ b/tests/qemu-iotests/tests/commit-zero-blocks.out @@ -0,0 +1,54 @@ +QA output created by commit-zero-blocks +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1048576 +Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.mid backing_fmt=IMGFMT +wrote 131072/131072 bytes at offset 65536 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Base image info before commit === +image: TEST_DIR/t.IMGFMT.base +file format: IMGFMT +virtual size: 1 MiB (1048576 bytes) +[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}, +{ "start": 65536, "length": 131072, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}, +{ "start": 196608, "length": 851968, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}] + +=== Middle image info before commit === +image: TEST_DIR/t.IMGFMT.mid +file format: IMGFMT +virtual size: 1 MiB (1048576 bytes) +backing file: TEST_DIR/t.IMGFMT.base +backing file format: IMGFMT +[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}, +{ "start": 65536, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}, +{ "start": 131072, "length": 65536, "depth": 1, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}, +{ "start": 196608, "length": 851968, "depth": 1, "present": false, "zero": true, "data": false, "compressed": false}] + +=== Running QEMU Live Commit Test === + +{ 'execute': 'qmp_capabilities' } +{"return": {}} +{ 'execute': 'block-commit', + 'arguments': { 'device': 'test', + 'top': 'TEST_DIR/t.IMGFMT.mid', + 'base': 'TEST_DIR/t.IMGFMT.base'} } +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "test"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "test"}} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "test"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "test"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "test", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "test"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "test"}} + +=== Base image info after commit === +image: TEST_DIR/t.IMGFMT.base +file format: IMGFMT +virtual size: 1 MiB (1048576 bytes) +[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}, +{ "start": 65536, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false}, +{ "start": 131072, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}, +{ "start": 196608, "length": 851968, "depth": 0, "present": false, "zero": true, "data": false, "compressed": false}] +*** done From da17dd5c5ee8e33124da0bf91aed11491d0c04de Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 9 Apr 2025 11:42:30 +0300 Subject: [PATCH 6/8] qapi: synchronize jobs and block-jobs documentation Actualize documentation and synchronize it for commands which actually call the same functions internally. Reviewed-by: Markus Armbruster Signed-off-by: Vladimir Sementsov-Ogievskiy Message-ID: <20250409084232.28201-2-vsementsov@yandex-team.ru> --- qapi/block-core.json | 61 ++++++++++++++++++++++++++------------------ qapi/job.json | 30 ++++++++++++++++++++-- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index b1937780e1..6beab0dc12 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2956,13 +2956,14 @@ # # Pause an active background block operation. # -# This command returns immediately after marking the active background -# block operation for pausing. It is an error to call this command if -# no operation is in progress or if the job is already paused. +# This command returns immediately after marking the active job for +# pausing. Pausing an already paused job is an error. # -# The operation will pause as soon as possible. No event is emitted -# when the operation is actually paused. Cancelling a paused job -# automatically resumes it. +# The job will pause as soon as possible, which means transitioning +# into the PAUSED state if it was RUNNING, or into STANDBY if it was +# READY. The corresponding JOB_STATUS_CHANGE event will be emitted. +# +# Cancelling a paused job automatically resumes it. # # @device: The job identifier. This used to be a device name (hence # the name of the parameter), but since QEMU 2.7 it can have other @@ -2982,9 +2983,8 @@ # # Resume an active background block operation. # -# This command returns immediately after resuming a paused background -# block operation. It is an error to call this command if no -# operation is in progress or if the job is not paused. +# This command returns immediately after resuming a paused job. +# Resuming an already running job is an error. # # This command also clears the error status of the job. # @@ -3004,10 +3004,15 @@ ## # @block-job-complete: # -# Manually trigger completion of an active background block operation. -# This is supported for drive mirroring, where it also switches the -# device to write to the target path only. The ability to complete is -# signaled with a BLOCK_JOB_READY event. +# Manually trigger completion of an active job in the READY or STANDBY +# state. Completing the job in any other state is an error. +# +# This is supported only for drive mirroring, where it also switches +# the device to write to the target path only. Note that drive +# mirroring includes drive-mirror, blockdev-mirror and block-commit +# job (only in case of "active commit", when the node being commited +# is used by the guest). The ability to complete is signaled with a +# BLOCK_JOB_READY event. # # This command completes an active background block operation # synchronously. The ordering of this command's return with the @@ -3017,8 +3022,6 @@ # rerror/werror arguments that were specified when starting the # operation. # -# A cancelled or paused job cannot be completed. -# # @device: The job identifier. This used to be a device name (hence # the name of the parameter), but since QEMU 2.7 it can have other # values. @@ -3035,10 +3038,13 @@ ## # @block-job-dismiss: # -# For jobs that have already concluded, remove them from the -# block-job-query list. This command only needs to be run for jobs -# which were started with QEMU 2.12+ job lifetime management -# semantics. +# Deletes a job that is in the CONCLUDED state. This command only +# needs to be run explicitly for jobs that don't have automatic +# dismiss enabled. In turn, automatic dismiss may be enabled only +# for jobs that have @auto-dismiss option, which are drive-backup, +# blockdev-backup, drive-mirror, blockdev-mirror, block-commit and +# block-stream. @auto-dismiss is enabled by default for these +# jobs. # # This command will refuse to operate on any job that has not yet # reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that @@ -3055,12 +3061,17 @@ ## # @block-job-finalize: # -# Once a job that has manual=true reaches the pending state, it can be -# instructed to finalize any graph changes and do any necessary -# cleanup via this command. For jobs in a transaction, instructing -# one job to finalize will force ALL jobs in the transaction to -# finalize, so it is only necessary to instruct a single member job to -# finalize. +# Instructs all jobs in a transaction (or a single job if it is not +# part of any transaction) to finalize any graph changes and do any +# necessary cleanup. This command requires that all involved jobs are +# in the PENDING state. +# +# For jobs in a transaction, instructing one job to finalize will +# force ALL jobs in the transaction to finalize, so it is only +# necessary to instruct a single member job to finalize. +# +# The command is applicable only to jobs which have @auto-finalize option +# and only when this option is set to false. # # @id: The job identifier. # diff --git a/qapi/job.json b/qapi/job.json index cfc3beedd2..b03f80bc84 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -156,6 +156,9 @@ # This command returns immediately after resuming a paused job. # Resuming an already running job is an error. # +# This command also clears the error status for block-jobs (stream, +# commit, mirror, backup). +# # @id: The job identifier. # # Since: 3.0 @@ -184,7 +187,23 @@ ## # @job-complete: # -# Manually trigger completion of an active job in the READY state. +# Manually trigger completion of an active job in the READY or STANDBY +# state. Completing the job in any other state is an error. +# +# This is supported only for drive mirroring, where it also switches +# the device to write to the target path only. Note that drive +# mirroring includes drive-mirror, blockdev-mirror and block-commit +# job (only in case of "active commit", when the node being commited +# is used by the guest). The ability to complete is signaled with a +# BLOCK_JOB_READY event. +# +# This command completes an active background block operation +# synchronously. The ordering of this command's return with the +# BLOCK_JOB_COMPLETED event is not defined. Note that if an I/O error +# occurs during the processing of this command: 1) the command itself +# will fail; 2) the error will be processed according to the +# rerror/werror arguments that were specified when starting the +# operation. # # @id: The job identifier. # @@ -197,7 +216,11 @@ # # Deletes a job that is in the CONCLUDED state. This command only # needs to be run explicitly for jobs that don't have automatic -# dismiss enabled. +# dismiss enabled. In turn, automatic dismiss may be enabled only +# for jobs that have @auto-dismiss option, which are drive-backup, +# blockdev-backup, drive-mirror, blockdev-mirror, block-commit and +# block-stream. @auto-dismiss is enabled by default for these +# jobs. # # This command will refuse to operate on any job that has not yet # reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that @@ -222,6 +245,9 @@ # force ALL jobs in the transaction to finalize, so it is only # necessary to instruct a single member job to finalize. # +# The command is applicable only to jobs which have @auto-finalize option +# and only when this option is set to false. +# # @id: The identifier of any job in the transaction, or of a job that # is not part of any transaction. # From b836bf2ab68fbc1e253c10bee95fa36399762967 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 9 Apr 2025 11:42:31 +0300 Subject: [PATCH 7/8] qapi/block-core: deprecate some block-job- APIs For change, pause, resume, complete, dismiss and finalize actions corresponding job- and block-job commands are almost equal. The difference is in find_block_job_locked() vs find_job_locked() functions. What's different? 1. find_block_job_locked() checks whether the found job is a block-job. This is OK when moving to more generic API, no needs to document this change. 2. find_block_job_locked() reports DeviceNotActive on failure, when find_job_locked() reports GenericError. So, let's document this difference in deprecated.txt. Still, for dismiss and finalize errors are not documented at all, so be silent in deprecated.txt as well. ACKed-by: Peter Krempa Reviewed-by: Eric Blake Reviewed-by: Markus Armbruster Signed-off-by: Vladimir Sementsov-Ogievskiy Message-ID: <20250409084232.28201-3-vsementsov@yandex-team.ru> --- docs/about/deprecated.rst | 31 +++++++++++++++++++++++++++++++ qapi/block-core.json | 30 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 05381441a9..b0d3ae6b32 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -148,6 +148,37 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +``block-job-pause`` (since 10.1) +'''''''''''''''''''''''''''''''' + +Use ``job-pause`` instead. The only difference is that ``job-pause`` +always reports GenericError on failure when ``block-job-pause`` reports +DeviceNotActive when block-job is not found. + +``block-job-resume`` (since 10.1) +''''''''''''''''''''''''''''''''' + +Use ``job-resume`` instead. The only difference is that ``job-resume`` +always reports GenericError on failure when ``block-job-resume`` reports +DeviceNotActive when block-job is not found. + +``block-job-complete`` (since 10.1) +''''''''''''''''''''''''''''''''''' + +Use ``job-complete`` instead. The only difference is that ``job-complete`` +always reports GenericError on failure when ``block-job-complete`` reports +DeviceNotActive when block-job is not found. + +``block-job-dismiss`` (since 10.1) +'''''''''''''''''''''''''''''''''' + +Use ``job-dismiss`` instead. + +``block-job-finalize`` (since 10.1) +''''''''''''''''''''''''''''''''''' + +Use ``job-finalize`` instead. + ``query-migrationthreads`` (since 9.2) '''''''''''''''''''''''''''''''''''''' diff --git a/qapi/block-core.json b/qapi/block-core.json index 6beab0dc12..22061227ca 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2969,6 +2969,11 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-pause +# instead. +# # Errors: # - If no background operation is active on this device, # DeviceNotActive @@ -2976,6 +2981,7 @@ # Since: 1.3 ## { 'command': 'block-job-pause', 'data': { 'device': 'str' }, + 'features': ['deprecated'], 'allow-preconfig': true } ## @@ -2992,6 +2998,11 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-resume +# instead. +# # Errors: # - If no background operation is active on this device, # DeviceNotActive @@ -2999,6 +3010,7 @@ # Since: 1.3 ## { 'command': 'block-job-resume', 'data': { 'device': 'str' }, + 'features': ['deprecated'], 'allow-preconfig': true } ## @@ -3026,6 +3038,11 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-complete +# instead. +# # Errors: # - If no background operation is active on this device, # DeviceNotActive @@ -3033,6 +3050,7 @@ # Since: 1.3 ## { 'command': 'block-job-complete', 'data': { 'device': 'str' }, + 'features': ['deprecated'], 'allow-preconfig': true } ## @@ -3053,9 +3071,15 @@ # # @id: The job identifier. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-dismiss +# instead. +# # Since: 2.12 ## { 'command': 'block-job-dismiss', 'data': { 'id': 'str' }, + 'features': ['deprecated'], 'allow-preconfig': true } ## @@ -3075,9 +3099,15 @@ # # @id: The job identifier. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-finalize +# instead. +# # Since: 2.12 ## { 'command': 'block-job-finalize', 'data': { 'id': 'str' }, + 'features': ['deprecated'], 'allow-preconfig': true } ## From 3d3911f16b6ceb9ed202aadf2cd75ded516a1a0b Mon Sep 17 00:00:00 2001 From: Raman Dzehtsiar Date: Mon, 14 Apr 2025 11:00:25 +0200 Subject: [PATCH 8/8] blockdev-backup: Add error handling option for copy-before-write jobs This patch extends the blockdev-backup QMP command to allow users to specify how to behave when IO errors occur during copy-before-write operations. Previously, the behavior was fixed and could not be controlled by the user. The new 'on-cbw-error' option can be set to one of two values: - 'break-guest-write': Forwards the IO error to the guest and triggers the on-source-error policy. This preserves snapshot integrity at the expense of guest IO operations. - 'break-snapshot': Allows the guest OS to continue running normally, but invalidates the snapshot and aborts related jobs. This prioritizes guest operation over backup consistency. This enhancement provides more flexibility for backup operations in different environments where requirements for guest availability versus backup consistency may vary. The default behavior remains unchanged to maintain backward compatibility. Signed-off-by: Raman Dzehtsiar Message-ID: <20250414090025.828660-1-Raman.Dzehtsiar@gmail.com> Acked-by: Markus Armbruster [vsementsov: fix long lines] Reviewed-by: Vladimir Sementsov-Ogievskiy Tested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/backup.c | 3 +- block/copy-before-write.c | 2 + block/copy-before-write.h | 1 + block/replication.c | 4 +- blockdev.c | 6 ++ include/block/block_int-global-state.h | 2 + qapi/block-core.json | 4 + tests/qemu-iotests/tests/copy-before-write | 95 +++++++++++++++++++ .../qemu-iotests/tests/copy-before-write.out | 4 +- 9 files changed, 117 insertions(+), 4 deletions(-) diff --git a/block/backup.c b/block/backup.c index 79652bf57b..0151e84395 100644 --- a/block/backup.c +++ b/block/backup.c @@ -361,6 +361,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BackupPerf *perf, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + OnCbwError on_cbw_error, int creation_flags, BlockCompletionFunc *cb, void *opaque, JobTxn *txn, Error **errp) @@ -458,7 +459,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source, - perf->min_cluster_size, &bcs, errp); + perf->min_cluster_size, &bcs, on_cbw_error, errp); if (!cbw) { goto error; } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index fd470f5f92..00af0b18ac 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -551,6 +551,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, bool discard_source, uint64_t min_cluster_size, BlockCopyState **bcs, + OnCbwError on_cbw_error, Error **errp) { BDRVCopyBeforeWriteState *state; @@ -568,6 +569,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, } qdict_put_str(opts, "file", bdrv_get_node_name(source)); qdict_put_str(opts, "target", bdrv_get_node_name(target)); + qdict_put_str(opts, "on-cbw-error", OnCbwError_str(on_cbw_error)); if (min_cluster_size > INT64_MAX) { error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 2a5d4ba693..eb93364e85 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -42,6 +42,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, bool discard_source, uint64_t min_cluster_size, BlockCopyState **bcs, + OnCbwError on_cbw_error, Error **errp); void bdrv_cbw_drop(BlockDriverState *bs); diff --git a/block/replication.c b/block/replication.c index d6625c51fe..07f274de9e 100644 --- a/block/replication.c +++ b/block/replication.c @@ -583,7 +583,9 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, - BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, + BLOCKDEV_ON_ERROR_REPORT, + ON_CBW_ERROR_BREAK_GUEST_WRITE, + JOB_INTERNAL, backup_job_completed, bs, NULL, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/blockdev.c b/blockdev.c index 1d1f27cfff..818ec42511 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2641,6 +2641,7 @@ static BlockJob *do_backup_common(BackupCommon *backup, BdrvDirtyBitmap *bmap = NULL; BackupPerf perf = { .max_workers = 64 }; int job_flags = JOB_DEFAULT; + OnCbwError on_cbw_error = ON_CBW_ERROR_BREAK_GUEST_WRITE; if (!backup->has_speed) { backup->speed = 0; @@ -2745,6 +2746,10 @@ static BlockJob *do_backup_common(BackupCommon *backup, job_flags |= JOB_MANUAL_DISMISS; } + if (backup->has_on_cbw_error) { + on_cbw_error = backup->on_cbw_error; + } + job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, backup->sync, bmap, backup->bitmap_mode, backup->compress, backup->discard_source, @@ -2752,6 +2757,7 @@ static BlockJob *do_backup_common(BackupCommon *backup, &perf, backup->on_source_error, backup->on_target_error, + on_cbw_error, job_flags, NULL, NULL, txn, errp); return job; } diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index eb2d92a226..0d93783763 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -179,6 +179,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, * all ".has_*" fields are ignored. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. + * @on_cbw_error: The action to take upon error in copy-before-write operations. * @creation_flags: Flags that control the behavior of the Job lifetime. * See @BlockJobCreateFlags * @cb: Completion function for the job. @@ -198,6 +199,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BackupPerf *perf, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + OnCbwError on_cbw_error, int creation_flags, BlockCompletionFunc *cb, void *opaque, JobTxn *txn, Error **errp); diff --git a/qapi/block-core.json b/qapi/block-core.json index 22061227ca..91c70e24a7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1602,6 +1602,9 @@ # default 'report' (no limitations, since this applies to a # different block device than @device). # +# @on-cbw-error: policy defining behavior on I/O errors in +# copy-before-write jobs; defaults to break-guest-write. (Since 10.1) +# # @auto-finalize: When false, this job will wait in a PENDING state # after it has finished its work, waiting for @block-job-finalize # before making any block graph changes. When true, this job will @@ -1641,6 +1644,7 @@ '*compress': 'bool', '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError', + '*on-cbw-error': 'OnCbwError', '*auto-finalize': 'bool', '*auto-dismiss': 'bool', '*filter-node-name': 'str', '*discard-source': 'bool', diff --git a/tests/qemu-iotests/tests/copy-before-write b/tests/qemu-iotests/tests/copy-before-write index 498c558008..236cb8ac37 100755 --- a/tests/qemu-iotests/tests/copy-before-write +++ b/tests/qemu-iotests/tests/copy-before-write @@ -99,6 +99,68 @@ class TestCbwError(iotests.QMPTestCase): log = iotests.filter_qemu_io(log) return log + def do_cbw_error_via_blockdev_backup(self, on_cbw_error=None): + self.vm.cmd('blockdev-add', { + 'node-name': 'source', + 'driver': iotests.imgfmt, + 'file': { + 'driver': 'file', + 'filename': source_img + } + }) + + self.vm.cmd('blockdev-add', { + 'node-name': 'target', + 'driver': iotests.imgfmt, + 'file': { + 'driver': 'blkdebug', + 'image': { + 'driver': 'file', + 'filename': temp_img + }, + 'inject-error': [ + { + 'event': 'write_aio', + 'errno': 5, + 'immediately': False, + 'once': True + } + ] + } + }) + + blockdev_backup_options = { + 'device': 'source', + 'target': 'target', + 'sync': 'none', + 'job-id': 'job-id', + 'filter-node-name': 'cbw' + } + + if on_cbw_error: + blockdev_backup_options['on-cbw-error'] = on_cbw_error + + self.vm.cmd('blockdev-backup', blockdev_backup_options) + + self.vm.cmd('blockdev-add', { + 'node-name': 'access', + 'driver': 'snapshot-access', + 'file': 'cbw' + }) + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io cbw "write 0 1M"') + self.assert_qmp(result, 'return', '') + + result = self.vm.qmp('human-monitor-command', + command_line='qemu-io access "read 0 1M"') + self.assert_qmp(result, 'return', '') + + self.vm.shutdown() + log = self.vm.get_log() + log = iotests.filter_qemu_io(log) + return log + def test_break_snapshot_on_cbw_error(self): """break-snapshot behavior: Guest write succeed, but further snapshot-read fails, as snapshot is @@ -123,6 +185,39 @@ read failed: Permission denied write failed: Input/output error read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +""") + + def test_break_snapshot_policy_forwarding(self): + """Ensure CBW filter accepts break-snapshot policy + specified in blockdev-backup QMP command. + """ + log = self.do_cbw_error_via_blockdev_backup('break-snapshot') + self.assertEqual(log, """\ +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read failed: Permission denied +""") + + def test_break_guest_write_policy_forwarding(self): + """Ensure CBW filter accepts break-guest-write policy + specified in blockdev-backup QMP command. + """ + log = self.do_cbw_error_via_blockdev_backup('break-guest-write') + self.assertEqual(log, """\ +write failed: Input/output error +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +""") + + def test_default_on_cbw_error_policy_forwarding(self): + """Ensure break-guest-write policy is used by default when + on-cbw-error is not explicitly specified. + """ + log = self.do_cbw_error_via_blockdev_backup() + self.assertEqual(log, """\ +write failed: Input/output error +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) """) def do_cbw_timeout(self, on_cbw_error): diff --git a/tests/qemu-iotests/tests/copy-before-write.out b/tests/qemu-iotests/tests/copy-before-write.out index 89968f35d7..2f7d3902f2 100644 --- a/tests/qemu-iotests/tests/copy-before-write.out +++ b/tests/qemu-iotests/tests/copy-before-write.out @@ -1,5 +1,5 @@ -.... +....... ---------------------------------------------------------------------- -Ran 4 tests +Ran 7 tests OK