From 9b9877ee9f1c27588a286f591852c0b7c0548b6a Mon Sep 17 00:00:00 2001 From: Wenchao Xia Date: Wed, 8 May 2013 18:25:12 +0800 Subject: [PATCH 01/11] block: package preparation code in qmp_transaction() The code before really committing is moved into a function. Most code is simply moved from qmp_transaction(), except that on fail it just returns now. Other code such as input parsing is not touched, to make it easier in review. Signed-off-by: Wenchao Xia Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 141 +++++++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 63 deletions(-) diff --git a/blockdev.c b/blockdev.c index 7c9d8dd0ac..1cb96403e1 100644 --- a/blockdev.c +++ b/blockdev.c @@ -785,6 +785,78 @@ typedef struct BlkTransactionStates { QSIMPLEQ_ENTRY(BlkTransactionStates) entry; } BlkTransactionStates; +static void external_snapshot_prepare(const char *device, + const char *format, + const char *new_image_file, + enum NewImageMode mode, + BlkTransactionStates *states, + Error **errp) +{ + BlockDriver *proto_drv; + BlockDriver *drv; + int flags, ret; + Error *local_err = NULL; + + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + + states->old_bs = bdrv_find(device); + if (!states->old_bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + if (!bdrv_is_inserted(states->old_bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + return; + } + + if (bdrv_in_use(states->old_bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } + + if (!bdrv_is_read_only(states->old_bs)) { + if (bdrv_flush(states->old_bs)) { + error_set(errp, QERR_IO_ERROR); + return; + } + } + + flags = states->old_bs->open_flags; + + proto_drv = bdrv_find_protocol(new_image_file); + if (!proto_drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + + /* create new image w/backing file */ + if (mode != NEW_IMAGE_MODE_EXISTING) { + bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags, &local_err, false); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + } + + /* We will manually add the backing_hd field to the bs later */ + states->new_bs = bdrv_new(""); + /* TODO Inherit bs->options or only take explicit options with an + * extended QMP command? */ + ret = bdrv_open(states->new_bs, new_image_file, NULL, + flags | BDRV_O_NO_BACKING, drv); + if (ret != 0) { + error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + } +} + /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the @@ -792,7 +864,6 @@ typedef struct BlkTransactionStates { */ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) { - int ret = 0; BlockdevActionList *dev_entry = dev_list; BlkTransactionStates *states, *next; Error *local_err = NULL; @@ -806,9 +877,6 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { BlockdevAction *dev_info = NULL; - BlockDriver *proto_drv; - BlockDriver *drv; - int flags; enum NewImageMode mode; const char *new_image_file; const char *device; @@ -831,70 +899,17 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) format = dev_info->blockdev_snapshot_sync->format; } mode = dev_info->blockdev_snapshot_sync->mode; + external_snapshot_prepare(device, format, new_image_file, + mode, states, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto delete_and_fail; + } break; default: abort(); } - drv = bdrv_find_format(format); - if (!drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - states->old_bs = bdrv_find(device); - if (!states->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); - goto delete_and_fail; - } - - if (!bdrv_is_inserted(states->old_bs)) { - error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); - goto delete_and_fail; - } - - if (bdrv_in_use(states->old_bs)) { - error_set(errp, QERR_DEVICE_IN_USE, device); - goto delete_and_fail; - } - - if (!bdrv_is_read_only(states->old_bs)) { - if (bdrv_flush(states->old_bs)) { - error_set(errp, QERR_IO_ERROR); - goto delete_and_fail; - } - } - - flags = states->old_bs->open_flags; - - proto_drv = bdrv_find_protocol(new_image_file); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - goto delete_and_fail; - } - - /* create new image w/backing file */ - if (mode != NEW_IMAGE_MODE_EXISTING) { - bdrv_img_create(new_image_file, format, - states->old_bs->filename, - states->old_bs->drv->format_name, - NULL, -1, flags, &local_err, false); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - goto delete_and_fail; - } - } - - /* We will manually add the backing_hd field to the bs later */ - states->new_bs = bdrv_new(""); - /* TODO Inherit bs->options or only take explicit options with an - * extended QMP command? */ - ret = bdrv_open(states->new_bs, new_image_file, NULL, - flags | BDRV_O_NO_BACKING, drv); - if (ret != 0) { - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); - goto delete_and_fail; - } } From e2a31e8798e8246bed8ab396a71cd06bf95edde6 Mon Sep 17 00:00:00 2001 From: Wenchao Xia Date: Wed, 8 May 2013 18:25:13 +0800 Subject: [PATCH 02/11] block: move input parsing code in qmp_transaction() The code is moved into preparation function, and changed a bit to tip more clearly what it is doing. Signed-off-by: Wenchao Xia Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/blockdev.c b/blockdev.c index 1cb96403e1..0115f4609c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -785,10 +785,7 @@ typedef struct BlkTransactionStates { QSIMPLEQ_ENTRY(BlkTransactionStates) entry; } BlkTransactionStates; -static void external_snapshot_prepare(const char *device, - const char *format, - const char *new_image_file, - enum NewImageMode mode, +static void external_snapshot_prepare(BlockdevAction *action, BlkTransactionStates *states, Error **errp) { @@ -796,7 +793,24 @@ static void external_snapshot_prepare(const char *device, BlockDriver *drv; int flags, ret; Error *local_err = NULL; + const char *device; + const char *new_image_file; + const char *format = "qcow2"; + enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + /* get parameters */ + g_assert(action->kind == BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); + + device = action->blockdev_snapshot_sync->device; + new_image_file = action->blockdev_snapshot_sync->snapshot_file; + if (action->blockdev_snapshot_sync->has_format) { + format = action->blockdev_snapshot_sync->format; + } + if (action->blockdev_snapshot_sync->has_mode) { + mode = action->blockdev_snapshot_sync->mode; + } + + /* start processing */ drv = bdrv_find_format(format); if (!drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); @@ -877,10 +891,6 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { BlockdevAction *dev_info = NULL; - enum NewImageMode mode; - const char *new_image_file; - const char *device; - const char *format = "qcow2"; dev_info = dev_entry->value; dev_entry = dev_entry->next; @@ -890,17 +900,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) switch (dev_info->kind) { case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: - device = dev_info->blockdev_snapshot_sync->device; - if (!dev_info->blockdev_snapshot_sync->has_mode) { - dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; - } - new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file; - if (dev_info->blockdev_snapshot_sync->has_format) { - format = dev_info->blockdev_snapshot_sync->format; - } - mode = dev_info->blockdev_snapshot_sync->mode; - external_snapshot_prepare(device, format, new_image_file, - mode, states, &local_err); + external_snapshot_prepare(dev_info, states, errp); if (error_is_set(&local_err)) { error_propagate(errp, local_err); goto delete_and_fail; From 3b0047e86a1c215d830b1ae1da0778db4636b83a Mon Sep 17 00:00:00 2001 From: Wenchao Xia Date: Wed, 8 May 2013 18:25:14 +0800 Subject: [PATCH 03/11] block: package committing code in qmp_transaction() The code is simply moved into a separate function. Signed-off-by: Wenchao Xia Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/blockdev.c b/blockdev.c index 0115f4609c..76f053219a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -871,6 +871,17 @@ static void external_snapshot_prepare(BlockdevAction *action, } } +static void external_snapshot_commit(BlkTransactionStates *states) +{ + /* This removes our old bs from the bdrv_states, and adds the new bs */ + bdrv_append(states->new_bs, states->old_bs); + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + NULL); +} + /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the @@ -916,13 +927,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* Now we are going to do the actual pivot. Everything up to this point * is reversible, but we are committed at this point */ QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - /* This removes our old bs from the bdrv_states, and adds the new bs */ - bdrv_append(states->new_bs, states->old_bs); - /* We don't need (or want) to use the transactional - * bdrv_reopen_multiple() across all the entries at once, because we - * don't want to abort all of them if one of them fails the reopen */ - bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, - NULL); + external_snapshot_commit(states); } /* success */ From 96b86bf72de0c6eda2799201517ef32910beb340 Mon Sep 17 00:00:00 2001 From: Wenchao Xia Date: Wed, 8 May 2013 18:25:15 +0800 Subject: [PATCH 04/11] block: package rollback code in qmp_transaction() Signed-off-by: Wenchao Xia Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/blockdev.c b/blockdev.c index 76f053219a..2131e134ba 100644 --- a/blockdev.c +++ b/blockdev.c @@ -882,6 +882,13 @@ static void external_snapshot_commit(BlkTransactionStates *states) NULL); } +static void external_snapshot_abort(BlkTransactionStates *states) +{ + if (states->new_bs) { + bdrv_delete(states->new_bs); + } +} + /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the @@ -939,9 +946,7 @@ delete_and_fail: * the original bs for all images */ QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - if (states->new_bs) { - bdrv_delete(states->new_bs); - } + external_snapshot_abort(states); } exit: QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { From ba0c86a34e29b31ef360feda74c94200a5403fdd Mon Sep 17 00:00:00 2001 From: Wenchao Xia Date: Wed, 8 May 2013 18:25:16 +0800 Subject: [PATCH 05/11] block: make all steps in qmp_transaction() as callback Make it easier to add other operations to qmp_transaction() by using callbacks, with external snapshots serving as an example implementation of the callbacks. Signed-off-by: Wenchao Xia Reviewed-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 95 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 24 deletions(-) diff --git a/blockdev.c b/blockdev.c index 2131e134ba..617501c097 100644 --- a/blockdev.c +++ b/blockdev.c @@ -779,14 +779,43 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, /* New and old BlockDriverState structs for group snapshots */ -typedef struct BlkTransactionStates { + +typedef struct BlkTransactionStates BlkTransactionStates; + +/* Only prepare() may fail. In a single transaction, only one of commit() or + abort() will be called, clean() will always be called if it present. */ +typedef struct BdrvActionOps { + /* Size of state struct, in bytes. */ + size_t instance_size; + /* Prepare the work, must NOT be NULL. */ + void (*prepare)(BlkTransactionStates *common, Error **errp); + /* Commit the changes, must NOT be NULL. */ + void (*commit)(BlkTransactionStates *common); + /* Abort the changes on fail, can be NULL. */ + void (*abort)(BlkTransactionStates *common); + /* Clean up resource in the end, can be NULL. */ + void (*clean)(BlkTransactionStates *common); +} BdrvActionOps; + +/* + * This structure must be arranged as first member in child type, assuming + * that compiler will also arrange it to the same address with parent instance. + * Later it will be used in free(). + */ +struct BlkTransactionStates { + BlockdevAction *action; + const BdrvActionOps *ops; + QSIMPLEQ_ENTRY(BlkTransactionStates) entry; +}; + +/* external snapshot private data */ +typedef struct ExternalSnapshotStates { + BlkTransactionStates common; BlockDriverState *old_bs; BlockDriverState *new_bs; - QSIMPLEQ_ENTRY(BlkTransactionStates) entry; -} BlkTransactionStates; +} ExternalSnapshotStates; -static void external_snapshot_prepare(BlockdevAction *action, - BlkTransactionStates *states, +static void external_snapshot_prepare(BlkTransactionStates *common, Error **errp) { BlockDriver *proto_drv; @@ -797,6 +826,9 @@ static void external_snapshot_prepare(BlockdevAction *action, const char *new_image_file; const char *format = "qcow2"; enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + ExternalSnapshotStates *states = + DO_UPCAST(ExternalSnapshotStates, common, common); + BlockdevAction *action = common->action; /* get parameters */ g_assert(action->kind == BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); @@ -871,8 +903,11 @@ static void external_snapshot_prepare(BlockdevAction *action, } } -static void external_snapshot_commit(BlkTransactionStates *states) +static void external_snapshot_commit(BlkTransactionStates *common) { + ExternalSnapshotStates *states = + DO_UPCAST(ExternalSnapshotStates, common, common); + /* This removes our old bs from the bdrv_states, and adds the new bs */ bdrv_append(states->new_bs, states->old_bs); /* We don't need (or want) to use the transactional @@ -882,13 +917,24 @@ static void external_snapshot_commit(BlkTransactionStates *states) NULL); } -static void external_snapshot_abort(BlkTransactionStates *states) +static void external_snapshot_abort(BlkTransactionStates *common) { + ExternalSnapshotStates *states = + DO_UPCAST(ExternalSnapshotStates, common, common); if (states->new_bs) { bdrv_delete(states->new_bs); } } +static const BdrvActionOps actions[] = { + [BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { + .instance_size = sizeof(ExternalSnapshotStates), + .prepare = external_snapshot_prepare, + .commit = external_snapshot_commit, + .abort = external_snapshot_abort, + }, +}; + /* * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail * then we do not pivot any of the devices in the group, and abandon the @@ -909,32 +955,28 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { BlockdevAction *dev_info = NULL; + const BdrvActionOps *ops; dev_info = dev_entry->value; dev_entry = dev_entry->next; - states = g_malloc0(sizeof(BlkTransactionStates)); + assert(dev_info->kind < ARRAY_SIZE(actions)); + + ops = &actions[dev_info->kind]; + states = g_malloc0(ops->instance_size); + states->ops = ops; + states->action = dev_info; QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); - switch (dev_info->kind) { - case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC: - external_snapshot_prepare(dev_info, states, errp); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - goto delete_and_fail; - } - break; - default: - abort(); + states->ops->prepare(states, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto delete_and_fail; } - } - - /* Now we are going to do the actual pivot. Everything up to this point - * is reversible, but we are committed at this point */ QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - external_snapshot_commit(states); + states->ops->commit(states); } /* success */ @@ -946,10 +988,15 @@ delete_and_fail: * the original bs for all images */ QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - external_snapshot_abort(states); + if (states->ops->abort) { + states->ops->abort(states); + } } exit: QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { + if (states->ops->clean) { + states->ops->clean(states); + } g_free(states); } } From c8a83e8500329d82f1282c4905be11a39078007f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 13 May 2013 10:43:43 +0200 Subject: [PATCH 06/11] blockdev: Rename BlockdevAction -> TransactionAction There's no reason to restrict transactions to operations related to block devices, so rename the type now before schema introspection stops us from doing so. Also change the schema documentation of 'transaction' to not refer to block devices or snapshots any more. Signed-off-by: Kevin Wolf --- blockdev.c | 22 +++++++++++----------- qapi-schema.json | 21 ++++++++++----------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/blockdev.c b/blockdev.c index 617501c097..d1ec99af73 100644 --- a/blockdev.c +++ b/blockdev.c @@ -750,8 +750,8 @@ void do_commit(Monitor *mon, const QDict *qdict) static void blockdev_do_action(int kind, void *data, Error **errp) { - BlockdevAction action; - BlockdevActionList list; + TransactionAction action; + TransactionActionList list; action.kind = kind; action.data = data; @@ -773,8 +773,8 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, .has_mode = has_mode, .mode = mode, }; - blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot, - errp); + blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, + &snapshot, errp); } @@ -803,7 +803,7 @@ typedef struct BdrvActionOps { * Later it will be used in free(). */ struct BlkTransactionStates { - BlockdevAction *action; + TransactionAction *action; const BdrvActionOps *ops; QSIMPLEQ_ENTRY(BlkTransactionStates) entry; }; @@ -828,10 +828,10 @@ static void external_snapshot_prepare(BlkTransactionStates *common, enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ExternalSnapshotStates *states = DO_UPCAST(ExternalSnapshotStates, common, common); - BlockdevAction *action = common->action; + TransactionAction *action = common->action; /* get parameters */ - g_assert(action->kind == BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); + g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); device = action->blockdev_snapshot_sync->device; new_image_file = action->blockdev_snapshot_sync->snapshot_file; @@ -927,7 +927,7 @@ static void external_snapshot_abort(BlkTransactionStates *common) } static const BdrvActionOps actions[] = { - [BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { + [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotStates), .prepare = external_snapshot_prepare, .commit = external_snapshot_commit, @@ -940,9 +940,9 @@ static const BdrvActionOps actions[] = { * then we do not pivot any of the devices in the group, and abandon the * snapshots */ -void qmp_transaction(BlockdevActionList *dev_list, Error **errp) +void qmp_transaction(TransactionActionList *dev_list, Error **errp) { - BlockdevActionList *dev_entry = dev_list; + TransactionActionList *dev_entry = dev_list; BlkTransactionStates *states, *next; Error *local_err = NULL; @@ -954,7 +954,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* We don't do anything in this loop that commits us to the snapshot */ while (NULL != dev_entry) { - BlockdevAction *dev_info = NULL; + TransactionAction *dev_info = NULL; const BdrvActionOps *ops; dev_info = dev_entry->value; diff --git a/qapi-schema.json b/qapi-schema.json index 664b31f035..ef1f657efb 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1609,12 +1609,12 @@ '*mode': 'NewImageMode' } } ## -# @BlockdevAction +# @TransactionAction # # A discriminated record of operations that can be performed with # @transaction. ## -{ 'union': 'BlockdevAction', +{ 'union': 'TransactionAction', 'data': { 'blockdev-snapshot-sync': 'BlockdevSnapshot' } } @@ -1622,25 +1622,24 @@ ## # @transaction # -# Atomically operate on a group of one or more block devices. If -# any operation fails, then the entire set of actions will be -# abandoned and the appropriate error returned. The only operation -# supported is currently blockdev-snapshot-sync. +# Executes a number of transactionable QMP commands atomically. If any +# operation fails, then the entire set of actions will be abandoned and the +# appropriate error returned. # # List of: -# @BlockdevAction: information needed for the device snapshot +# @TransactionAction: information needed for the respective operation # # Returns: nothing on success -# If @device is not a valid block device, DeviceNotFound +# Errors depend on the operations of the transaction # -# Note: The transaction aborts on the first failure. Therefore, there will -# be only one device or snapshot file returned in an error condition, and +# Note: The transaction aborts on the first failure. Therefore, there will be +# information on only one failed operation returned in an error condition, and # subsequent actions will not have been attempted. # # Since 1.1 ## { 'command': 'transaction', - 'data': { 'actions': [ 'BlockdevAction' ] } } + 'data': { 'actions': [ 'TransactionAction' ] } } ## # @blockdev-snapshot-sync From a00e81e98f71c91a35b96bcd8ae431a86f42378d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 13 May 2013 15:31:34 +0200 Subject: [PATCH 07/11] qemu-io: Fix 'map' output The output of the 'map' command in qemu-io used to directly resemble bdrv_is_allocated() and could contain many lines for small chunks that all have the same allocation status. After this patch, they will be coalesced into a single output line for a large chunk. As a side effect, the command gains some error handling. Signed-off-by: Kevin Wolf --- qemu-io.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index 475a8bd034..5e6680b247 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -1635,12 +1635,43 @@ static const cmdinfo_t alloc_cmd = { .oneline = "checks if a sector is present in the file", }; + +static int map_is_allocated(int64_t sector_num, int64_t nb_sectors, int64_t *pnum) +{ + int num, num_checked; + int ret, firstret; + + num_checked = MIN(nb_sectors, INT_MAX); + ret = bdrv_is_allocated(bs, sector_num, num_checked, &num); + if (ret < 0) { + return ret; + } + + firstret = ret; + *pnum = num; + + while (nb_sectors > 0 && ret == firstret) { + sector_num += num; + nb_sectors -= num; + + num_checked = MIN(nb_sectors, INT_MAX); + ret = bdrv_is_allocated(bs, sector_num, num_checked, &num); + if (ret == firstret) { + *pnum += num; + } else { + break; + } + } + + return firstret; +} + static int map_f(int argc, char **argv) { int64_t offset; int64_t nb_sectors; char s1[64]; - int num, num_checked; + int64_t num; int ret; const char *retstr; @@ -1648,12 +1679,17 @@ static int map_f(int argc, char **argv) nb_sectors = bs->total_sectors; do { - num_checked = MIN(nb_sectors, INT_MAX); - ret = bdrv_is_allocated(bs, offset, num_checked, &num); + ret = map_is_allocated(offset, nb_sectors, &num); + if (ret < 0) { + error_report("Failed to get allocation status: %s", strerror(-ret)); + return 0; + } + retstr = ret ? " allocated" : "not allocated"; cvtstr(offset << 9ULL, s1, sizeof(s1)); - printf("[% 24" PRId64 "] % 8d/% 8d sectors %s at offset %s (%d)\n", - offset << 9ULL, num, num_checked, retstr, s1, ret); + printf("[% 24" PRId64 "] % 8" PRId64 "/% 8" PRId64 " sectors %s " + "at offset %s (%d)\n", + offset << 9ULL, num, nb_sectors, retstr, s1, ret); offset += num; nb_sectors -= num; From c93331c9146719958a4b102435fcd0566da45ea2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 13 May 2013 14:00:15 +0200 Subject: [PATCH 08/11] qcow2.py: Subcommand for changing header fields Signed-off-by: Kevin Wolf --- tests/qemu-iotests/qcow2.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py index fecf5b9a59..44a2b45641 100755 --- a/tests/qemu-iotests/qcow2.py +++ b/tests/qemu-iotests/qcow2.py @@ -149,6 +149,22 @@ def cmd_dump_header(fd): h.dump() h.dump_extensions() +def cmd_set_header(fd, name, value): + try: + value = int(value, 0) + except: + print "'%s' is not a valid number" % value + sys.exit(1) + + fields = (field[2] for field in QcowHeader.fields) + if not name in fields: + print "'%s' is not a known header field" % name + sys.exit(1) + + h = QcowHeader(fd) + h.__dict__[name] = value + h.update(fd) + def cmd_add_header_ext(fd, magic, data): try: magic = int(magic, 0) @@ -205,6 +221,7 @@ def cmd_set_feature_bit(fd, group, bit): cmds = [ [ 'dump-header', cmd_dump_header, 0, 'Dump image header and header extensions' ], + [ 'set-header', cmd_set_header, 2, 'Set a field in the header'], [ 'add-header-ext', cmd_add_header_ext, 2, 'Add a header extension' ], [ 'del-header-ext', cmd_del_header_ext, 1, 'Delete a header extension' ], [ 'set-feature-bit', cmd_set_feature_bit, 2, 'Set a feature bit'], From bd91ecbf5b43b52321c4d938e16a612b9c68bf06 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 13 May 2013 14:11:13 +0200 Subject: [PATCH 09/11] qemu-iotests: Try creating huge qcow2 image It's supposed to fail gracefully instead of segfaulting. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/054 | 58 ++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/054.out | 10 +++++++ tests/qemu-iotests/common.rc | 2 +- tests/qemu-iotests/group | 1 + 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 tests/qemu-iotests/054 create mode 100644 tests/qemu-iotests/054.out diff --git a/tests/qemu-iotests/054 b/tests/qemu-iotests/054 new file mode 100755 index 0000000000..b36042958c --- /dev/null +++ b/tests/qemu-iotests/054 @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Test huge qcow2 images +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +echo +echo "creating too large image (1 EB)" +_make_test_img $((1024*1024))T + +echo +echo "creating too large image (1 EB) using qcow2.py" +_make_test_img 4G +./qcow2.py $TEST_IMG set-header size $((1024 ** 6)) +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/054.out b/tests/qemu-iotests/054.out new file mode 100644 index 0000000000..0b2fe301ea --- /dev/null +++ b/tests/qemu-iotests/054.out @@ -0,0 +1,10 @@ +QA output created by 054 + +creating too large image (1 EB) +qemu-img: The image size is too large for file format 'qcow2' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1152921504606846976 + +creating too large image (1 EB) using qcow2.py +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 +qemu-img: Could not open 'TEST_DIR/t.qcow2': File too large +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 31eb62b604..e9ba3586b5 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -167,7 +167,7 @@ _cleanup_test_img() _check_test_img() { - $QEMU_IMG check "$@" -f $IMGFMT $TEST_IMG 2>&1 | \ + $QEMU_IMG check "$@" -f $IMGFMT $TEST_IMG 2>&1 | _filter_testdir | \ sed -e '/allocated.*fragmented.*compressed clusters/d' \ -e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \ -e '/Image end offset: [0-9]\+/d' diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index bf944d9123..387b050987 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -60,3 +60,4 @@ #051 rw auto 052 rw auto backing 053 rw auto +054 rw auto From b84c4586234b26ccc875595713f6f4491e5b3385 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 17 May 2013 15:51:25 +0200 Subject: [PATCH 10/11] coroutine: protect global pool with a mutex The coroutine freelist is a global pool of unused coroutines. It avoids the setup/teardown overhead associated with the coroutine lifecycle. Since the pool is global, we need to synchronize access so that coroutines can be used outside the BQL. Signed-off-by: Stefan Hajnoczi --- qemu-coroutine.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/qemu-coroutine.c b/qemu-coroutine.c index 25a14c605d..60ac79e680 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -14,6 +14,7 @@ #include "trace.h" #include "qemu-common.h" +#include "qemu/thread.h" #include "block/coroutine.h" #include "block/coroutine_int.h" @@ -23,6 +24,7 @@ enum { }; /** Free list to speed up creation */ +static QemuMutex pool_lock; static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); static unsigned int pool_size; @@ -30,11 +32,15 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) { Coroutine *co; + qemu_mutex_lock(&pool_lock); co = QSLIST_FIRST(&pool); if (co) { QSLIST_REMOVE_HEAD(&pool, pool_next); pool_size--; - } else { + } + qemu_mutex_unlock(&pool_lock); + + if (!co) { co = qemu_coroutine_new(); } @@ -44,17 +50,25 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) static void coroutine_delete(Coroutine *co) { + qemu_mutex_lock(&pool_lock); if (pool_size < POOL_MAX_SIZE) { QSLIST_INSERT_HEAD(&pool, co, pool_next); co->caller = NULL; pool_size++; + qemu_mutex_unlock(&pool_lock); return; } + qemu_mutex_unlock(&pool_lock); qemu_coroutine_delete(co); } -static void __attribute__((destructor)) coroutine_cleanup(void) +static void __attribute__((constructor)) coroutine_pool_init(void) +{ + qemu_mutex_init(&pool_lock); +} + +static void __attribute__((destructor)) coroutine_pool_cleanup(void) { Coroutine *co; Coroutine *tmp; @@ -63,6 +77,8 @@ static void __attribute__((destructor)) coroutine_cleanup(void) QSLIST_REMOVE_HEAD(&pool, pool_next); qemu_coroutine_delete(co); } + + qemu_mutex_destroy(&pool_lock); } static void coroutine_swap(Coroutine *from, Coroutine *to) From 02ffb504485f0920cfc75a0982a602f824a9a4f4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 17 May 2013 15:51:26 +0200 Subject: [PATCH 11/11] coroutine: stop using AioContext in CoQueue qemu_co_queue_next(&queue) arranges that the next queued coroutine is run at a later point in time. This deferred restart is useful because the caller may not want to transfer control yet. This behavior was implemented using QEMUBH in the past, which meant that CoQueue (and hence CoMutex and CoRwlock) had a dependency on the AioContext event loop. This hidden dependency causes trouble when we move to a world with multiple event loops - now qemu_co_queue_next() needs to know which event loop to schedule the QEMUBH in. After pondering how to stash AioContext I realized the best solution is to not use AioContext at all. This patch implements the deferred restart behavior purely in terms of coroutines and no longer uses QEMUBH. Here is how it works: Each Coroutine has a wakeup queue that starts out empty. When qemu_co_queue_next() is called, the next coroutine is added to our wakeup queue. The wakeup queue is processed when we yield or terminate. Signed-off-by: Stefan Hajnoczi --- include/block/coroutine_int.h | 4 +++ qemu-coroutine-lock.c | 58 +++++++++++++---------------------- qemu-coroutine.c | 3 ++ trace-events | 2 +- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/include/block/coroutine_int.h b/include/block/coroutine_int.h index 17eb71e723..f133d65af8 100644 --- a/include/block/coroutine_int.h +++ b/include/block/coroutine_int.h @@ -38,6 +38,9 @@ struct Coroutine { void *entry_arg; Coroutine *caller; QSLIST_ENTRY(Coroutine) pool_next; + + /* Coroutines that should be woken up when we yield or terminate */ + QTAILQ_HEAD(, Coroutine) co_queue_wakeup; QTAILQ_ENTRY(Coroutine) co_queue_next; }; @@ -45,5 +48,6 @@ Coroutine *qemu_coroutine_new(void); void qemu_coroutine_delete(Coroutine *co); CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, CoroutineAction action); +void coroutine_fn qemu_co_queue_run_restart(Coroutine *co); #endif diff --git a/qemu-coroutine-lock.c b/qemu-coroutine-lock.c index 86efe1f86a..d9fea4989d 100644 --- a/qemu-coroutine-lock.c +++ b/qemu-coroutine-lock.c @@ -26,39 +26,11 @@ #include "block/coroutine.h" #include "block/coroutine_int.h" #include "qemu/queue.h" -#include "block/aio.h" #include "trace.h" -/* Coroutines are awoken from a BH to allow the current coroutine to complete - * its flow of execution. The BH may run after the CoQueue has been destroyed, - * so keep BH data in a separate heap-allocated struct. - */ -typedef struct { - QEMUBH *bh; - QTAILQ_HEAD(, Coroutine) entries; -} CoQueueNextData; - -static void qemu_co_queue_next_bh(void *opaque) -{ - CoQueueNextData *data = opaque; - Coroutine *next; - - trace_qemu_co_queue_next_bh(); - while ((next = QTAILQ_FIRST(&data->entries))) { - QTAILQ_REMOVE(&data->entries, next, co_queue_next); - qemu_coroutine_enter(next, NULL); - } - - qemu_bh_delete(data->bh); - g_slice_free(CoQueueNextData, data); -} - void qemu_co_queue_init(CoQueue *queue) { QTAILQ_INIT(&queue->entries); - - /* This will be exposed to callers once there are multiple AioContexts */ - queue->ctx = qemu_get_aio_context(); } void coroutine_fn qemu_co_queue_wait(CoQueue *queue) @@ -77,23 +49,37 @@ void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue) assert(qemu_in_coroutine()); } -static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) +/** + * qemu_co_queue_run_restart: + * + * Enter each coroutine that was previously marked for restart by + * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is + * invoked by the core coroutine code when the current coroutine yields or + * terminates. + */ +void qemu_co_queue_run_restart(Coroutine *co) { Coroutine *next; - CoQueueNextData *data; + + trace_qemu_co_queue_run_restart(co); + while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) { + QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + } +} + +static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *next; if (QTAILQ_EMPTY(&queue->entries)) { return false; } - data = g_slice_new(CoQueueNextData); - data->bh = aio_bh_new(queue->ctx, qemu_co_queue_next_bh, data); - QTAILQ_INIT(&data->entries); - qemu_bh_schedule(data->bh); - while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) { QTAILQ_REMOVE(&queue->entries, next, co_queue_next); - QTAILQ_INSERT_TAIL(&data->entries, next, co_queue_next); + QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next); trace_qemu_co_queue_next(next); if (single) { break; diff --git a/qemu-coroutine.c b/qemu-coroutine.c index 60ac79e680..423430d3a0 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -45,6 +45,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) } co->entry = entry; + QTAILQ_INIT(&co->co_queue_wakeup); return co; } @@ -87,6 +88,8 @@ static void coroutine_swap(Coroutine *from, Coroutine *to) ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD); + qemu_co_queue_run_restart(to); + switch (ret) { case COROUTINE_YIELD: return; diff --git a/trace-events b/trace-events index 9c73931f37..f51408aeb2 100644 --- a/trace-events +++ b/trace-events @@ -825,7 +825,7 @@ qemu_coroutine_yield(void *from, void *to) "from %p to %p" qemu_coroutine_terminate(void *co) "self %p" # qemu-coroutine-lock.c -qemu_co_queue_next_bh(void) "" +qemu_co_queue_run_restart(void *co) "co %p" qemu_co_queue_next(void *nxt) "next %p" qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p" qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"