From a9a443c874cf92b583852cf8ea5b75df8580df32 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 1 Nov 2013 17:35:29 +0800 Subject: [PATCH 01/37] qapi: Fix comment for create-type to match code. Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- qapi-schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi-schema.json b/qapi-schema.json index 81a375ba0..76c98a726 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -227,7 +227,7 @@ ## # @ImageInfoSpecificVmdk: # -# @create_type: The create type of VMDK image +# @create-type: The create type of VMDK image # # @cid: Content id of image # From 64815e2a966f0a3f18818b9d542f1ef02dc992a2 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 2 Nov 2013 14:52:11 +0100 Subject: [PATCH 02/37] qemu-iotests: Filter out actual image size in 067 The actual size of the image file may differ depending on the Linux kernel currently running on the host. Filtering out this value makes this test pass in such cases. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/067 | 2 +- tests/qemu-iotests/067.out | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 79dc38bc0..d025192c8 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -45,7 +45,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 4bb9ff965..8d271cc41 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virtio-blk-pci,drive=disk,id=virtio0 QMP_VERSION {"return": {}} -{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}} @@ -24,7 +24,7 @@ QMP_VERSION Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk QMP_VERSION {"return": {}} -{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} +{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} @@ -44,7 +44,7 @@ Testing: QMP_VERSION {"return": {}} {"return": "OK\r\n"} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} @@ -64,14 +64,14 @@ Testing: QMP_VERSION {"return": {}} {"return": {}} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": "/machine/peripheral/virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "RESET"} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} From 511018e4b49a5f0f1c7b051bb79fe8eccbb590fe Mon Sep 17 00:00:00 2001 From: Andreas Tobler Date: Thu, 31 Oct 2013 22:41:46 +0100 Subject: [PATCH 03/37] block/raw-posix: fix FreeBSD compilation The below patch is needed to compile qemu trunk on FreeBSD with gcc48, clang will fail.... ;). Host x84_64-freebsd. Signed-off-by: Andreas Tobler Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index f6d48bbdb..ace5d962e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1842,7 +1842,8 @@ static BlockDriver bdrv_host_cdrom = { #endif /* __linux__ */ #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) -static int cdrom_open(BlockDriverState *bs, QDict *options, int flags) +static int cdrom_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) { BDRVRawState *s = bs->opaque; Error *local_err = NULL; From 0084043888f6773d905c1b5d644b89c79d1c7714 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:36 -0400 Subject: [PATCH 04/37] block: qemu-iotests, add quotes to $TEST_IMG usage io pattern tests The usage of $TEST_IMG was not properly quoted everywhere in common.pattern. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/common.pattern | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern index 00e0f605f..ddfbca1b7 100644 --- a/tests/qemu-iotests/common.pattern +++ b/tests/qemu-iotests/common.pattern @@ -28,7 +28,7 @@ function do_is_allocated() { } function is_allocated() { - do_is_allocated "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_is_allocated "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function do_io() { @@ -46,18 +46,18 @@ function do_io() { } function io_pattern() { - do_io "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io() { local start=$2 local pattern=$(( (start >> 9) % 256 )) - do_io "$@" $pattern | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" $pattern | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io_zero() { - do_io "$@" 0 | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" 0 | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io_test() { @@ -117,8 +117,8 @@ function io_test2() { echo === Clusters to be compressed [3] io_pattern writev $((offset + 8 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165 - mv $TEST_IMG $TEST_IMG.orig - $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c $TEST_IMG.orig $TEST_IMG + mv "$TEST_IMG" "$TEST_IMG.orig" + $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG" # Write the used clusters echo === Used clusters [1] From 0018c03f5d37f3a7df23eef0e9f1e6e6189ed634 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:37 -0400 Subject: [PATCH 05/37] block: qemu-iotests, fix _make_test_img() to work with spaced pathnames _make_test_img() currently works with spaced pathnames only when not specifying a backing file. This fixes it so that the backing file argument is properly quoted. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/common.rc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 4e826040d..d24de2c95 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -111,6 +111,8 @@ _make_test_img() local image_size=$* local optstr="" local img_name="" + local use_backing=0 + local backing_file="" if [ -n "$TEST_IMG_FILE" ]; then img_name=$TEST_IMG_FILE @@ -123,7 +125,8 @@ _make_test_img() fi if [ "$1" = "-b" ]; then - extra_img_options="$1 $2" + use_backing=1 + backing_file=$2 image_size=$3 fi if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then @@ -135,7 +138,13 @@ _make_test_img() fi # XXX(hch): have global image options? - $QEMU_IMG create -f $IMGFMT $extra_img_options $img_name $image_size 2>&1 | \ + ( + if [ $use_backing = 1 ]; then + $QEMU_IMG create -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1 + else + $QEMU_IMG create -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1 + fi + ) | \ sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ From 02cde1688131090d3abb2f210e30a44d96804167 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:38 -0400 Subject: [PATCH 06/37] block: qemu-iotests, add quotes to $TEST_IMG.base usage in 017 $TEST_IMG.base is used unquoted. Add quotes so that pathnames with spaces are supported. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/017 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index 45f2c0b05..aba3faf71 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -66,7 +66,7 @@ echo "Creating test image with backing file" echo TEST_IMG=$TEST_IMG_SAVE -_make_test_img -b $TEST_IMG.base 6G +_make_test_img -b "$TEST_IMG.base" 6G echo "Filling test image" echo From 3cbe3e8d25529ecb4e1838d1c6856e199b3a66d4 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:39 -0400 Subject: [PATCH 07/37] block: qemu-iotests, add quotes to $TEST_IMG usage in 019 There were still instances of $TEST_IMG not being properly quoted. This was in the usage of a string built up for a 'for' loop; modify the loop so we can quote $TEST_IMG properly. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/019 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019 index cd3582cf6..5bb18d0c0 100755 --- a/tests/qemu-iotests/019 +++ b/tests/qemu-iotests/019 @@ -90,12 +90,12 @@ mv "$TEST_IMG" "$TEST_IMG.orig" # Test the conversion twice: One test with the old-style -B option and another # one with -o backing_file -for backing_option in "-B $TEST_IMG.base" "-o backing_file=$TEST_IMG.base"; do +for backing_option in "-B " "-o backing_file="; do echo - echo Testing conversion with $backing_option | _filter_testdir | _filter_imgfmt + echo Testing conversion with $backing_option$TEST_IMG.base | _filter_testdir | _filter_imgfmt echo - $QEMU_IMG convert -O $IMGFMT $backing_option "$TEST_IMG.orig" "$TEST_IMG" + $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" "$TEST_IMG.orig" "$TEST_IMG" echo "Checking if backing clusters are allocated when they shouldn't" echo From f897e3939cbbcec26e99415a499654ae7ae09b64 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:40 -0400 Subject: [PATCH 08/37] block: qemu-iotests, removes duplicate double quotes in 039 Test 039 had $TEST_IMG with duplicate double quotes - remove duplicate. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/039 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index f85b4ce63..8bade92a8 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -54,7 +54,7 @@ echo "== Checking that image is clean on shutdown ==" IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img $size -$QEMU_IO -c "write -P 0x5a 0 512" ""$TEST_IMG"" | _filter_qemu_io +$QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io # The dirty bit must not be set ./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features From 1ab391938d29ebc3956d07b622c987c350434eaf Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:41 -0400 Subject: [PATCH 09/37] block: qemu-iotests, add quotes to $TEST_IMG usage for 051 There were still a couple of instances of unquoted usage of $TEST_IMG and $TEST_IMG.orig. Quoted these so they will not fail on pathnames with spaces in them. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/051 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 356c3756f..0a4971d43 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -64,9 +64,9 @@ function run_qemu() size=128M _make_test_img $size -cp $TEST_IMG $TEST_IMG.orig -mv $TEST_IMG $TEST_IMG.base -_make_test_img -b $TEST_IMG.base $size +cp "$TEST_IMG" "$TEST_IMG.orig" +mv "$TEST_IMG" "$TEST_IMG.base" +_make_test_img -b "$TEST_IMG.base" $size echo echo === Unknown option === @@ -81,7 +81,7 @@ echo echo === Overriding backing file === echo -echo "info block" | run_qemu -drive file=$TEST_IMG,driver=qcow2,backing.file.filename=$TEST_IMG.orig -nodefaults +echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults echo echo === Enable and disable lazy refcounting on the command line, plus some invalid values === From db5dc557288fb1b7a17ffabe3b92f299fc2dc187 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 31 Oct 2013 11:57:42 -0400 Subject: [PATCH 10/37] block: qemu-iotests, add quotes to $TEST_IMG usage in 061 When creating images with backing files in the test, the backing file argument was not quoted properly. This caused the test to fail when run from a pathname with a space. Pass the backing argument in with the -b option to _make_test_img, so it can be properly quoted. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/061 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index fa9319da2..e42f9bd5e 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -163,7 +163,7 @@ echo "=== Testing zero expansion on backed image ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "read -P 0x2a 0 128k" -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" _check_test_img @@ -174,7 +174,7 @@ echo "=== Testing zero expansion on backed inactive clusters ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG snapshot -c foo "$TEST_IMG" $QEMU_IO -c "write -P 0x42 0 128k" "$TEST_IMG" | _filter_qemu_io @@ -190,7 +190,7 @@ echo "=== Testing zero expansion on backed image with shared L2 table ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG snapshot -c foo "$TEST_IMG" $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" From ec9c10d29c6bb5613a680af62f5825d3bb2d31d4 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Oct 2013 14:54:30 +0100 Subject: [PATCH 11/37] blockdev: fix drive_init() opts and bs_opts leaks These memory leaks also make drive_add if=none,id=drive0 without a file= option leak the options list. This keeps ID "drive0" around forever. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake --- blockdev.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/blockdev.c b/blockdev.c index b260477f1..86e6bffdc 100644 --- a/blockdev.c +++ b/blockdev.c @@ -341,7 +341,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, qemu_opts_absorb_qdict(opts, bs_opts, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } if (id) { @@ -361,7 +361,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if ((buf = qemu_opt_get(opts, "discard")) != NULL) { if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) { error_setg(errp, "invalid discard option"); - return NULL; + goto early_err; } } @@ -383,7 +383,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, /* this is the default */ } else { error_setg(errp, "invalid aio option"); - return NULL; + goto early_err; } } #endif @@ -393,13 +393,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, error_printf("Supported formats:"); bdrv_iterate_format(bdrv_format_print, NULL); error_printf("\n"); - return NULL; + goto early_err; } drv = bdrv_find_format(buf); if (!drv) { error_setg(errp, "'%s' invalid format", buf); - return NULL; + goto early_err; } } @@ -435,20 +435,20 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if (!check_throttle_config(&cfg, &error)) { error_propagate(errp, error); - return NULL; + goto early_err; } on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { error_setg(errp, "werror is not supported by this bus type"); - return NULL; + goto early_err; } on_write_error = parse_block_error_action(buf, 0, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } } @@ -456,13 +456,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) { error_report("rerror is not supported by this bus type"); - return NULL; + goto early_err; } on_read_error = parse_block_error_action(buf, 1, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } } @@ -491,6 +491,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if (has_driver_specific_opts) { file = NULL; } else { + QDECREF(bs_opts); + qemu_opts_del(opts); return dinfo; } } @@ -529,12 +531,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, return dinfo; err: - qemu_opts_del(opts); - QDECREF(bs_opts); bdrv_unref(dinfo->bdrv); g_free(dinfo->id); QTAILQ_REMOVE(&drives, dinfo, next); g_free(dinfo); +early_err: + QDECREF(bs_opts); + qemu_opts_del(opts); return NULL; } From 0d1aa05e9eba2437fdcdfbaa846c850c986bf7c6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Oct 2013 14:54:32 +0100 Subject: [PATCH 12/37] libqtest: rename qmp() to qmp_discard_response() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Existing qmp() callers do not expect a response object. In order to implement real QMP test cases it will be necessary to inspect the response object. Rename qmp() to qmp_discard_response(). Later patches will introduce a qmp() function that returns the response object and tests that use it. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Andreas Färber --- tests/boot-order-test.c | 4 ++-- tests/fdc-test.c | 15 +++++++++------ tests/ide-test.c | 10 ++++++---- tests/libqtest.c | 10 +++++----- tests/libqtest.h | 20 ++++++++++---------- 5 files changed, 32 insertions(+), 27 deletions(-) diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c index 4b233d0b2..da158c32b 100644 --- a/tests/boot-order-test.c +++ b/tests/boot-order-test.c @@ -41,12 +41,12 @@ static void test_a_boot_order(const char *machine, qtest_start(args); actual = read_boot_order(); g_assert_cmphex(actual, ==, expected_boot); - qmp("{ 'execute': 'system_reset' }"); + qmp_discard_response("{ 'execute': 'system_reset' }"); /* * system_reset only requests reset. We get a RESET event after * the actual reset completes. Need to wait for that. */ - qmp(""); /* HACK: wait for event */ + qmp_discard_response(""); /* HACK: wait for event */ actual = read_boot_order(); g_assert_cmphex(actual, ==, expected_reboot); qtest_quit(global_qtest); diff --git a/tests/fdc-test.c b/tests/fdc-test.c index fd198dcf8..38b5b178d 100644 --- a/tests/fdc-test.c +++ b/tests/fdc-test.c @@ -290,10 +290,12 @@ static void test_media_insert(void) /* Insert media in drive. DSKCHK should not be reset until a step pulse * is sent. */ - qmp("{'execute':'change', 'arguments':{ 'device':'floppy0', " - "'target': '%s' }}", test_image); - qmp(""); /* ignore event (FIXME open -> open transition?!) */ - qmp(""); /* ignore event */ + qmp_discard_response("{'execute':'change', 'arguments':{" + " 'device':'floppy0', 'target': '%s' }}", + test_image); + qmp_discard_response(""); /* ignore event + (FIXME open -> open transition?!) */ + qmp_discard_response(""); /* ignore event */ dir = inb(FLOPPY_BASE + reg_dir); assert_bit_set(dir, DSKCHG); @@ -322,8 +324,9 @@ static void test_media_change(void) /* Eject the floppy and check that DSKCHG is set. Reading it out doesn't * reset the bit. */ - qmp("{'execute':'eject', 'arguments':{ 'device':'floppy0' }}"); - qmp(""); /* ignore event */ + qmp_discard_response("{'execute':'eject', 'arguments':{" + " 'device':'floppy0' }}"); + qmp_discard_response(""); /* ignore event */ dir = inb(FLOPPY_BASE + reg_dir); assert_bit_set(dir, DSKCHG); diff --git a/tests/ide-test.c b/tests/ide-test.c index bc824a814..d5cec5a1f 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -460,8 +460,9 @@ static void test_flush(void) tmp_path); /* Delay the completion of the flush request until we explicitly do it */ - qmp("{'execute':'human-monitor-command', 'arguments': { " - "'command-line': 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }"); + qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {" + " 'command-line':" + " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }"); /* FLUSH CACHE command on device 0*/ outb(IDE_BASE + reg_device, 0); @@ -473,8 +474,9 @@ static void test_flush(void) assert_bit_clear(data, DF | ERR | DRQ); /* Complete the command */ - qmp("{'execute':'human-monitor-command', 'arguments': { " - "'command-line': 'qemu-io ide0-hd0 \"resume A\"'} }"); + qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {" + " 'command-line':" + " 'qemu-io ide0-hd0 \"resume A\"'} }"); /* Check registers */ data = inb(IDE_BASE + reg_device); diff --git a/tests/libqtest.c b/tests/libqtest.c index bb82069f5..dc4c9833b 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -151,8 +151,8 @@ QTestState *qtest_init(const char *extra_args) } /* Read the QMP greeting and then do the handshake */ - qtest_qmp(s, ""); - qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }"); + qtest_qmp_discard_response(s, ""); + qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }"); if (getenv("QTEST_STOP")) { kill(qtest_qemu_pid(s), SIGSTOP); @@ -291,7 +291,7 @@ redo: return words; } -void qtest_qmpv(QTestState *s, const char *fmt, va_list ap) +void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap) { bool has_reply = false; int nesting = 0; @@ -326,12 +326,12 @@ void qtest_qmpv(QTestState *s, const char *fmt, va_list ap) } } -void qtest_qmp(QTestState *s, const char *fmt, ...) +void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - qtest_qmpv(s, fmt, ap); + qtest_qmpv_discard_response(s, fmt, ap); va_end(ap); } diff --git a/tests/libqtest.h b/tests/libqtest.h index a6e99bd02..4f1b06007 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -44,23 +44,23 @@ QTestState *qtest_init(const char *extra_args); void qtest_quit(QTestState *s); /** - * qtest_qmp: + * qtest_qmp_discard_response: * @s: #QTestState instance to operate on. * @fmt...: QMP message to send to qemu * - * Sends a QMP message to QEMU + * Sends a QMP message to QEMU and consumes the response. */ -void qtest_qmp(QTestState *s, const char *fmt, ...); +void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...); /** - * qtest_qmpv: + * qtest_qmpv_discard_response: * @s: #QTestState instance to operate on. * @fmt: QMP message to send to QEMU * @ap: QMP message arguments * - * Sends a QMP message to QEMU. + * Sends a QMP message to QEMU and consumes the response. */ -void qtest_qmpv(QTestState *s, const char *fmt, va_list ap); +void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap); /** * qtest_get_irq: @@ -331,17 +331,17 @@ static inline void qtest_end(void) } /** - * qmp: + * qmp_discard_response: * @fmt...: QMP message to send to qemu * - * Sends a QMP message to QEMU + * Sends a QMP message to QEMU and consumes the response. */ -static inline void qmp(const char *fmt, ...) +static inline void qmp_discard_response(const char *fmt, ...) { va_list ap; va_start(ap, fmt); - qtest_qmpv(global_qtest, fmt, ap); + qtest_qmpv_discard_response(global_qtest, fmt, ap); va_end(ap); } From 0c460dac03e7919079525d8e24ef2c4c607c219d Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Oct 2013 14:54:33 +0100 Subject: [PATCH 13/37] libqtest: add qmp(fmt, ...) -> QDict* function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a qtest qmp() function that returns the response object. This allows test cases to verify the result or to check for error responses. It also allows waiting for QMP events. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Andreas Färber --- tests/libqtest.c | 66 ++++++++++++++++++++++++++++++++++++++---------- tests/libqtest.h | 37 +++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/tests/libqtest.c b/tests/libqtest.c index dc4c9833b..83424c3c6 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -30,6 +30,8 @@ #include "qemu/compiler.h" #include "qemu/osdep.h" +#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #define MAX_IRQ 256 @@ -291,16 +293,38 @@ redo: return words; } -void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap) +typedef struct { + JSONMessageParser parser; + QDict *response; +} QMPResponseParser; + +static void qmp_response(JSONMessageParser *parser, QList *tokens) { - bool has_reply = false; - int nesting = 0; + QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser); + QObject *obj; + + obj = json_parser_parse(tokens, NULL); + if (!obj) { + fprintf(stderr, "QMP JSON response parsing failed\n"); + exit(1); + } + + g_assert(qobject_type(obj) == QTYPE_QDICT); + g_assert(!qmp->response); + qmp->response = (QDict *)obj; +} + +QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap) +{ + QMPResponseParser qmp; /* Send QMP request */ socket_sendf(s->qmp_fd, fmt, ap); /* Receive reply */ - while (!has_reply || nesting > 0) { + qmp.response = NULL; + json_message_parser_init(&qmp.parser, qmp_response); + while (!qmp.response) { ssize_t len; char c; @@ -314,25 +338,39 @@ void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap) exit(1); } - switch (c) { - case '{': - nesting++; - has_reply = true; - break; - case '}': - nesting--; - break; - } + json_message_parser_feed(&qmp.parser, &c, 1); } + json_message_parser_destroy(&qmp.parser); + + return qmp.response; +} + +QDict *qtest_qmp(QTestState *s, const char *fmt, ...) +{ + va_list ap; + QDict *response; + + va_start(ap, fmt); + response = qtest_qmpv(s, fmt, ap); + va_end(ap); + return response; +} + +void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap) +{ + QDict *response = qtest_qmpv(s, fmt, ap); + QDECREF(response); } void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...) { va_list ap; + QDict *response; va_start(ap, fmt); - qtest_qmpv_discard_response(s, fmt, ap); + response = qtest_qmpv(s, fmt, ap); va_end(ap); + QDECREF(response); } const char *qtest_get_arch(void) diff --git a/tests/libqtest.h b/tests/libqtest.h index 4f1b06007..9deebdcdf 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -22,6 +22,7 @@ #include #include #include +#include "qapi/qmp/qdict.h" typedef struct QTestState QTestState; @@ -52,6 +53,15 @@ void qtest_quit(QTestState *s); */ void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...); +/** + * qtest_qmp: + * @s: #QTestState instance to operate on. + * @fmt...: QMP message to send to qemu + * + * Sends a QMP message to QEMU and returns the response. + */ +QDict *qtest_qmp(QTestState *s, const char *fmt, ...); + /** * qtest_qmpv_discard_response: * @s: #QTestState instance to operate on. @@ -62,6 +72,16 @@ void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...); */ void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap); +/** + * qtest_qmpv: + * @s: #QTestState instance to operate on. + * @fmt: QMP message to send to QEMU + * @ap: QMP message arguments + * + * Sends a QMP message to QEMU and returns the response. + */ +QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap); + /** * qtest_get_irq: * @s: #QTestState instance to operate on. @@ -330,6 +350,23 @@ static inline void qtest_end(void) global_qtest = NULL; } +/** + * qmp: + * @fmt...: QMP message to send to qemu + * + * Sends a QMP message to QEMU and returns the response. + */ +static inline QDict *qmp(const char *fmt, ...) +{ + va_list ap; + QDict *response; + + va_start(ap, fmt); + response = qtest_qmpv(global_qtest, fmt, ap); + va_end(ap); + return response; +} + /** * qmp_discard_response: * @fmt...: QMP message to send to qemu From 7ceeedd016facf8d58e14a0d1417fa7225d71072 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Oct 2013 14:54:34 +0100 Subject: [PATCH 14/37] blockdev-test: add test case for drive_add duplicate IDs The following should work: (qemu) drive_add if=none,id=drive0 (qemu) drive_del drive0 (qemu) drive_add if=none,id=drive0 Previous versions of QEMU produced a duplicate ID error because drive_add leaked the options. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake --- tests/Makefile | 2 ++ tests/blockdev-test.c | 59 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 tests/blockdev-test.c diff --git a/tests/Makefile b/tests/Makefile index f414f2c80..973f497e6 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -68,6 +68,7 @@ check-qtest-i386-y += tests/rtc-test$(EXESUF) check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) check-qtest-i386-y += tests/qom-test$(EXESUF) +check-qtest-i386-y += tests/blockdev-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -200,6 +201,7 @@ tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) tests/qom-test$(EXESUF): tests/qom-test.o +tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o # QTest rules diff --git a/tests/blockdev-test.c b/tests/blockdev-test.c new file mode 100644 index 000000000..c940e0069 --- /dev/null +++ b/tests/blockdev-test.c @@ -0,0 +1,59 @@ +/* + * blockdev.c test cases + * + * Copyright (C) 2013 Red Hat Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include +#include "libqtest.h" + +static void test_drive_add_empty(void) +{ + QDict *response; + const char *response_return; + + /* Start with an empty drive */ + qtest_start("-drive if=none,id=drive0"); + + /* Delete the drive */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_del drive0\"" + "}}"); + g_assert(response); + response_return = qdict_get_try_str(response, "return"); + g_assert(response_return); + g_assert(strcmp(response_return, "") == 0); + QDECREF(response); + + /* Ensure re-adding the drive works - there should be no duplicate ID error + * because the old drive must be gone. + */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_add 0 if=none,id=drive0\"" + "}}"); + g_assert(response); + response_return = qdict_get_try_str(response, "return"); + g_assert(response_return); + g_assert(strcmp(response_return, "OK\r\n") == 0); + QDECREF(response); + + qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/qmp/drive_add_empty", test_drive_add_empty); + + return g_test_run(); +} From 43cd209803d6cffb1e1a028c9ff2fd0ff4fce954 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 30 Oct 2013 14:54:35 +0100 Subject: [PATCH 15/37] qdev-monitor-test: add device_add leak test cases Ensure that the device_add error code path deletes device objects. Failure to do so not only leaks the objects but can also keep other objects (like drive or netdev) alive due to qdev properties holding references. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake --- tests/Makefile | 2 + tests/qdev-monitor-test.c | 81 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/qdev-monitor-test.c diff --git a/tests/Makefile b/tests/Makefile index 973f497e6..379cdd9ad 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -69,6 +69,7 @@ check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) check-qtest-i386-y += tests/qom-test$(EXESUF) check-qtest-i386-y += tests/blockdev-test$(EXESUF) +check-qtest-i386-y += tests/qdev-monitor-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -202,6 +203,7 @@ tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) tests/qom-test$(EXESUF): tests/qom-test.o tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y) +tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o # QTest rules diff --git a/tests/qdev-monitor-test.c b/tests/qdev-monitor-test.c new file mode 100644 index 000000000..33a8ea4b9 --- /dev/null +++ b/tests/qdev-monitor-test.c @@ -0,0 +1,81 @@ +/* + * qdev-monitor.c test cases + * + * Copyright (C) 2013 Red Hat Inc. + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include +#include "libqtest.h" +#include "qapi/qmp/qjson.h" + +static void test_device_add(void) +{ + QDict *response; + QDict *error; + + qtest_start("-drive if=none,id=drive0"); + + /* Make device_add fail. If this leaks the virtio-blk-pci device then a + * reference to drive0 will also be held (via qdev properties). + */ + response = qmp("{\"execute\": \"device_add\"," + " \"arguments\": {" + " \"driver\": \"virtio-blk-pci\"," + " \"drive\": \"drive0\"" + "}}"); + g_assert(response); + error = qdict_get_qdict(response, "error"); + g_assert(!strcmp(qdict_get_try_str(error, "class") ?: "", + "GenericError")); + g_assert(!strcmp(qdict_get_try_str(error, "desc") ?: "", + "Device initialization failed.")); + QDECREF(response); + + /* Delete the drive */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_del drive0\"" + "}}"); + g_assert(response); + g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "(null)", "")); + QDECREF(response); + + /* Try to re-add the drive. This fails with duplicate IDs if a leaked + * virtio-blk-pci exists that holds a reference to the old drive0. + */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_add pci-addr=auto if=none,id=drive0\"" + "}}"); + g_assert(response); + g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "", + "OK\r\n")); + QDECREF(response); + + qtest_end(); +} + +int main(int argc, char **argv) +{ + const char *arch = qtest_get_arch(); + + /* Check architecture */ + if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) { + g_test_message("Skipping test for non-x86\n"); + return 0; + } + + /* Run the tests */ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/qmp/device_add", test_device_add); + + return g_test_run(); +} From 17826bc159893300a05551d8a830275c5ecf0092 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 5 Nov 2013 20:03:33 +0100 Subject: [PATCH 16/37] block: Save errno before error_setg_errno error_setg_errno() may overwrite errno; therefore, its value should be read before calling that function and not afterwards. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 58efb5b4e..0e96a223a 100644 --- a/block.c +++ b/block.c @@ -1084,8 +1084,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, snprintf(backing_filename, sizeof(backing_filename), "%s", filename); } else if (!realpath(filename, backing_filename)) { - error_setg_errno(errp, errno, "Could not resolve path '%s'", filename); ret = -errno; + error_setg_errno(errp, errno, "Could not resolve path '%s'", filename); goto fail; } From 0173e7bbf3c892abd4e35507ddeb2f23c1a44829 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 21 Oct 2013 16:00:18 +0200 Subject: [PATCH 17/37] block/vpc: fix virtual size for images created with disk2vhd Signed-off-by: Peter Lieven Signed-off-by: Stefan Hajnoczi --- block/vpc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/vpc.c b/block/vpc.c index 627d11cb9..577cc4599 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -211,6 +211,15 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; + /* images created with disk2vhd report a far higher virtual size + * than expected with the cyls * heads * sectors_per_cyl formula. + * use the footer->size instead if the image was created with + * disk2vhd. + */ + if (!strncmp(footer->creator_app, "d2v", 4)) { + bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE; + } + /* Allow a maximum disk size of approximately 2 TB */ if (bs->total_sectors >= 65535LL * 255 * 255) { ret = -EFBIG; From 6e9d290bf62406098ca3d7bf3796463681ed3c39 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:38 -0400 Subject: [PATCH 18/37] block: vhdx - minor comments and typo correction. Just a couple of minor comments to help note where allocated buffers are freed, and a typo fix. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 6 ++++-- block/vhdx.h | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 6cb04122b..b497c274d 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -6,9 +6,9 @@ * Authors: * Jeff Cody * - * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 * by Microsoft: - * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -264,6 +264,7 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) uint64_t h2_seq = 0; uint8_t *buffer; + /* header1 & header2 are freed in vhdx_close() */ header1 = qemu_blockalign(bs, sizeof(VHDXHeader)); header2 = qemu_blockalign(bs, sizeof(VHDXHeader)); @@ -790,6 +791,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + /* s->bat is freed in vhdx_close() */ s->bat = qemu_blockalign(bs, s->bat_rt.length); ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); diff --git a/block/vhdx.h b/block/vhdx.h index fb687ed2d..9eb6b9723 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -6,9 +6,9 @@ * Authors: * Jeff Cody * - * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 * by Microsoft: - * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -116,7 +116,7 @@ typedef struct QEMU_PACKED VHDXHeader { valid. */ uint16_t log_version; /* version of the log format. Mustn't be zero, unless log_guid is also zero */ - uint16_t version; /* version of th evhdx file. Currently, + uint16_t version; /* version of the vhdx file. Currently, only supported version is "1" */ uint32_t log_length; /* length of the log. Must be multiple of 1MB */ From 4f18b7824ab5eda9fe051f5b24e90e5f34d08a23 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:39 -0400 Subject: [PATCH 19/37] block: vhdx - add header update capability. This adds the ability to update the headers in a VHDX image, including generating a new MS-compatible GUID. As VHDX depends on uuid.h, VHDX is now a configurable build option. If VHDX support is enabled, that will also enable uuid as well. The default is to have VHDX enabled. To enable/disable VHDX: --enable-vhdx, --disable-vhdx Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/Makefile.objs | 2 +- block/vhdx.c | 161 +++++++++++++++++++++++++++++++++++++++++++- block/vhdx.h | 14 +++- configure | 24 +++++++ 4 files changed, 196 insertions(+), 5 deletions(-) diff --git a/block/Makefile.objs b/block/Makefile.objs index 3bb85b535..e7214de12 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o v block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-y += vhdx.o +block-obj-$(CONFIG_VHDX) += vhdx.o block-obj-y += parallels.o blkdebug.o blkverify.o block-obj-y += snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o diff --git a/block/vhdx.c b/block/vhdx.c index b497c274d..7b94c42ee 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -22,6 +22,7 @@ #include "block/vhdx.h" #include "migration/migration.h" +#include /* Several metadata and region table data entries are identified by * guids in a MS-specific GUID format. */ @@ -157,12 +158,41 @@ typedef struct BDRVVHDXState { VHDXBatEntry *bat; uint64_t bat_offset; + MSGUID session_guid; + + VHDXParentLocatorHeader parent_header; VHDXParentLocatorEntry *parent_entries; Error *migration_blocker; } BDRVVHDXState; +/* Calculates new checksum. + * + * Zero is substituted during crc calculation for the original crc field + * crc_offset: byte offset in buf of the buffer crc + * buf: buffer pointer + * size: size of buffer (must be > crc_offset+4) + * + * Note: The resulting checksum is in the CPU endianness, not necessarily + * in the file format endianness (LE). Any header export to disk should + * make sure that vhdx_header_le_export() is used to convert to the + * correct endianness + */ +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) +{ + uint32_t crc; + + assert(buf != NULL); + assert(size > (crc_offset + sizeof(crc))); + + memset(buf + crc_offset, 0, sizeof(crc)); + crc = crc32c(0xffffffff, buf, size); + memcpy(buf + crc_offset, &crc, sizeof(crc)); + + return crc; +} + uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, int crc_offset) { @@ -213,6 +243,19 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) } +/* + * This generates a UUID that is compliant with the MS GUIDs used + * in the VHDX spec (and elsewhere). + */ +void vhdx_guid_generate(MSGUID *guid) +{ + uuid_t uuid; + assert(guid != NULL); + + uuid_generate(uuid); + memcpy(guid, uuid, sizeof(MSGUID)); +} + /* * Per the MS VHDX Specification, for every VHDX file: * - The header section is fixed size - 1 MB @@ -251,6 +294,113 @@ static void vhdx_header_le_import(VHDXHeader *h) le64_to_cpus(&h->log_offset); } +/* All VHDX structures on disk are little endian */ +static void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h) +{ + assert(orig_h != NULL); + assert(new_h != NULL); + + new_h->signature = cpu_to_le32(orig_h->signature); + new_h->checksum = cpu_to_le32(orig_h->checksum); + new_h->sequence_number = cpu_to_le64(orig_h->sequence_number); + + new_h->file_write_guid = orig_h->file_write_guid; + new_h->data_write_guid = orig_h->data_write_guid; + new_h->log_guid = orig_h->log_guid; + + cpu_to_leguids(&new_h->file_write_guid); + cpu_to_leguids(&new_h->data_write_guid); + cpu_to_leguids(&new_h->log_guid); + + new_h->log_version = cpu_to_le16(orig_h->log_version); + new_h->version = cpu_to_le16(orig_h->version); + new_h->log_length = cpu_to_le32(orig_h->log_length); + new_h->log_offset = cpu_to_le64(orig_h->log_offset); +} + +/* Update the VHDX headers + * + * This follows the VHDX spec procedures for header updates. + * + * - non-current header is updated with largest sequence number + */ +static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid) +{ + int ret = 0; + int hdr_idx = 0; + uint64_t header_offset = VHDX_HEADER1_OFFSET; + + VHDXHeader *active_header; + VHDXHeader *inactive_header; + VHDXHeader header_le; + uint8_t *buffer; + + /* operate on the non-current header */ + if (s->curr_header == 0) { + hdr_idx = 1; + header_offset = VHDX_HEADER2_OFFSET; + } + + active_header = s->headers[s->curr_header]; + inactive_header = s->headers[hdr_idx]; + + inactive_header->sequence_number = active_header->sequence_number + 1; + + /* a new file guid must be generated before any file write, including + * headers */ + inactive_header->file_write_guid = s->session_guid; + + /* a new data guid only needs to be generated before any guest-visible + * writes (i.e. something observable via virtual disk read) */ + if (generate_data_write_guid) { + vhdx_guid_generate(&inactive_header->data_write_guid); + } + + /* the header checksum is not over just the packed size of VHDXHeader, + * but rather over the entire 'reserved' range for the header, which is + * 4KB (VHDX_HEADER_SIZE). */ + + buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); + /* we can't assume the extra reserved bytes are 0 */ + ret = bdrv_pread(bs->file, header_offset, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto exit; + } + /* overwrite the actual VHDXHeader portion */ + memcpy(buffer, inactive_header, sizeof(VHDXHeader)); + inactive_header->checksum = + vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, + offsetof(VHDXHeader, checksum)); + vhdx_header_le_export(inactive_header, &header_le); + ret = bdrv_pwrite_sync(bs->file, header_offset, &header_le, + sizeof(VHDXHeader)); + if (ret < 0) { + goto exit; + } + s->curr_header = hdr_idx; + +exit: + qemu_vfree(buffer); + return ret; +} + +/* + * The VHDX spec calls for header updates to be performed twice, so that both + * the current and non-current header have valid info + */ +static int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid) +{ + int ret; + + ret = vhdx_update_header(bs, s, generate_data_write_guid); + if (ret < 0) { + return ret; + } + ret = vhdx_update_header(bs, s, generate_data_write_guid); + return ret; +} /* opens the specified header block from the VHDX file header section */ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) @@ -742,6 +892,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + /* This is used for any header updates, for the file_write_guid. + * The spec dictates that a new value should be used for the first + * header update */ + vhdx_guid_generate(&s->session_guid); + ret = vhdx_parse_header(bs, s); if (ret) { goto fail; @@ -804,8 +959,10 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } if (flags & BDRV_O_RDWR) { - ret = -ENOTSUP; - goto fail; + ret = vhdx_update_headers(bs, s, false); + if (ret < 0) { + goto fail; + } } /* TODO: differencing files, write */ diff --git a/block/vhdx.h b/block/vhdx.h index 9eb6b9723..403f76645 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -67,7 +67,7 @@ typedef struct VHDXFileIdentifier { * Microsoft is not just 16 bytes though - it is a structure that is defined, * so we need to follow it here so that endianness does not trip us up */ -typedef struct MSGUID { +typedef struct QEMU_PACKED MSGUID { uint32_t data1; uint16_t data2; uint16_t data3; @@ -309,17 +309,27 @@ typedef struct QEMU_PACKED VHDXParentLocatorEntry { /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ +void vhdx_guid_generate(MSGUID *guid); + +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, int crc_offset); bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); -static void leguid_to_cpus(MSGUID *guid) +static inline void leguid_to_cpus(MSGUID *guid) { le32_to_cpus(&guid->data1); le16_to_cpus(&guid->data2); le16_to_cpus(&guid->data3); } +static inline void cpu_to_leguids(MSGUID *guid) +{ + cpu_to_le32s(&guid->data1); + cpu_to_le16s(&guid->data2); + cpu_to_le16s(&guid->data3); +} + #endif diff --git a/configure b/configure index 9addff190..8e7f356dc 100755 --- a/configure +++ b/configure @@ -247,6 +247,7 @@ gtk="" gtkabi="2.0" tpm="no" libssh2="" +vhdx="" # parse CC options first for opt do @@ -972,6 +973,10 @@ for opt do ;; --enable-libssh2) libssh2="yes" ;; + --enable-vhdx) vhdx="yes" + ;; + --disable-vhdx) vhdx="no" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -1204,6 +1209,8 @@ echo " --gcov=GCOV use specified gcov [$gcov_tool]" echo " --enable-tpm enable TPM support" echo " --disable-libssh2 disable ssh block device support" echo " --enable-libssh2 enable ssh block device support" +echo " --disable-vhdx disables support for the Microsoft VHDX image format" +echo " --enable-vhdx enable support for the Microsoft VHDX image format" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2004,6 +2011,18 @@ EOF fi fi +if test "$vhdx" = "yes" ; then + if test "$uuid" = "no" ; then + error_exit "uuid required for VHDX support" + fi +elif test "$vhdx" != "no" ; then + if test "$uuid" = "yes" ; then + vhdx=yes + else + vhdx=no + fi +fi + ########################################## # xfsctl() probe, used for raw-posix if test "$xfs" != "no" ; then @@ -3747,6 +3766,7 @@ echo "TPM support $tpm" echo "libssh2 support $libssh2" echo "TPM passthrough $tpm_passthrough" echo "QOM debugging $qom_cast_debug" +echo "vhdx $vhdx" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -4141,6 +4161,10 @@ if test "$virtio_blk_data_plane" = "yes" ; then echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak fi +if test "$vhdx" = "yes" ; then + echo "CONFIG_VHDX=y" >> $config_host_mak +fi + # USB host support if test "$libusb" = "yes"; then echo "HOST_USB=libusb legacy" >> $config_host_mak From 28541d46280733b9afe7b106a3a6665275a45e5f Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:40 -0400 Subject: [PATCH 20/37] block: vhdx code movement - VHDXMetadataEntries and BDRVVHDXState to header. In preparation for VHDX log support, move these structures to the header. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 52 ---------------------------------------------------- block/vhdx.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 7b94c42ee..2c921cff4 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -105,16 +105,6 @@ static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7, META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ META_PHYS_SECTOR_SIZE_PRESENT) -typedef struct VHDXMetadataEntries { - VHDXMetadataTableEntry file_parameters_entry; - VHDXMetadataTableEntry virtual_disk_size_entry; - VHDXMetadataTableEntry page83_data_entry; - VHDXMetadataTableEntry logical_sector_size_entry; - VHDXMetadataTableEntry phys_sector_size_entry; - VHDXMetadataTableEntry parent_locator_entry; - uint16_t present; -} VHDXMetadataEntries; - typedef struct VHDXSectorInfo { uint32_t bat_idx; /* BAT entry index */ @@ -125,48 +115,6 @@ typedef struct VHDXSectorInfo { uint64_t block_offset; /* block offset, in bytes */ } VHDXSectorInfo; - - -typedef struct BDRVVHDXState { - CoMutex lock; - - int curr_header; - VHDXHeader *headers[2]; - - VHDXRegionTableHeader rt; - VHDXRegionTableEntry bat_rt; /* region table for the BAT */ - VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ - - VHDXMetadataTableHeader metadata_hdr; - VHDXMetadataEntries metadata_entries; - - VHDXFileParameters params; - uint32_t block_size; - uint32_t block_size_bits; - uint32_t sectors_per_block; - uint32_t sectors_per_block_bits; - - uint64_t virtual_disk_size; - uint32_t logical_sector_size; - uint32_t physical_sector_size; - - uint64_t chunk_ratio; - uint32_t chunk_ratio_bits; - uint32_t logical_sector_size_bits; - - uint32_t bat_entries; - VHDXBatEntry *bat; - uint64_t bat_offset; - - MSGUID session_guid; - - - VHDXParentLocatorHeader parent_header; - VHDXParentLocatorEntry *parent_entries; - - Error *migration_blocker; -} BDRVVHDXState; - /* Calculates new checksum. * * Zero is substituted during crc calculation for the original crc field diff --git a/block/vhdx.h b/block/vhdx.h index 403f76645..2f7461d67 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -308,6 +308,54 @@ typedef struct QEMU_PACKED VHDXParentLocatorEntry { /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ +typedef struct VHDXMetadataEntries { + VHDXMetadataTableEntry file_parameters_entry; + VHDXMetadataTableEntry virtual_disk_size_entry; + VHDXMetadataTableEntry page83_data_entry; + VHDXMetadataTableEntry logical_sector_size_entry; + VHDXMetadataTableEntry phys_sector_size_entry; + VHDXMetadataTableEntry parent_locator_entry; + uint16_t present; +} VHDXMetadataEntries; + +typedef struct BDRVVHDXState { + CoMutex lock; + + int curr_header; + VHDXHeader *headers[2]; + + VHDXRegionTableHeader rt; + VHDXRegionTableEntry bat_rt; /* region table for the BAT */ + VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ + + VHDXMetadataTableHeader metadata_hdr; + VHDXMetadataEntries metadata_entries; + + VHDXFileParameters params; + uint32_t block_size; + uint32_t block_size_bits; + uint32_t sectors_per_block; + uint32_t sectors_per_block_bits; + + uint64_t virtual_disk_size; + uint32_t logical_sector_size; + uint32_t physical_sector_size; + + uint64_t chunk_ratio; + uint32_t chunk_ratio_bits; + uint32_t logical_sector_size_bits; + + uint32_t bat_entries; + VHDXBatEntry *bat; + uint64_t bat_offset; + + MSGUID session_guid; + + VHDXParentLocatorHeader parent_header; + VHDXParentLocatorEntry *parent_entries; + + Error *migration_blocker; +} BDRVVHDXState; void vhdx_guid_generate(MSGUID *guid); From 625565d27e8d7c7f7238ccd118a2cd49c1c52963 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:41 -0400 Subject: [PATCH 21/37] block: vhdx - log support struct and defines This adds some magic number defines, and internal structure definitions for VHDX log replay support. The struct VHDXLogEntries does not reflect an on-disk data structure, and thus does not need to be packed. Some minor code style fixes are applied as well. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.h | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/block/vhdx.h b/block/vhdx.h index 2f7461d67..154c55a16 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -30,12 +30,12 @@ * 0.........64KB...........128KB........192KB..........256KB................1MB */ -#define VHDX_HEADER_BLOCK_SIZE (64*1024) +#define VHDX_HEADER_BLOCK_SIZE (64 * 1024) #define VHDX_FILE_ID_OFFSET 0 -#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1) -#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2) -#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3) +#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) +#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) +#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) /* @@ -77,10 +77,10 @@ typedef struct QEMU_PACKED MSGUID { #define guid_eq(a, b) \ (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) -#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk - is only 582 bytes, for purposes of crc - the header is the first 4KB of the 64KB - block */ +#define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk + is only 582 bytes, for purposes of crc + the header is the first 4KB of the 64KB + block */ /* The full header is 4KB, although the actual header data is much smaller. * But for the checksum calculation, it is over the entire 4KB structure, @@ -92,7 +92,7 @@ typedef struct QEMU_PACKED VHDXHeader { VHDX file has 2 of these headers, and only the header with the highest sequence number is valid */ - MSGUID file_write_guid; /* 128 bit unique identifier. Must be + MSGUID file_write_guid; /* 128 bit unique identifier. Must be updated to new, unique value before the first modification is made to file */ @@ -151,7 +151,10 @@ typedef struct QEMU_PACKED VHDXRegionTableEntry { /* ---- LOG ENTRY STRUCTURES ---- */ +#define VHDX_LOG_MIN_SIZE (1024 * 1024) +#define VHDX_LOG_SECTOR_SIZE 4096 #define VHDX_LOG_HDR_SIZE 64 +#define VHDX_LOG_SIGNATURE 0x65676f6c typedef struct QEMU_PACKED VHDXLogEntryHeader { uint32_t signature; /* "loge" in ASCII */ uint32_t checksum; /* CRC-32C hash of the 64KB table */ @@ -174,7 +177,8 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader { } VHDXLogEntryHeader; #define VHDX_LOG_DESC_SIZE 32 - +#define VHDX_LOG_DESC_SIGNATURE 0x63736564 +#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a typedef struct QEMU_PACKED VHDXLogDescriptor { uint32_t signature; /* "zero" or "desc" in ASCII */ union { @@ -194,6 +198,7 @@ typedef struct QEMU_PACKED VHDXLogDescriptor { vhdx_log_entry_header */ } VHDXLogDescriptor; +#define VHDX_LOG_DATA_SIGNATURE 0x61746164 typedef struct QEMU_PACKED VHDXLogDataSector { uint32_t data_signature; /* "data" in ASCII */ uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ @@ -219,12 +224,12 @@ typedef struct QEMU_PACKED VHDXLogDataSector { #define SB_BLOCK_PRESENT 6 /* per the spec */ -#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23) +#define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state other bits are reserved */ #define VHDX_BAT_STATE_BIT_MASK 0x07 -#define VHDX_BAT_FILE_OFF_BITS (64-44) +#define VHDX_BAT_FILE_OFF_BITS (64 - 44) typedef uint64_t VHDXBatEntry; /* ---- METADATA REGION STRUCTURES ---- */ @@ -252,8 +257,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry { metadata region */ /* note: if length = 0, so is offset */ uint32_t length; /* length of metadata. <= 1MB. */ - uint32_t data_bits; /* least-significant 3 bits are flags, the - rest are reserved (see above) */ + uint32_t data_bits; /* least-significant 3 bits are flags, + the rest are reserved (see above) */ uint32_t reserved2; } VHDXMetadataTableEntry; @@ -265,8 +270,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry { typedef struct QEMU_PACKED VHDXFileParameters { uint32_t block_size; /* size of each payload block, always power of 2, <= 256MB and >= 1MB. */ - uint32_t data_bits; /* least-significant 2 bits are flags, the rest - are reserved (see above) */ + uint32_t data_bits; /* least-significant 2 bits are flags, + the rest are reserved (see above) */ } VHDXFileParameters; typedef struct QEMU_PACKED VHDXVirtualDiskSize { @@ -318,6 +323,13 @@ typedef struct VHDXMetadataEntries { uint16_t present; } VHDXMetadataEntries; +typedef struct VHDXLogEntries { + uint64_t offset; + uint64_t length; + uint32_t head; + uint32_t tail; +} VHDXLogEntries; + typedef struct BDRVVHDXState { CoMutex lock; @@ -351,6 +363,8 @@ typedef struct BDRVVHDXState { MSGUID session_guid; + VHDXLogEntries log; + VHDXParentLocatorHeader parent_header; VHDXParentLocatorEntry *parent_entries; From 0f48e8f0978afe0bd44c63749e7df6411da6c437 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:42 -0400 Subject: [PATCH 22/37] block: vhdx - break endian translation functions out This moves the endian translation functions out from the vhdx.c source, into a separate source file. In addition to the previously defined endian functions, new endian translation functions for log support are added as well. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/Makefile.objs | 2 +- block/vhdx-endian.c | 141 ++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.c | 43 -------------- block/vhdx.h | 8 +++ 4 files changed, 150 insertions(+), 44 deletions(-) create mode 100644 block/vhdx-endian.c diff --git a/block/Makefile.objs b/block/Makefile.objs index e7214de12..2fc496ab6 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o v block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-$(CONFIG_VHDX) += vhdx.o +block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o block-obj-y += parallels.o blkdebug.o blkverify.o block-obj-y += snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c new file mode 100644 index 000000000..3e93e635a --- /dev/null +++ b/block/vhdx-endian.c @@ -0,0 +1,141 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "block/block_int.h" +#include "block/vhdx.h" + +#include + + +/* + * All the VHDX formats on disk are little endian - the following + * are helper import/export functions to correctly convert + * endianness from disk read to native cpu format, and back again. + */ + + +/* VHDX File Header */ + + +void vhdx_header_le_import(VHDXHeader *h) +{ + assert(h != NULL); + + le32_to_cpus(&h->signature); + le32_to_cpus(&h->checksum); + le64_to_cpus(&h->sequence_number); + + leguid_to_cpus(&h->file_write_guid); + leguid_to_cpus(&h->data_write_guid); + leguid_to_cpus(&h->log_guid); + + le16_to_cpus(&h->log_version); + le16_to_cpus(&h->version); + le32_to_cpus(&h->log_length); + le64_to_cpus(&h->log_offset); +} + +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h) +{ + assert(orig_h != NULL); + assert(new_h != NULL); + + new_h->signature = cpu_to_le32(orig_h->signature); + new_h->checksum = cpu_to_le32(orig_h->checksum); + new_h->sequence_number = cpu_to_le64(orig_h->sequence_number); + + new_h->file_write_guid = orig_h->file_write_guid; + new_h->data_write_guid = orig_h->data_write_guid; + new_h->log_guid = orig_h->log_guid; + + cpu_to_leguids(&new_h->file_write_guid); + cpu_to_leguids(&new_h->data_write_guid); + cpu_to_leguids(&new_h->log_guid); + + new_h->log_version = cpu_to_le16(orig_h->log_version); + new_h->version = cpu_to_le16(orig_h->version); + new_h->log_length = cpu_to_le32(orig_h->log_length); + new_h->log_offset = cpu_to_le64(orig_h->log_offset); +} + + +/* VHDX Log Headers */ + + +void vhdx_log_desc_le_import(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + le32_to_cpus(&d->signature); + le32_to_cpus(&d->trailing_bytes); + le64_to_cpus(&d->leading_bytes); + le64_to_cpus(&d->file_offset); + le64_to_cpus(&d->sequence_number); +} + +void vhdx_log_desc_le_export(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + cpu_to_le32s(&d->signature); + cpu_to_le32s(&d->trailing_bytes); + cpu_to_le64s(&d->leading_bytes); + cpu_to_le64s(&d->file_offset); + cpu_to_le64s(&d->sequence_number); +} + +void vhdx_log_data_le_export(VHDXLogDataSector *d) +{ + assert(d != NULL); + + cpu_to_le32s(&d->data_signature); + cpu_to_le32s(&d->sequence_high); + cpu_to_le32s(&d->sequence_low); +} + +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + le32_to_cpus(&hdr->signature); + le32_to_cpus(&hdr->checksum); + le32_to_cpus(&hdr->entry_length); + le32_to_cpus(&hdr->tail); + le64_to_cpus(&hdr->sequence_number); + le32_to_cpus(&hdr->descriptor_count); + leguid_to_cpus(&hdr->log_guid); + le64_to_cpus(&hdr->flushed_file_offset); + le64_to_cpus(&hdr->last_file_offset); +} + +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le32s(&hdr->signature); + cpu_to_le32s(&hdr->checksum); + cpu_to_le32s(&hdr->entry_length); + cpu_to_le32s(&hdr->tail); + cpu_to_le64s(&hdr->sequence_number); + cpu_to_le32s(&hdr->descriptor_count); + cpu_to_leguids(&hdr->log_guid); + cpu_to_le64s(&hdr->flushed_file_offset); + cpu_to_le64s(&hdr->last_file_offset); +} + + diff --git a/block/vhdx.c b/block/vhdx.c index 2c921cff4..68663c6aa 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -223,49 +223,6 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } -/* All VHDX structures on disk are little endian */ -static void vhdx_header_le_import(VHDXHeader *h) -{ - assert(h != NULL); - - le32_to_cpus(&h->signature); - le32_to_cpus(&h->checksum); - le64_to_cpus(&h->sequence_number); - - leguid_to_cpus(&h->file_write_guid); - leguid_to_cpus(&h->data_write_guid); - leguid_to_cpus(&h->log_guid); - - le16_to_cpus(&h->log_version); - le16_to_cpus(&h->version); - le32_to_cpus(&h->log_length); - le64_to_cpus(&h->log_offset); -} - -/* All VHDX structures on disk are little endian */ -static void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h) -{ - assert(orig_h != NULL); - assert(new_h != NULL); - - new_h->signature = cpu_to_le32(orig_h->signature); - new_h->checksum = cpu_to_le32(orig_h->checksum); - new_h->sequence_number = cpu_to_le64(orig_h->sequence_number); - - new_h->file_write_guid = orig_h->file_write_guid; - new_h->data_write_guid = orig_h->data_write_guid; - new_h->log_guid = orig_h->log_guid; - - cpu_to_leguids(&new_h->file_write_guid); - cpu_to_leguids(&new_h->data_write_guid); - cpu_to_leguids(&new_h->log_guid); - - new_h->log_version = cpu_to_le16(orig_h->log_version); - new_h->version = cpu_to_le16(orig_h->version); - new_h->log_length = cpu_to_le32(orig_h->log_length); - new_h->log_offset = cpu_to_le64(orig_h->log_offset); -} - /* Update the VHDX headers * * This follows the VHDX spec procedures for header updates. diff --git a/block/vhdx.h b/block/vhdx.h index 154c55a16..23028af04 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -394,4 +394,12 @@ static inline void cpu_to_leguids(MSGUID *guid) cpu_to_le16s(&guid->data3); } +void vhdx_header_le_import(VHDXHeader *h); +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); +void vhdx_log_desc_le_import(VHDXLogDescriptor *d); +void vhdx_log_desc_le_export(VHDXLogDescriptor *d); +void vhdx_log_data_le_export(VHDXLogDataSector *d); +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); + #endif From c3906c5e8281b37a526c706596af8575d6ac00d3 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:43 -0400 Subject: [PATCH 23/37] block: vhdx - update log guid in header, and first write tracker Allow tracking of first file write in the VHDX image, as well as the ability to update the GUID in the header. This is in preparation for log support. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 30 ++++++++++++++++++++++++------ block/vhdx.h | 6 ++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 68663c6aa..241703a1a 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -230,7 +230,7 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) * - non-current header is updated with largest sequence number */ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, - bool generate_data_write_guid) + bool generate_data_write_guid, MSGUID *log_guid) { int ret = 0; int hdr_idx = 0; @@ -262,6 +262,11 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, vhdx_guid_generate(&inactive_header->data_write_guid); } + /* update the log guid if present */ + if (log_guid) { + inactive_header->log_guid = *log_guid; + } + /* the header checksum is not over just the packed size of VHDXHeader, * but rather over the entire 'reserved' range for the header, which is * 4KB (VHDX_HEADER_SIZE). */ @@ -294,16 +299,16 @@ exit: * The VHDX spec calls for header updates to be performed twice, so that both * the current and non-current header have valid info */ -static int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, - bool generate_data_write_guid) +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid, MSGUID *log_guid) { int ret; - ret = vhdx_update_header(bs, s, generate_data_write_guid); + ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); if (ret < 0) { return ret; } - ret = vhdx_update_header(bs, s, generate_data_write_guid); + ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); return ret; } @@ -784,6 +789,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, s->bat = NULL; + s->first_visible_write = true; qemu_co_mutex_init(&s->lock); @@ -864,7 +870,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } if (flags & BDRV_O_RDWR) { - ret = vhdx_update_headers(bs, s, false); + ret = vhdx_update_headers(bs, s, false, NULL); if (ret < 0) { goto fail; } @@ -1010,6 +1016,18 @@ exit: +/* Per the spec, on the first write of guest-visible data to the file the + * data write guid must be updated in the header */ +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + if (s->first_visible_write) { + s->first_visible_write = false; + ret = vhdx_update_headers(bs, s, true, NULL); + } + return ret; +} + static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { diff --git a/block/vhdx.h b/block/vhdx.h index 23028af04..6c35737b0 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -361,6 +361,7 @@ typedef struct BDRVVHDXState { VHDXBatEntry *bat; uint64_t bat_offset; + bool first_visible_write; MSGUID session_guid; VHDXLogEntries log; @@ -373,6 +374,9 @@ typedef struct BDRVVHDXState { void vhdx_guid_generate(MSGUID *guid); +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw, + MSGUID *log_guid); + uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, int crc_offset); @@ -402,4 +406,6 @@ void vhdx_log_data_le_export(VHDXLogDataSector *d); void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); + #endif From c46415afc2c99ea052f52f9d68ed9a78799f2c10 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:44 -0400 Subject: [PATCH 24/37] block: vhdx code movement - move vhdx_close() above vhdx_open() Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 241703a1a..3f06ce33e 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -778,6 +778,17 @@ exit: } +static void vhdx_close(BlockDriverState *bs) +{ + BDRVVHDXState *s = bs->opaque; + qemu_vfree(s->headers[0]); + qemu_vfree(s->headers[1]); + qemu_vfree(s->bat); + qemu_vfree(s->parent_entries); + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); +} + static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1035,17 +1046,6 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, } -static void vhdx_close(BlockDriverState *bs) -{ - BDRVVHDXState *s = bs->opaque; - qemu_vfree(s->headers[0]); - qemu_vfree(s->headers[1]); - qemu_vfree(s->bat); - qemu_vfree(s->parent_entries); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); -} - static BlockDriver bdrv_vhdx = { .format_name = "vhdx", .instance_size = sizeof(BDRVVHDXState), From 0a43a1b5d7c33208120eeb2d98ebb9ab15dc2c87 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:45 -0400 Subject: [PATCH 25/37] block: vhdx - log parsing, replay, and flush support This adds support for VHDX v0 logs, as specified in Microsoft's VHDX Specification Format v1.00: https://www.microsoft.com/en-us/download/details.aspx?id=34750 The following support is added: * Log parsing, and validation - validate that an existing log is correct. * Log search - search through an existing log, to find any valid sequence of entries. * Log replay and flush - replay an existing log, and flush/clear the log when complete. The VHDX log is a circular buffer, with elements (sectors) of 4KB. A log entry is a variably-length number of sectors, that is comprised of a header and 'descriptors', that describe each sector. A log may contain multiple entries, know as a log sequence. In a log sequence, each log entry immediately follows the previous entry, with an incrementing sequence number. There can only ever be one active and valid sequence in the log. Each log entry must match the file log GUID in order to be valid (along with other criteria). Once we have flushed all valid log entries, we marked the file log GUID to be zero, which indicates a buffer with no valid entries. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/Makefile.objs | 2 +- block/vhdx-log.c | 728 ++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.c | 65 +--- block/vhdx.h | 7 +- 4 files changed, 749 insertions(+), 53 deletions(-) create mode 100644 block/vhdx-log.c diff --git a/block/Makefile.objs b/block/Makefile.objs index 2fc496ab6..f43ecbc04 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o v block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o +block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += parallels.o blkdebug.o blkverify.o block-obj-y += snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o diff --git a/block/vhdx-log.c b/block/vhdx-log.c new file mode 100644 index 000000000..0284729dd --- /dev/null +++ b/block/vhdx-log.c @@ -0,0 +1,728 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This file covers the functionality of the metadata log writing, parsing, and + * replay. + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ +#include "qemu-common.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "block/vhdx.h" + + +typedef struct VHDXLogSequence { + bool valid; + uint32_t count; + VHDXLogEntries log; + VHDXLogEntryHeader hdr; +} VHDXLogSequence; + +typedef struct VHDXLogDescEntries { + VHDXLogEntryHeader hdr; + VHDXLogDescriptor desc[]; +} VHDXLogDescEntries; + +static const MSGUID zero_guid = { 0 }; + +/* The log located on the disk is circular buffer containing + * sectors of 4096 bytes each. + * + * It is assumed for the read/write functions below that the + * circular buffer scheme uses a 'one sector open' to indicate + * the buffer is full. Given the validation methods used for each + * sector, this method should be compatible with other methods that + * do not waste a sector. + */ + + +/* Allow peeking at the hdr entry at the beginning of the current + * read index, without advancing the read index */ +static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, + VHDXLogEntryHeader *hdr) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + assert(hdr != NULL); + + /* peek is only supported on sector boundaries */ + if (log->read % VHDX_LOG_SECTOR_SIZE) { + ret = -EFAULT; + goto exit; + } + + read = log->read; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { + read = 0; + } + + if (read == log->write) { + ret = -EINVAL; + goto exit; + } + + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); + if (ret < 0) { + goto exit; + } + +exit: + return ret; +} + +/* Index increment for log, based on sector boundaries */ +static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) +{ + idx += VHDX_LOG_SECTOR_SIZE; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + return idx >= length ? 0 : idx; +} + + +/* Reset the log to empty */ +static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) +{ + MSGUID guid = { 0 }; + s->log.read = s->log.write = 0; + /* a log guid of 0 indicates an empty log to any parser of v0 + * VHDX logs */ + vhdx_update_headers(bs, s, false, &guid); +} + +/* Reads num_sectors from the log (all log sectors are 4096 bytes), + * into buffer 'buffer'. Upon return, *sectors_read will contain + * the number of sectors successfully read. + * + * It is assumed that 'buffer' is already allocated, and of sufficient + * size (i.e. >= 4096*num_sectors). + * + * If 'peek' is true, then the tail (read) pointer for the circular buffer is + * not modified. + * + * 0 is returned on success, -errno otherwise. */ +static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_read, void *buffer, + uint32_t num_sectors, bool peek) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + read = log->read; + + *sectors_read = 0; + while (num_sectors) { + if (read == log->write) { + /* empty */ + break; + } + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + read = vhdx_log_inc_idx(read, log->length); + + *sectors_read = *sectors_read + 1; + num_sectors--; + } + +exit: + if (!peek) { + log->read = read; + } + return ret; +} + +/* Validates a log entry header */ +static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, + BDRVVHDXState *s) +{ + int valid = false; + + if (memcmp(&hdr->signature, "loge", 4)) { + goto exit; + } + + /* if the individual entry length is larger than the whole log + * buffer, that is obviously invalid */ + if (log->length < hdr->entry_length) { + goto exit; + } + + /* length of entire entry must be in units of 4KB (log sector size) */ + if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { + goto exit; + } + + /* per spec, sequence # must be > 0 */ + if (hdr->sequence_number == 0) { + goto exit; + } + + /* log entries are only valid if they match the file-wide log guid + * found in the active header */ + if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { + goto exit; + } + + if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { + goto exit; + } + + valid = true; + +exit: + return valid; +} + +/* + * Given a log header, this will validate that the descriptors and the + * corresponding data sectors (if applicable) + * + * Validation consists of: + * 1. Making sure the sequence numbers matches the entry header + * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) + * 3. File offset field is a multiple of 4KB + * 4. If a data descriptor, the corresponding data sector + * has its signature ('data') and matching sequence number + * + * @desc: the data buffer containing the descriptor + * @hdr: the log entry header + * + * Returns true if valid + */ +static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, + VHDXLogEntryHeader *hdr) +{ + bool ret = false; + + if (desc->sequence_number != hdr->sequence_number) { + goto exit; + } + if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { + goto exit; + } + + if (!memcmp(&desc->signature, "zero", 4)) { + if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { + /* valid */ + ret = true; + } + } else if (!memcmp(&desc->signature, "desc", 4)) { + /* valid */ + ret = true; + } + +exit: + return ret; +} + + +/* Prior to sector data for a log entry, there is the header + * and the descriptors referenced in the header: + * + * [] = 4KB sector + * + * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] + * + * The first sector in a log entry has a 64 byte header, and + * up to 126 32-byte descriptors. If more descriptors than + * 126 are required, then subsequent sectors can have up to 128 + * descriptors. Each sector is 4KB. Data follows the descriptor + * sectors. + * + * This will return the number of sectors needed to encompass + * the passed number of descriptors in desc_cnt. + * + * This will never return 0, even if desc_cnt is 0. + */ +static int vhdx_compute_desc_sectors(uint32_t desc_cnt) +{ + uint32_t desc_sectors; + + desc_cnt += 2; /* account for header in first sector */ + desc_sectors = desc_cnt / 128; + if (desc_cnt % 128) { + desc_sectors++; + } + + return desc_sectors; +} + + +/* Reads the log header, and subsequent descriptors (if any). This + * will allocate all the space for buffer, which must be NULL when + * passed into this function. Each descriptor will also be validated, + * and error returned if any are invalid. */ +static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, VHDXLogDescEntries **buffer) +{ + int ret = 0; + uint32_t desc_sectors; + uint32_t sectors_read; + VHDXLogEntryHeader hdr; + VHDXLogDescEntries *desc_entries = NULL; + int i; + + assert(*buffer == NULL); + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto exit; + } + vhdx_log_entry_hdr_le_import(&hdr); + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + ret = -EINVAL; + goto exit; + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE); + + ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, + desc_sectors, false); + if (ret < 0) { + goto free_and_exit; + } + if (sectors_read != desc_sectors) { + ret = -EINVAL; + goto free_and_exit; + } + + /* put in proper endianness, and validate each desc */ + for (i = 0; i < hdr.descriptor_count; i++) { + vhdx_log_desc_le_import(&desc_entries->desc[i]); + if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) { + ret = -EINVAL; + goto free_and_exit; + } + } + + *buffer = desc_entries; + goto exit; + +free_and_exit: + qemu_vfree(desc_entries); +exit: + return ret; +} + + +/* Flushes the descriptor described by desc to the VHDX image file. + * If the descriptor is a data descriptor, than 'data' must be non-NULL, + * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be + * written. + * + * Verification is performed to make sure the sequence numbers of a data + * descriptor match the sequence number in the desc. + * + * For a zero descriptor, it may describe multiple sectors to fill with zeroes. + * In this case, it should be noted that zeroes are written to disk, and the + * image file is not extended as a sparse file. */ +static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, + VHDXLogDataSector *data) +{ + int ret = 0; + uint64_t seq, file_offset; + uint32_t offset = 0; + void *buffer = NULL; + uint64_t count = 1; + int i; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + if (!memcmp(&desc->signature, "desc", 4)) { + /* data sector */ + if (data == NULL) { + ret = -EFAULT; + goto exit; + } + + /* The sequence number of the data sector must match that + * in the descriptor */ + seq = data->sequence_high; + seq <<= 32; + seq |= data->sequence_low & 0xffffffff; + + if (seq != desc->sequence_number) { + ret = -EINVAL; + goto exit; + } + + /* Each data sector is in total 4096 bytes, however the first + * 8 bytes, and last 4 bytes, are located in the descriptor */ + memcpy(buffer, &desc->leading_bytes, 8); + offset += 8; + + memcpy(buffer+offset, data->data, 4084); + offset += 4084; + + memcpy(buffer+offset, &desc->trailing_bytes, 4); + + } else if (!memcmp(&desc->signature, "zero", 4)) { + /* write 'count' sectors of sector */ + memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); + count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; + } + + file_offset = desc->file_offset; + + /* count is only > 1 if we are writing zeroes */ + for (i = 0; i < count; i++) { + ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + file_offset += VHDX_LOG_SECTOR_SIZE; + } + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Flush the entire log (as described by 'logs') to the VHDX image + * file, and then set the log to 'empty' status once complete. + * + * The log entries should be validate prior to flushing */ +static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + int i; + uint32_t cnt, sectors_read; + uint64_t new_file_size; + void *data = NULL; + VHDXLogDescEntries *desc_entries = NULL; + VHDXLogEntryHeader hdr_tmp = { 0 }; + + cnt = logs->count; + + data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + /* each iteration represents one log sequence, which may span multiple + * sectors */ + while (cnt--) { + ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); + if (ret < 0) { + goto exit; + } + /* if the log shows a FlushedFileOffset larger than our current file + * size, then that means the file has been truncated / corrupted, and + * we must refused to open it / use it */ + if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) { + ret = -EINVAL; + goto exit; + } + + ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries); + if (ret < 0) { + goto exit; + } + + for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { + if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) { + /* data sector, so read a sector to flush */ + ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, + data, 1, false); + if (ret < 0) { + goto exit; + } + if (sectors_read != 1) { + ret = -EINVAL; + goto exit; + } + } + + ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); + if (ret < 0) { + goto exit; + } + } + if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) { + new_file_size = desc_entries->hdr.last_file_offset; + if (new_file_size % (1024*1024)) { + /* round up to nearest 1MB boundary */ + new_file_size = ((new_file_size >> 20) + 1) << 20; + bdrv_truncate(bs->file, new_file_size); + } + } + qemu_vfree(desc_entries); + desc_entries = NULL; + } + + bdrv_flush(bs); + /* once the log is fully flushed, indicate that we have an empty log + * now. This also sets the log guid to 0, to indicate an empty log */ + vhdx_log_reset(bs, s); + +exit: + qemu_vfree(data); + qemu_vfree(desc_entries); + return ret; +} + +static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, uint64_t seq, + bool *valid, VHDXLogEntryHeader *entry) +{ + int ret = 0; + VHDXLogEntryHeader hdr; + void *buffer = NULL; + uint32_t i, desc_sectors, total_sectors, crc; + uint32_t sectors_read = 0; + VHDXLogDescEntries *desc_buffer = NULL; + + *valid = false; + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto inc_and_exit; + } + + vhdx_log_entry_hdr_le_import(&hdr); + + + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + goto inc_and_exit; + } + + if (seq > 0) { + if (hdr.sequence_number != seq + 1) { + goto inc_and_exit; + } + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + + /* Read desc sectors, and calculate log checksum */ + + total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; + + + /* read_desc() will incrememnt the read idx */ + ret = vhdx_log_read_desc(bs, s, log, &desc_buffer); + if (ret < 0) { + goto free_and_exit; + } + + crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, + desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); + crc ^= 0xffffffff; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + if (total_sectors > desc_sectors) { + for (i = 0; i < total_sectors - desc_sectors; i++) { + sectors_read = 0; + ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, + 1, false); + if (ret < 0 || sectors_read != 1) { + goto free_and_exit; + } + crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); + crc ^= 0xffffffff; + } + } + crc ^= 0xffffffff; + if (crc != desc_buffer->hdr.checksum) { + goto free_and_exit; + } + + *valid = true; + *entry = hdr; + goto free_and_exit; + +inc_and_exit: + log->read = vhdx_log_inc_idx(log->read, log->length); + +free_and_exit: + qemu_vfree(buffer); + qemu_vfree(desc_buffer); + return ret; +} + +/* Search through the log circular buffer, and find the valid, active + * log sequence, if any exists + * */ +static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + uint32_t tail; + bool seq_valid = false; + VHDXLogSequence candidate = { 0 }; + VHDXLogEntryHeader hdr = { 0 }; + VHDXLogEntries curr_log; + + memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); + curr_log.write = curr_log.length; /* assume log is full */ + curr_log.read = 0; + + + /* now we will go through the whole log sector by sector, until + * we find a valid, active log sequence, or reach the end of the + * log buffer */ + for (;;) { + uint64_t curr_seq = 0; + VHDXLogSequence current = { 0 }; + + tail = curr_log.read; + + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + + if (seq_valid) { + current.valid = true; + current.log = curr_log; + current.log.read = tail; + current.log.write = curr_log.read; + current.count = 1; + current.hdr = hdr; + + + for (;;) { + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + if (seq_valid == false) { + break; + } + current.log.write = curr_log.read; + current.count++; + + curr_seq = hdr.sequence_number; + } + } + + if (current.valid) { + if (candidate.valid == false || + current.hdr.sequence_number > candidate.hdr.sequence_number) { + candidate = current; + } + } + + if (curr_log.read < tail) { + break; + } + } + + *logs = candidate; + + if (candidate.valid) { + /* this is the next sequence number, for writes */ + s->log.sequence = candidate.hdr.sequence_number + 1; + } + + +exit: + return ret; +} + +/* Parse the replay log. Per the VHDX spec, if the log is present + * it must be replayed prior to opening the file, even read-only. + * + * If read-only, we must replay the log in RAM (or refuse to open + * a dirty VHDX file read-only) */ +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed) +{ + int ret = 0; + VHDXHeader *hdr; + VHDXLogSequence logs = { 0 }; + + hdr = s->headers[s->curr_header]; + + *flushed = false; + + /* s->log.hdr is freed in vhdx_close() */ + if (s->log.hdr == NULL) { + s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); + } + + s->log.offset = hdr->log_offset; + s->log.length = hdr->log_length; + + if (s->log.offset < VHDX_LOG_MIN_SIZE || + s->log.offset % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + /* per spec, only log version of 0 is supported */ + if (hdr->log_version != 0) { + ret = -EINVAL; + goto exit; + } + + /* If either the log guid, or log length is zero, + * then a replay log is not present */ + if (guid_eq(hdr->log_guid, zero_guid)) { + goto exit; + } + + if (hdr->log_length == 0) { + goto exit; + } + + if (hdr->log_length % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + + /* The log is present, we need to find if and where there is an active + * sequence of valid entries present in the log. */ + + ret = vhdx_log_search(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + if (logs.valid) { + /* now flush the log */ + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + *flushed = true; + } + + +exit: + return ret; +} + + diff --git a/block/vhdx.c b/block/vhdx.c index 3f06ce33e..8fbfbd60a 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -735,58 +735,22 @@ exit: return ret; } -/* Parse the replay log. Per the VHDX spec, if the log is present - * it must be replayed prior to opening the file, even read-only. - * - * If read-only, we must replay the log in RAM (or refuse to open - * a dirty VHDX file read-only */ -static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s) -{ - int ret = 0; - int i; - VHDXHeader *hdr; - - hdr = s->headers[s->curr_header]; - - /* either the log guid, or log length is zero, - * then a replay log is present */ - for (i = 0; i < sizeof(hdr->log_guid.data4); i++) { - ret |= hdr->log_guid.data4[i]; - } - if (hdr->log_guid.data1 == 0 && - hdr->log_guid.data2 == 0 && - hdr->log_guid.data3 == 0 && - ret == 0) { - goto exit; - } - - /* per spec, only log version of 0 is supported */ - if (hdr->log_version != 0) { - ret = -EINVAL; - goto exit; - } - - if (hdr->log_length == 0) { - goto exit; - } - - /* We currently do not support images with logs to replay */ - ret = -ENOTSUP; - -exit: - return ret; -} - static void vhdx_close(BlockDriverState *bs) { BDRVVHDXState *s = bs->opaque; qemu_vfree(s->headers[0]); + s->headers[0] = NULL; qemu_vfree(s->headers[1]); + s->headers[1] = NULL; qemu_vfree(s->bat); + s->bat = NULL; qemu_vfree(s->parent_entries); + s->parent_entries = NULL; migrate_del_blocker(s->migration_blocker); error_free(s->migration_blocker); + qemu_vfree(s->log.hdr); + s->log.hdr = NULL; } static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, @@ -797,6 +761,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, uint32_t i; uint64_t signature; uint32_t data_blocks_cnt, bitmap_blocks_cnt; + bool log_flushed = false; s->bat = NULL; @@ -820,24 +785,25 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, vhdx_guid_generate(&s->session_guid); ret = vhdx_parse_header(bs, s); - if (ret) { + if (ret < 0) { goto fail; } - ret = vhdx_parse_log(bs, s); - if (ret) { + ret = vhdx_parse_log(bs, s, &log_flushed); + if (ret < 0) { goto fail; } ret = vhdx_open_region_tables(bs, s); - if (ret) { + if (ret < 0) { goto fail; } ret = vhdx_parse_metadata(bs, s); - if (ret) { + if (ret < 0) { goto fail; } + s->block_size = s->params.block_size; /* the VHDX spec dictates that virtual_disk_size is always a multiple of @@ -897,10 +863,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, return 0; fail: - qemu_vfree(s->headers[0]); - qemu_vfree(s->headers[1]); - qemu_vfree(s->bat); - qemu_vfree(s->parent_entries); + vhdx_close(bs); return ret; } diff --git a/block/vhdx.h b/block/vhdx.h index 6c35737b0..584ebec8f 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -326,7 +326,11 @@ typedef struct VHDXMetadataEntries { typedef struct VHDXLogEntries { uint64_t offset; uint64_t length; - uint32_t head; + uint32_t write; + uint32_t read; + VHDXLogEntryHeader *hdr; + void *desc_buffer; + uint64_t sequence; uint32_t tail; } VHDXLogEntries; @@ -383,6 +387,7 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); static inline void leguid_to_cpus(MSGUID *guid) { From 1a848fd4517820981b542e0d10e64c0426414229 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:46 -0400 Subject: [PATCH 26/37] block: vhdx - add region overlap detection for image files Regions in the image file cannot overlap - the log, region tables, and metdata must all be unique and non-overlapping. This adds region checking by means of a QLIST; there can be a variable number of regions and metadata (there may be metadata or region tables that we do not recognize / know about, but are not required). This adds the capability to register a region for later checking, and to check against registered regions for any overlap. Also, if neither the BAT or Metadata region tables are found, return error. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.h | 9 ++++++ 2 files changed, 91 insertions(+) diff --git a/block/vhdx.c b/block/vhdx.c index 8fbfbd60a..574ac4c93 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -204,6 +204,50 @@ void vhdx_guid_generate(MSGUID *guid) memcpy(guid, uuid, sizeof(MSGUID)); } +/* Check for region overlaps inside the VHDX image */ +static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length) +{ + int ret = 0; + uint64_t end; + VHDXRegionEntry *r; + + end = start + length; + QLIST_FOREACH(r, &s->regions, entries) { + if (!((start >= r->end) || (end <= r->start))) { + ret = -EINVAL; + goto exit; + } + } + +exit: + return ret; +} + +/* Register a region for future checks */ +static void vhdx_region_register(BDRVVHDXState *s, + uint64_t start, uint64_t length) +{ + VHDXRegionEntry *r; + + r = g_malloc0(sizeof(*r)); + + r->start = start; + r->end = start + length; + + QLIST_INSERT_HEAD(&s->regions, r, entries); +} + +/* Free all registered regions */ +static void vhdx_region_unregister_all(BDRVVHDXState *s) +{ + VHDXRegionEntry *r, *r_next; + + QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) { + QLIST_REMOVE(r, entries); + g_free(r); + } +} + /* * Per the MS VHDX Specification, for every VHDX file: * - The header section is fixed size - 1 MB @@ -389,6 +433,9 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) } } + vhdx_region_register(s, s->headers[s->curr_header]->log_offset, + s->headers[s->curr_header]->log_length); + ret = 0; goto exit; @@ -452,6 +499,15 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) le32_to_cpus(&rt_entry.length); le32_to_cpus(&rt_entry.data_bits); + /* check for region overlap between these entries, and any + * other memory regions in the file */ + ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length); + if (ret < 0) { + goto fail; + } + + vhdx_region_register(s, rt_entry.file_offset, rt_entry.length); + /* see if we recognize the entry */ if (guid_eq(rt_entry.guid, bat_guid)) { /* must be unique; if we have already found it this is invalid */ @@ -482,6 +538,12 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) goto fail; } } + + if (!bat_rt_found || !metadata_rt_found) { + ret = -EINVAL; + goto fail; + } + ret = 0; fail: @@ -751,6 +813,7 @@ static void vhdx_close(BlockDriverState *bs) error_free(s->migration_blocker); qemu_vfree(s->log.hdr); s->log.hdr = NULL; + vhdx_region_unregister_all(s); } static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, @@ -768,6 +831,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, s->first_visible_write = true; qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->regions); /* validate the file signature */ ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t)); @@ -842,8 +906,26 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + uint64_t payblocks = s->chunk_ratio; + /* endian convert, and verify populated BAT field file offsets against + * region table and log entries */ for (i = 0; i < s->bat_entries; i++) { le64_to_cpus(&s->bat[i]); + if (payblocks--) { + /* payload bat entries */ + if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == + PAYLOAD_BLOCK_FULL_PRESENT) { + ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK, + s->block_size); + if (ret < 0) { + goto fail; + } + } + } else { + payblocks = s->chunk_ratio; + /* Once differencing files are supported, verify sector bitmap + * blocks here */ + } } if (flags & BDRV_O_RDWR) { diff --git a/block/vhdx.h b/block/vhdx.h index 584ebec8f..d9065595f 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -230,6 +230,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector { other bits are reserved */ #define VHDX_BAT_STATE_BIT_MASK 0x07 #define VHDX_BAT_FILE_OFF_BITS (64 - 44) +#define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */ typedef uint64_t VHDXBatEntry; /* ---- METADATA REGION STRUCTURES ---- */ @@ -334,6 +335,12 @@ typedef struct VHDXLogEntries { uint32_t tail; } VHDXLogEntries; +typedef struct VHDXRegionEntry { + uint64_t start; + uint64_t end; + QLIST_ENTRY(VHDXRegionEntry) entries; +} VHDXRegionEntry; + typedef struct BDRVVHDXState { CoMutex lock; @@ -374,6 +381,8 @@ typedef struct BDRVVHDXState { VHDXParentLocatorEntry *parent_entries; Error *migration_blocker; + + QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions; } BDRVVHDXState; void vhdx_guid_generate(MSGUID *guid); From 8adc52336d9c44ab4c7b9358a7be22ac0ef962bf Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:47 -0400 Subject: [PATCH 27/37] block: vhdx - add log write support This adds support for writing to the VHDX log. For spec details, see VHDX Specification Format v1.00: https://www.microsoft.com/en-us/download/details.aspx?id=34750 There are a few limitations to this log support: 1.) There is no caching yet 2.) The log is flushed after each entry The primary write interface, vhdx_log_write_and_flush(), performs a log write followed by an immediate flush of the log. As each log entry sector is a minimum of 4KB, partial sector writes are filled in with data from the disk write destination. If the current file log GUID is 0, a new GUID is generated and updated in the header. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx-log.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.h | 3 + 2 files changed, 285 insertions(+) diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 0284729dd..ee5583c30 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -156,6 +156,55 @@ exit: return ret; } +/* Writes num_sectors to the log (all log sectors are 4096 bytes), + * from buffer 'buffer'. Upon return, *sectors_written will contain + * the number of sectors successfully written. + * + * It is assumed that 'buffer' is at least 4096*num_sectors large. + * + * 0 is returned on success, -errno otherwise */ +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_written, void *buffer, + uint32_t num_sectors) +{ + int ret = 0; + uint64_t offset; + uint32_t write; + void *buffer_tmp; + BDRVVHDXState *s = bs->opaque; + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + write = log->write; + + buffer_tmp = buffer; + while (num_sectors) { + + offset = log->offset + write; + write = vhdx_log_inc_idx(write, log->length); + if (write == log->read) { + /* full */ + break; + } + ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + buffer_tmp += VHDX_LOG_SECTOR_SIZE; + + log->write = write; + *sectors_written = *sectors_written + 1; + num_sectors--; + } + +exit: + return ret; +} + + /* Validates a log entry header */ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, BDRVVHDXState *s) @@ -726,3 +775,236 @@ exit: } + +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, + VHDXLogDataSector *sector, void *data, + uint64_t seq) +{ + /* 8 + 4084 + 4 = 4096, 1 log sector */ + memcpy(&desc->leading_bytes, data, 8); + data += 8; + cpu_to_le64s(&desc->leading_bytes); + memcpy(sector->data, data, 4084); + data += 4084; + memcpy(&desc->trailing_bytes, data, 4); + cpu_to_le32s(&desc->trailing_bytes); + data += 4; + + sector->sequence_high = (uint32_t) (seq >> 32); + sector->sequence_low = (uint32_t) (seq & 0xffffffff); + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; + + vhdx_log_desc_le_export(desc); + vhdx_log_data_le_export(sector); +} + + +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + void *buffer = NULL; + void *merged_sector = NULL; + void *data_tmp, *sector_write; + unsigned int i; + int sector_offset; + uint32_t desc_sectors, sectors, total_length; + uint32_t sectors_written = 0; + uint32_t aligned_length; + uint32_t leading_length = 0; + uint32_t trailing_length = 0; + uint32_t partial_sectors = 0; + uint32_t bytes_written = 0; + uint64_t file_offset; + VHDXHeader *header; + VHDXLogEntryHeader new_hdr; + VHDXLogDescriptor *new_desc = NULL; + VHDXLogDataSector *data_sector = NULL; + MSGUID new_guid = { 0 }; + + header = s->headers[s->curr_header]; + + /* need to have offset read data, and be on 4096 byte boundary */ + + if (length > header->log_length) { + /* no log present. we could create a log here instead of failing */ + ret = -EINVAL; + goto exit; + } + + if (guid_eq(header->log_guid, zero_guid)) { + vhdx_guid_generate(&new_guid); + vhdx_update_headers(bs, s, false, &new_guid); + } else { + /* currently, we require that the log be flushed after + * every write. */ + ret = -ENOTSUP; + goto exit; + } + + /* 0 is an invalid sequence number, but may also represent the first + * log write (or a wrapped seq) */ + if (s->log.sequence == 0) { + s->log.sequence = 1; + } + + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; + + aligned_length = length; + + /* add in the unaligned head and tail bytes */ + if (sector_offset) { + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); + leading_length = leading_length > length ? length : leading_length; + aligned_length -= leading_length; + partial_sectors++; + } + + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); + if (trailing_length) { + partial_sectors++; + } + + sectors += partial_sectors; + + /* sectors is now how many sectors the data itself takes, not + * including the header and descriptor metadata */ + + new_hdr = (VHDXLogEntryHeader) { + .signature = VHDX_LOG_SIGNATURE, + .tail = s->log.tail, + .sequence_number = s->log.sequence, + .descriptor_count = sectors, + .reserved = 0, + .flushed_file_offset = bdrv_getlength(bs->file), + .last_file_offset = bdrv_getlength(bs->file), + }; + + new_hdr.log_guid = header->log_guid; + + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); + + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; + new_hdr.entry_length = total_length; + + vhdx_log_entry_hdr_le_export(&new_hdr); + + buffer = qemu_blockalign(bs, total_length); + memcpy(buffer, &new_hdr, sizeof(new_hdr)); + + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); + data_tmp = data; + + /* All log sectors are 4KB, so for any partial sectors we must + * merge the data with preexisting data from the final file + * destination */ + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + for (i = 0; i < sectors; i++) { + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; + new_desc->sequence_number = s->log.sequence; + new_desc->file_offset = file_offset; + + if (i == 0 && leading_length) { + /* partial sector at the front of the buffer */ + ret = bdrv_pread(bs->file, file_offset, merged_sector, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector + sector_offset, data_tmp, leading_length); + bytes_written = leading_length; + sector_write = merged_sector; + } else if (i == sectors - 1 && trailing_length) { + /* partial sector at the end of the buffer */ + ret = bdrv_pread(bs->file, + file_offset, + merged_sector + trailing_length, + VHDX_LOG_SECTOR_SIZE - trailing_length); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector, data_tmp, trailing_length); + bytes_written = trailing_length; + sector_write = merged_sector; + } else { + bytes_written = VHDX_LOG_SECTOR_SIZE; + sector_write = data_tmp; + } + + /* populate the raw sector data into the proper structures, + * as well as update the descriptor, and convert to proper + * endianness */ + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, + s->log.sequence); + + data_tmp += bytes_written; + data_sector++; + new_desc++; + file_offset += VHDX_LOG_SECTOR_SIZE; + } + + /* checksum covers entire entry, from the log header through the + * last data sector */ + vhdx_update_checksum(buffer, total_length, + offsetof(VHDXLogEntryHeader, checksum)); + cpu_to_le32s((uint32_t *)(buffer + 4)); + + /* now write to the log */ + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, + desc_sectors + sectors); + if (ret < 0) { + goto exit; + } + + if (sectors_written != desc_sectors + sectors) { + /* instead of failing, we could flush the log here */ + ret = -EINVAL; + goto exit; + } + + s->log.sequence++; + /* write new tail */ + s->log.tail = s->log.write; + +exit: + qemu_vfree(buffer); + qemu_vfree(merged_sector); + return ret; +} + +/* Perform a log write, and then immediately flush the entire log */ +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + VHDXLogSequence logs = { .valid = true, + .count = 1, + .hdr = { 0 } }; + + + /* Make sure data written (new and/or changed blocks) is stable + * on disk, before creating log entry */ + bdrv_flush(bs); + ret = vhdx_log_write(bs, s, data, length, offset); + if (ret < 0) { + goto exit; + } + logs.log = s->log; + + /* Make sure log is stable on disk */ + bdrv_flush(bs); + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + s->log = logs.log; + +exit: + return ret; +} + diff --git a/block/vhdx.h b/block/vhdx.h index d9065595f..6abbf5090 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -398,6 +398,9 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset); + static inline void leguid_to_cpus(MSGUID *guid) { le32_to_cpus(&guid->data1); From d92aa8833c051b53d3bf2614ff885df0037f10bb Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:48 -0400 Subject: [PATCH 28/37] block: vhdx write support This adds support for writing to VHDX image files, using coroutines. Writes into the BAT table goes through the VHDX log. Currently, BAT table writes occur when expanding a dynamic VHDX file, and allocating a new BAT entry. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++- block/vhdx.h | 2 +- 2 files changed, 209 insertions(+), 5 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 574ac4c93..050f0719c 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -914,7 +914,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, if (payblocks--) { /* payload bat entries */ if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == - PAYLOAD_BLOCK_FULL_PRESENT) { + PAYLOAD_BLOCK_FULLY_PRESENT) { ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK, s->block_size); if (ret < 0) { @@ -935,7 +935,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } } - /* TODO: differencing files, write */ + /* TODO: differencing files */ /* Disable migration when VHDX images are used */ error_set(&s->migration_blocker, @@ -1040,7 +1040,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, /* return zero */ qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail); break; - case PAYLOAD_BLOCK_FULL_PRESENT: + case PAYLOAD_BLOCK_FULLY_PRESENT: qemu_co_mutex_unlock(&s->lock); ret = bdrv_co_readv(bs->file, sinfo.file_offset >> BDRV_SECTOR_BITS, @@ -1070,7 +1070,43 @@ exit: return ret; } +/* + * Allocate a new payload block at the end of the file. + * + * Allocation will happen at 1MB alignment inside the file + * + * Returns the file offset start of the new payload block + */ +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t *new_offset) +{ + *new_offset = bdrv_getlength(bs->file); + /* per the spec, the address for a block is in units of 1MB */ + *new_offset = ROUND_UP(*new_offset, 1024 * 1024); + + return bdrv_truncate(bs->file, *new_offset + s->block_size); +} + +/* + * Update the BAT table entry with the new file offset, and the new entry + * state */ +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXSectorInfo *sinfo, + uint64_t *bat_entry_le, + uint64_t *bat_offset, int state) +{ + /* The BAT entry is a uint64, with 44 bits for the file offset in units of + * 1MB, and 3 bits for the block state. */ + s->bat[sinfo->bat_idx] = ((sinfo->file_offset>>20) << + VHDX_BAT_FILE_OFF_BITS); + + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; + + *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]); + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); + +} /* Per the spec, on the first write of guest-visible data to the file the * data write guid must be updated in the header */ @@ -1087,7 +1123,175 @@ int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return -ENOTSUP; + int ret = -ENOTSUP; + BDRVVHDXState *s = bs->opaque; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + uint64_t bat_entry = 0; + uint64_t bat_entry_offset = 0; + QEMUIOVector hd_qiov; + struct iovec iov1 = { 0 }; + struct iovec iov2 = { 0 }; + int sectors_to_write; + int bat_state; + uint64_t bat_prior_offset = 0; + bool bat_update = false; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + while (nb_sectors > 0) { + bool use_zero_buffers = false; + bat_update = false; + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + sectors_to_write = sinfo.sectors_avail; + + qemu_iovec_reset(&hd_qiov); + /* check the payload block state */ + bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK; + switch (bat_state) { + case PAYLOAD_BLOCK_ZERO: + /* in this case, we need to preserve zero writes for + * data that is not part of this write, so we must pad + * the rest of the buffer to zeroes */ + + /* if we are on a posix system with ftruncate() that extends + * a file, then it is zero-filled for us. On Win32, the raw + * layer uses SetFilePointer and SetFileEnd, which does not + * zero fill AFAIK */ + + /* Queue another write of zero buffers if the underlying file + * does not zero-fill on file extension */ + + if (bdrv_has_zero_init(bs->file) == 0) { + use_zero_buffers = true; + + /* zero fill the front, if any */ + if (sinfo.block_offset) { + iov1.iov_len = sinfo.block_offset; + iov1.iov_base = qemu_blockalign(bs, iov1.iov_len); + memset(iov1.iov_base, 0, iov1.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0, + sinfo.block_offset); + sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS; + } + + /* our actual data */ + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + + /* zero fill the back, if any */ + if ((sinfo.bytes_avail - sinfo.block_offset) < + s->block_size) { + iov2.iov_len = s->block_size - + (sinfo.bytes_avail + sinfo.block_offset); + iov2.iov_base = qemu_blockalign(bs, iov2.iov_len); + memset(iov2.iov_base, 0, iov2.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0, + sinfo.block_offset); + sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS; + } + } + + /* fall through */ + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ + bat_prior_offset = sinfo.file_offset; + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); + if (ret < 0) { + goto exit; + } + /* once we support differencing files, this may also be + * partially present */ + /* update block state to the newly specified state */ + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, + PAYLOAD_BLOCK_FULLY_PRESENT); + bat_update = true; + /* since we just allocated a block, file_offset is the + * beginning of the payload block. It needs to be the + * write address, which includes the offset into the block */ + if (!use_zero_buffers) { + sinfo.file_offset += sinfo.block_offset; + } + /* fall through */ + case PAYLOAD_BLOCK_FULLY_PRESENT: + /* if the file offset address is in the header zone, + * there is a problem */ + if (sinfo.file_offset < (1024 * 1024)) { + ret = -EFAULT; + goto error_bat_restore; + } + + if (!use_zero_buffers) { + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + } + /* block exists, so we can just overwrite it */ + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->file, + sinfo.file_offset >> BDRV_SECTOR_BITS, + sectors_to_write, &hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto error_bat_restore; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + + if (bat_update) { + /* this will update the BAT entry into the log journal, and + * then flush the log journal out to disk */ + ret = vhdx_log_write_and_flush(bs, s, &bat_entry, + sizeof(VHDXBatEntry), + bat_entry_offset); + if (ret < 0) { + goto exit; + } + } + + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + + } + } + + goto exit; + +error_bat_restore: + if (bat_update) { + /* keep metadata in sync, and restore the bat entry state + * if error. */ + sinfo.file_offset = bat_prior_offset; + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, bat_state); + } +exit: + qemu_vfree(iov1.iov_base); + qemu_vfree(iov2.iov_base); + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); + return ret; } diff --git a/block/vhdx.h b/block/vhdx.h index 6abbf5090..f3315480e 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -217,7 +217,7 @@ typedef struct QEMU_PACKED VHDXLogDataSector { #define PAYLOAD_BLOCK_UNDEFINED 1 #define PAYLOAD_BLOCK_ZERO 2 #define PAYLOAD_BLOCK_UNMAPPED 5 -#define PAYLOAD_BLOCK_FULL_PRESENT 6 +#define PAYLOAD_BLOCK_FULLY_PRESENT 6 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 #define SB_BLOCK_NOT_PRESENT 0 From 0b7da092b40734538631c3ad461c1753a87535fc Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:49 -0400 Subject: [PATCH 29/37] block: vhdx - remove BAT file offset bit shifting Bit shifting can be fun, but in this case it was unnecessary. The upper 44 bits of the 64-bit BAT entry is specifies the File Offset, so we shifted the bits to get access to the value. However, per the spec the value is in MB. So we dutifully shifted back to the left by 20 bits, to convert to a true uint64_t file offset. This replaces those steps with just a bit mask, to get rid of the lower 20 bits instead. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 6 ++---- block/vhdx.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 050f0719c..158edab7b 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -985,7 +985,7 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits; - sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS; + sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK; sinfo->block_offset = block_offset << s->logical_sector_size_bits; @@ -999,7 +999,6 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, * in the block, and add in the payload data block offset * in the file, in bytes, to get the final read address */ - sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */ sinfo->file_offset += sinfo->block_offset; } @@ -1098,8 +1097,7 @@ static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, { /* The BAT entry is a uint64, with 44 bits for the file offset in units of * 1MB, and 3 bits for the block state. */ - s->bat[sinfo->bat_idx] = ((sinfo->file_offset>>20) << - VHDX_BAT_FILE_OFF_BITS); + s->bat[sinfo->bat_idx] = sinfo->file_offset; s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; diff --git a/block/vhdx.h b/block/vhdx.h index f3315480e..d3598e0a2 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -229,7 +229,6 @@ typedef struct QEMU_PACKED VHDXLogDataSector { /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state other bits are reserved */ #define VHDX_BAT_STATE_BIT_MASK 0x07 -#define VHDX_BAT_FILE_OFF_BITS (64 - 44) #define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */ typedef uint64_t VHDXBatEntry; From c325ee1de84dde5b2a90c30e1b788defa31c5d53 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:50 -0400 Subject: [PATCH 30/37] block: vhdx - move more endian translations to vhdx-endian.c In preparation for vhdx_create(), move more endian translation functions out to vhdx-endian.c. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx-endian.c | 75 +++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.c | 20 +++--------- block/vhdx.h | 9 +++++- 3 files changed, 87 insertions(+), 17 deletions(-) diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c index 3e93e635a..fe879ed99 100644 --- a/block/vhdx-endian.c +++ b/block/vhdx-endian.c @@ -139,3 +139,78 @@ void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr) } +/* Region table entries */ +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + le32_to_cpus(&hdr->signature); + le32_to_cpus(&hdr->checksum); + le32_to_cpus(&hdr->entry_count); +} + +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le32s(&hdr->signature); + cpu_to_le32s(&hdr->checksum); + cpu_to_le32s(&hdr->entry_count); +} + +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->guid); + le64_to_cpus(&e->file_offset); + le32_to_cpus(&e->length); + le32_to_cpus(&e->data_bits); +} + +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->guid); + cpu_to_le64s(&e->file_offset); + cpu_to_le32s(&e->length); + cpu_to_le32s(&e->data_bits); +} + + +/* Metadata headers & table */ +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + le64_to_cpus(&hdr->signature); + le16_to_cpus(&hdr->entry_count); +} + +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le64s(&hdr->signature); + cpu_to_le16s(&hdr->entry_count); +} + +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->item_id); + le32_to_cpus(&e->offset); + le32_to_cpus(&e->length); + le32_to_cpus(&e->data_bits); +} +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->item_id); + cpu_to_le32s(&e->offset); + cpu_to_le32s(&e->length); + cpu_to_le32s(&e->data_bits); +} diff --git a/block/vhdx.c b/block/vhdx.c index 158edab7b..7da149a8d 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -472,10 +472,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) goto fail; } memcpy(&s->rt, buffer, sizeof(s->rt)); - le32_to_cpus(&s->rt.signature); - le32_to_cpus(&s->rt.checksum); - le32_to_cpus(&s->rt.entry_count); - le32_to_cpus(&s->rt.reserved); + vhdx_region_header_le_import(&s->rt); offset += sizeof(s->rt); if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) || @@ -494,10 +491,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&rt_entry, buffer + offset, sizeof(rt_entry)); offset += sizeof(rt_entry); - leguid_to_cpus(&rt_entry.guid); - le64_to_cpus(&rt_entry.file_offset); - le32_to_cpus(&rt_entry.length); - le32_to_cpus(&rt_entry.data_bits); + vhdx_region_entry_le_import(&rt_entry); /* check for region overlap between these entries, and any * other memory regions in the file */ @@ -587,9 +581,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); offset += sizeof(s->metadata_hdr); - le64_to_cpus(&s->metadata_hdr.signature); - le16_to_cpus(&s->metadata_hdr.reserved); - le16_to_cpus(&s->metadata_hdr.entry_count); + vhdx_metadata_header_le_import(&s->metadata_hdr); if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) { ret = -EINVAL; @@ -608,11 +600,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&md_entry, buffer + offset, sizeof(md_entry)); offset += sizeof(md_entry); - leguid_to_cpus(&md_entry.item_id); - le32_to_cpus(&md_entry.offset); - le32_to_cpus(&md_entry.length); - le32_to_cpus(&md_entry.data_bits); - le32_to_cpus(&md_entry.reserved2); + vhdx_metadata_entry_le_import(&md_entry); if (guid_eq(md_entry.item_id, file_param_guid)) { if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) { diff --git a/block/vhdx.h b/block/vhdx.h index d3598e0a2..15486c742 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -421,7 +421,14 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d); void vhdx_log_data_le_export(VHDXLogDataSector *d); void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); - +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr); +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr); +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e); +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e); +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e); +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e); int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); #endif From 1e74a971cb3229813112c74dca81e599812b66f3 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:51 -0400 Subject: [PATCH 31/37] block: vhdx - break out code operations to functions This is preperation for vhdx_create(). The ability to write headers, and calculate the number of BAT entries will be needed within the create() functions, so move this relevant code into helper functions. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 121 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 41 deletions(-) diff --git a/block/vhdx.c b/block/vhdx.c index 7da149a8d..7bf7cd69a 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -248,6 +248,14 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s) } } +static void vhdx_set_shift_bits(BDRVVHDXState *s) +{ + s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); + s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); + s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); + s->block_size_bits = 31 - clz32(s->block_size); +} + /* * Per the MS VHDX Specification, for every VHDX file: * - The header section is fixed size - 1 MB @@ -267,6 +275,50 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } +/* + * Writes the header to the specified offset. + * + * This will optionally read in buffer data from disk (otherwise zero-fill), + * and then update the header checksum. Header is converted to proper + * endianness before being written to the specified file offset + */ +static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr, + uint64_t offset, bool read) +{ + uint8_t *buffer = NULL; + int ret; + VHDXHeader header_le; + + assert(bs_file != NULL); + assert(hdr != NULL); + + /* the header checksum is not over just the packed size of VHDXHeader, + * but rather over the entire 'reserved' range for the header, which is + * 4KB (VHDX_HEADER_SIZE). */ + + buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE); + if (read) { + /* if true, we can't assume the extra reserved bytes are 0 */ + ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto exit; + } + } else { + memset(buffer, 0, VHDX_HEADER_SIZE); + } + + /* overwrite the actual VHDXHeader portion */ + memcpy(buffer, hdr, sizeof(VHDXHeader)); + hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, + offsetof(VHDXHeader, checksum)); + vhdx_header_le_export(hdr, &header_le); + ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader)); + +exit: + qemu_vfree(buffer); + return ret; +} + /* Update the VHDX headers * * This follows the VHDX spec procedures for header updates. @@ -282,8 +334,6 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, VHDXHeader *active_header; VHDXHeader *inactive_header; - VHDXHeader header_le; - uint8_t *buffer; /* operate on the non-current header */ if (s->curr_header == 0) { @@ -311,31 +361,13 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, inactive_header->log_guid = *log_guid; } - /* the header checksum is not over just the packed size of VHDXHeader, - * but rather over the entire 'reserved' range for the header, which is - * 4KB (VHDX_HEADER_SIZE). */ - - buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); - /* we can't assume the extra reserved bytes are 0 */ - ret = bdrv_pread(bs->file, header_offset, buffer, VHDX_HEADER_SIZE); - if (ret < 0) { - goto exit; - } - /* overwrite the actual VHDXHeader portion */ - memcpy(buffer, inactive_header, sizeof(VHDXHeader)); - inactive_header->checksum = - vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, - offsetof(VHDXHeader, checksum)); - vhdx_header_le_export(inactive_header, &header_le); - ret = bdrv_pwrite_sync(bs->file, header_offset, &header_le, - sizeof(VHDXHeader)); + vhdx_write_header(bs->file, inactive_header, header_offset, true); if (ret < 0) { goto exit; } s->curr_header = hdr_idx; exit: - qemu_vfree(buffer); return ret; } @@ -773,10 +805,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) goto exit; } - s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); - s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); - s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); - s->block_size_bits = 31 - clz32(s->block_size); + vhdx_set_shift_bits(s); ret = 0; @@ -785,6 +814,31 @@ exit: return ret; } +/* + * Calculate the number of BAT entries, including sector + * bitmap entries. + */ +static void vhdx_calc_bat_entries(BDRVVHDXState *s) +{ + uint32_t data_blocks_cnt, bitmap_blocks_cnt; + + data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; + if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { + data_blocks_cnt++; + } + bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; + if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { + bitmap_blocks_cnt++; + } + + if (s->parent_entries) { + s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); + } else { + s->bat_entries = data_blocks_cnt + + ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); + } + +} static void vhdx_close(BlockDriverState *bs) { @@ -811,7 +865,6 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, int ret = 0; uint32_t i; uint64_t signature; - uint32_t data_blocks_cnt, bitmap_blocks_cnt; bool log_flushed = false; @@ -862,21 +915,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, * logical_sector_size */ bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits; - data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; - if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { - data_blocks_cnt++; - } - bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; - if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { - bitmap_blocks_cnt++; - } - - if (s->parent_entries) { - s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); - } else { - s->bat_entries = data_blocks_cnt + - ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); - } + vhdx_calc_bat_entries(s); s->bat_offset = s->bat_rt.file_offset; From 61c02e5687dcc581c9d3413b61040b023adeaa9c Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:52 -0400 Subject: [PATCH 32/37] block: vhdx - fix comment typos in header, fix incorrect struct fields VHDXPage83Data and VHDXParentLocatorHeader both incorrectly had their MSGUID fields set as arrays of 16. This is incorrect (it stems from an early version where those fields were uint_8 arrays). Those fields were, up to this patch, unused. Also, there were a couple of typos and incorrect wording in comments, and those have been fixed up as well. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/vhdx.h b/block/vhdx.h index 15486c742..f222d1876 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -58,7 +58,7 @@ typedef struct VHDXFileIdentifier { uint64_t signature; /* "vhdxfile" in ASCII */ uint16_t creator[256]; /* optional; utf-16 string to identify - the vhdx file creator. Diagnotistic + the vhdx file creator. Diagnostic only */ } VHDXFileIdentifier; @@ -114,8 +114,8 @@ typedef struct QEMU_PACKED VHDXHeader { there is no valid log. If non-zero, log entries with this guid are valid. */ - uint16_t log_version; /* version of the log format. Mustn't be - zero, unless log_guid is also zero */ + uint16_t log_version; /* version of the log format. Must be + set to zero */ uint16_t version; /* version of the vhdx file. Currently, only supported version is "1" */ uint32_t log_length; /* length of the log. Must be multiple @@ -281,7 +281,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskSize { } VHDXVirtualDiskSize; typedef struct QEMU_PACKED VHDXPage83Data { - MSGUID page_83_data[16]; /* unique id for scsi devices that + MSGUID page_83_data; /* unique id for scsi devices that support page 0x83 */ } VHDXPage83Data; @@ -296,7 +296,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { } VHDXVirtualDiskPhysicalSectorSize; typedef struct QEMU_PACKED VHDXParentLocatorHeader { - MSGUID locator_type[16]; /* type of the parent virtual disk. */ + MSGUID locator_type; /* type of the parent virtual disk. */ uint16_t reserved; uint16_t key_value_count; /* number of key/value pairs for this locator */ From 3412f7b1bd8f250c34c9f933767d06b9444bb821 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:53 -0400 Subject: [PATCH 33/37] block: vhdx - add .bdrv_create() support This adds support for VHDX image creation, for images of type "Fixed" and "Dynamic". "Differencing" types (i.e., VHDX images with backing files) are currently not supported. Options for image creation include: * log size: The size of the journaling log for VHDX. Minimum is 1MB, and it must be a multiple of 1MB. Invalid log sizes will be silently fixed by rounding up to the nearest MB. Default is 1MB. * block size: This is the size of a payload block. The range is 1MB to 256MB, inclusive, and must be a multiple of 1MB as well. Invalid sizes and multiples will be silently fixed. If '0' is passed, then a sane size is chosen (depending on virtual image size). Default is 0 (Auto-select). * subformat: - "dynamic" An image without data pre-allocated. - "fixed" An image with data pre-allocated. Default is "dynamic" When creating the image file, the lettered sections are created: -----------------------------------------------------------------. | (A) | (B) | (C) | (D) | (E) | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 | | | | | .-----------------------------------------------------------------. 0 64KB 128KB 192KB 256KB 320KB .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. | (F) | (G) | (H) | | Journal Log | BAT / Bitmap | Metadata | .... data ...... | | | | .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. 1MB (var.) (var.) (var.) Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- block/vhdx.c | 558 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/vhdx.h | 15 +- 2 files changed, 572 insertions(+), 1 deletion(-) diff --git a/block/vhdx.c b/block/vhdx.c index 7bf7cd69a..7d1af9663 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -23,6 +23,19 @@ #include "migration/migration.h" #include +#include + +/* Options for VHDX creation */ + +#define VHDX_BLOCK_OPT_LOG_SIZE "log_size" +#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size" +#define VHDX_BLOCK_OPT_ZERO "block_state_zero" + +typedef enum VHDXImageType { + VHDX_TYPE_DYNAMIC = 0, + VHDX_TYPE_FIXED, + VHDX_TYPE_DIFFERENCING, /* Currently unsupported */ +} VHDXImageType; /* Several metadata and region table data entries are identified by * guids in a MS-specific GUID format. */ @@ -1320,6 +1333,548 @@ exit: } + +/* + * Create VHDX Headers + * + * There are 2 headers, and the highest sequence number will represent + * the active header + */ +static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size, + uint32_t log_size) +{ + int ret = 0; + VHDXHeader *hdr = NULL; + + hdr = g_malloc0(sizeof(VHDXHeader)); + + hdr->signature = VHDX_HEADER_SIGNATURE; + hdr->sequence_number = g_random_int(); + hdr->log_version = 0; + hdr->version = 1; + hdr->log_length = log_size; + hdr->log_offset = VHDX_HEADER_SECTION_END; + vhdx_guid_generate(&hdr->file_write_guid); + vhdx_guid_generate(&hdr->data_write_guid); + + ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false); + if (ret < 0) { + goto exit; + } + hdr->sequence_number++; + ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false); + if (ret < 0) { + goto exit; + } + +exit: + g_free(hdr); + return ret; +} + + +/* + * Create the Metadata entries. + * + * For more details on the entries, see section 3.5 (pg 29) in the + * VHDX 1.00 specification. + * + * We support 5 metadata entries (all required by spec): + * File Parameters, + * Virtual Disk Size, + * Page 83 Data, + * Logical Sector Size, + * Physical Sector Size + * + * The first 64KB of the Metadata section is reserved for the metadata + * header and entries; beyond that, the metadata items themselves reside. + */ +static int vhdx_create_new_metadata(BlockDriverState *bs, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint64_t metadata_offset, + VHDXImageType type) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + void *entry_buffer; + VHDXMetadataTableHeader *md_table;; + VHDXMetadataTableEntry *md_table_entry; + + /* Metadata entries */ + VHDXFileParameters *mt_file_params; + VHDXVirtualDiskSize *mt_virtual_size; + VHDXPage83Data *mt_page83; + VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size; + VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size; + + entry_buffer = g_malloc0(sizeof(VHDXFileParameters) + + sizeof(VHDXVirtualDiskSize) + + sizeof(VHDXPage83Data) + + sizeof(VHDXVirtualDiskLogicalSectorSize) + + sizeof(VHDXVirtualDiskPhysicalSectorSize)); + + mt_file_params = entry_buffer; + offset += sizeof(VHDXFileParameters); + mt_virtual_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskSize); + mt_page83 = entry_buffer + offset; + offset += sizeof(VHDXPage83Data); + mt_log_sector_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskLogicalSectorSize); + mt_phys_sector_size = entry_buffer + offset; + + mt_file_params->block_size = cpu_to_le32(block_size); + if (type == VHDX_TYPE_FIXED) { + mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED; + cpu_to_le32s(&mt_file_params->data_bits); + } + + vhdx_guid_generate(&mt_page83->page_83_data); + cpu_to_leguids(&mt_page83->page_83_data); + mt_virtual_size->virtual_disk_size = cpu_to_le64(image_size); + mt_log_sector_size->logical_sector_size = cpu_to_le32(sector_size); + mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size); + + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + md_table = buffer; + + md_table->signature = VHDX_METADATA_SIGNATURE; + md_table->entry_count = 5; + vhdx_metadata_header_le_export(md_table); + + + /* This will reference beyond the reserved table portion */ + offset = 64 * KiB; + + md_table_entry = buffer + sizeof(VHDXMetadataTableHeader); + + md_table_entry[0].item_id = file_param_guid; + md_table_entry[0].offset = offset; + md_table_entry[0].length = sizeof(VHDXFileParameters); + md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED; + offset += md_table_entry[0].length; + vhdx_metadata_entry_le_export(&md_table_entry[0]); + + md_table_entry[1].item_id = virtual_size_guid; + md_table_entry[1].offset = offset; + md_table_entry[1].length = sizeof(VHDXVirtualDiskSize); + md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[1].length; + vhdx_metadata_entry_le_export(&md_table_entry[1]); + + md_table_entry[2].item_id = page83_guid; + md_table_entry[2].offset = offset; + md_table_entry[2].length = sizeof(VHDXPage83Data); + md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[2].length; + vhdx_metadata_entry_le_export(&md_table_entry[2]); + + md_table_entry[3].item_id = logical_sector_guid; + md_table_entry[3].offset = offset; + md_table_entry[3].length = sizeof(VHDXVirtualDiskLogicalSectorSize); + md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[3].length; + vhdx_metadata_entry_le_export(&md_table_entry[3]); + + md_table_entry[4].item_id = phys_sector_guid; + md_table_entry[4].offset = offset; + md_table_entry[4].length = sizeof(VHDXVirtualDiskPhysicalSectorSize); + md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + vhdx_metadata_entry_le_export(&md_table_entry[4]); + + ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + +exit: + g_free(buffer); + g_free(entry_buffer); + return ret; +} + +/* This create the actual BAT itself. We currently only support + * 'Dynamic' and 'Fixed' image types. + * + * Dynamic images: default state of the BAT is all zeroes. + * + * Fixed images: default state of the BAT is fully populated, with + * file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT. + */ +static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t image_size, VHDXImageType type, + bool use_zero_blocks, VHDXRegionTableEntry *rt_bat) +{ + int ret = 0; + uint64_t data_file_offset; + uint64_t total_sectors = 0; + uint64_t sector_num = 0; + uint64_t unused; + int block_state; + VHDXSectorInfo sinfo; + + assert(s->bat == NULL); + + /* this gives a data start after BAT/bitmap entries, and well + * past any metadata entries (with a 4 MB buffer for future + * expansion */ + data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB; + total_sectors = image_size >> s->logical_sector_size_bits; + + if (type == VHDX_TYPE_DYNAMIC) { + /* All zeroes, so we can just extend the file - the end of the BAT + * is the furthest thing we have written yet */ + ret = bdrv_truncate(bs, data_file_offset); + if (ret < 0) { + goto exit; + } + } else if (type == VHDX_TYPE_FIXED) { + ret = bdrv_truncate(bs, data_file_offset + image_size); + if (ret < 0) { + goto exit; + } + } else { + ret = -ENOTSUP; + goto exit; + } + + if (type == VHDX_TYPE_FIXED || + use_zero_blocks || + bdrv_has_zero_init(bs) == 0) { + /* for a fixed file, the default BAT entry is not zero */ + s->bat = g_malloc0(rt_bat->length); + block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT : + PAYLOAD_BLOCK_NOT_PRESENT; + block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state; + /* fill the BAT by emulating sector writes of sectors_per_block size */ + while (sector_num < total_sectors) { + vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo); + sinfo.file_offset = data_file_offset + + (sector_num << s->logical_sector_size_bits); + sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB); + vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused, + block_state); + cpu_to_le64s(&s->bat[sinfo.bat_idx]); + sector_num += s->sectors_per_block; + } + ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length); + if (ret < 0) { + goto exit; + } + } + + + +exit: + g_free(s->bat); + return ret; +} + +/* Creates the region table header, and region table entries. + * There are 2 supported region table entries: BAT, and Metadata/ + * + * As the calculations for the BAT region table are also needed + * to create the BAT itself, we will also cause the BAT to be + * created. + */ +static int vhdx_create_new_region_table(BlockDriverState *bs, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint32_t log_size, + bool use_zero_blocks, + VHDXImageType type, + uint64_t *metadata_offset) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + BDRVVHDXState *s = NULL; + VHDXRegionTableHeader *region_table; + VHDXRegionTableEntry *rt_bat; + VHDXRegionTableEntry *rt_metadata; + + assert(metadata_offset != NULL); + + /* Populate enough of the BDRVVHDXState to be able to use the + * pre-existing BAT calculation, translation, and update functions */ + s = g_malloc0(sizeof(BDRVVHDXState)); + + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * + (uint64_t) sector_size / (uint64_t) block_size; + + s->sectors_per_block = block_size / sector_size; + s->virtual_disk_size = image_size; + s->block_size = block_size; + s->logical_sector_size = sector_size; + + vhdx_set_shift_bits(s); + + vhdx_calc_bat_entries(s); + + /* At this point the VHDX state is populated enough for creation */ + + /* a single buffer is used so we can calculate the checksum over the + * entire 64KB block */ + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + region_table = buffer; + offset += sizeof(VHDXRegionTableHeader); + rt_bat = buffer + offset; + offset += sizeof(VHDXRegionTableEntry); + rt_metadata = buffer + offset; + + region_table->signature = VHDX_REGION_SIGNATURE; + region_table->entry_count = 2; /* BAT and Metadata */ + + rt_bat->guid = bat_guid; + rt_bat->length = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB); + rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB); + s->bat_offset = rt_bat->file_offset; + + rt_metadata->guid = metadata_guid; + rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length, + MiB); + rt_metadata->length = 1 * MiB; /* min size, and more than enough */ + *metadata_offset = rt_metadata->file_offset; + + vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE, + offsetof(VHDXRegionTableHeader, checksum)); + + + /* The region table gives us the data we need to create the BAT, + * so do that now */ + ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat); + + /* Now write out the region headers to disk */ + vhdx_region_header_le_export(region_table); + vhdx_region_entry_le_export(rt_bat); + vhdx_region_entry_le_export(rt_metadata); + + ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + +exit: + g_free(s); + g_free(buffer); + return ret; +} + +/* We need to create the following elements: + * + * .-----------------------------------------------------------------. + * | (A) | (B) | (C) | (D) | (E) | + * | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 | + * | | | | | | + * .-----------------------------------------------------------------. + * 0 64KB 128KB 192KB 256KB 320KB + * + * + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * | (F) | (G) | (H) | | + * | Journal Log | BAT / Bitmap | Metadata | .... data ...... | + * | | | | | + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * 1MB + */ +static int vhdx_create(const char *filename, QEMUOptionParameter *options, + Error **errp) +{ + int ret = 0; + uint64_t image_size = (uint64_t) 2 * GiB; + uint32_t log_size = 1 * MiB; + uint32_t block_size = 0; + uint64_t signature; + uint64_t metadata_offset; + bool use_zero_blocks = false; + + gunichar2 *creator = NULL; + glong creator_items; + BlockDriverState *bs; + const char *type = NULL; + VHDXImageType image_type; + Error *local_err = NULL; + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + image_size = options->value.n; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_LOG_SIZE)) { + log_size = options->value.n; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_BLOCK_SIZE)) { + block_size = options->value.n; + } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { + type = options->value.s; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_ZERO)) { + use_zero_blocks = options->value.n != 0; + } + options++; + } + + if (image_size > VHDX_MAX_IMAGE_SIZE) { + error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB"); + ret = -EINVAL; + goto exit; + } + + if (type == NULL) { + type = "dynamic"; + } + + if (!strcmp(type, "dynamic")) { + image_type = VHDX_TYPE_DYNAMIC; + } else if (!strcmp(type, "fixed")) { + image_type = VHDX_TYPE_FIXED; + } else if (!strcmp(type, "differencing")) { + error_setg_errno(errp, ENOTSUP, + "Differencing files not yet supported"); + ret = -ENOTSUP; + goto exit; + } else { + ret = -EINVAL; + goto exit; + } + + /* These are pretty arbitrary, and mainly designed to keep the BAT + * size reasonable to load into RAM */ + if (block_size == 0) { + if (image_size > 32 * TiB) { + block_size = 64 * MiB; + } else if (image_size > (uint64_t) 100 * GiB) { + block_size = 32 * MiB; + } else if (image_size > 1 * GiB) { + block_size = 16 * MiB; + } else { + block_size = 8 * MiB; + } + } + + + /* make the log size close to what was specified, but must be + * min 1MB, and multiple of 1MB */ + log_size = ROUND_UP(log_size, MiB); + + block_size = ROUND_UP(block_size, MiB); + block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX : + block_size; + + ret = bdrv_create_file(filename, options, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; + } + + ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; + } + + /* Create (A) */ + + /* The creator field is optional, but may be useful for + * debugging / diagnostics */ + creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, + &creator_items, NULL); + signature = cpu_to_le64(VHDX_FILE_SIGNATURE); + bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); + if (ret < 0) { + goto delete_and_exit; + } + if (creator) { + bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator, + creator_items * sizeof(gunichar2)); + if (ret < 0) { + goto delete_and_exit; + } + } + + + /* Creates (B),(C) */ + ret = vhdx_create_new_headers(bs, image_size, log_size); + if (ret < 0) { + goto delete_and_exit; + } + + /* Creates (D),(E),(G) explicitly. (F) created as by-product */ + ret = vhdx_create_new_region_table(bs, image_size, block_size, 512, + log_size, use_zero_blocks, image_type, + &metadata_offset); + if (ret < 0) { + goto delete_and_exit; + } + + /* Creates (H) */ + ret = vhdx_create_new_metadata(bs, image_size, block_size, 512, + metadata_offset, image_type); + if (ret < 0) { + goto delete_and_exit; + } + + + +delete_and_exit: + bdrv_unref(bs); +exit: + g_free(creator); + return ret; +} + +static QEMUOptionParameter vhdx_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size; max of 64TB." + }, + { + .name = VHDX_BLOCK_OPT_LOG_SIZE, + .type = OPT_SIZE, + .value.n = 1 * MiB, + .help = "Log size; min 1MB." + }, + { + .name = VHDX_BLOCK_OPT_BLOCK_SIZE, + .type = OPT_SIZE, + .value.n = 0, + .help = "Block Size; min 1MB, max 256MB. " \ + "0 means auto-calculate based on image size." + }, + { + .name = BLOCK_OPT_SUBFMT, + .type = OPT_STRING, + .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\ + "Default is 'dynamic'." + }, + { + .name = VHDX_BLOCK_OPT_ZERO, + .type = OPT_FLAG, + .help = "Force use of payload blocks of type 'ZERO'. Non-standard." + }, + { NULL } +}; + static BlockDriver bdrv_vhdx = { .format_name = "vhdx", .instance_size = sizeof(BDRVVHDXState), @@ -1329,6 +1884,9 @@ static BlockDriver bdrv_vhdx = { .bdrv_reopen_prepare = vhdx_reopen_prepare, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, + .bdrv_create = vhdx_create, + + .create_options = vhdx_create_options, }; static void bdrv_vhdx_init(void) diff --git a/block/vhdx.h b/block/vhdx.h index f222d1876..51183b243 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -18,6 +18,11 @@ #ifndef BLOCK_VHDX_H #define BLOCK_VHDX_H +#define KiB (1 * 1024) +#define MiB (KiB * 1024) +#define GiB (MiB * 1024) +#define TiB ((uint64_t) GiB * 1024) + /* Structures and fields present in the VHDX file */ /* The header section has the following blocks, @@ -36,8 +41,9 @@ #define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) #define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) #define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) +#define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4) - +#define VHDX_HEADER_SECTION_END (1 * MiB) /* * A note on the use of MS-GUID fields. For more details on the GUID, * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. @@ -55,6 +61,7 @@ /* These structures are ones that are defined in the VHDX specification * document */ +#define VHDX_FILE_SIGNATURE 0x656C696678646876 /* "vhdxfile" in ASCII */ typedef struct VHDXFileIdentifier { uint64_t signature; /* "vhdxfile" in ASCII */ uint16_t creator[256]; /* optional; utf-16 string to identify @@ -85,6 +92,7 @@ typedef struct QEMU_PACKED MSGUID { /* The full header is 4KB, although the actual header data is much smaller. * But for the checksum calculation, it is over the entire 4KB structure, * not just the defined portion of it */ +#define VHDX_HEADER_SIGNATURE 0x64616568 typedef struct QEMU_PACKED VHDXHeader { uint32_t signature; /* "head" in ASCII */ uint32_t checksum; /* CRC-32C hash of the whole header */ @@ -125,6 +133,7 @@ typedef struct QEMU_PACKED VHDXHeader { } VHDXHeader; /* Header for the region table block */ +#define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */ typedef struct QEMU_PACKED VHDXRegionTableHeader { uint32_t signature; /* "regi" in ASCII */ uint32_t checksum; /* CRC-32C hash of the 64KB table */ @@ -238,6 +247,7 @@ typedef uint64_t VHDXBatEntry; #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ #define VHDX_METADATA_TABLE_MAX_SIZE \ (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) +#define VHDX_METADATA_SIGNATURE 0x617461646174656D /* "metadata" in ASCII */ typedef struct QEMU_PACKED VHDXMetadataTableHeader { uint64_t signature; /* "metadata" in ASCII */ uint16_t reserved; @@ -267,6 +277,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry { If set indicates a fixed size VHDX file */ #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ +#define VHDX_BLOCK_SIZE_MIN (1 * MiB) +#define VHDX_BLOCK_SIZE_MAX (256 * MiB) typedef struct QEMU_PACKED VHDXFileParameters { uint32_t block_size; /* size of each payload block, always power of 2, <= 256MB and >= 1MB. */ @@ -274,6 +286,7 @@ typedef struct QEMU_PACKED VHDXFileParameters { the rest are reserved (see above) */ } VHDXFileParameters; +#define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB) typedef struct QEMU_PACKED VHDXVirtualDiskSize { uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. Must be multiple of the sector size, From 228b234dd73383408743d749d29a6c37e9d99981 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:54 -0400 Subject: [PATCH 34/37] block: vhdx - update _make_test_img() to filter out vhdx options The non-global option output is suppresed in _make_test_img() for output verification in the 0?? tests. This adds suppression for the vhdx-unique options as well. This allows check -vhdx to run successfully. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/common.rc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index d24de2c95..7f6245770 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -157,7 +157,10 @@ _make_test_img() -e "s# zeroed_grain=\\(on\\|off\\)##g" \ -e "s# subformat='[^']*'##g" \ -e "s# adapter_type='[^']*'##g" \ - -e "s# lazy_refcounts=\\(on\\|off\\)##g" + -e "s# lazy_refcounts=\\(on\\|off\\)##g" \ + -e "s# block_size=[0-9]\\+##g" \ + -e "s# block_state_zero=\\(on\\|off\\)##g" \ + -e "s# log_size=[0-9]\\+##g" # Start an NBD server on the image file, which is what we'll be talking to if [ $IMGPROTO = "nbd" ]; then From 751aec24cdcd4359b5927df5cebfe6b8d3e74515 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:55 -0400 Subject: [PATCH 35/37] block: qemu-iotests for vhdx, add write test support This removes the IMGFMT_GENERIC blocker for read-only, so existing iotests run read/write tests for vhdx images created by qemu-img (e.g. tests 001, 002, 003). In addition, this updates the sample image test for the Hyper-V created image, to verify we can write it as well. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/064 | 11 +++++++++++ tests/qemu-iotests/064.out | 14 ++++++++++++++ tests/qemu-iotests/common | 1 - 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/064 b/tests/qemu-iotests/064 index 6789aa6ee..1c74c31a1 100755 --- a/tests/qemu-iotests/064 +++ b/tests/qemu-iotests/064 @@ -56,6 +56,17 @@ echo echo "=== Verify pattern 0x00, 66M - 1024M ===" $QEMU_IO -r -c "read -pP 0x00 66M 958M" "$TEST_IMG" | _filter_qemu_io +echo +echo "=== Verify pattern write, 0xc3 99M-157M ===" +$QEMU_IO -c "write -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io +# first verify we didn't write where we should not have +$QEMU_IO -c "read -pP 0xa5 0 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x96 33M 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x00 66M 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x00 157MM 867MM" "$TEST_IMG" | _filter_qemu_io +# now verify what we should have actually written +$QEMU_IO -c "read -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/064.out b/tests/qemu-iotests/064.out index b9e8e4a87..5346a4e63 100644 --- a/tests/qemu-iotests/064.out +++ b/tests/qemu-iotests/064.out @@ -11,4 +11,18 @@ read 34603008/34603008 bytes at offset 34603008 === Verify pattern 0x00, 66M - 1024M === read 1004535808/1004535808 bytes at offset 69206016 958 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Verify pattern write, 0xc3 99M-157M === +wrote 60817408/60817408 bytes at offset 103809024 +58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 0 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 34603008 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 69206016 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 909115392/909115392 bytes at offset 164626432 +867 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 60817408/60817408 bytes at offset 103809024 +58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 2932e14e7..8cde7f11f 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -200,7 +200,6 @@ testlist options -vhdx) IMGFMT=vhdx xpand=false - IMGFMT_GENERIC=false ;; -rbd) From e78835b722eb26f5a56370166e99b69e9751ea2a Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Wed, 30 Oct 2013 10:44:56 -0400 Subject: [PATCH 36/37] block: vhdx qemu-iotest - log replay of data sector This tests the replay of a data sector in a VHDX image file. The image file is a 10G dynamic image, with 4MB block size. The image was created with qemu-img, and the log left unplayed by modification of the vhdx image format driver. It was verified under both QEMU and Hyper-V that the image file, post log replay, matched. Signed-off-by: Jeff Cody Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/070 | 67 ++++++++++++++++++ tests/qemu-iotests/070.out | 8 +++ tests/qemu-iotests/group | 1 + .../iotest-dirtylog-10G-4M.vhdx.bz2 | Bin 0 -> 4490 bytes 4 files changed, 76 insertions(+) create mode 100755 tests/qemu-iotests/070 create mode 100644 tests/qemu-iotests/070.out create mode 100644 tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070 new file mode 100755 index 000000000..41bf10070 --- /dev/null +++ b/tests/qemu-iotests/070 @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Test VHDX log replay from an image with a journal that needs to be +# replayed +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt vhdx +_supported_proto generic +_supported_os Linux + +# With the log replayed, the pattern 0xa5 extends to 0xc025000 +# If the log was not replayed, it would only extend to 0xc000000 +# +# This image is a 10G dynamic image, with 4M block size, and 1 unplayed +# data sector in the log +# +# This image was created with qemu-img, however it was verified using +# Hyper-V to properly replay the logs and give the same post-replay +# image as qemu. +_use_sample_img iotest-dirtylog-10G-4M.vhdx.bz2 + +echo +echo "=== Verify open image read-only fails, due to dirty log ===" +$QEMU_IO -r -c "read -pP 0xa5 0 18M" "$TEST_IMG" 2>&1 | grep -o "Permission denied" + +echo "=== Verify open image replays log ===" +$QEMU_IO -c "read -pP 0xa5 0 18M" "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/070.out b/tests/qemu-iotests/070.out new file mode 100644 index 000000000..9db8ff265 --- /dev/null +++ b/tests/qemu-iotests/070.out @@ -0,0 +1,8 @@ +QA output created by 070 + +=== Verify open image read-only fails, due to dirty log === +Permission denied +=== Verify open image replays log === +read 18874368/18874368 bytes at offset 0 +18 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index c57ff3584..b18b241f8 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -75,3 +75,4 @@ 067 rw auto 068 rw auto 069 rw auto +070 rw auto diff --git a/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..4b91cfc654b9b8b46ff45918b6f9a2f88d47ecf0 GIT binary patch literal 4490 zcmV;55q0iDT4*^jL0KkKS$k&2jsZ0{fB*mg|NsC0|NsC0|NsC0|NsC0|NsC0|NsC0 z|NsC0|Nr0&KYaiIlYO_ZcRTI9zV`F0x$m=gzWd(y-zN9hz4y-D_uhSdA3nFvh^Y#D z3N-W+)gBsC)bz}ZrZlILew2Gu*e9xbn-M=#1j3#IpQ)+hVw(d~8mH(cr=uY@Nt$S7 zGgH${gz#w-)Y_X->Uoh(srnT2MvW5!o=m5hQ`E_($r-8*J*s&yO-Je&r;JJH1ZsJeHq%qlPgC_agrA@q z6ExL76+K45Pt`LfQ`6LHeyQqysguD*L)sMeF+D;krkb9oqt!jCHku&F^%{92Kxoj@ z)b%`}=s}G~l-gwUp{J%rpb|{#w7HflMNXmv@(xLr>T?F+91*DdQ8${XwYpy(8PL1 zhpFm%CWe|C14f2NsA3vuX_4s-8fXknGVA-VQ))b>#;C+&N$5sHH8B{}!5B0&42=`hO&SRGJx8gd)ICh~1j2f1Gej6P zCTZ%K8$@Wqrje#20i?i~(9s8}kCadV%PbH$Ws8vcdpP+(cp!SJv0 zBp9v^cVke1zziW%O?^-eCRNnX*IrRxO$vjsNH7Z7HcG2usbH1K3s|a)(!FR<0W=5& zi)b!R~~XYSpWIVMIYKrG^(U5Cjm_MLG z=zt-000D52L_t-ta;O%fBYZ)!P`C<&0IKPQmY^>rtdx=xk#6kRwPNnQh>{~CUJ8f` zA#4SlDVr+_1$xTR9o`6;hz1`PX{te8lEnce7Vwl1nbL*TMG_<+!Xp47@u;+Ov)sASi|W0?-$rWI)iaL~?173}6^7{k65VZOFU0uU@@+ z<$M2~p7@Jzjj?fR-wAPbZEDfk#WIk@MFzw)oh=>5OGQY)ts6!#Ac&GaI|>KioPr%F z)+B^cli}we(9Xv#P?-VHG5|Ulpg0o(B62#3F!*tX2W-Tg)oCKKp#V#Nxyc54qztBU zvcR7EuH>rA^BVvES;J%hA%UONcbup0!Myh{{Wk@6w{X+Y0~JsKzQJk&2*4x)5)Hm* zAAzUhY!)?oj+&tYwqJ&5T>=uc-Xq^1B zEe3rLH8Tv$m@)2n+)EmAAOH%V?zmpTjcjX1;rfvsGZ4N8*{CHWtyQ=iLHkIiM|A!% zL^56K$-sa!P~`3#E0TdiCt^S{-q!Q_yIgk5s_%9CXY?SY#6t3rUNR6qQzD_zMJ|> zuHG@ttFw|uWjHBanXyiL=2`eC=<;kN_l$K7E1t}tfN4)*tN51%%oM=bNfP0aDufSu z$$Phc=ffo>E!(B6JH-^wsPezOv(KG5y6p24 zewI|L{AH%B&8wmn&0xn1aGuq_XB-V*Z5y4YU7jn53RTohsr`P+~TGe`exgXyw^$ z(UkOdp4+se0#Up;H4_wgv>N5DM=!kOYeEElv`9}kSL_n-m;92yF1EQ}BUQBnASq51 zH{7l1km1i$t3g0M3*V=tJ8a|d&y^Rhb(!5W<<*eays2qigRhm3Yc9zHB!oq5kAEf9 zP~juz<*eUc?7nI?Z zGXVjB00Dq4bT4!WAY2ha+s65S$ISbTC1IZw0E|K<3r&+i%mKs@fmiR9kr`e)Na6$u zS%tKiMYqyNW=B1YHqB6!jU*$Y%7pkHYex_`EDdPM9mbA#C)h$ggmm4R2?Q_RLGMCxWDbQ{RP=z~^3P-- zgz;fm5CWQ^a^-6ge9J_anKF#jbtrgz)IrG84ktqQk(~4A;bqhrqh72FuDQ468D>Ts zX|P}+oYR^Oo7g>_!QE5sQV|`NOb6L8&ryAyS1-lX9_(qY_TzjM6wFEtc8JyBN;vFk zK|XM|08GHNk0W#4ab-Zk1te}mqz<}mI)!k*Cg1Wc8EFC<|pdOP@$2DBu$CyGw@ z^uJ`2T^A6)vu$xYS7^N7n($p@$u=trByXwHfhw|dyu%X#TmYwj3Bwb!AO`9*i8d9oZ{4V+*e5y zr4Iq>`N;nGP`WNiC8i=D19B75D_MT59a-Hvj;q^^Xj1{sx>d7qF!w5={bm7`G@Q5w0YHHQAsNhI>py1)xbGQWU!&x5SdDho$!*6-&b(hTFdyctQ2>BI z<@UH&QAJmmwAkBDYrldH6~>`DskSQXFr+CG(6Gml0J#_(F1vFHiXCFkn=psQL##7@ zVuh4_+L(_T;DRaK+UBXz;%}o&f41iX%HoKkV%9^<>Vqui3b$51H4S6Gk*8JrM+!i5AV?gsb{l%LQDBh-Yho1cCrX^4Op z(5=0HV0G*(XC*C-9n;mzg&EO$fb3%F5Kdo6D`;S-Wm{zjuN@ku=@yUn~1=X zBk5Pm4B}%37@$tM3ynOJX(bVtN^T;#h&Kjmr~RW!lfHiQ)Pdles=@kj$DOisG0LKe zT_l9#?1?G(cWe4PaAa^c9<^v|C*U|Hd;KqQ1U(6~-WOs*D_1SJM*B zihwUF&bkv<;0ROMxlAWXB|w+}0H>1{j0sHNo29BN^maob2Q_2wXp?u1`zLYWR%#?u z5H`LAqrFm?c+XhK=j!x;06TH^@ z=Y^;lt-N>c?~1T|HvB1`6y+*;0uB6}CtoL#c~yy1#@(AdewS^}Q2!&RRFh=8@PSZ( zG(i4|XX-XkY#kFnk#5c1@yBUHf1^zjT4#RPJTB53Oxxzq5Hbd?8Z8D7f8%{1>^>N0 z9lW$B>>^&R<@V`{$+67xy4TSXw{gQ#tPmhXK!HXW08~InM-@;NQ#qzVRTM!n8r>UO zd&E_7Hf6JqJd2TZme>sPUr@Jvf0sNXV z_V6jK^tCnDU_hq(q&lR$lx+g8{^N9ax;`Q&e`wk)a;48+TMsjf4{@C{>+!6C=lyuU=;W zg1VM$Zpd$AmhJ8WUsBJhYY9${Xa@#1jQ}cSa;7V+G0B`U}#`B ze821FVs(GlSEk%HbFSJu?gQ8wL0>OalmvhfgCPVA5tt(e2*M1Iw+NiP^F;z3xc3?e zW@iZ5=qq#%5~d#4VieM~AM1S{#9g2EYRTC&y|yxk0U`5;;}7s+N!|)3XQga7MpNCa zw`e;Zy@KPFb-|Id6J!Oxei=Q6GxOp5jztzkW8m29_ReF8l?hqluWO;(+?CylQyv?7 zApSFV4bQV+NF?)@NdDpsIcZ{#j?&v2#8|iZugcr0TuV%|MOb(MGe97sZib!$FyjVz zVFb#xh!7xG$%%hMNL0Y_mrq=5lC0YH zYhu$Mg}=+zKQ|*hdMbv3_FIRUD{qmJqHW{P;E@Dg;!oZ|r;y1)GN+WF!lzNn0_$^} zd&?gWd02wW^=qm)2wgj>NaPWzx6xTTazbGrT6ZS1GuAG#Oj_gS_7*4J)Yn{4u==&- zjed!WmN$T=mRi9!{&q1AhLqhhwDg_rW`;tMCo?oH5*H;i2YDhwL-RgaPW+9~!5i{Qr{SZbPd zIM$?7a<;DIh_uhMWDyq_>Sf@dI$!`9*oojiwhWnv6GLStINPYpy`fwfN;O1!6Bvi1 zBW0dnY9fguH_^_@d(R`x%qr)X6t~fvh?(&4GKKym~EH)G=|rvUJwCxb5ds7NA_N3OOmymrw&d^* Date: Wed, 6 Nov 2013 19:48:06 +0800 Subject: [PATCH 37/37] block: Round up total_sectors Since b94a2610, bdrv_getlength() is omitted when probing image. VMDK monolithicFlat is broken by that because a file < 512 bytes can't be read with its total_sectors truncated to 0. This patch round up the size to BDRV_SECTOR_SIZE, when a image size is not sector aligned. Signed-off-by: Fam Zheng Reviewed-by: Benoit Canet Signed-off-by: Stefan Hajnoczi --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 0e96a223a..6d5c80475 100644 --- a/block.c +++ b/block.c @@ -640,7 +640,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) if (length < 0) { return length; } - hint = length >> BDRV_SECTOR_BITS; + hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); } bs->total_sectors = hint;