From 72893756e0da74fcb626c52dfff1fc6e9de84f0c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 18 Oct 2010 16:53:53 +0100 Subject: [PATCH 01/11] qcow2: Support exact L1 table growth The L1 table grow operation includes a size calculation that bumps up the new L1 table size in order to anticipate the size needs of vmstate data. This helps reduce the number of times that the L1 table has to be grown when vmstate data is appended. This size overhead is not necessary during image creation, bdrv_truncate(), or snapshot goto operations. In fact, existing qemu-iotests that exercise table growth are no longer able to trigger it because image creation preallocates an L1 table that is too large after changes to qcow_create2(). This patch keeps the size calculation but also adds exact growth for callers that do not want to inflate the L1 table size unnecessarily. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 25 ++++++++++++++++--------- block/qcow2-snapshot.c | 2 +- block/qcow2.c | 2 +- block/qcow2.h | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index fb4224a66..4f7dc59b7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -28,7 +28,7 @@ #include "block_int.h" #include "block/qcow2.h" -int qcow2_grow_l1_table(BlockDriverState *bs, int min_size) +int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) { BDRVQcowState *s = bs->opaque; int new_l1_size, new_l1_size2, ret, i; @@ -36,15 +36,22 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size) int64_t new_l1_table_offset; uint8_t data[12]; - new_l1_size = s->l1_size; - if (min_size <= new_l1_size) + if (min_size <= s->l1_size) return 0; - if (new_l1_size == 0) { - new_l1_size = 1; - } - while (min_size > new_l1_size) { - new_l1_size = (new_l1_size * 3 + 1) / 2; + + if (exact_size) { + new_l1_size = min_size; + } else { + /* Bump size up to reduce the number of times we have to grow */ + new_l1_size = s->l1_size; + if (new_l1_size == 0) { + new_l1_size = 1; + } + while (min_size > new_l1_size) { + new_l1_size = (new_l1_size * 3 + 1) / 2; + } } + #ifdef DEBUG_ALLOC2 printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size); #endif @@ -550,7 +557,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, l1_index = offset >> (s->l2_bits + s->cluster_bits); if (l1_index >= s->l1_size) { - ret = qcow2_grow_l1_table(bs, l1_index + 1); + ret = qcow2_grow_l1_table(bs, l1_index + 1, false); if (ret < 0) { return ret; } diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index bbfcaaae2..8dd5df0bc 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -327,7 +327,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) goto fail; - if (qcow2_grow_l1_table(bs, sn->l1_size) < 0) + if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0) goto fail; s->l1_size = sn->l1_size; diff --git a/block/qcow2.c b/block/qcow2.c index ee3481b78..345d2e4c3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1154,7 +1154,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) } new_l1_size = size_to_l1(s, offset); - ret = qcow2_grow_l1_table(bs, new_l1_size); + ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index 356a34af4..add710ba7 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -188,7 +188,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res); /* qcow2-cluster.c functions */ -int qcow2_grow_l1_table(BlockDriverState *bs, int min_size); +int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size); void qcow2_l2_cache_reset(BlockDriverState *bs); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, From a9420734b617be43d075c55b980479411807512e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 11 Jun 2010 21:37:37 +0200 Subject: [PATCH 02/11] qcow2: Simplify image creation Instead of doing lots of magic for setting up initial refcount blocks and stuff create a minimal (inconsistent) image, open it and initialize the rest with regular qcow2 functions. This is a complete rewrite of the image creation function. The old implementating is #ifdef'd out and will be removed by the next patch (removing it here would have made the diff unreadable because diff tries to find similarities when it's really a rewrite) Signed-off-by: Kevin Wolf --- block/qcow2.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 345d2e4c3..3b2e38cb6 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -27,6 +27,7 @@ #include #include "aes.h" #include "block/qcow2.h" +#include "qemu-error.h" /* Differences with QCOW: @@ -794,6 +795,7 @@ static int qcow2_change_backing_file(BlockDriverState *bs, return qcow2_update_ext_header(bs, backing_file, backing_fmt); } +#if 0 static int get_bits_from_size(size_t size) { int res = 0; @@ -814,6 +816,7 @@ static int get_bits_from_size(size_t size) return res; } +#endif static int preallocate(BlockDriverState *bs) @@ -869,6 +872,7 @@ static int preallocate(BlockDriverState *bs) return 0; } +#if 0 static int qcow_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, int prealloc) @@ -1066,6 +1070,133 @@ exit: return ret; } +#else +static int qcow_create2(const char *filename, int64_t total_size, + const char *backing_file, const char *backing_format, + int flags, size_t cluster_size, int prealloc, + QEMUOptionParameter *options) +{ + /* Calulate cluster_bits */ + int cluster_bits; + cluster_bits = ffs(cluster_size) - 1; + if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || + (1 << cluster_bits) != cluster_size) + { + error_report( + "Cluster size must be a power of two between %d and %dk\n", + 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); + return -EINVAL; + } + + /* + * Open the image file and write a minimal qcow2 header. + * + * We keep things simple and start with a zero-sized image. We also + * do without refcount blocks or a L1 table for now. We'll fix the + * inconsistency later. + * + * We do need a refcount table because growing the refcount table means + * allocating two new refcount blocks - the seconds of which would be at + * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file + * size for any qcow2 image. + */ + BlockDriverState* bs; + QCowHeader header; + uint8_t* refcount_table; + int ret; + + ret = bdrv_create_file(filename, options); + if (ret < 0) { + return ret; + } + + ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR); + if (ret < 0) { + return ret; + } + + /* Write the header */ + memset(&header, 0, sizeof(header)); + header.magic = cpu_to_be32(QCOW_MAGIC); + header.version = cpu_to_be32(QCOW_VERSION); + header.cluster_bits = cpu_to_be32(cluster_bits); + header.size = cpu_to_be64(0); + header.l1_table_offset = cpu_to_be64(0); + header.l1_size = cpu_to_be32(0); + header.refcount_table_offset = cpu_to_be64(cluster_size); + header.refcount_table_clusters = cpu_to_be32(1); + + if (flags & BLOCK_FLAG_ENCRYPT) { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + } else { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + } + + ret = bdrv_pwrite(bs, 0, &header, sizeof(header)); + if (ret < 0) { + goto out; + } + + /* Write an empty refcount table */ + refcount_table = qemu_mallocz(cluster_size); + ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); + qemu_free(refcount_table); + + if (ret < 0) { + goto out; + } + + bdrv_close(bs); + + /* + * And now open the image and make it consistent first (i.e. increase the + * refcount of the cluster that is occupied by the header and the refcount + * table) + */ + BlockDriver* drv = bdrv_find_format("qcow2"); + assert(drv != NULL); + ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv); + if (ret < 0) { + goto out; + } + + ret = qcow2_alloc_clusters(bs, 2 * cluster_size); + if (ret < 0) { + goto out; + + } else if (ret != 0) { + error_report("Huh, first cluster in empty image is already in use?"); + abort(); + } + + /* Okay, now that we have a valid image, let's give it the right size */ + ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); + if (ret < 0) { + goto out; + } + + /* Want a backing file? There you go.*/ + if (backing_file) { + ret = bdrv_change_backing_file(bs, backing_file, backing_format); + if (ret < 0) { + goto out; + } + } + + /* And if we're supposed to preallocate metadata, do that now */ + if (prealloc) { + ret = preallocate(bs); + if (ret < 0) { + goto out; + } + } + + ret = 0; +out: + bdrv_delete(bs); + return ret; +} +#endif static int qcow_create(const char *filename, QEMUOptionParameter *options) { @@ -1111,7 +1242,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options) } return qcow_create2(filename, sectors, backing_file, backing_fmt, flags, - cluster_size, prealloc); + cluster_size, prealloc, options); } static int qcow_make_empty(BlockDriverState *bs) From 9b036055ef65d0ab3236dfdb0d07828cea961133 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 14 Jun 2010 15:15:03 +0200 Subject: [PATCH 03/11] qcow2: Remove old image creation function They have been #ifdef'd out by the previous patch. Signed-off-by: Kevin Wolf --- block/qcow2.c | 224 -------------------------------------------------- 1 file changed, 224 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 3b2e38cb6..45ed073db 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -795,30 +795,6 @@ static int qcow2_change_backing_file(BlockDriverState *bs, return qcow2_update_ext_header(bs, backing_file, backing_fmt); } -#if 0 -static int get_bits_from_size(size_t size) -{ - int res = 0; - - if (size == 0) { - return -1; - } - - while (size != 1) { - /* Not a power of two */ - if (size & 1) { - return -1; - } - - size >>= 1; - res++; - } - - return res; -} -#endif - - static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; @@ -872,205 +848,6 @@ static int preallocate(BlockDriverState *bs) return 0; } -#if 0 -static int qcow_create2(const char *filename, int64_t total_size, - const char *backing_file, const char *backing_format, - int flags, size_t cluster_size, int prealloc) -{ - - int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits; - int ref_clusters, reftable_clusters, backing_format_len = 0; - int rounded_ext_bf_len = 0; - QCowHeader header; - uint64_t tmp, offset; - uint64_t old_ref_clusters; - QCowCreateState s1, *s = &s1; - QCowExtension ext_bf = {0, 0}; - int ret; - - memset(s, 0, sizeof(*s)); - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -errno; - memset(&header, 0, sizeof(header)); - header.magic = cpu_to_be32(QCOW_MAGIC); - header.version = cpu_to_be32(QCOW_VERSION); - header.size = cpu_to_be64(total_size * 512); - header_size = sizeof(header); - backing_filename_len = 0; - if (backing_file) { - if (backing_format) { - ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT; - backing_format_len = strlen(backing_format); - ext_bf.len = backing_format_len; - rounded_ext_bf_len = (sizeof(ext_bf) + ext_bf.len + 7) & ~7; - header_size += rounded_ext_bf_len; - } - header.backing_file_offset = cpu_to_be64(header_size); - backing_filename_len = strlen(backing_file); - header.backing_file_size = cpu_to_be32(backing_filename_len); - header_size += backing_filename_len; - } - - /* Cluster size */ - s->cluster_bits = get_bits_from_size(cluster_size); - if (s->cluster_bits < MIN_CLUSTER_BITS || - s->cluster_bits > MAX_CLUSTER_BITS) - { - fprintf(stderr, "Cluster size must be a power of two between " - "%d and %dk\n", - 1 << MIN_CLUSTER_BITS, - 1 << (MAX_CLUSTER_BITS - 10)); - return -EINVAL; - } - s->cluster_size = 1 << s->cluster_bits; - - header.cluster_bits = cpu_to_be32(s->cluster_bits); - header_size = (header_size + 7) & ~7; - if (flags & BLOCK_FLAG_ENCRYPT) { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } - l2_bits = s->cluster_bits - 3; - shift = s->cluster_bits + l2_bits; - l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift); - offset = align_offset(header_size, s->cluster_size); - s->l1_table_offset = offset; - header.l1_table_offset = cpu_to_be64(s->l1_table_offset); - header.l1_size = cpu_to_be32(l1_size); - offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size); - - /* count how many refcount blocks needed */ - -#define NUM_CLUSTERS(bytes) \ - (((bytes) + (s->cluster_size) - 1) / (s->cluster_size)) - - ref_clusters = NUM_CLUSTERS(NUM_CLUSTERS(offset) * sizeof(uint16_t)); - - do { - uint64_t image_clusters; - old_ref_clusters = ref_clusters; - - /* Number of clusters used for the refcount table */ - reftable_clusters = NUM_CLUSTERS(ref_clusters * sizeof(uint64_t)); - - /* Number of clusters that the whole image will have */ - image_clusters = NUM_CLUSTERS(offset) + ref_clusters - + reftable_clusters; - - /* Number of refcount blocks needed for the image */ - ref_clusters = NUM_CLUSTERS(image_clusters * sizeof(uint16_t)); - - } while (ref_clusters != old_ref_clusters); - - s->refcount_table = qemu_mallocz(reftable_clusters * s->cluster_size); - - s->refcount_table_offset = offset; - header.refcount_table_offset = cpu_to_be64(offset); - header.refcount_table_clusters = cpu_to_be32(reftable_clusters); - offset += (reftable_clusters * s->cluster_size); - s->refcount_block_offset = offset; - - for (i=0; i < ref_clusters; i++) { - s->refcount_table[i] = cpu_to_be64(offset); - offset += s->cluster_size; - } - - s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size); - - /* update refcounts */ - qcow2_create_refcount_update(s, 0, header_size); - qcow2_create_refcount_update(s, s->l1_table_offset, - l1_size * sizeof(uint64_t)); - qcow2_create_refcount_update(s, s->refcount_table_offset, - reftable_clusters * s->cluster_size); - qcow2_create_refcount_update(s, s->refcount_block_offset, - ref_clusters * s->cluster_size); - - /* write all the data */ - ret = qemu_write_full(fd, &header, sizeof(header)); - if (ret != sizeof(header)) { - ret = -errno; - goto exit; - } - if (backing_file) { - if (backing_format_len) { - char zero[16]; - int padding = rounded_ext_bf_len - (ext_bf.len + sizeof(ext_bf)); - - memset(zero, 0, sizeof(zero)); - cpu_to_be32s(&ext_bf.magic); - cpu_to_be32s(&ext_bf.len); - ret = qemu_write_full(fd, &ext_bf, sizeof(ext_bf)); - if (ret != sizeof(ext_bf)) { - ret = -errno; - goto exit; - } - ret = qemu_write_full(fd, backing_format, backing_format_len); - if (ret != backing_format_len) { - ret = -errno; - goto exit; - } - if (padding > 0) { - ret = qemu_write_full(fd, zero, padding); - if (ret != padding) { - ret = -errno; - goto exit; - } - } - } - ret = qemu_write_full(fd, backing_file, backing_filename_len); - if (ret != backing_filename_len) { - ret = -errno; - goto exit; - } - } - lseek(fd, s->l1_table_offset, SEEK_SET); - tmp = 0; - for(i = 0;i < l1_size; i++) { - ret = qemu_write_full(fd, &tmp, sizeof(tmp)); - if (ret != sizeof(tmp)) { - ret = -errno; - goto exit; - } - } - lseek(fd, s->refcount_table_offset, SEEK_SET); - ret = qemu_write_full(fd, s->refcount_table, - reftable_clusters * s->cluster_size); - if (ret != reftable_clusters * s->cluster_size) { - ret = -errno; - goto exit; - } - - lseek(fd, s->refcount_block_offset, SEEK_SET); - ret = qemu_write_full(fd, s->refcount_block, - ref_clusters * s->cluster_size); - if (ret != ref_clusters * s->cluster_size) { - ret = -errno; - goto exit; - } - - ret = 0; -exit: - qemu_free(s->refcount_table); - qemu_free(s->refcount_block); - close(fd); - - /* Preallocate metadata */ - if (ret == 0 && prealloc) { - BlockDriverState *bs; - BlockDriver *drv = bdrv_find_format("qcow2"); - bs = bdrv_new(""); - bdrv_open(bs, filename, BDRV_O_CACHE_WB | BDRV_O_RDWR, drv); - ret = preallocate(bs); - bdrv_close(bs); - } - - return ret; -} -#else static int qcow_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, int prealloc, @@ -1196,7 +973,6 @@ out: bdrv_delete(bs); return ret; } -#endif static int qcow_create(const char *filename, QEMUOptionParameter *options) { From a58b8d5401b6064d52113f243456115d046bdd12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Oct 2010 15:29:41 +0200 Subject: [PATCH 04/11] ide: set WCACHE supported in IDENTIFY data ATA does not only have the WCACHE enabled bit in identify word 85, but also a WCACHE supported bit in word 82. While the Linux kernel is fine with the latter at least hdparm also needs the former before correctly displaying the cache settings. There's also a non-zero chance other operating systems are more picky in their volatile write cache detection. Signed-off-by: Christoph Hellwig Signed-off-by: Kevin Wolf --- hw/ide/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 06b6e14e5..5ccb09cfb 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -146,8 +146,8 @@ static void ide_identify(IDEState *s) put_le16(p + 68, 120); put_le16(p + 80, 0xf0); /* ata3 -> ata6 supported */ put_le16(p + 81, 0x16); /* conforms to ata5 */ - /* 14=NOP supported, 0=SMART supported */ - put_le16(p + 82, (1 << 14) | 1); + /* 14=NOP supported, 5=WCACHE supported, 0=SMART supported */ + put_le16(p + 82, (1 << 14) | (1 << 5) | 1); /* 13=flush_cache_ext,12=flush_cache,10=lba48 */ put_le16(p + 83, (1 << 14) | (1 << 13) | (1 <<12) | (1 << 10)); /* 14=set to 1, 1=SMART self test, 0=SMART error logging */ From 51ef67270b1d10e1fcf3de7368dccad1ba0bf9d1 Mon Sep 17 00:00:00 2001 From: edison Date: Tue, 21 Sep 2010 19:58:41 -0700 Subject: [PATCH 05/11] Copy snapshots out of QCOW2 disk In order to backup snapshots, created from QCOW2 iamge, we want to copy snapshots out of QCOW2 disk to a seperate storage. The following patch adds a new option in "qemu-img": qemu-img convert -f qcow2 -O qcow2 -s snapshot_name src_img bck_img. Right now, it only supports to copy the full snapshot, delta snapshot is on the way. Changes from V1: all the comments from Kevin are addressed: Add read-only checking Fix coding style Change the name from bdrv_snapshot_load to bdrv_snapshot_load_tmp Signed-off-by: Disheng Su Signed-off-by: Kevin Wolf --- block.c | 16 ++++++++++++++++ block.h | 2 ++ block/qcow2-snapshot.c | 31 +++++++++++++++++++++++++++++++ block/qcow2.c | 1 + block/qcow2.h | 1 + block_int.h | 2 ++ qemu-img-cmds.hx | 4 ++-- qemu-img.c | 19 ++++++++++++++++++- qemu-img.texi | 4 ++-- 9 files changed, 75 insertions(+), 5 deletions(-) diff --git a/block.c b/block.c index a19374dfc..985d0b7e3 100644 --- a/block.c +++ b/block.c @@ -1899,6 +1899,22 @@ int bdrv_snapshot_list(BlockDriverState *bs, return -ENOTSUP; } +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_name) +{ + BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; + } + if (!bs->read_only) { + return -EINVAL; + } + if (drv->bdrv_snapshot_load_tmp) { + return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); + } + return -ENOTSUP; +} + #define NB_SUFFIXES 4 char *get_human_readable_size(char *buf, int buf_size, int64_t size) diff --git a/block.h b/block.h index 5f6438010..a4facf2fa 100644 --- a/block.h +++ b/block.h @@ -211,6 +211,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id); int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_name); char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn); char *get_human_readable_size(char *buf, int buf_size, int64_t size); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 8dd5df0bc..aacf35782 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -418,3 +418,34 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) return s->nb_snapshots; } +int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) +{ + int i, snapshot_index, l1_size2; + BDRVQcowState *s = bs->opaque; + QCowSnapshot *sn; + + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); + if (snapshot_index < 0) { + return -ENOENT; + } + + sn = &s->snapshots[snapshot_index]; + s->l1_size = sn->l1_size; + l1_size2 = s->l1_size * sizeof(uint64_t); + if (s->l1_table != NULL) { + qemu_free(s->l1_table); + } + + s->l1_table_offset = sn->l1_table_offset; + s->l1_table = qemu_mallocz(align_offset(l1_size2, 512)); + + if (bdrv_pread(bs->file, sn->l1_table_offset, + s->l1_table, l1_size2) != l1_size2) { + return -1; + } + + for(i = 0;i < s->l1_size; i++) { + be64_to_cpus(&s->l1_table[i]); + } + return 0; +} diff --git a/block/qcow2.c b/block/qcow2.c index 45ed073db..b816d8733 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1286,6 +1286,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_snapshot_goto = qcow2_snapshot_goto, .bdrv_snapshot_delete = qcow2_snapshot_delete, .bdrv_snapshot_list = qcow2_snapshot_list, + .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, .bdrv_get_info = qcow_get_info, .bdrv_save_vmstate = qcow_save_vmstate, diff --git a/block/qcow2.h b/block/qcow2.h index add710ba7..2d22e5ec4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -211,6 +211,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id); int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab); +int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name); void qcow2_free_snapshots(BlockDriverState *bs); int qcow2_read_snapshots(BlockDriverState *bs); diff --git a/block_int.h b/block_int.h index e8e7156c9..87e60b859 100644 --- a/block_int.h +++ b/block_int.h @@ -93,6 +93,8 @@ struct BlockDriver { int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id); int (*bdrv_snapshot_list)(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_name); int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); int (*bdrv_save_vmstate)(BlockDriverState *bs, const uint8_t *buf, diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 6d3e5f8e6..6c7176f8b 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -28,9 +28,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [-c] [-f fmt] [-O output_fmt] [-o options] filename [filename2 [...]] output_filename") + "convert [-c] [-f fmt] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename") STEXI -@item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("info", img_info, diff --git a/qemu-img.c b/qemu-img.c index 578b8ebe8..d4a3b4ebc 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -646,13 +646,14 @@ static int img_convert(int argc, char **argv) BlockDriverInfo bdi; QEMUOptionParameter *param = NULL, *create_options = NULL; char *options = NULL; + const char *snapshot_name = NULL; fmt = NULL; out_fmt = "raw"; out_baseimg = NULL; flags = 0; for(;;) { - c = getopt(argc, argv, "f:O:B:hce6o:"); + c = getopt(argc, argv, "f:O:B:s:hce6o:"); if (c == -1) break; switch(c) { @@ -680,6 +681,9 @@ static int img_convert(int argc, char **argv) case 'o': options = optarg; break; + case 's': + snapshot_name = optarg; + break; } } @@ -711,6 +715,19 @@ static int img_convert(int argc, char **argv) total_sectors += bs_sectors; } + if (snapshot_name != NULL) { + if (bs_n > 1) { + error("No support for concatenating multiple snapshot\n"); + ret = -1; + goto out; + } + if (bdrv_snapshot_load_tmp(bs[0], snapshot_name) < 0) { + error("Failed to load snapshot\n"); + ret = -1; + goto out; + } + } + /* Find driver and parse its options */ drv = bdrv_find_format(out_fmt); if (!drv) { diff --git a/qemu-img.texi b/qemu-img.texi index c1b1f2717..1b90ddbcf 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -77,9 +77,9 @@ it doesn't need to be specified separately in this case. Commit the changes recorded in @var{filename} in its base image. -@item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} -Convert the disk image @var{filename} to disk image @var{output_filename} +Convert the disk image @var{filename} or a snapshot @var{snapshot_name} to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} option) or use any format specific options like encryption (@code{-o} option). From 191c2890ddc9626d35c9e432494e1699a8181025 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 16 Sep 2010 13:18:08 +0200 Subject: [PATCH 06/11] qemu-io: New command map The new map command in qemu-io lists all allocated/unallocated areas in an image file. Signed-off-by: Kevin Wolf --- qemu-io.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/qemu-io.c b/qemu-io.c index b4e5cc8fe..ff353ebba 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -1442,6 +1442,44 @@ static const cmdinfo_t alloc_cmd = { .oneline = "checks if a sector is present in the file", }; +static int +map_f(int argc, char **argv) +{ + int64_t offset; + int64_t nb_sectors; + char s1[64]; + int num, num_checked; + int ret; + const char *retstr; + + offset = 0; + nb_sectors = bs->total_sectors; + + do { + num_checked = MIN(nb_sectors, INT_MAX); + ret = bdrv_is_allocated(bs, offset, num_checked, &num); + retstr = ret ? " allocated" : "not allocated"; + cvtstr(offset << 9ULL, s1, sizeof(s1)); + printf("[% 24" PRId64 "] % 8d/% 8d sectors %s at offset %s (%d)\n", + offset << 9ULL, num, num_checked, retstr, s1, ret); + + offset += num; + nb_sectors -= num; + } while(offset < bs->total_sectors); + + return 0; +} + +static const cmdinfo_t map_cmd = { + .name = "map", + .argmin = 0, + .argmax = 0, + .cfunc = map_f, + .args = "", + .oneline = "prints the allocated areas of a file", +}; + + static int close_f(int argc, char **argv) { @@ -1680,6 +1718,7 @@ int main(int argc, char **argv) add_command(&length_cmd); add_command(&info_cmd); add_command(&alloc_cmd); + add_command(&map_cmd); add_args_command(init_args_command); add_check_command(init_check_command); From a77cffe7e916f4dd28f2048982ea2e0d98143b11 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 24 Sep 2010 21:02:05 +0200 Subject: [PATCH 07/11] block: Use GCC_FMT_ATTR and fix a format error Adding the gcc format attribute detects a format bug which is fixed here. v2: Don't use type cast. BDRV_SECTOR_SIZE is unsigned long long, so %lld should be the correct format specifier. Cc: Blue Swirl Cc: Kevin Wolf Signed-off-by: Stefan Weil Signed-off-by: Kevin Wolf --- block/blkverify.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blkverify.c b/block/blkverify.c index 808346475..b2a12fe7f 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -53,7 +53,8 @@ static AIOPool blkverify_aio_pool = { .cancel = blkverify_aio_cancel, }; -static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...) +static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb, + const char *fmt, ...) { va_list ap; @@ -299,7 +300,7 @@ static void blkverify_verify_readv(BlkverifyAIOCB *acb) { ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov); if (offset != -1) { - blkverify_err(acb, "contents mismatch in sector %ld", + blkverify_err(acb, "contents mismatch in sector %lld", acb->sector_num + (offset / BDRV_SECTOR_SIZE)); } } From a18953fbe72ba553a564ba11fdda03aac41d4f38 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 14 Oct 2010 15:46:04 +0200 Subject: [PATCH 08/11] qemu-img: Fix qemu-img convert -obacking_file The old -B option caused a backing file to be used for the converted image and to avoid copying clusters from the old backing file. When replaced with -obacking_file, qemu-img convert does assign the backing file to the new image, but it doesn't realize that it should avoid copying clusters from the backing file. This patch checks the -o options for a backing_file and applies the same logic as for -B in this case. Signed-off-by: Kevin Wolf --- qemu-img.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qemu-img.c b/qemu-img.c index d4a3b4ebc..2864cb820 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -645,6 +645,7 @@ static int img_convert(int argc, char **argv) const uint8_t *buf1; BlockDriverInfo bdi; QEMUOptionParameter *param = NULL, *create_options = NULL; + QEMUOptionParameter *out_baseimg_param; char *options = NULL; const char *snapshot_name = NULL; @@ -769,6 +770,12 @@ static int img_convert(int argc, char **argv) goto out; } + /* Get backing file name if -o backing_file was used */ + out_baseimg_param = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); + if (out_baseimg_param) { + out_baseimg = out_baseimg_param->value.s; + } + /* Check if compression is supported */ if (flags & BLOCK_FLAG_COMPRESS) { QEMUOptionParameter *encryption = From 6bcb1a79a31563f2c414e1bd35d044d6ab385011 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 18 Oct 2010 17:10:49 +0200 Subject: [PATCH 09/11] ide: Factor ide_flush_cache out The next patch reuses this code, so put it in its own function. Signed-off-by: Kevin Wolf --- hw/ide/core.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 5ccb09cfb..6d8606e89 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -801,6 +801,15 @@ static void ide_flush_cb(void *opaque, int ret) ide_set_irq(s->bus); } +static void ide_flush_cache(IDEState *s) +{ + if (s->bs) { + bdrv_aio_flush(s->bs, ide_flush_cb, s); + } else { + ide_flush_cb(s, 0); + } +} + static inline void cpu_to_ube16(uint8_t *buf, int val) { buf[0] = val >> 8; @@ -2031,10 +2040,7 @@ void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val) break; case WIN_FLUSH_CACHE: case WIN_FLUSH_CACHE_EXT: - if (s->bs) - bdrv_aio_flush(s->bs, ide_flush_cb, s); - else - ide_flush_cb(s, 0); + ide_flush_cache(s); break; case WIN_STANDBY: case WIN_STANDBY2: From e2bcadadc35673223c1445152445d0c9b6887b9e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 18 Oct 2010 17:13:05 +0200 Subject: [PATCH 10/11] ide: Handle flush failure Instead of always assuming success for bdrv_aio_flush, actually do something with the error. This respects the werror option and accordingly ignores the error, reports it to the guest or stops the VM and retries after cont. Ignoring the error is trivial, obviously. For stopping the VM and retrying later old code can be reused, but we need to introduce a new status for "retry a flush". For reporting to the guest, fortunately the same action is required as for a failed read/write (status = DRDY | ERR, error = ABRT), so this code can be reused as well. Signed-off-by: Kevin Wolf --- hw/ide/core.c | 10 +++++++++- hw/ide/internal.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 6d8606e89..bc3e91658 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -65,6 +65,7 @@ static void ide_dma_start(IDEState *s, BlockDriverCompletionFunc *dma_cb); static void ide_dma_restart(IDEState *s, int is_read); static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret); static int ide_handle_rw_error(IDEState *s, int error, int op); +static void ide_flush_cache(IDEState *s); static void padstr(char *str, const char *src, int len) { @@ -688,6 +689,8 @@ static void ide_dma_restart_bh(void *opaque) } else { ide_sector_write(bmdma_active_if(bm)); } + } else if (bm->status & BM_STATUS_RETRY_FLUSH) { + ide_flush_cache(bmdma_active_if(bm)); } } @@ -795,7 +798,12 @@ static void ide_flush_cb(void *opaque, int ret) { IDEState *s = opaque; - /* XXX: how do we signal I/O errors here? */ + if (ret < 0) { + /* XXX: What sector number to set here? */ + if (ide_handle_rw_error(s, -ret, BM_STATUS_RETRY_FLUSH)) { + return; + } + } s->status = READY_STAT | SEEK_STAT; ide_set_irq(s->bus); diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 416554324..d652e06c4 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -472,7 +472,8 @@ struct IDEDeviceInfo { #define BM_STATUS_INT 0x04 #define BM_STATUS_DMA_RETRY 0x08 #define BM_STATUS_PIO_RETRY 0x10 -#define BM_STATUS_RETRY_READ 0x20 +#define BM_STATUS_RETRY_READ 0x20 +#define BM_STATUS_RETRY_FLUSH 0x40 #define BM_CMD_START 0x01 #define BM_CMD_READ 0x08 From 8c269b542c5bbbcd74c1664959b223d941a2893b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 20 Oct 2010 13:17:30 +0200 Subject: [PATCH 11/11] virtio-blk: Respect werror option for flushes The werror option now affects not only write requests, but also flush requests. Previously, it was not possible to stop a VM on a failed flush. Signed-off-by: Kevin Wolf --- hw/virtio-blk.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index a1df26dbc..dbe207070 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -106,7 +106,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) { VirtIOBlockReq *req = opaque; - virtio_blk_req_complete(req, ret ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK); + if (ret) { + if (virtio_blk_handle_rw_error(req, -ret, 0)) { + return; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); } static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)