From 4a3f03ba8dbf53fce36d0c1dd5d0cc0f340fe5f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 11 Jan 2017 09:38:15 +0100 Subject: [PATCH 01/17] virtio-net: enable ioeventfd even if vhost=off virtio-net-pci does not enable ioeventfd for historical reasons (and nobody ever checked whether it should be revisited). Note that other backends do enable ioeventfd for virtio-net. However, it has a major effect on performance. On Windows, throughput is _multiplied_ by 2 or 3 on TCP_STREAM (on small packets it is "only" a 30% improvement) and a little less so on TCP_MAERTS albeit still very much statistically significant. Latency also has a single digit improvement. This is not visible when using vhost, which forces ioeventfd=on, but it is substantial without vhost. In addition, also on Windows and with the RHEL 7.3 kernel, APICv seems to slow down virtio-net performance a bit, but the penalty with this patch goes from -25% to -7%. Signed-off-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 854b8f22bf..8baaf2ba6d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2278,7 +2278,7 @@ static const TypeInfo virtio_serial_pci_info = { static Property virtio_net_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), DEFINE_PROP_END_OF_LIST(), }; From 332fa82d0963409fa14997a02639289afa226596 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 12 Jan 2017 11:46:10 +0000 Subject: [PATCH 02/17] Revert "virtio: turn vq->notification into a nested counter" This reverts commit aff8fd18f1786fc5af259a9bc0077727222f51ca. Both virtio-net and virtio-crypto do not balance virtio_queue_set_notification() enable and disable calls. This makes the notifications_disabled counter unreliable and Doug Goldstein reported the following assertion failure: #3 0x00007ffff44d1c62 in __GI___assert_fail ( assertion=assertion@entry=0x555555ae8e8a "vq->notification_disabled > 0", file=file@entry=0x555555ae89c0 "/home/doug/work/qemu/hw/virtio/virtio.c", line=line@entry=215, function=function@entry=0x555555ae9630 <__PRETTY_FUNCTION__.43707> "virtio_queue_set_notification") at assert.c:101 #4 0x00005555557f25d6 in virtio_queue_set_notification (vq=0x55555666aa90, enable=enable@entry=1) at /home/doug/work/qemu/hw/virtio/virtio.c:215 #5 0x00005555557dc311 in virtio_net_has_buffers (q=, q=, bufsize=102) at /home/doug/work/qemu/hw/net/virtio-net.c:1008 #6 virtio_net_receive (nc=, buf=0x555557386b88 "", size=102) at /home/doug/work/qemu/hw/net/virtio-net.c:1148 #7 0x00005555559cad33 in nc_sendv_compat (flags=, iovcnt=1, iov=0x7fffead746d0, nc=0x55555788b340) at net/net.c:705 #8 qemu_deliver_packet_iov (sender=, flags=, iov=0x7fffead746d0, iovcnt=1, opaque=0x55555788b340) at net/net.c:732 #9 0x00005555559cd929 in qemu_net_queue_deliver (size=, data=, flags=, sender=, queue=0x55555788b550) at net/queue.c:164 #10 qemu_net_queue_flush (queue=0x55555788b550) at net/queue.c:261 This patch is safe to revert since it's just an optimization for virtqueue polling. The next patch will improve the situation again without resorting to nesting. Reported-by: Doug Goldstein Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Richard Henderson Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index aa4f38f50a..f04ab7aa6d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -88,8 +88,8 @@ struct VirtQueue /* Last used index value we have signalled on */ bool signalled_used_valid; - /* Nested host->guest notification disabled counter */ - unsigned int notification_disabled; + /* Notification enabled? */ + bool notification; uint16_t queue_index; @@ -202,7 +202,7 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) { hwaddr pa; - if (vq->notification_disabled) { + if (!vq->notification) { return; } pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]); @@ -211,13 +211,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) void virtio_queue_set_notification(VirtQueue *vq, int enable) { - if (enable) { - assert(vq->notification_disabled > 0); - vq->notification_disabled--; - } else { - vq->notification_disabled++; - } - + vq->notification = enable; if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); } else if (enable) { @@ -1020,7 +1014,7 @@ void virtio_reset(void *opaque) virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; - vdev->vq[i].notification_disabled = 0; + vdev->vq[i].notification = true; vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; vdev->vq[i].inuse = 0; } @@ -1831,7 +1825,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) vdev->vq[i].vring.desc = qemu_get_be64(f); qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); vdev->vq[i].signalled_used_valid = false; - vdev->vq[i].notification_disabled = 0; + vdev->vq[i].notification = true; if (vdev->vq[i].vring.desc) { /* XXX virtio-1 devices */ From 1448c133e19372359d9de68626c06088ba79a34b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 12 Jan 2017 11:46:11 +0000 Subject: [PATCH 03/17] virtio: disable notifications again after poll succeeded While AioContext is in polling mode virtqueue notifications are not necessary. Some device virtqueue handlers enable notifications. Make sure they stay disabled to avoid unnecessary vmexits. Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Richard Henderson Tested-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f04ab7aa6d..34065c78f8 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2126,6 +2126,9 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) } virtio_queue_notify_aio_vq(vq); + + /* In case the handler function re-enabled notifications */ + virtio_queue_set_notification(vq, 0); return true; } From c471ad0e9bd46ca5f5c9c796e727230e043a091d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 11 Jan 2017 12:32:12 +0800 Subject: [PATCH 04/17] vhost_net: device IOTLB support This patches implements Device IOTLB support for vhost kernel. This is done through: 1) switch to use dma helpers when map/unmap vrings from vhost codes 2) introduce a set of VhostOps to: - setting up device IOTLB request callback - processing device IOTLB request - processing device IOTLB invalidation 2) kernel support for Device IOTLB API: - allow vhost-net to query the IOMMU IOTLB entry through eventfd - enable the ability for qemu to update a specified mapping of vhost - through ioctl. - enable the ability to invalidate a specified range of iova for the device IOTLB of vhost through ioctl. In x86/intel_iommu case this is triggered through iommu memory region notifier from device IOTLB invalidation descriptor processing routine. With all the above, kernel vhost_net can co-operate with userspace IOMMU. For vhost-user, the support could be easily done on top by implementing the VhostOps. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 1 + hw/virtio/vhost-backend.c | 99 ++++++++++++++++++ hw/virtio/vhost.c | 166 ++++++++++++++++++++++++++---- include/hw/virtio/vhost-backend.h | 13 +++ include/hw/virtio/vhost.h | 4 + net/tap.c | 1 + 6 files changed, 262 insertions(+), 22 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 6280422d02..22874a9777 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -52,6 +52,7 @@ static const int kernel_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 272a5ec584..be927b891e 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -185,6 +185,102 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start) } #endif /* CONFIG_VHOST_VSOCK */ +static void vhost_kernel_iotlb_read(void *opaque) +{ + struct vhost_dev *dev = opaque; + struct vhost_msg msg; + ssize_t len; + + while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) { + struct vhost_iotlb_msg *imsg = &msg.iotlb; + if (len < sizeof msg) { + error_report("Wrong vhost message len: %d", (int)len); + break; + } + if (msg.type != VHOST_IOTLB_MSG) { + error_report("Unknown vhost iotlb message type"); + break; + } + switch (imsg->type) { + case VHOST_IOTLB_MISS: + vhost_device_iotlb_miss(dev, imsg->iova, + imsg->perm != VHOST_ACCESS_RO); + break; + case VHOST_IOTLB_UPDATE: + case VHOST_IOTLB_INVALIDATE: + error_report("Unexpected IOTLB message type"); + break; + case VHOST_IOTLB_ACCESS_FAIL: + /* FIXME: report device iotlb error */ + break; + default: + break; + } + } +} + +static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm) +{ + struct vhost_msg msg; + msg.type = VHOST_IOTLB_MSG; + msg.iotlb.iova = iova; + msg.iotlb.uaddr = uaddr; + msg.iotlb.size = len; + msg.iotlb.type = VHOST_IOTLB_UPDATE; + + switch (perm) { + case IOMMU_RO: + msg.iotlb.perm = VHOST_ACCESS_RO; + break; + case IOMMU_WO: + msg.iotlb.perm = VHOST_ACCESS_WO; + break; + case IOMMU_RW: + msg.iotlb.perm = VHOST_ACCESS_RW; + break; + default: + g_assert_not_reached(); + } + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to update device iotlb"); + return -EFAULT; + } + + return 0; +} + +static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len) +{ + struct vhost_msg msg; + + msg.type = VHOST_IOTLB_MSG; + msg.iotlb.iova = iova; + msg.iotlb.size = len; + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to invalidate device iotlb"); + return -EFAULT; + } + + return 0; +} + +static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev, + int enabled) +{ + if (enabled) + qemu_set_fd_handler((uintptr_t)dev->opaque, + vhost_kernel_iotlb_read, NULL, dev); + else + qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL); +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, @@ -214,6 +310,9 @@ static const VhostOps kernel_ops = { .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, .vhost_vsock_set_running = vhost_kernel_vsock_set_running, #endif /* CONFIG_VHOST_VSOCK */ + .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, + .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb, + .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d396b22531..9cacf557f2 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,6 +26,7 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "migration/migration.h" +#include "sysemu/dma.h" /* enabled until disconnected backend stabilizes */ #define _VHOST_DEBUG 1 @@ -421,8 +422,36 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) dev->log_size = size; } +static int vhost_dev_has_iommu(struct vhost_dev *dev) +{ + VirtIODevice *vdev = dev->vdev; + AddressSpace *dma_as = vdev->dma_as; -static int vhost_verify_ring_part_mapping(void *part, + return memory_region_is_iommu(dma_as->root) && + virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + +static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, + hwaddr *plen, int is_write) +{ + if (!vhost_dev_has_iommu(dev)) { + return cpu_physical_memory_map(addr, plen, is_write); + } else { + return (void *)(uintptr_t)addr; + } +} + +static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer, + hwaddr len, int is_write, + hwaddr access_len) +{ + if (!vhost_dev_has_iommu(dev)) { + cpu_physical_memory_unmap(buffer, len, is_write, access_len); + } +} + +static int vhost_verify_ring_part_mapping(struct vhost_dev *dev, + void *part, uint64_t part_addr, uint64_t part_size, uint64_t start_addr, @@ -436,14 +465,14 @@ static int vhost_verify_ring_part_mapping(void *part, return 0; } l = part_size; - p = cpu_physical_memory_map(part_addr, &l, 1); + p = vhost_memory_map(dev, part_addr, &l, 1); if (!p || l != part_size) { r = -ENOMEM; } if (p != part) { r = -EBUSY; } - cpu_physical_memory_unmap(p, l, 0, 0); + vhost_memory_unmap(dev, p, l, 0, 0); return r; } @@ -463,21 +492,21 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, struct vhost_virtqueue *vq = dev->vqs + i; j = 0; - r = vhost_verify_ring_part_mapping(vq->desc, vq->desc_phys, + r = vhost_verify_ring_part_mapping(dev, vq->desc, vq->desc_phys, vq->desc_size, start_addr, size); if (!r) { break; } j++; - r = vhost_verify_ring_part_mapping(vq->avail, vq->avail_phys, + r = vhost_verify_ring_part_mapping(dev, vq->avail, vq->avail_phys, vq->avail_size, start_addr, size); if (!r) { break; } j++; - r = vhost_verify_ring_part_mapping(vq->used, vq->used_phys, + r = vhost_verify_ring_part_mapping(dev, vq->used, vq->used_phys, vq->used_size, start_addr, size); if (!r) { break; @@ -715,7 +744,8 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, return 0; } -static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) +static int vhost_dev_set_features(struct vhost_dev *dev, + bool enable_log) { uint64_t features = dev->acked_features; int r; @@ -858,6 +888,56 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, return -errno; } +static int vhost_memory_region_lookup(struct vhost_dev *hdev, + uint64_t gpa, uint64_t *uaddr, + uint64_t *len) +{ + int i; + + for (i = 0; i < hdev->mem->nregions; i++) { + struct vhost_memory_region *reg = hdev->mem->regions + i; + + if (gpa >= reg->guest_phys_addr && + reg->guest_phys_addr + reg->memory_size > gpa) { + *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr; + *len = reg->guest_phys_addr + reg->memory_size - gpa; + return 0; + } + } + + return -EFAULT; +} + +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) +{ + IOMMUTLBEntry iotlb; + uint64_t uaddr, len; + + rcu_read_lock(); + + iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, + iova, write); + if (iotlb.target_as != NULL) { + if (vhost_memory_region_lookup(dev, iotlb.translated_addr, + &uaddr, &len)) { + error_report("Fail to lookup the translated address " + "%"PRIx64, iotlb.translated_addr); + goto out; + } + + len = MIN(iotlb.addr_mask + 1, len); + iova = iova & ~iotlb.addr_mask; + + if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, + len, iotlb.perm)) { + error_report("Fail to update device iotlb"); + goto out; + } + } +out: + rcu_read_unlock(); +} + static int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, @@ -903,21 +983,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx); vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx); - vq->desc = cpu_physical_memory_map(a, &l, 0); + vq->desc = vhost_memory_map(dev, a, &l, 0); if (!vq->desc || l != s) { r = -ENOMEM; goto fail_alloc_desc; } vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx); vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx); - vq->avail = cpu_physical_memory_map(a, &l, 0); + vq->avail = vhost_memory_map(dev, a, &l, 0); if (!vq->avail || l != s) { r = -ENOMEM; goto fail_alloc_avail; } vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); - vq->used = cpu_physical_memory_map(a, &l, 1); + vq->used = vhost_memory_map(dev, a, &l, 1); if (!vq->used || l != s) { r = -ENOMEM; goto fail_alloc_used; @@ -963,14 +1043,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, fail_vector: fail_kick: fail_alloc: - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), + 0, 0); fail_alloc_used: - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, 0); fail_alloc_avail: - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, 0); + vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, 0); fail_alloc_desc: return r; } @@ -1004,12 +1084,12 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, vhost_vq_index); } - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 1, virtio_queue_get_used_size(vdev, idx)); - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, virtio_queue_get_avail_size(vdev, idx)); - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, virtio_queue_get_desc_size(vdev, idx)); + vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), + 1, virtio_queue_get_used_size(vdev, idx)); + vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, virtio_queue_get_avail_size(vdev, idx)); + vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, virtio_queue_get_desc_size(vdev, idx)); } static void vhost_eventfd_add(MemoryListener *listener, @@ -1066,6 +1146,9 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, r = -errno; goto fail_call; } + + vq->dev = dev; + return 0; fail_call: event_notifier_cleanup(&vq->masked_notifier); @@ -1077,12 +1160,24 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } +static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + struct vhost_dev *hdev = container_of(n, struct vhost_dev, n); + + if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, + iotlb->iova, + iotlb->addr_mask + 1)) { + error_report("Fail to invalidate device iotlb"); + } +} + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout) { uint64_t features; int i, r, n_initialized_vqs = 0; + hdev->vdev = NULL; hdev->migration_blocker = NULL; r = vhost_set_backend_type(hdev, backend_type); @@ -1147,6 +1242,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .priority = 10 }; + hdev->n.notify = vhost_iommu_unmap_notify; + hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; + if (hdev->migration_blocker == NULL) { if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { error_setg(&hdev->migration_blocker, @@ -1342,11 +1440,18 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) assert(hdev->vhost_ops); hdev->started = true; + hdev->vdev = vdev; r = vhost_dev_set_features(hdev, hdev->log_enabled); if (r < 0) { goto fail_features; } + + if (vhost_dev_has_iommu(hdev)) { + memory_region_register_iommu_notifier(vdev->dma_as->root, + &hdev->n); + } + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { VHOST_OPS_DEBUG("vhost_set_mem_table failed"); @@ -1380,6 +1485,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } } + if (vhost_dev_has_iommu(hdev)) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); + + /* Update used ring information for IOTLB to work correctly, + * vhost-kernel code requires for this.*/ + for (i = 0; i < hdev->nvqs; ++i) { + struct vhost_virtqueue *vq = hdev->vqs + i; + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } return 0; fail_log: vhost_log_put(hdev, false); @@ -1391,6 +1506,7 @@ fail_vq: hdev->vq_index + i); } i = hdev->nvqs; + fail_mem: fail_features: @@ -1413,8 +1529,14 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->vq_index + i); } + if (vhost_dev_has_iommu(hdev)) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); + memory_region_unregister_iommu_notifier(vdev->dma_as->root, + &hdev->n); + } vhost_log_put(hdev, true); hdev->started = false; + hdev->vdev = NULL; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 30abc11cf1..c3cf4a72bc 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -11,6 +11,8 @@ #ifndef VHOST_BACKEND_H #define VHOST_BACKEND_H +#include "exec/memory.h" + typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, @@ -77,6 +79,14 @@ typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, uint64_t guest_cid); typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); +typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, + int enabled); +typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm); +typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t len); typedef struct VhostOps { VhostBackendType backend_type; @@ -109,6 +119,9 @@ typedef struct VhostOps { vhost_backend_can_merge_op vhost_backend_can_merge; vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; + vhost_set_iotlb_callback_op vhost_set_iotlb_callback; + vhost_update_device_iotlb_op vhost_update_device_iotlb; + vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 1fe5aadef5..52f633ec89 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -21,6 +21,7 @@ struct vhost_virtqueue { unsigned long long used_phys; unsigned used_size; EventNotifier masked_notifier; + struct vhost_dev *dev; }; typedef unsigned long vhost_log_chunk_t; @@ -38,6 +39,7 @@ struct vhost_log { struct vhost_memory; struct vhost_dev { + VirtIODevice *vdev; MemoryListener memory_listener; struct vhost_memory *mem; int n_mem_sections; @@ -62,6 +64,7 @@ struct vhost_dev { void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; + IOMMUNotifier n; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -91,4 +94,5 @@ bool vhost_has_free_slot(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); #endif diff --git a/net/tap.c b/net/tap.c index b6896a7b7c..86071b2659 100644 --- a/net/tap.c +++ b/net/tap.c @@ -696,6 +696,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, "tap: open vhost char device failed"); return; } + fcntl(vhostfd, F_SETFL, O_NONBLOCK); } options.opaque = (void *)(uintptr_t)vhostfd; From baf2d5bfbac015b27f4db74feab235e167df0c84 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Jan 2017 19:24:14 +0100 Subject: [PATCH 05/17] fw-cfg: support writeable blobs Useful to send guest data back to QEMU. Changes from Laszlo Ersek : - rebase the patch from Michael Tsirkin's original postings at [1] and [2] to the following patches: - loader: Allow a custom AddressSpace when loading ROMs - loader: Add AddressSpace loading support to uImages - loader: fix handling of custom address spaces when adding ROM blobs - reject such writes immediately that would exceed the end of the array, rather than performing a partial write before setting the error bit: see the (len != dma.length) condition - document the write interface [1] http://lists.nongnu.org/archive/html/qemu-devel/2016-02/msg04968.html [2] http://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg02735.html Cc: "Gabriel L. Somlo" Cc: "Michael S. Tsirkin" Cc: Gerd Hoffmann Cc: Igor Mammedov Cc: Michael Walle Cc: Paolo Bonzini Cc: Peter Maydell Cc: Shannon Zhao Cc: qemu-arm@nongnu.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Laszlo Ersek Reviewed-by: Marcel Apfelbaum Acked-by: Gabriel Somlo Tested-by: Gabriel Somlo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eduardo Habkost --- docs/specs/fw_cfg.txt | 32 +++++++++++++++++++++++++------- hw/arm/virt-acpi-build.c | 2 +- hw/core/loader.c | 18 +++++++++++------- hw/i386/acpi-build.c | 4 ++-- hw/lm32/lm32_hwsetup.h | 2 +- hw/nvram/fw_cfg.c | 37 +++++++++++++++++++++++++++++-------- include/hw/loader.h | 7 ++++--- include/hw/nvram/fw_cfg.h | 3 ++- 8 files changed, 75 insertions(+), 30 deletions(-) diff --git a/docs/specs/fw_cfg.txt b/docs/specs/fw_cfg.txt index 7a5f8c7824..a19e2adbe1 100644 --- a/docs/specs/fw_cfg.txt +++ b/docs/specs/fw_cfg.txt @@ -33,6 +33,10 @@ the selector value is between 0x4000-0x7fff or 0xc000-0xffff. NOTE: As of QEMU v2.4, writes to the fw_cfg data register are no longer supported, and will be ignored (treated as no-ops)! +NOTE: As of QEMU v2.9, writes are reinstated, but only through the DMA + interface (see below). Furthermore, writeability of any specific item is + governed independently of Bit14 in the selector key value. + Bit15 of the selector register indicates whether the configuration setting is architecture specific. A value of 0 means the item is a generic configuration item. A value of 1 means the item is specific @@ -43,7 +47,7 @@ value between 0x8000-0xffff. == Data Register == -* Read/Write (writes ignored as of QEMU v2.4) +* Read/Write (writes ignored as of QEMU v2.4, but see the DMA interface) * Location: platform dependent (IOport [*] or MMIO) * Width: 8-bit (if IOport), 8/16/32/64-bit (if MMIO) * Endianness: string-preserving @@ -134,8 +138,8 @@ struct FWCfgFile { /* an individual file entry, 64 bytes total */ === All Other Data Items === -Please consult the QEMU source for the most up-to-date and authoritative -list of selector keys and their respective items' purpose and format. +Please consult the QEMU source for the most up-to-date and authoritative list +of selector keys and their respective items' purpose, format and writeability. === Ranges === @@ -144,9 +148,11 @@ items, and up to 0x4000 architecturally specific ones. Selector Reg. Range Usage --------------- ----------- -0x0000 - 0x3fff Generic (0x0000 - 0x3fff, RO) +0x0000 - 0x3fff Generic (0x0000 - 0x3fff, generally RO, possibly RW through + the DMA interface in QEMU v2.9+) 0x4000 - 0x7fff Generic (0x0000 - 0x3fff, RW, ignored in QEMU v2.4+) -0x8000 - 0xbfff Arch. Specific (0x0000 - 0x3fff, RO) +0x8000 - 0xbfff Arch. Specific (0x0000 - 0x3fff, generally RO, possibly RW + through the DMA interface in QEMU v2.9+) 0xc000 - 0xffff Arch. Specific (0x0000 - 0x3fff, RW, ignored in v2.4+) In practice, the number of allowed firmware configuration items is given @@ -182,6 +188,7 @@ The "control" field has the following bits: - Bit 1: Read - Bit 2: Skip - Bit 3: Select. The upper 16 bits are the selected index. + - Bit 4: Write When an operation is triggered, if the "control" field has bit 3 set, the upper 16 bits are interpreted as an index of a firmware configuration item. @@ -191,8 +198,17 @@ If the "control" field has bit 1 set, a read operation will be performed. "length" bytes for the current selector and offset will be copied into the physical RAM address specified by the "address" field. -If the "control" field has bit 2 set (and not bit 1), a skip operation will be -performed. The offset for the current selector will be advanced "length" bytes. +If the "control" field has bit 4 set (and not bit 1), a write operation will be +performed. "length" bytes will be copied from the physical RAM address +specified by the "address" field to the current selector and offset. QEMU +prevents starting or finishing the write beyond the end of the item associated +with the current selector (i.e., the item cannot be resized). Truncated writes +are dropped entirely. Writes to read-only items are also rejected. All of these +write errors set bit 0 (the error bit) in the "control" field. + +If the "control" field has bit 2 set (and neither bit 1 nor bit 4), a skip +operation will be performed. The offset for the current selector will be +advanced "length" bytes. To check the result, read the "control" field: error bit set -> something went wrong. @@ -234,3 +250,5 @@ Prefix "opt/org.qemu/" is reserved for QEMU itself. Use of names not beginning with "opt/" is potentially dangerous and entirely unsupported. QEMU will warn if you try. + +All externally provided fw_cfg items are read-only to the guest. diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 085a611173..205d6268e2 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -818,7 +818,7 @@ static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state, uint64_t max_size) { return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, - name, virt_acpi_build_update, build_state, NULL); + name, virt_acpi_build_update, build_state, NULL, true); } static const VMStateDescription vmstate_virt_acpi_build = { diff --git a/hw/core/loader.c b/hw/core/loader.c index 45742494e6..ee5abd6eb7 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -853,7 +853,7 @@ static void fw_cfg_resized(const char *id, uint64_t length, void *host) } } -static void *rom_set_mr(Rom *rom, Object *owner, const char *name) +static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro) { void *data; @@ -862,7 +862,7 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name) rom->datasize, rom->romsize, fw_cfg_resized, &error_fatal); - memory_region_set_readonly(rom->mr, true); + memory_region_set_readonly(rom->mr, ro); vmstate_register_ram_global(rom->mr); data = memory_region_get_ram_ptr(rom->mr); @@ -942,7 +942,7 @@ int rom_add_file(const char *file, const char *fw_dir, snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name); if ((!option_rom || mc->option_rom_has_mr) && mc->rom_file_has_mr) { - data = rom_set_mr(rom, OBJECT(fw_cfg), devpath); + data = rom_set_mr(rom, OBJECT(fw_cfg), devpath, true); } else { data = rom->data; } @@ -979,7 +979,7 @@ err: MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, size_t max_len, hwaddr addr, const char *fw_file_name, FWCfgReadCallback fw_callback, void *callback_opaque, - AddressSpace *as) + AddressSpace *as, bool read_only) { MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); Rom *rom; @@ -998,10 +998,14 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, char devpath[100]; void *data; - snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name); + if (read_only) { + snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name); + } else { + snprintf(devpath, sizeof(devpath), "/ram@%s", fw_file_name); + } if (mc->rom_file_has_mr) { - data = rom_set_mr(rom, OBJECT(fw_cfg), devpath); + data = rom_set_mr(rom, OBJECT(fw_cfg), devpath, read_only); mr = rom->mr; } else { data = rom->data; @@ -1009,7 +1013,7 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, fw_cfg_add_file_callback(fw_cfg, fw_file_name, fw_callback, callback_opaque, - data, rom->datasize); + data, rom->datasize, read_only); } return mr; } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 0c8912fd86..a1d781ae42 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2806,7 +2806,7 @@ static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state, uint64_t max_size) { return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, - name, acpi_build_update, build_state, NULL); + name, acpi_build_update, build_state, NULL, true); } static const VMStateDescription vmstate_acpi_build = { @@ -2872,7 +2872,7 @@ void acpi_setup(void) build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); fw_cfg_add_file_callback(pcms->fw_cfg, ACPI_BUILD_RSDP_FILE, acpi_build_update, build_state, - build_state->rsdp, rsdp_size); + build_state->rsdp, rsdp_size, true); build_state->rsdp_mr = NULL; } else { build_state->rsdp = NULL; diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h index 23e18784df..a01f6bc5df 100644 --- a/hw/lm32/lm32_hwsetup.h +++ b/hw/lm32/lm32_hwsetup.h @@ -75,7 +75,7 @@ static inline void hwsetup_create_rom(HWSetup *hw, hwaddr base) { rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE, - TARGET_PAGE_SIZE, base, NULL, NULL, NULL, NULL); + TARGET_PAGE_SIZE, base, NULL, NULL, NULL, NULL, true); } static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u) diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 3ebecb2260..e0145c11a1 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -54,11 +54,13 @@ #define FW_CFG_DMA_CTL_READ 0x02 #define FW_CFG_DMA_CTL_SKIP 0x04 #define FW_CFG_DMA_CTL_SELECT 0x08 +#define FW_CFG_DMA_CTL_WRITE 0x10 #define FW_CFG_DMA_SIGNATURE 0x51454d5520434647ULL /* "QEMU CFG" */ typedef struct FWCfgEntry { uint32_t len; + bool allow_write; uint8_t *data; void *callback_opaque; FWCfgReadCallback read_callback; @@ -326,7 +328,7 @@ static void fw_cfg_dma_transfer(FWCfgState *s) FWCfgDmaAccess dma; int arch; FWCfgEntry *e; - int read; + int read = 0, write = 0; dma_addr_t dma_addr; /* Reset the address before the next access */ @@ -353,8 +355,13 @@ static void fw_cfg_dma_transfer(FWCfgState *s) if (dma.control & FW_CFG_DMA_CTL_READ) { read = 1; + write = 0; + } else if (dma.control & FW_CFG_DMA_CTL_WRITE) { + read = 0; + write = 1; } else if (dma.control & FW_CFG_DMA_CTL_SKIP) { read = 0; + write = 0; } else { dma.length = 0; } @@ -374,7 +381,9 @@ static void fw_cfg_dma_transfer(FWCfgState *s) dma.control |= FW_CFG_DMA_CTL_ERROR; } } - + if (write) { + dma.control |= FW_CFG_DMA_CTL_ERROR; + } } else { if (dma.length <= (e->len - s->cur_offset)) { len = dma.length; @@ -391,6 +400,14 @@ static void fw_cfg_dma_transfer(FWCfgState *s) dma.control |= FW_CFG_DMA_CTL_ERROR; } } + if (write) { + if (!e->allow_write || + len != dma.length || + dma_memory_read(s->dma_as, dma.address, + &e->data[s->cur_offset], len)) { + dma.control |= FW_CFG_DMA_CTL_ERROR; + } + } s->cur_offset += len; } @@ -586,7 +603,8 @@ static const VMStateDescription vmstate_fw_cfg = { static void fw_cfg_add_bytes_read_callback(FWCfgState *s, uint16_t key, FWCfgReadCallback callback, void *callback_opaque, - void *data, size_t len) + void *data, size_t len, + bool read_only) { int arch = !!(key & FW_CFG_ARCH_LOCAL); @@ -599,6 +617,7 @@ static void fw_cfg_add_bytes_read_callback(FWCfgState *s, uint16_t key, s->entries[arch][key].len = (uint32_t)len; s->entries[arch][key].read_callback = callback; s->entries[arch][key].callback_opaque = callback_opaque; + s->entries[arch][key].allow_write = !read_only; } static void *fw_cfg_modify_bytes_read(FWCfgState *s, uint16_t key, @@ -616,13 +635,14 @@ static void *fw_cfg_modify_bytes_read(FWCfgState *s, uint16_t key, s->entries[arch][key].data = data; s->entries[arch][key].len = len; s->entries[arch][key].callback_opaque = NULL; + s->entries[arch][key].allow_write = false; return ptr; } void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len) { - fw_cfg_add_bytes_read_callback(s, key, NULL, NULL, data, len); + fw_cfg_add_bytes_read_callback(s, key, NULL, NULL, data, len, true); } void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value) @@ -749,7 +769,7 @@ static int get_fw_cfg_order(FWCfgState *s, const char *name) void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, FWCfgReadCallback callback, void *callback_opaque, - void *data, size_t len) + void *data, size_t len, bool read_only) { int i, index, count; size_t dsize; @@ -811,7 +831,8 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, } fw_cfg_add_bytes_read_callback(s, FW_CFG_FILE_FIRST + index, - callback, callback_opaque, data, len); + callback, callback_opaque, data, len, + read_only); s->files->f[index].size = cpu_to_be32(len); s->files->f[index].select = cpu_to_be16(FW_CFG_FILE_FIRST + index); @@ -824,7 +845,7 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data, size_t len) { - fw_cfg_add_file_callback(s, filename, NULL, NULL, data, len); + fw_cfg_add_file_callback(s, filename, NULL, NULL, data, len, true); } void *fw_cfg_modify_file(FWCfgState *s, const char *filename, @@ -847,7 +868,7 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename, } } /* add new one */ - fw_cfg_add_file_callback(s, filename, NULL, NULL, data, len); + fw_cfg_add_file_callback(s, filename, NULL, NULL, data, len, true); return NULL; } diff --git a/include/hw/loader.h b/include/hw/loader.h index 0c864cfd60..0dbd8d6bf3 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -180,7 +180,8 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, size_t max_len, hwaddr addr, const char *fw_file_name, FWCfgReadCallback fw_callback, - void *callback_opaque, AddressSpace *as); + void *callback_opaque, AddressSpace *as, + bool read_only); int rom_add_elf_program(const char *name, void *data, size_t datasize, size_t romsize, hwaddr addr, AddressSpace *as); int rom_check_and_register_reset(void); @@ -194,7 +195,7 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_file_fixed(_f, _a, _i) \ rom_add_file(_f, NULL, _a, _i, false, NULL, NULL) #define rom_add_blob_fixed(_f, _b, _l, _a) \ - rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, NULL) + rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, NULL, true) #define rom_add_file_mr(_f, _mr, _i) \ rom_add_file(_f, NULL, 0, _i, false, _mr, NULL) #define rom_add_file_as(_f, _as, _i) \ @@ -202,7 +203,7 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_file_fixed_as(_f, _a, _i, _as) \ rom_add_file(_f, NULL, _a, _i, false, NULL, _as) #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \ - rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as) + rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true) #define PC_ROM_MIN_VGA 0xc0000 #define PC_ROM_MIN_OPTION 0xc8000 diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 5c27a1f0d5..b980cbaebf 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -136,6 +136,7 @@ void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data, * @callback_opaque: argument to be passed into callback function * @data: pointer to start of item data * @len: size of item data + * @read_only: is file read only * * Add a new NAMED fw_cfg item as a raw "blob" of the given size. The data * referenced by the starting pointer is only linked, NOT copied, into the @@ -151,7 +152,7 @@ void fw_cfg_add_file(FWCfgState *s, const char *filename, void *data, */ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, FWCfgReadCallback callback, void *callback_opaque, - void *data, size_t len); + void *data, size_t len, bool read_only); /** * fw_cfg_modify_file: From e12f3a13e2e134fe9b8d3d1a160a8e54de1e8fa7 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Thu, 12 Jan 2017 19:24:15 +0100 Subject: [PATCH 06/17] fw-cfg: turn FW_CFG_FILE_SLOTS into a device property We'd like to raise the value of FW_CFG_FILE_SLOTS. Doing it naively could lead to problems with backward migration: a more recent QEMU (running an older machine type) would allow the guest, in fw_cfg_select(), to select a high key value that is unavailable in the same machine type implemented by the older (target) QEMU. On the target host, fw_cfg_data_read() for example could dereference nonexistent entries. As first step, size the FWCfgState.entries[*] and FWCfgState.entry_order arrays dynamically. All three array sizes will be influenced by the new field FWCfgState.file_slots (and matching device property). Make the following changes: - Replace the FW_CFG_FILE_SLOTS macro with FW_CFG_FILE_SLOTS_MIN (minimum count of fw_cfg file slots) in the header file. The value remains 0x10. - Replace all uses of FW_CFG_FILE_SLOTS with a helper function called fw_cfg_file_slots(), returning the new property. - Eliminate the macro FW_CFG_MAX_ENTRY, and replace all its uses with a helper function called fw_cfg_max_entry(). - In the MMIO- and IO-mapped realize functions both, allocate all three arrays dynamically, based on the new property. - The new property defaults to FW_CFG_FILE_SLOTS_MIN. This is going to be customized in the following patches. Cc: "Gabriel L. Somlo" Cc: "Michael S. Tsirkin" Cc: Gerd Hoffmann Cc: Igor Mammedov Cc: Paolo Bonzini Signed-off-by: Laszlo Ersek Acked-by: Gabriel Somlo Tested-by: Gabriel Somlo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eduardo Habkost --- docs/specs/fw_cfg.txt | 2 +- hw/nvram/fw_cfg.c | 71 ++++++++++++++++++++++++++++++---- include/hw/nvram/fw_cfg_keys.h | 3 +- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/docs/specs/fw_cfg.txt b/docs/specs/fw_cfg.txt index a19e2adbe1..9373bbc647 100644 --- a/docs/specs/fw_cfg.txt +++ b/docs/specs/fw_cfg.txt @@ -156,7 +156,7 @@ Selector Reg. Range Usage 0xc000 - 0xffff Arch. Specific (0x0000 - 0x3fff, RW, ignored in v2.4+) In practice, the number of allowed firmware configuration items is given -by the value of FW_CFG_MAX_ENTRY (see fw_cfg.h). +by the value (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS_MIN) (see fw_cfg.h). = Guest-side DMA Interface = diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index e0145c11a1..bf75ef71d5 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -33,6 +33,7 @@ #include "qemu/error-report.h" #include "qemu/config-file.h" #include "qemu/cutils.h" +#include "qapi/error.h" #define FW_CFG_NAME "fw_cfg" #define FW_CFG_PATH "/machine/" FW_CFG_NAME @@ -71,8 +72,9 @@ struct FWCfgState { SysBusDevice parent_obj; /*< public >*/ - FWCfgEntry entries[2][FW_CFG_MAX_ENTRY]; - int entry_order[FW_CFG_MAX_ENTRY]; + uint16_t file_slots; + FWCfgEntry *entries[2]; + int *entry_order; FWCfgFiles *files; uint16_t cur_entry; uint32_t cur_offset; @@ -257,13 +259,24 @@ static void fw_cfg_write(FWCfgState *s, uint8_t value) /* nothing, write support removed in QEMU v2.4+ */ } +static inline uint16_t fw_cfg_file_slots(const FWCfgState *s) +{ + return s->file_slots; +} + +/* Note: this function returns an exclusive limit. */ +static inline uint32_t fw_cfg_max_entry(const FWCfgState *s) +{ + return FW_CFG_FILE_FIRST + fw_cfg_file_slots(s); +} + static int fw_cfg_select(FWCfgState *s, uint16_t key) { int arch, ret; FWCfgEntry *e; s->cur_offset = 0; - if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) { + if ((key & FW_CFG_ENTRY_MASK) >= fw_cfg_max_entry(s)) { s->cur_entry = FW_CFG_INVALID; ret = 0; } else { @@ -610,7 +623,7 @@ static void fw_cfg_add_bytes_read_callback(FWCfgState *s, uint16_t key, key &= FW_CFG_ENTRY_MASK; - assert(key < FW_CFG_MAX_ENTRY && len < UINT32_MAX); + assert(key < fw_cfg_max_entry(s) && len < UINT32_MAX); assert(s->entries[arch][key].data == NULL); /* avoid key conflict */ s->entries[arch][key].data = data; @@ -628,7 +641,7 @@ static void *fw_cfg_modify_bytes_read(FWCfgState *s, uint16_t key, key &= FW_CFG_ENTRY_MASK; - assert(key < FW_CFG_MAX_ENTRY && len < UINT32_MAX); + assert(key < fw_cfg_max_entry(s) && len < UINT32_MAX); /* return the old data to the function caller, avoid memory leak */ ptr = s->entries[arch][key].data; @@ -777,13 +790,13 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, int order = 0; if (!s->files) { - dsize = sizeof(uint32_t) + sizeof(FWCfgFile) * FW_CFG_FILE_SLOTS; + dsize = sizeof(uint32_t) + sizeof(FWCfgFile) * fw_cfg_file_slots(s); s->files = g_malloc0(dsize); fw_cfg_add_bytes(s, FW_CFG_FILE_DIR, s->files, dsize); } count = be32_to_cpu(s->files->count); - assert(count < FW_CFG_FILE_SLOTS); + assert(count < fw_cfg_file_slots(s)); /* Find the insertion point. */ if (mc->legacy_fw_cfg_order) { @@ -857,7 +870,7 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename, assert(s->files); index = be32_to_cpu(s->files->count); - assert(index < FW_CFG_FILE_SLOTS); + assert(index < fw_cfg_file_slots(s)); for (i = 0; i < index; i++) { if (strcmp(filename, s->files->f[i].name) == 0) { @@ -1014,12 +1027,38 @@ static const TypeInfo fw_cfg_info = { .class_init = fw_cfg_class_init, }; +static void fw_cfg_file_slots_allocate(FWCfgState *s, Error **errp) +{ + uint16_t file_slots_max; + + if (fw_cfg_file_slots(s) < FW_CFG_FILE_SLOTS_MIN) { + error_setg(errp, "\"file_slots\" must be at least 0x%x", + FW_CFG_FILE_SLOTS_MIN); + return; + } + + /* (UINT16_MAX & FW_CFG_ENTRY_MASK) is the highest inclusive selector value + * that we permit. The actual (exclusive) value coming from the + * configuration is (FW_CFG_FILE_FIRST + fw_cfg_file_slots(s)). */ + file_slots_max = (UINT16_MAX & FW_CFG_ENTRY_MASK) - FW_CFG_FILE_FIRST + 1; + if (fw_cfg_file_slots(s) > file_slots_max) { + error_setg(errp, "\"file_slots\" must not exceed 0x%" PRIx16, + file_slots_max); + return; + } + + s->entries[0] = g_new0(FWCfgEntry, fw_cfg_max_entry(s)); + s->entries[1] = g_new0(FWCfgEntry, fw_cfg_max_entry(s)); + s->entry_order = g_new0(int, fw_cfg_max_entry(s)); +} static Property fw_cfg_io_properties[] = { DEFINE_PROP_UINT32("iobase", FWCfgIoState, iobase, -1), DEFINE_PROP_UINT32("dma_iobase", FWCfgIoState, dma_iobase, -1), DEFINE_PROP_BOOL("dma_enabled", FWCfgIoState, parent_obj.dma_enabled, true), + DEFINE_PROP_UINT16("x-file-slots", FWCfgIoState, parent_obj.file_slots, + FW_CFG_FILE_SLOTS_MIN), DEFINE_PROP_END_OF_LIST(), }; @@ -1027,6 +1066,13 @@ static void fw_cfg_io_realize(DeviceState *dev, Error **errp) { FWCfgIoState *s = FW_CFG_IO(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + Error *local_err = NULL; + + fw_cfg_file_slots_allocate(FW_CFG(s), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } /* when using port i/o, the 8-bit data register ALWAYS overlaps * with half of the 16-bit control register. Hence, the total size @@ -1063,6 +1109,8 @@ static Property fw_cfg_mem_properties[] = { DEFINE_PROP_UINT32("data_width", FWCfgMemState, data_width, -1), DEFINE_PROP_BOOL("dma_enabled", FWCfgMemState, parent_obj.dma_enabled, true), + DEFINE_PROP_UINT16("x-file-slots", FWCfgMemState, parent_obj.file_slots, + FW_CFG_FILE_SLOTS_MIN), DEFINE_PROP_END_OF_LIST(), }; @@ -1071,6 +1119,13 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error **errp) FWCfgMemState *s = FW_CFG_MEM(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); const MemoryRegionOps *data_ops = &fw_cfg_data_mem_ops; + Error *local_err = NULL; + + fw_cfg_file_slots_allocate(FW_CFG(s), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } memory_region_init_io(&s->ctl_iomem, OBJECT(s), &fw_cfg_ctl_mem_ops, FW_CFG(s), "fwcfg.ctl", FW_CFG_CTL_SIZE); diff --git a/include/hw/nvram/fw_cfg_keys.h b/include/hw/nvram/fw_cfg_keys.h index 0f3e871884..b6919451f5 100644 --- a/include/hw/nvram/fw_cfg_keys.h +++ b/include/hw/nvram/fw_cfg_keys.h @@ -29,8 +29,7 @@ #define FW_CFG_FILE_DIR 0x19 #define FW_CFG_FILE_FIRST 0x20 -#define FW_CFG_FILE_SLOTS 0x10 -#define FW_CFG_MAX_ENTRY (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS) +#define FW_CFG_FILE_SLOTS_MIN 0x10 #define FW_CFG_WRITE_CHANNEL 0x4000 #define FW_CFG_ARCH_LOCAL 0x8000 From d580bd4b73f0462962fa952777e7ec8f41bb73bd Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 12 Jan 2017 19:24:16 +0100 Subject: [PATCH 07/17] pc: Add 2.9 machine-types Cc: "Michael S. Tsirkin" Cc: Laszlo Ersek Cc: Igor Mammedov Signed-off-by: Eduardo Habkost Reviewed-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Acked-by: Gabriel Somlo Tested-by: Gabriel Somlo Cc: Gabriel Somlo Signed-off-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eduardo Habkost --- hw/i386/pc_piix.c | 15 ++++++++++++--- hw/i386/pc_q35.c | 13 +++++++++++-- include/hw/i386/pc.h | 2 ++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 5e1adbe53c..9f102aa388 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -437,13 +437,24 @@ static void pc_i440fx_machine_options(MachineClass *m) m->default_display = "std"; } -static void pc_i440fx_2_8_machine_options(MachineClass *m) +static void pc_i440fx_2_9_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL, + pc_i440fx_2_9_machine_options); + +static void pc_i440fx_2_8_machine_options(MachineClass *m) +{ + pc_i440fx_2_9_machine_options(m); + m->is_default = 0; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_8); +} + DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8", NULL, pc_i440fx_2_8_machine_options); @@ -451,8 +462,6 @@ DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8", NULL, static void pc_i440fx_2_7_machine_options(MachineClass *m) { pc_i440fx_2_8_machine_options(m); - m->is_default = 0; - m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_2_7); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index d042fe0843..dd792a8547 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -301,19 +301,28 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_2_8_machine_options(MachineClass *m) +static void pc_q35_2_9_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL, + pc_q35_2_9_machine_options); + +static void pc_q35_2_8_machine_options(MachineClass *m) +{ + pc_q35_2_9_machine_options(m); + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_8); +} + DEFINE_Q35_MACHINE(v2_8, "pc-q35-2.8", NULL, pc_q35_2_8_machine_options); static void pc_q35_2_7_machine_options(MachineClass *m) { pc_q35_2_8_machine_options(m); - m->alias = NULL; m->max_cpus = 255; SET_MACHINE_COMPAT(m, PC_COMPAT_2_7); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index b22e699c46..230e9e70c5 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -375,6 +375,8 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_8 \ + HW_COMPAT_2_8 \ + #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ From a5b3ebfd23bc70fa68461dff1d7145ff65e07150 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Thu, 12 Jan 2017 19:24:17 +0100 Subject: [PATCH 08/17] fw-cfg: bump "x-file-slots" to 0x20 for 2.9+ machine types More precisely, the "x-file-slots" count is bumped for all machine types that: (a) use fw_cfg, and (b) are not versioned (hence migration is not expected to work for them across QEMU releases anyway), or have version 2.9. This affects machine types implemented in the following source files: - "hw/arm/virt.c". The "virt-*" machine type is versioned, and the <= 2.8 versions already depend on HW_COMPAT_2_8 (see commit e353aac51b944). Therefore adding the "x-file-slots" compat values to HW_COMPAT_2_8 suffices. - "hw/i386/pc.c". The "pc-i440fx-*" (including "pc-*") and "pc-q35-*" machine types are versioned. Modifying HW_COMPAT_2_8 is sufficient here too (see commit "pc: Add 2.9 machine-types"). The "isapc" machtype is not versioned. The "xenfv" machine type, which uses fw_cfg for direct kernel booting, is also not versioned. - "hw/ppc/mac_newworld.c". The "mac99" machine type is not versioned. - "hw/ppc/mac_oldworld.c". The "g3beige" machine type is not versioned. - "hw/sparc/sun4m.c". None of the 9 machine types defined in this file appear versioned. - "hw/sparc64/sun4u.c". None of the 3 machine types defined in this file appear versioned. Cc: "Gabriel L. Somlo" Cc: "Michael S. Tsirkin" Cc: Alexander Graf Cc: Anthony Perard Cc: Artyom Tarasenko Cc: David Gibson Cc: Eduardo Habkost Cc: Gerd Hoffmann Cc: Igor Mammedov Cc: Mark Cave-Ayland Cc: Paolo Bonzini Cc: Peter Maydell Cc: Stefano Stabellini Signed-off-by: Laszlo Ersek Acked-by: Gabriel Somlo Tested-by: Gabriel Somlo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eduardo Habkost --- docs/specs/fw_cfg.txt | 4 ++-- hw/nvram/fw_cfg.c | 6 ++++-- include/hw/compat.h | 10 +++++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/specs/fw_cfg.txt b/docs/specs/fw_cfg.txt index 9373bbc647..08c00bdf44 100644 --- a/docs/specs/fw_cfg.txt +++ b/docs/specs/fw_cfg.txt @@ -155,8 +155,8 @@ Selector Reg. Range Usage through the DMA interface in QEMU v2.9+) 0xc000 - 0xffff Arch. Specific (0x0000 - 0x3fff, RW, ignored in v2.4+) -In practice, the number of allowed firmware configuration items is given -by the value (FW_CFG_FILE_FIRST + FW_CFG_FILE_SLOTS_MIN) (see fw_cfg.h). +In practice, the number of allowed firmware configuration items depends on the +machine type/version. = Guest-side DMA Interface = diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index bf75ef71d5..523d585dcf 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -35,6 +35,8 @@ #include "qemu/cutils.h" #include "qapi/error.h" +#define FW_CFG_FILE_SLOTS_DFLT 0x20 + #define FW_CFG_NAME "fw_cfg" #define FW_CFG_PATH "/machine/" FW_CFG_NAME @@ -1058,7 +1060,7 @@ static Property fw_cfg_io_properties[] = { DEFINE_PROP_BOOL("dma_enabled", FWCfgIoState, parent_obj.dma_enabled, true), DEFINE_PROP_UINT16("x-file-slots", FWCfgIoState, parent_obj.file_slots, - FW_CFG_FILE_SLOTS_MIN), + FW_CFG_FILE_SLOTS_DFLT), DEFINE_PROP_END_OF_LIST(), }; @@ -1110,7 +1112,7 @@ static Property fw_cfg_mem_properties[] = { DEFINE_PROP_BOOL("dma_enabled", FWCfgMemState, parent_obj.dma_enabled, true), DEFINE_PROP_UINT16("x-file-slots", FWCfgMemState, parent_obj.file_slots, - FW_CFG_FILE_SLOTS_MIN), + FW_CFG_FILE_SLOTS_DFLT), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/compat.h b/include/hw/compat.h index 4fe44d1c7a..34e9b4a660 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,15 @@ #define HW_COMPAT_H #define HW_COMPAT_2_8 \ - /* empty */ + {\ + .driver = "fw_cfg_mem",\ + .property = "x-file-slots",\ + .value = stringify(0x10),\ + },{\ + .driver = "fw_cfg_io",\ + .property = "x-file-slots",\ + .value = stringify(0x10),\ + }, #define HW_COMPAT_2_7 \ {\ From 1aea7a5b7e604598066dda8919339dab83fd8089 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Jan 2017 23:19:40 +0200 Subject: [PATCH 09/17] virtio: drop an obsolete comment virtio core has code to revert queue number to maximum on reset. Drop TODO to add that. Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- hw/virtio/virtio-pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 8baaf2ba6d..09230c05df 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1316,7 +1316,6 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; case VIRTIO_PCI_COMMON_Q_ENABLE: - /* TODO: need a way to put num back on reset. */ virtio_queue_set_num(vdev, vdev->queue_sel, proxy->vqs[vdev->queue_sel].num); virtio_queue_set_rings(vdev, vdev->queue_sel, From 5878b13642ddf8da44186ef93ac91319ff53668b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Jan 2017 23:58:13 +0200 Subject: [PATCH 10/17] virtio_mmio: add standard header file Signed-off-by: Michael S. Tsirkin --- include/standard-headers/linux/virtio_mmio.h | 141 +++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 include/standard-headers/linux/virtio_mmio.h diff --git a/include/standard-headers/linux/virtio_mmio.h b/include/standard-headers/linux/virtio_mmio.h new file mode 100644 index 0000000000..c4b09689ab --- /dev/null +++ b/include/standard-headers/linux/virtio_mmio.h @@ -0,0 +1,141 @@ +/* + * Virtio platform device driver + * + * Copyright 2011, ARM Ltd. + * + * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MMIO_H +#define _LINUX_VIRTIO_MMIO_H + +/* + * Control registers + */ + +/* Magic value ("virt" string) - Read Only */ +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 + +/* Virtio device version - Read Only */ +#define VIRTIO_MMIO_VERSION 0x004 + +/* Virtio device ID - Read Only */ +#define VIRTIO_MMIO_DEVICE_ID 0x008 + +/* Virtio vendor ID - Read Only */ +#define VIRTIO_MMIO_VENDOR_ID 0x00c + +/* Bitmask of the features supported by the device (host) + * (32 bits per set) - Read Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 + +/* Device (host) features set selector - Write Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 + +/* Bitmask of features activated by the driver (guest) + * (32 bits per set) - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 + +/* Activated features set selector - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 + + +#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ + +/* Guest's memory page size in bytes - Write Only */ +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 + +#endif + + +/* Queue selector - Write Only */ +#define VIRTIO_MMIO_QUEUE_SEL 0x030 + +/* Maximum size of the currently selected queue - Read Only */ +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 + +/* Queue size for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_NUM 0x038 + + +#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ + +/* Used Ring alignment for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c + +/* Guest's PFN for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_PFN 0x040 + +#endif + + +/* Ready bit for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_READY 0x044 + +/* Queue notifier - Write Only */ +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 + +/* Interrupt status - Read Only */ +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 + +/* Interrupt acknowledge - Write Only */ +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 + +/* Device status register - Read Write */ +#define VIRTIO_MMIO_STATUS 0x070 + +/* Selected queue's Descriptor Table address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 + +/* Selected queue's Available Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 + +/* Selected queue's Used Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 +#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 + +/* Configuration atomicity value */ +#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc + +/* The config space is defined by each driver as + * the per-driver configuration space - Read Write */ +#define VIRTIO_MMIO_CONFIG 0x100 + + + +/* + * Interrupt flags (re: interrupt status & acknowledge registers) + */ + +#define VIRTIO_MMIO_INT_VRING (1 << 0) +#define VIRTIO_MMIO_INT_CONFIG (1 << 1) + +#endif From 7e71da7f124fc47a2f2bc1fbfb32f0e8ee3e7d44 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Jan 2017 00:14:55 +0200 Subject: [PATCH 11/17] virtio-mmio: switch to linux headers Switch to virtio_mmio.h from Linux - will make it easier to implement virtio 1. Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-mmio.c | 95 ++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 60654dc19d..5807aa87fe 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -20,6 +20,7 @@ */ #include "qemu/osdep.h" +#include "standard-headers/linux/virtio_mmio.h" #include "hw/sysbus.h" #include "hw/virtio/virtio.h" #include "qemu/host-utils.h" @@ -52,28 +53,6 @@ do { printf("virtio_mmio: " fmt , ## __VA_ARGS__); } while (0) #define VIRTIO_MMIO(obj) \ OBJECT_CHECK(VirtIOMMIOProxy, (obj), TYPE_VIRTIO_MMIO) -/* Memory mapped register offsets */ -#define VIRTIO_MMIO_MAGIC 0x0 -#define VIRTIO_MMIO_VERSION 0x4 -#define VIRTIO_MMIO_DEVICEID 0x8 -#define VIRTIO_MMIO_VENDORID 0xc -#define VIRTIO_MMIO_HOSTFEATURES 0x10 -#define VIRTIO_MMIO_HOSTFEATURESSEL 0x14 -#define VIRTIO_MMIO_GUESTFEATURES 0x20 -#define VIRTIO_MMIO_GUESTFEATURESSEL 0x24 -#define VIRTIO_MMIO_GUESTPAGESIZE 0x28 -#define VIRTIO_MMIO_QUEUESEL 0x30 -#define VIRTIO_MMIO_QUEUENUMMAX 0x34 -#define VIRTIO_MMIO_QUEUENUM 0x38 -#define VIRTIO_MMIO_QUEUEALIGN 0x3c -#define VIRTIO_MMIO_QUEUEPFN 0x40 -#define VIRTIO_MMIO_QUEUENOTIFY 0x50 -#define VIRTIO_MMIO_INTERRUPTSTATUS 0x60 -#define VIRTIO_MMIO_INTERRUPTACK 0x64 -#define VIRTIO_MMIO_STATUS 0x70 -/* Device specific config space starts here */ -#define VIRTIO_MMIO_CONFIG 0x100 - #define VIRT_MAGIC 0x74726976 /* 'virt' */ #define VIRT_VERSION 1 #define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ @@ -104,10 +83,10 @@ static int virtio_mmio_ioeventfd_assign(DeviceState *d, VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); if (assign) { - memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + memory_region_add_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4, true, n, notifier); } else { - memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUENOTIFY, 4, + memory_region_del_eventfd(&proxy->iomem, VIRTIO_MMIO_QUEUE_NOTIFY, 4, true, n, notifier); } return 0; @@ -140,11 +119,11 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) * device ID of zero means no backend will claim it. */ switch (offset) { - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: return VIRT_VERSION; - case VIRTIO_MMIO_VENDORID: + case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; default: return 0; @@ -169,40 +148,40 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size) return 0; } switch (offset) { - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: return VIRT_MAGIC; case VIRTIO_MMIO_VERSION: return VIRT_VERSION; - case VIRTIO_MMIO_DEVICEID: + case VIRTIO_MMIO_DEVICE_ID: return vdev->device_id; - case VIRTIO_MMIO_VENDORID: + case VIRTIO_MMIO_VENDOR_ID: return VIRT_VENDOR; - case VIRTIO_MMIO_HOSTFEATURES: + case VIRTIO_MMIO_DEVICE_FEATURES: if (proxy->host_features_sel) { return 0; } return vdev->host_features; - case VIRTIO_MMIO_QUEUENUMMAX: + case VIRTIO_MMIO_QUEUE_NUM_MAX: if (!virtio_queue_get_num(vdev, vdev->queue_sel)) { return 0; } return VIRTQUEUE_MAX_SIZE; - case VIRTIO_MMIO_QUEUEPFN: + case VIRTIO_MMIO_QUEUE_PFN: return virtio_queue_get_addr(vdev, vdev->queue_sel) >> proxy->guest_page_shift; - case VIRTIO_MMIO_INTERRUPTSTATUS: + case VIRTIO_MMIO_INTERRUPT_STATUS: return atomic_read(&vdev->isr); case VIRTIO_MMIO_STATUS: return vdev->status; - case VIRTIO_MMIO_HOSTFEATURESSEL: - case VIRTIO_MMIO_GUESTFEATURES: - case VIRTIO_MMIO_GUESTFEATURESSEL: - case VIRTIO_MMIO_GUESTPAGESIZE: - case VIRTIO_MMIO_QUEUESEL: - case VIRTIO_MMIO_QUEUENUM: - case VIRTIO_MMIO_QUEUEALIGN: - case VIRTIO_MMIO_QUEUENOTIFY: - case VIRTIO_MMIO_INTERRUPTACK: + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + case VIRTIO_MMIO_DRIVER_FEATURES: + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + case VIRTIO_MMIO_GUEST_PAGE_SIZE: + case VIRTIO_MMIO_QUEUE_SEL: + case VIRTIO_MMIO_QUEUE_NUM: + case VIRTIO_MMIO_QUEUE_ALIGN: + case VIRTIO_MMIO_QUEUE_NOTIFY: + case VIRTIO_MMIO_INTERRUPT_ACK: DPRINTF("read of write-only register\n"); return 0; default: @@ -251,18 +230,18 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, return; } switch (offset) { - case VIRTIO_MMIO_HOSTFEATURESSEL: + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: proxy->host_features_sel = value; break; - case VIRTIO_MMIO_GUESTFEATURES: + case VIRTIO_MMIO_DRIVER_FEATURES: if (!proxy->guest_features_sel) { virtio_set_features(vdev, value); } break; - case VIRTIO_MMIO_GUESTFEATURESSEL: + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: proxy->guest_features_sel = value; break; - case VIRTIO_MMIO_GUESTPAGESIZE: + case VIRTIO_MMIO_GUEST_PAGE_SIZE: proxy->guest_page_shift = ctz32(value); if (proxy->guest_page_shift > 31) { proxy->guest_page_shift = 0; @@ -270,22 +249,22 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, DPRINTF("guest page size %" PRIx64 " shift %d\n", value, proxy->guest_page_shift); break; - case VIRTIO_MMIO_QUEUESEL: + case VIRTIO_MMIO_QUEUE_SEL: if (value < VIRTIO_QUEUE_MAX) { vdev->queue_sel = value; } break; - case VIRTIO_MMIO_QUEUENUM: + case VIRTIO_MMIO_QUEUE_NUM: DPRINTF("mmio_queue write %d max %d\n", (int)value, VIRTQUEUE_MAX_SIZE); virtio_queue_set_num(vdev, vdev->queue_sel, value); /* Note: only call this function for legacy devices */ virtio_queue_update_rings(vdev, vdev->queue_sel); break; - case VIRTIO_MMIO_QUEUEALIGN: + case VIRTIO_MMIO_QUEUE_ALIGN: /* Note: this is only valid for legacy devices */ virtio_queue_set_align(vdev, vdev->queue_sel, value); break; - case VIRTIO_MMIO_QUEUEPFN: + case VIRTIO_MMIO_QUEUE_PFN: if (value == 0) { virtio_reset(vdev); } else { @@ -293,12 +272,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, value << proxy->guest_page_shift); } break; - case VIRTIO_MMIO_QUEUENOTIFY: + case VIRTIO_MMIO_QUEUE_NOTIFY: if (value < VIRTIO_QUEUE_MAX) { virtio_queue_notify(vdev, value); } break; - case VIRTIO_MMIO_INTERRUPTACK: + case VIRTIO_MMIO_INTERRUPT_ACK: atomic_and(&vdev->isr, ~value); virtio_update_irq(vdev); break; @@ -317,13 +296,13 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, virtio_reset(vdev); } break; - case VIRTIO_MMIO_MAGIC: + case VIRTIO_MMIO_MAGIC_VALUE: case VIRTIO_MMIO_VERSION: - case VIRTIO_MMIO_DEVICEID: - case VIRTIO_MMIO_VENDORID: - case VIRTIO_MMIO_HOSTFEATURES: - case VIRTIO_MMIO_QUEUENUMMAX: - case VIRTIO_MMIO_INTERRUPTSTATUS: + case VIRTIO_MMIO_DEVICE_ID: + case VIRTIO_MMIO_VENDOR_ID: + case VIRTIO_MMIO_DEVICE_FEATURES: + case VIRTIO_MMIO_QUEUE_NUM_MAX: + case VIRTIO_MMIO_INTERRUPT_STATUS: DPRINTF("write to readonly register\n"); break; From 0084248348c2e17b018f11e4a159bd2d68b0edc3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Jan 2017 18:12:44 +0200 Subject: [PATCH 12/17] pci_regs: update to latest linux this drops a duplicate definition of PCI_EXT_CAP_ATS_SIZEOF Signed-off-by: Michael S. Tsirkin --- include/standard-headers/linux/pci_regs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index be5b066aa4..e5a2e68b22 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -678,7 +678,6 @@ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM #define PCI_EXT_CAP_DSN_SIZEOF 12 -#define PCI_EXT_CAP_ATS_SIZEOF 8 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 /* Advanced Error Reporting */ From f43c0076054c3d340e5e9b69f7c46582aef9ae0c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Jan 2017 18:16:12 +0200 Subject: [PATCH 13/17] virtio_crypto: header update Update header from latest linux driver. Session creation structs gain padding to make them same size. Formatting cleanups. Signed-off-by: Michael S. Tsirkin Tested-by: Gonglei Reviewed-by: Gonglei --- .../standard-headers/linux/virtio_crypto.h | 469 +++++++++--------- 1 file changed, 245 insertions(+), 224 deletions(-) diff --git a/include/standard-headers/linux/virtio_crypto.h b/include/standard-headers/linux/virtio_crypto.h index 82275a84d8..5ff0b4ee59 100644 --- a/include/standard-headers/linux/virtio_crypto.h +++ b/include/standard-headers/linux/virtio_crypto.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VIRTIO_CRYPTO_H -#define _LINUX_VIRTIO_CRYPTO_H +#ifndef _VIRTIO_CRYPTO_H +#define _VIRTIO_CRYPTO_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -14,52 +14,54 @@ * 3. Neither the name of IBM nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. */ - + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include "standard-headers/linux/types.h" -#include "standard-headers/linux/virtio_config.h" #include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" #define VIRTIO_CRYPTO_SERVICE_CIPHER 0 -#define VIRTIO_CRYPTO_SERVICE_HASH 1 -#define VIRTIO_CRYPTO_SERVICE_MAC 2 -#define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_HASH 1 +#define VIRTIO_CRYPTO_SERVICE_MAC 2 +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) struct virtio_crypto_ctrl_header { #define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) #define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) #define VIRTIO_CRYPTO_HASH_CREATE_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) #define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) #define VIRTIO_CRYPTO_MAC_CREATE_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) #define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) #define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) - __virtio32 opcode; - __virtio32 algo; - __virtio32 flag; - /* data virtqueue id */ - __virtio32 queue_id; + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) + uint32_t opcode; + uint32_t algo; + uint32_t flag; + /* data virtqueue id */ + uint32_t queue_id; }; struct virtio_crypto_cipher_session_para { @@ -78,26 +80,27 @@ struct virtio_crypto_cipher_session_para { #define VIRTIO_CRYPTO_CIPHER_AES_F8 12 #define VIRTIO_CRYPTO_CIPHER_AES_XTS 13 #define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3 14 - __virtio32 algo; - /* length of key */ - __virtio32 keylen; + uint32_t algo; + /* length of key */ + uint32_t keylen; #define VIRTIO_CRYPTO_OP_ENCRYPT 1 #define VIRTIO_CRYPTO_OP_DECRYPT 2 - /* encrypt or decrypt */ - __virtio32 op; - __virtio32 padding; + /* encrypt or decrypt */ + uint32_t op; + uint32_t padding; }; struct virtio_crypto_session_input { - /* Device-writable part */ - __virtio64 session_id; - __virtio32 status; - __virtio32 padding; + /* Device-writable part */ + uint64_t session_id; + uint32_t status; + uint32_t padding; }; struct virtio_crypto_cipher_session_req { - struct virtio_crypto_cipher_session_para para; + struct virtio_crypto_cipher_session_para para; + uint8_t padding[32]; }; struct virtio_crypto_hash_session_para { @@ -114,13 +117,15 @@ struct virtio_crypto_hash_session_para { #define VIRTIO_CRYPTO_HASH_SHA3_512 10 #define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128 11 #define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256 12 - __virtio32 algo; - /* hash result length */ - __virtio32 hash_result_len; + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + uint8_t padding[8]; }; struct virtio_crypto_hash_create_session_req { - struct virtio_crypto_hash_session_para para; + struct virtio_crypto_hash_session_para para; + uint8_t padding[40]; }; struct virtio_crypto_mac_session_para { @@ -140,16 +145,17 @@ struct virtio_crypto_mac_session_para { #define VIRTIO_CRYPTO_MAC_CBCMAC_AES 49 #define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9 50 #define VIRTIO_CRYPTO_MAC_XCBC_AES 53 - __virtio32 algo; - /* hash result length */ - __virtio32 hash_result_len; - /* length of authenticated key */ - __virtio32 auth_key_len; - __virtio32 padding; + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + /* length of authenticated key */ + uint32_t auth_key_len; + uint32_t padding; }; struct virtio_crypto_mac_create_session_req { - struct virtio_crypto_mac_session_para para; + struct virtio_crypto_mac_session_para para; + uint8_t padding[40]; }; struct virtio_crypto_aead_session_para { @@ -157,273 +163,288 @@ struct virtio_crypto_aead_session_para { #define VIRTIO_CRYPTO_AEAD_GCM 1 #define VIRTIO_CRYPTO_AEAD_CCM 2 #define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305 3 - __virtio32 algo; - /* length of key */ - __virtio32 key_len; - /* digest result length */ - __virtio32 digest_result_len; - /* length of the additional authenticated data (AAD) in bytes */ - __virtio32 aad_len; - /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ - __virtio32 op; - __virtio32 padding; + uint32_t algo; + /* length of key */ + uint32_t key_len; + /* hash result length */ + uint32_t hash_result_len; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ + uint32_t op; + uint32_t padding; }; struct virtio_crypto_aead_create_session_req { - struct virtio_crypto_aead_session_para para; + struct virtio_crypto_aead_session_para para; + uint8_t padding[32]; }; struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 - __virtio32 alg_chain_order; + uint32_t alg_chain_order; /* Plain hash */ #define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN 1 /* Authenticated hash (mac) */ #define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH 2 /* Nested hash */ #define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED 3 - __virtio32 hash_mode; - struct virtio_crypto_cipher_session_para cipher_param; - union { - struct virtio_crypto_hash_session_para hash_param; - struct virtio_crypto_mac_session_para mac_param; - } u; - /* length of the additional authenticated data (AAD) in bytes */ - __virtio32 aad_len; - __virtio32 padding; + uint32_t hash_mode; + struct virtio_crypto_cipher_session_para cipher_param; + union { + struct virtio_crypto_hash_session_para hash_param; + struct virtio_crypto_mac_session_para mac_param; + uint8_t padding[16]; + } u; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + uint32_t padding; }; struct virtio_crypto_alg_chain_session_req { - struct virtio_crypto_alg_chain_session_para para; + struct virtio_crypto_alg_chain_session_para para; }; struct virtio_crypto_sym_create_session_req { - union { - struct virtio_crypto_cipher_session_req cipher; - struct virtio_crypto_alg_chain_session_req chain; - } u; + union { + struct virtio_crypto_cipher_session_req cipher; + struct virtio_crypto_alg_chain_session_req chain; + uint8_t padding[48]; + } u; - /* Device-readable part */ + /* Device-readable part */ /* No operation */ #define VIRTIO_CRYPTO_SYM_OP_NONE 0 /* Cipher only operation on the data */ #define VIRTIO_CRYPTO_SYM_OP_CIPHER 1 -/* Chain any cipher with any hash or mac operation. The order - depends on the value of alg_chain_order param */ +/* + * Chain any cipher with any hash or mac operation. The order + * depends on the value of alg_chain_order param + */ #define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING 2 - __virtio32 op_type; - __virtio32 padding; + uint32_t op_type; + uint32_t padding; }; struct virtio_crypto_destroy_session_req { - /* Device-readable part */ - __virtio64 session_id; + /* Device-readable part */ + uint64_t session_id; + uint8_t padding[48]; }; -/* The request of the control viritqueue's packet */ +/* The request of the control virtqueue's packet */ struct virtio_crypto_op_ctrl_req { - struct virtio_crypto_ctrl_header header; + struct virtio_crypto_ctrl_header header; - union { - struct virtio_crypto_sym_create_session_req sym_create_session; - struct virtio_crypto_hash_create_session_req hash_create_session; - struct virtio_crypto_mac_create_session_req mac_create_session; - struct virtio_crypto_aead_create_session_req aead_create_session; - struct virtio_crypto_destroy_session_req destroy_session; - } u; + union { + struct virtio_crypto_sym_create_session_req + sym_create_session; + struct virtio_crypto_hash_create_session_req + hash_create_session; + struct virtio_crypto_mac_create_session_req + mac_create_session; + struct virtio_crypto_aead_create_session_req + aead_create_session; + struct virtio_crypto_destroy_session_req + destroy_session; + uint8_t padding[56]; + } u; }; struct virtio_crypto_op_header { #define VIRTIO_CRYPTO_CIPHER_ENCRYPT \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) #define VIRTIO_CRYPTO_CIPHER_DECRYPT \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) #define VIRTIO_CRYPTO_HASH \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) #define VIRTIO_CRYPTO_MAC \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) #define VIRTIO_CRYPTO_AEAD_ENCRYPT \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ - VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) - __virtio32 opcode; - /* algo should be service-specific algorithms */ - __virtio32 algo; - /* session_id should be service-specific algorithms */ - __virtio64 session_id; - /* control flag to control the request */ - __virtio32 flag; - __virtio32 padding; + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) + uint32_t opcode; + /* algo should be service-specific algorithms */ + uint32_t algo; + /* session_id should be service-specific algorithms */ + uint64_t session_id; + /* control flag to control the request */ + uint32_t flag; + uint32_t padding; }; struct virtio_crypto_cipher_para { - /* - * Byte Length of valid IV/Counter - * - * - For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for - * SNOW3G in UEA2 mode, this is the length of the IV (which - * must be the same as the block length of the cipher). - * - For block ciphers in CTR mode, this is the length of the counter - * (which must be the same as the block length of the cipher). - * - For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. - * - * The IV/Counter will be updated after every partial cryptographic - * operation. - */ - __virtio32 iv_len; - /* length of source data */ - __virtio32 src_data_len; - /* length of dst data */ - __virtio32 dst_data_len; - __virtio32 padding; + /* + * Byte Length of valid IV/Counter + * + * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for + * SNOW3G in UEA2 mode, this is the length of the IV (which + * must be the same as the block length of the cipher). + * For block ciphers in CTR mode, this is the length of the counter + * (which must be the same as the block length of the cipher). + * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. + * + * The IV/Counter will be updated after every partial cryptographic + * operation. + */ + uint32_t iv_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; + uint32_t padding; }; struct virtio_crypto_hash_para { - /* length of source data */ - __virtio32 src_data_len; - /* hash result length */ - __virtio32 hash_result_len; + /* length of source data */ + uint32_t src_data_len; + /* hash result length */ + uint32_t hash_result_len; }; struct virtio_crypto_mac_para { - struct virtio_crypto_hash_para hash; + struct virtio_crypto_hash_para hash; }; struct virtio_crypto_aead_para { - /* - * Byte Length of valid IV data pointed to by the below iv_addr - * parameter. - * - * - For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which - * case iv_addr points to J0. - * - For CCM mode, this is the length of the nonce, which can be in the - * range 7 to 13 inclusive. - */ - __virtio32 iv_len; - /* length of additional auth data */ - __virtio32 aad_len; - /* length of source data */ - __virtio32 src_data_len; - /* length of dst data */ - __virtio32 dst_data_len; + /* + * Byte Length of valid IV data pointed to by the below iv_addr + * parameter. + * + * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which + * case iv_addr points to J0. + * For CCM mode, this is the length of the nonce, which can be in the + * range 7 to 13 inclusive. + */ + uint32_t iv_len; + /* length of additional auth data */ + uint32_t aad_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; }; struct virtio_crypto_cipher_data_req { - /* Device-readable part */ - struct virtio_crypto_cipher_para para; + /* Device-readable part */ + struct virtio_crypto_cipher_para para; + uint8_t padding[24]; }; struct virtio_crypto_hash_data_req { - /* Device-readable part */ - struct virtio_crypto_hash_para para; + /* Device-readable part */ + struct virtio_crypto_hash_para para; + uint8_t padding[40]; }; struct virtio_crypto_mac_data_req { - /* Device-readable part */ - struct virtio_crypto_mac_para para; + /* Device-readable part */ + struct virtio_crypto_mac_para para; + uint8_t padding[40]; }; struct virtio_crypto_alg_chain_data_para { - __virtio32 iv_len; - /* Length of source data */ - __virtio32 src_data_len; - /* Length of destination data */ - __virtio32 dst_data_len; - /* Starting point for cipher processing in source data */ - __virtio32 cipher_start_src_offset; - /* Length of the source data that the cipher will be computed on */ - __virtio32 len_to_cipher; - /* Starting point for hash processing in source data */ - __virtio32 hash_start_src_offset; - /* Length of the source data that the hash will be computed on */ - __virtio32 len_to_hash; - /* Length of the additional auth data */ - __virtio32 aad_len; - /* Length of the hash result */ - __virtio32 hash_result_len; - __virtio32 reserved; + uint32_t iv_len; + /* Length of source data */ + uint32_t src_data_len; + /* Length of destination data */ + uint32_t dst_data_len; + /* Starting point for cipher processing in source data */ + uint32_t cipher_start_src_offset; + /* Length of the source data that the cipher will be computed on */ + uint32_t len_to_cipher; + /* Starting point for hash processing in source data */ + uint32_t hash_start_src_offset; + /* Length of the source data that the hash will be computed on */ + uint32_t len_to_hash; + /* Length of the additional auth data */ + uint32_t aad_len; + /* Length of the hash result */ + uint32_t hash_result_len; + uint32_t reserved; }; struct virtio_crypto_alg_chain_data_req { - /* Device-readable part */ - struct virtio_crypto_alg_chain_data_para para; + /* Device-readable part */ + struct virtio_crypto_alg_chain_data_para para; }; struct virtio_crypto_sym_data_req { - union { - struct virtio_crypto_cipher_data_req cipher; - struct virtio_crypto_alg_chain_data_req chain; - } u; + union { + struct virtio_crypto_cipher_data_req cipher; + struct virtio_crypto_alg_chain_data_req chain; + uint8_t padding[40]; + } u; - /* See above VIRTIO_CRYPTO_SYM_OP_* */ - __virtio32 op_type; - __virtio32 padding; + /* See above VIRTIO_CRYPTO_SYM_OP_* */ + uint32_t op_type; + uint32_t padding; }; struct virtio_crypto_aead_data_req { - /* Device-readable part */ - struct virtio_crypto_aead_para para; + /* Device-readable part */ + struct virtio_crypto_aead_para para; + uint8_t padding[32]; }; -/* The request of the data viritqueue's packet */ +/* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { - struct virtio_crypto_op_header header; + struct virtio_crypto_op_header header; - union { - struct virtio_crypto_sym_data_req sym_req; - struct virtio_crypto_hash_data_req hash_req; - struct virtio_crypto_mac_data_req mac_req; - struct virtio_crypto_aead_data_req aead_req; - } u; + union { + struct virtio_crypto_sym_data_req sym_req; + struct virtio_crypto_hash_data_req hash_req; + struct virtio_crypto_mac_data_req mac_req; + struct virtio_crypto_aead_data_req aead_req; + uint8_t padding[48]; + } u; }; #define VIRTIO_CRYPTO_OK 0 #define VIRTIO_CRYPTO_ERR 1 #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 -#define VIRTIO_CRYPTO_INVSESS 4 /* Invaild session id */ +#define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) -#define VIRTIO_CRYPTO_S_STARTED (1 << 1) struct virtio_crypto_config { - /* See VIRTIO_CRYPTO_* above */ - __virtio32 status; + /* See VIRTIO_CRYPTO_OP_* above */ + uint32_t status; - /* - * Maximum number of data queue legal values are between 1 and 0x8000 - */ - __virtio32 max_dataqueues; + /* + * Maximum number of data queue + */ + uint32_t max_dataqueues; - /* Specifies the services mask which the devcie support, - see VIRTIO_CRYPTO_SERVICE_* above */ - __virtio32 crypto_services; + /* + * Specifies the services mask which the device support, + * see VIRTIO_CRYPTO_SERVICE_* above + */ + uint32_t crypto_services; - /* Detailed algorithms mask */ - __virtio32 cipher_algo_l; - __virtio32 cipher_algo_h; - __virtio32 hash_algo; - __virtio32 mac_algo_l; - __virtio32 mac_algo_h; - __virtio32 aead_algo; - - /* Maximum length of cipher key */ - uint32_t max_cipher_key_len; - /* Maximum length of authenticated key */ - uint32_t max_auth_key_len; - - __virtio32 reserve; - - /* The maximum size of per request's content */ - __virtio64 max_size; + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + uint32_t reserve; + /* Maximum size of each crypto request's content */ + uint64_t max_size; }; struct virtio_crypto_inhdr { - /* See VIRTIO_CRYPTO_* above */ - uint8_t status; + /* See VIRTIO_CRYPTO_* above */ + uint8_t status; }; - -#endif /* _LINUX_VIRTIO_CRYPTO_H */ +#endif From ed219c40a376d5e0a09f8af0c4264e6f85e3cbde Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Jan 2017 18:18:35 +0200 Subject: [PATCH 14/17] update-linux-headers.sh: support __bitwise In 4.10, Linux is switching from __bitwise__ to use __bitwise exclusively. Update our script accordingly. Signed-off-by: Michael S. Tsirkin --- scripts/update-linux-headers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 08c4c4ae54..72cf1fbf0a 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -51,7 +51,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ - -e 's/__bitwise__//' \ + -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ -e '/sys\/ioctl.h/d' \ From 9568700563151a5ce68fd7e61a38b72fbba7273d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Jan 2017 18:27:21 +0200 Subject: [PATCH 15/17] vhost: drop VHOST_F_DEVICE_IOTLB Upstream does not have it, uses VIRTIO_F_IOMMU_PLATFORM to signal support instead. Signed-off-by: Michael S. Tsirkin --- linux-headers/linux/vhost.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index ac7a1f136a..1e86a3dd0d 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -172,8 +172,6 @@ struct vhost_memory { #define VHOST_F_LOG_ALL 26 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ #define VHOST_NET_F_VIRTIO_NET_HDR 27 -/* Vhost have device IOTLB */ -#define VHOST_F_DEVICE_IOTLB 63 /* VHOST_SCSI specific definitions */ From 6bdc21c050a2a7b92cbbd0b2a1f8934e9b5f896f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 18 Jan 2017 21:32:22 +0200 Subject: [PATCH 16/17] virtio: fix up max size checks Coverity reports that ARRAY_SIZE(elem->out_sg) (and all the others too) is wrong because elem->out_sg is a pointer. However, the check is not in the right place and the max_size argument of virtqueue_map_iovec can be removed. The check on in_num/out_num should be moved to qemu_get_virtqueue_element instead, before the call to virtqueue_alloc_element. Cc: qemu-stable@nongnu.org Reported-by: Paolo Bonzini Fixes: 3724650db07057333879484c8bc7d900b5c1bf8e ("virtio: introduce virtqueue_alloc_element") Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- hw/virtio/virtio.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 34065c78f8..6e34f0527d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -599,23 +599,11 @@ static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num, static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, hwaddr *addr, unsigned int *num_sg, - unsigned int max_size, int is_write) + int is_write) { unsigned int i; hwaddr len; - /* Note: this function MUST validate input, some callers - * are passing in num_sg values received over the network. - */ - /* TODO: teach all callers that this can fail, and return failure instead - * of asserting here. - * When we do, we might be able to re-enable NDEBUG below. - */ -#ifdef NDEBUG -#error building with NDEBUG is not supported -#endif - assert(*num_sg <= max_size); - for (i = 0; i < *num_sg; i++) { len = sg[i].iov_len; sg[i].iov_base = dma_memory_map(vdev->dma_as, @@ -635,13 +623,8 @@ static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) { - virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, - MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), - 1); - virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, - MIN(ARRAY_SIZE(elem->out_sg), - ARRAY_SIZE(elem->out_addr)), - 0); + virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, 1); + virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, 0); } static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) @@ -840,6 +823,16 @@ void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld)); + /* TODO: teach all callers that this can fail, and return failure instead + * of asserting here. + * When we do, we might be able to re-enable NDEBUG below. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif + assert(ARRAY_SIZE(data.in_addr) >= data.in_num); + assert(ARRAY_SIZE(data.out_addr) >= data.out_num); + elem = virtqueue_alloc_element(sz, data.out_num, data.in_num); elem->index = data.index; From 2943b53f682f54548e7ddcf2ebb6c6d12d8dc821 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 17 Jan 2017 12:01:00 +0800 Subject: [PATCH 17/17] virtio: force VIRTIO_F_IOMMU_PLATFORM We allow vhost to clear VIRITO_F_IOMMU_PLATFORM which is wrong since VIRTIO_F_IOMMU_PLATFORM is mandatory for security. Fixing this by enforce it after vdc->get_features(). Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-bus.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index d31cc00e83..a886011e75 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -47,6 +47,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) VirtioBusState *bus = VIRTIO_BUS(qbus); VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); DPRINTF("%s: plug device.\n", qbus->name); @@ -63,8 +64,8 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) klass->device_plugged(qbus->parent, errp); } - if (klass->get_dma_as != NULL && - virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + if (klass->get_dma_as != NULL && has_iommu) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); vdev->dma_as = klass->get_dma_as(qbus->parent); } else { vdev->dma_as = &address_space_memory;