From b87de24e6c568d40d4f231b10e1707aed1ebe92e Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Tue, 24 May 2011 14:34:20 +0100 Subject: [PATCH 01/11] xen: Add xc_domain_add_to_physmap to xen_interface. This function will be used to support sync dirty bitmap. This come with a check against every Xen release, and special implementation for Xen version that doesn't have this specific call. This function will not be usable with Xen 3.3 because the behavior is different. Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- configure | 29 ++++++++++++++++++++++++++++- hw/xen_common.h | 14 ++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 44c092a29..b63b49f1d 100755 --- a/configure +++ b/configure @@ -1210,6 +1210,7 @@ int main(void) { xc = xc_interface_open(0, 0, 0); xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); return 0; } EOF @@ -1228,10 +1229,14 @@ EOF # error HVM_MAX_VCPUS not defined #endif int main(void) { + struct xen_add_to_physmap xatp = { + .domid = 0, .space = XENMAPSPACE_gmfn, .idx = 0, .gpfn = 0, + }; xs_daemon_open(); xc_interface_open(); xc_gnttab_open(); xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_memory_op(0, XENMEM_add_to_physmap, &xatp); return 0; } EOF @@ -1240,7 +1245,29 @@ EOF xen_ctrl_version=400 xen=yes - # Xen 3.3.0, 3.4.0 + # Xen 3.4.0 + elif ( + cat > $TMPC < +#include +int main(void) { + struct xen_add_to_physmap xatp = { + .domid = 0, .space = XENMAPSPACE_gmfn, .idx = 0, .gpfn = 0, + }; + xs_daemon_open(); + xc_interface_open(); + xc_gnttab_open(); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_memory_op(0, XENMEM_add_to_physmap, &xatp); + return 0; +} +EOF + compile_prog "" "$xen_libs" + ) ; then + xen_ctrl_version=340 + xen=yes + + # Xen 3.3.0 elif ( cat > $TMPC < diff --git a/hw/xen_common.h b/hw/xen_common.h index a1958a0af..2c79af64d 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -71,6 +71,20 @@ static inline int xc_domain_populate_physmap_exact (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start); } +static inline int xc_domain_add_to_physmap(int xc_handle, uint32_t domid, + unsigned int space, unsigned long idx, + xen_pfn_t gpfn) +{ + struct xen_add_to_physmap xatp = { + .domid = domid, + .space = space, + .idx = idx, + .gpfn = gpfn, + }; + + return xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); +} + /* Xen 4.1 */ #else From b4dd7802cac7749de80ceadd622d4209fc31b507 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Tue, 24 May 2011 14:34:21 +0100 Subject: [PATCH 02/11] xen: Introduce VGA sync dirty bitmap support This patch introduces phys memory client for Xen. Only sync dirty_bitmap and set_memory are actually implemented. migration_log will stay empty for the moment. Xen can only log one range for bit change, so only the range in the first call will be synced. Signed-off-by: Anthony PERARD Signed-off-by: Alexander Graf --- trace-events | 1 + xen-all.c | 267 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) diff --git a/trace-events b/trace-events index f1230f1e3..46a19d372 100644 --- a/trace-events +++ b/trace-events @@ -396,6 +396,7 @@ disable milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr %08x v # xen-all.c disable xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx" +disable xen_client_set_memory(uint64_t start_addr, unsigned long size, unsigned long phys_offset, bool log_dirty) "%#"PRIx64" size %#lx, offset %#lx, log_dirty %i" # xen-mapcache.c disable qemu_map_cache(uint64_t phys_addr) "want %#"PRIx64"" diff --git a/xen-all.c b/xen-all.c index 0eac202d4..75a82c28f 100644 --- a/xen-all.c +++ b/xen-all.c @@ -13,6 +13,7 @@ #include "hw/xen_common.h" #include "hw/xen_backend.h" +#include "range.h" #include "xen-mapcache.h" #include "trace.h" @@ -54,6 +55,14 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) #define BUFFER_IO_MAX_DELAY 100 +typedef struct XenPhysmap { + target_phys_addr_t start_addr; + ram_addr_t size; + target_phys_addr_t phys_offset; + + QLIST_ENTRY(XenPhysmap) list; +} XenPhysmap; + typedef struct XenIOState { shared_iopage_t *shared_page; buffered_iopage_t *buffered_io_page; @@ -66,6 +75,9 @@ typedef struct XenIOState { int send_vcpu; struct xs_handle *xenstore; + CPUPhysMemoryClient client; + QLIST_HEAD(, XenPhysmap) physmap; + const XenPhysmap *log_for_dirtybit; Notifier exit; } XenIOState; @@ -178,6 +190,256 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) qemu_free(pfn_list); } +static XenPhysmap *get_physmapping(XenIOState *state, + target_phys_addr_t start_addr, ram_addr_t size) +{ + XenPhysmap *physmap = NULL; + + start_addr &= TARGET_PAGE_MASK; + + QLIST_FOREACH(physmap, &state->physmap, list) { + if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) { + return physmap; + } + } + return NULL; +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 340 +static int xen_add_to_physmap(XenIOState *state, + target_phys_addr_t start_addr, + ram_addr_t size, + target_phys_addr_t phys_offset) +{ + unsigned long i = 0; + int rc = 0; + XenPhysmap *physmap = NULL; + target_phys_addr_t pfn, start_gpfn; + + if (get_physmapping(state, start_addr, size)) { + return 0; + } + if (size <= 0) { + return -1; + } + + DPRINTF("mapping vram to %llx - %llx, from %llx\n", + start_addr, start_addr + size, phys_offset); + + pfn = phys_offset >> TARGET_PAGE_BITS; + start_gpfn = start_addr >> TARGET_PAGE_BITS; + for (i = 0; i < size >> TARGET_PAGE_BITS; i++) { + unsigned long idx = pfn + i; + xen_pfn_t gpfn = start_gpfn + i; + + rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + if (rc) { + DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %" + PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + return -rc; + } + } + + physmap = qemu_malloc(sizeof (XenPhysmap)); + + physmap->start_addr = start_addr; + physmap->size = size; + physmap->phys_offset = phys_offset; + + QLIST_INSERT_HEAD(&state->physmap, physmap, list); + + xc_domain_pin_memory_cacheattr(xen_xc, xen_domid, + start_addr >> TARGET_PAGE_BITS, + (start_addr + size) >> TARGET_PAGE_BITS, + XEN_DOMCTL_MEM_CACHEATTR_WB); + return 0; +} + +static int xen_remove_from_physmap(XenIOState *state, + target_phys_addr_t start_addr, + ram_addr_t size) +{ + unsigned long i = 0; + int rc = 0; + XenPhysmap *physmap = NULL; + target_phys_addr_t phys_offset = 0; + + physmap = get_physmapping(state, start_addr, size); + if (physmap == NULL) { + return -1; + } + + phys_offset = physmap->phys_offset; + size = physmap->size; + + DPRINTF("unmapping vram to %llx - %llx, from %llx\n", + phys_offset, phys_offset + size, start_addr); + + size >>= TARGET_PAGE_BITS; + start_addr >>= TARGET_PAGE_BITS; + phys_offset >>= TARGET_PAGE_BITS; + for (i = 0; i < size; i++) { + unsigned long idx = start_addr + i; + xen_pfn_t gpfn = phys_offset + i; + + rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn); + if (rc) { + fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %" + PRI_xen_pfn" failed: %d\n", idx, gpfn, rc); + return -rc; + } + } + + QLIST_REMOVE(physmap, list); + if (state->log_for_dirtybit == physmap) { + state->log_for_dirtybit = NULL; + } + free(physmap); + + return 0; +} + +#else +static int xen_add_to_physmap(XenIOState *state, + target_phys_addr_t start_addr, + ram_addr_t size, + target_phys_addr_t phys_offset) +{ + return -ENOSYS; +} + +static int xen_remove_from_physmap(XenIOState *state, + target_phys_addr_t start_addr, + ram_addr_t size) +{ + return -ENOSYS; +} +#endif + +static void xen_client_set_memory(struct CPUPhysMemoryClient *client, + target_phys_addr_t start_addr, + ram_addr_t size, + ram_addr_t phys_offset, + bool log_dirty) +{ + XenIOState *state = container_of(client, XenIOState, client); + ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; + hvmmem_type_t mem_type; + + if (!(start_addr != phys_offset + && ( (log_dirty && flags < IO_MEM_UNASSIGNED) + || (!log_dirty && flags == IO_MEM_UNASSIGNED)))) { + return; + } + + trace_xen_client_set_memory(start_addr, size, phys_offset, log_dirty); + + start_addr &= TARGET_PAGE_MASK; + size = TARGET_PAGE_ALIGN(size); + phys_offset &= TARGET_PAGE_MASK; + + switch (flags) { + case IO_MEM_RAM: + xen_add_to_physmap(state, start_addr, size, phys_offset); + break; + case IO_MEM_ROM: + mem_type = HVMMEM_ram_ro; + if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type, + start_addr >> TARGET_PAGE_BITS, + size >> TARGET_PAGE_BITS)) { + DPRINTF("xc_hvm_set_mem_type error, addr: "TARGET_FMT_plx"\n", + start_addr); + } + break; + case IO_MEM_UNASSIGNED: + if (xen_remove_from_physmap(state, start_addr, size) < 0) { + DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr); + } + break; + } +} + +static int xen_sync_dirty_bitmap(XenIOState *state, + target_phys_addr_t start_addr, + ram_addr_t size) +{ + target_phys_addr_t npages = size >> TARGET_PAGE_BITS; + target_phys_addr_t vram_offset = 0; + const int width = sizeof(unsigned long) * 8; + unsigned long bitmap[(npages + width - 1) / width]; + int rc, i, j; + const XenPhysmap *physmap = NULL; + + physmap = get_physmapping(state, start_addr, size); + if (physmap == NULL) { + /* not handled */ + return -1; + } + + if (state->log_for_dirtybit == NULL) { + state->log_for_dirtybit = physmap; + } else if (state->log_for_dirtybit != physmap) { + return -1; + } + vram_offset = physmap->phys_offset; + + rc = xc_hvm_track_dirty_vram(xen_xc, xen_domid, + start_addr >> TARGET_PAGE_BITS, npages, + bitmap); + if (rc) { + return rc; + } + + for (i = 0; i < ARRAY_SIZE(bitmap); i++) { + unsigned long map = bitmap[i]; + while (map != 0) { + j = ffsl(map) - 1; + map &= ~(1ul << j); + cpu_physical_memory_set_dirty(vram_offset + (i * width + j) * TARGET_PAGE_SIZE); + }; + } + + return 0; +} + +static int xen_log_start(CPUPhysMemoryClient *client, target_phys_addr_t phys_addr, ram_addr_t size) +{ + XenIOState *state = container_of(client, XenIOState, client); + + return xen_sync_dirty_bitmap(state, phys_addr, size); +} + +static int xen_log_stop(CPUPhysMemoryClient *client, target_phys_addr_t phys_addr, ram_addr_t size) +{ + XenIOState *state = container_of(client, XenIOState, client); + + state->log_for_dirtybit = NULL; + /* Disable dirty bit tracking */ + return xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL); +} + +static int xen_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client, + target_phys_addr_t start_addr, + target_phys_addr_t end_addr) +{ + XenIOState *state = container_of(client, XenIOState, client); + + return xen_sync_dirty_bitmap(state, start_addr, end_addr - start_addr); +} + +static int xen_client_migration_log(struct CPUPhysMemoryClient *client, + int enable) +{ + return 0; +} + +static CPUPhysMemoryClient xen_cpu_phys_memory_client = { + .set_memory = xen_client_set_memory, + .sync_dirty_bitmap = xen_client_sync_dirty_bitmap, + .migration_log = xen_client_migration_log, + .log_start = xen_log_start, + .log_stop = xen_log_stop, +}; /* VCPU Operations, MMIO, IO ring ... */ @@ -581,6 +843,11 @@ int xen_hvm_init(void) qemu_add_vm_change_state_handler(xen_vm_change_state_handler, state); + state->client = xen_cpu_phys_memory_client; + QLIST_INIT(&state->physmap); + cpu_register_phys_memory_client(&state->client); + state->log_for_dirtybit = NULL; + return 0; } From c13390cd384a9564e6dded127d01ef0627b6b1c5 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 May 2011 18:35:42 +0100 Subject: [PATCH 03/11] xen: fix qemu_map_cache with size != MCACHE_BUCKET_SIZE Fix the implementation of qemu_map_cache: correctly support size arguments different from 0 or MCACHE_BUCKET_SIZE. The new implementation supports locked mapcache entries with size multiple of MCACHE_BUCKET_SIZE. qemu_invalidate_entry can correctly find and unmap these "large" mapcache entries given that the virtual address passed to qemu_invalidate_entry is the same returned by qemu_map_cache when the locked mapcache entry was created. Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- xen-mapcache.c | 77 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 12 deletions(-) diff --git a/xen-mapcache.c b/xen-mapcache.c index 349cc6221..90fbd4983 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -43,14 +43,16 @@ typedef struct MapCacheEntry { target_phys_addr_t paddr_index; uint8_t *vaddr_base; - DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + unsigned long *valid_mapping; uint8_t lock; + target_phys_addr_t size; struct MapCacheEntry *next; } MapCacheEntry; typedef struct MapCacheRev { uint8_t *vaddr_req; target_phys_addr_t paddr_index; + target_phys_addr_t size; QTAILQ_ENTRY(MapCacheRev) next; } MapCacheRev; @@ -68,6 +70,15 @@ typedef struct MapCache { static MapCache *mapcache; +static inline int test_bits(int nr, int size, const unsigned long *addr) +{ + unsigned long res = find_next_zero_bit(addr, size + nr, nr); + if (res >= nr + size) + return 1; + else + return 0; +} + void qemu_map_cache_init(void) { unsigned long size; @@ -115,11 +126,15 @@ static void qemu_remap_bucket(MapCacheEntry *entry, err = qemu_mallocz(nb_pfn * sizeof (int)); if (entry->vaddr_base != NULL) { - if (munmap(entry->vaddr_base, size) != 0) { + if (munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); } } + if (entry->valid_mapping != NULL) { + qemu_free(entry->valid_mapping); + entry->valid_mapping = NULL; + } for (i = 0; i < nb_pfn; i++) { pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; @@ -134,6 +149,9 @@ static void qemu_remap_bucket(MapCacheEntry *entry, entry->vaddr_base = vaddr_base; entry->paddr_index = address_index; + entry->size = size; + entry->valid_mapping = (unsigned long *) qemu_mallocz(sizeof(unsigned long) * + BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); bitmap_zero(entry->valid_mapping, nb_pfn); for (i = 0; i < nb_pfn; i++) { @@ -151,32 +169,47 @@ uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, u MapCacheEntry *entry, *pentry = NULL; target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + target_phys_addr_t __size = size; trace_qemu_map_cache(phys_addr); - if (address_index == mapcache->last_address_index && !lock) { + if (address_index == mapcache->last_address_index && !lock && !__size) { trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); return mapcache->last_address_vaddr + address_offset; } + /* size is always a multiple of MCACHE_BUCKET_SIZE */ + if ((address_offset + (__size % MCACHE_BUCKET_SIZE)) > MCACHE_BUCKET_SIZE) + __size += MCACHE_BUCKET_SIZE; + if (__size % MCACHE_BUCKET_SIZE) + __size += MCACHE_BUCKET_SIZE - (__size % MCACHE_BUCKET_SIZE); + if (!__size) + __size = MCACHE_BUCKET_SIZE; + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; - while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + while (entry && entry->lock && entry->vaddr_base && + (entry->paddr_index != address_index || entry->size != __size || + !test_bits(address_offset >> XC_PAGE_SHIFT, size >> XC_PAGE_SHIFT, + entry->valid_mapping))) { pentry = entry; entry = entry->next; } if (!entry) { entry = qemu_mallocz(sizeof (MapCacheEntry)); pentry->next = entry; - qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + qemu_remap_bucket(entry, __size, address_index); } else if (!entry->lock) { if (!entry->vaddr_base || entry->paddr_index != address_index || - !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { - qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + entry->size != __size || + !test_bits(address_offset >> XC_PAGE_SHIFT, size >> XC_PAGE_SHIFT, + entry->valid_mapping)) { + qemu_remap_bucket(entry, __size, address_index); } } - if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + if(!test_bits(address_offset >> XC_PAGE_SHIFT, size >> XC_PAGE_SHIFT, + entry->valid_mapping)) { mapcache->last_address_index = -1; trace_qemu_map_cache_return(NULL); return NULL; @@ -189,6 +222,7 @@ uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, u entry->lock++; reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; reventry->paddr_index = mapcache->last_address_index; + reventry->size = entry->size; QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); } @@ -231,13 +265,16 @@ void qemu_map_cache_unlock(void *buffer) ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) { + MapCacheEntry *entry = NULL, *pentry = NULL; MapCacheRev *reventry; target_phys_addr_t paddr_index; + target_phys_addr_t size; int found = 0; QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { if (reventry->vaddr_req == ptr) { paddr_index = reventry->paddr_index; + size = reventry->size; found = 1; break; } @@ -252,7 +289,17 @@ ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) return 0; } - return paddr_index << MCACHE_BUCKET_SHIFT; + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); + return 0; + } + return (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + + ((unsigned long) ptr - (unsigned long) entry->vaddr_base); } void qemu_invalidate_entry(uint8_t *buffer) @@ -260,6 +307,7 @@ void qemu_invalidate_entry(uint8_t *buffer) MapCacheEntry *entry = NULL, *pentry = NULL; MapCacheRev *reventry; target_phys_addr_t paddr_index; + target_phys_addr_t size; int found = 0; if (mapcache->last_address_vaddr == buffer) { @@ -269,6 +317,7 @@ void qemu_invalidate_entry(uint8_t *buffer) QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { if (reventry->vaddr_req == buffer) { paddr_index = reventry->paddr_index; + size = reventry->size; found = 1; break; } @@ -284,7 +333,7 @@ void qemu_invalidate_entry(uint8_t *buffer) qemu_free(reventry); entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; - while (entry && entry->paddr_index != paddr_index) { + while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { pentry = entry; entry = entry->next; } @@ -298,10 +347,11 @@ void qemu_invalidate_entry(uint8_t *buffer) } pentry->next = entry->next; - if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + if (munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); } + qemu_free(entry->valid_mapping); qemu_free(entry); } @@ -328,13 +378,16 @@ void qemu_invalidate_map_cache(void) continue; } - if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + if (munmap(entry->vaddr_base, entry->size) != 0) { perror("unmap fails"); exit(-1); } entry->paddr_index = 0; entry->vaddr_base = NULL; + entry->size = 0; + qemu_free(entry->valid_mapping); + entry->valid_mapping = NULL; } mapcache->last_address_index = -1; From cd306087e5a9ea4091071a0a41c0ea99fac60ab0 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 May 2011 18:35:43 +0100 Subject: [PATCH 04/11] xen: remove qemu_map_cache_unlock There is no need for qemu_map_cache_unlock, just use qemu_invalidate_entry instead. Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- exec.c | 2 +- xen-mapcache-stub.c | 4 ---- xen-mapcache.c | 33 --------------------------------- xen-mapcache.h | 1 - 4 files changed, 1 insertion(+), 39 deletions(-) diff --git a/exec.c b/exec.c index 09928a3b3..01f33bb2b 100644 --- a/exec.c +++ b/exec.c @@ -3146,7 +3146,7 @@ void qemu_put_ram_ptr(void *addr) xen_unmap_block(block->host, block->length); block->host = NULL; } else { - qemu_map_cache_unlock(addr); + qemu_invalidate_entry(addr); } } } diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c index 7c14b3d14..60f712b22 100644 --- a/xen-mapcache-stub.c +++ b/xen-mapcache-stub.c @@ -22,10 +22,6 @@ uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, u return qemu_get_ram_ptr(phys_addr); } -void qemu_map_cache_unlock(void *buffer) -{ -} - ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) { return -1; diff --git a/xen-mapcache.c b/xen-mapcache.c index 90fbd4983..57fe24de8 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -230,39 +230,6 @@ uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, u return mapcache->last_address_vaddr + address_offset; } -void qemu_map_cache_unlock(void *buffer) -{ - MapCacheEntry *entry = NULL, *pentry = NULL; - MapCacheRev *reventry; - target_phys_addr_t paddr_index; - int found = 0; - - QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { - if (reventry->vaddr_req == buffer) { - paddr_index = reventry->paddr_index; - found = 1; - break; - } - } - if (!found) { - return; - } - QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); - qemu_free(reventry); - - entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; - while (entry && entry->paddr_index != paddr_index) { - pentry = entry; - entry = entry->next; - } - if (!entry) { - return; - } - if (entry->lock > 0) { - entry->lock--; - } -} - ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) { MapCacheEntry *entry = NULL, *pentry = NULL; diff --git a/xen-mapcache.h b/xen-mapcache.h index 339444c94..b89b8f965 100644 --- a/xen-mapcache.h +++ b/xen-mapcache.h @@ -14,7 +14,6 @@ void qemu_map_cache_init(void); uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); -void qemu_map_cache_unlock(void *phys_addr); ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); void qemu_invalidate_entry(uint8_t *buffer); void qemu_invalidate_map_cache(void); From 6506e4f995967b1a48cc34418c77b318df92ce35 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 May 2011 18:35:44 +0100 Subject: [PATCH 05/11] xen: remove xen_map_block and xen_unmap_block Replace xen_map_block with qemu_map_cache with the appropriate locking and size parameters. Replace xen_unmap_block with qemu_invalidate_entry. Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- exec.c | 19 ++++--------------- xen-mapcache-stub.c | 4 ---- xen-mapcache.c | 31 ------------------------------- xen-mapcache.h | 15 --------------- 4 files changed, 4 insertions(+), 65 deletions(-) diff --git a/exec.c b/exec.c index 01f33bb2b..e11c1dd97 100644 --- a/exec.c +++ b/exec.c @@ -53,6 +53,7 @@ #endif #else /* !CONFIG_USER_ONLY */ #include "xen-mapcache.h" +#include "trace.h" #endif //#define DEBUG_TB_INVALIDATE @@ -3088,7 +3089,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr) if (block->offset == 0) { return qemu_map_cache(addr, 0, 1); } else if (block->host == NULL) { - block->host = xen_map_block(block->offset, block->length); + block->host = qemu_map_cache(block->offset, block->length, 1); } } return block->host + (addr - block->offset); @@ -3117,7 +3118,7 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) if (block->offset == 0) { return qemu_map_cache(addr, 0, 1); } else if (block->host == NULL) { - block->host = xen_map_block(block->offset, block->length); + block->host = qemu_map_cache(block->offset, block->length, 1); } } return block->host + (addr - block->offset); @@ -3135,19 +3136,7 @@ void qemu_put_ram_ptr(void *addr) trace_qemu_put_ram_ptr(addr); if (xen_mapcache_enabled()) { - RAMBlock *block; - - QLIST_FOREACH(block, &ram_list.blocks, next) { - if (addr == block->host) { - break; - } - } - if (block && block->host) { - xen_unmap_block(block->host, block->length); - block->host = NULL; - } else { - qemu_invalidate_entry(addr); - } + qemu_invalidate_entry(block->host); } } diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c index 60f712b22..8a2380a15 100644 --- a/xen-mapcache-stub.c +++ b/xen-mapcache-stub.c @@ -34,7 +34,3 @@ void qemu_invalidate_map_cache(void) void qemu_invalidate_entry(uint8_t *buffer) { } -uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) -{ - return NULL; -} diff --git a/xen-mapcache.c b/xen-mapcache.c index 57fe24de8..fac47cd9b 100644 --- a/xen-mapcache.c +++ b/xen-mapcache.c @@ -362,34 +362,3 @@ void qemu_invalidate_map_cache(void) mapcache_unlock(); } - -uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) -{ - uint8_t *vaddr_base; - xen_pfn_t *pfns; - int *err; - unsigned int i; - target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; - - trace_xen_map_block(phys_addr, size); - phys_addr >>= XC_PAGE_SHIFT; - - pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); - err = qemu_mallocz(nb_pfn * sizeof (int)); - - for (i = 0; i < nb_pfn; i++) { - pfns[i] = phys_addr + i; - } - - vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, - pfns, err, nb_pfn); - if (vaddr_base == NULL) { - perror("xc_map_foreign_bulk"); - exit(-1); - } - - qemu_free(pfns); - qemu_free(err); - - return vaddr_base; -} diff --git a/xen-mapcache.h b/xen-mapcache.h index b89b8f965..6216cc3be 100644 --- a/xen-mapcache.h +++ b/xen-mapcache.h @@ -9,27 +9,12 @@ #ifndef XEN_MAPCACHE_H #define XEN_MAPCACHE_H -#include -#include "trace.h" - void qemu_map_cache_init(void); uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); void qemu_invalidate_entry(uint8_t *buffer); void qemu_invalidate_map_cache(void); -uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size); - -static inline void xen_unmap_block(void *addr, ram_addr_t size) -{ - trace_xen_unmap_block(addr, size); - - if (munmap(addr, size) != 0) { - hw_error("xen_unmap_block: %s", strerror(errno)); - } -} - - #define mapcache_lock() ((void)0) #define mapcache_unlock() ((void)0) From 38bee5dc94ee355640b030d28f311b03ee2f13d1 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 May 2011 18:35:45 +0100 Subject: [PATCH 06/11] exec.c: refactor cpu_physical_memory_map Introduce qemu_ram_ptr_length that takes an address and a size as parameters rather than just an address. Refactor cpu_physical_memory_map so that we call qemu_ram_ptr_length only once rather than calling qemu_get_ram_ptr one time per page. This is not only more efficient but also tries to simplify the logic of the function. Currently we are relying on the fact that all the pages are mapped contiguously in qemu's address space: we have a check to make sure that the virtual address returned by qemu_get_ram_ptr from the second call on is consecutive. Now we are making this more explicit replacing all the calls to qemu_get_ram_ptr with a single call to qemu_ram_ptr_length passing a size argument. Signed-off-by: Stefano Stabellini CC: agraf@suse.de CC: anthony@codemonkey.ws Signed-off-by: Alexander Graf --- cpu-common.h | 1 + exec.c | 51 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/cpu-common.h b/cpu-common.h index 9f5917224..b027e4308 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -65,6 +65,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr); void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); +void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size); /* Same but slower, to use for migration, where the order of * RAMBlocks must not change. */ void *qemu_safe_ram_ptr(ram_addr_t addr); diff --git a/exec.c b/exec.c index e11c1dd97..238c173de 100644 --- a/exec.c +++ b/exec.c @@ -3131,6 +3131,31 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) return NULL; } +/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr + * but takes a size argument */ +void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size) +{ + if (xen_mapcache_enabled()) + return qemu_map_cache(addr, *size, 1); + else { + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr - block->offset < block->length) { + if (addr - block->offset + *size > block->length) + *size = block->length - addr + block->offset; + return block->host + (addr - block->offset); + } + } + + fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr); + abort(); + + *size = 0; + return NULL; + } +} + void qemu_put_ram_ptr(void *addr) { trace_qemu_put_ram_ptr(addr); @@ -3992,14 +4017,12 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, int is_write) { target_phys_addr_t len = *plen; - target_phys_addr_t done = 0; + target_phys_addr_t todo = 0; int l; - uint8_t *ret = NULL; - uint8_t *ptr; target_phys_addr_t page; unsigned long pd; PhysPageDesc *p; - unsigned long addr1; + target_phys_addr_t addr1 = addr; while (len > 0) { page = addr & TARGET_PAGE_MASK; @@ -4014,7 +4037,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, } if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { - if (done || bounce.buffer) { + if (todo || bounce.buffer) { break; } bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); @@ -4023,23 +4046,17 @@ void *cpu_physical_memory_map(target_phys_addr_t addr, if (!is_write) { cpu_physical_memory_read(addr, bounce.buffer, l); } - ptr = bounce.buffer; - } else { - addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK); - ptr = qemu_get_ram_ptr(addr1); - } - if (!done) { - ret = ptr; - } else if (ret + done != ptr) { - break; + + *plen = l; + return bounce.buffer; } len -= l; addr += l; - done += l; + todo += l; } - *plen = done; - return ret; + *plen = todo; + return qemu_ram_ptr_length(addr1, plen); } /* Unmaps a memory region previously mapped by cpu_physical_memory_map(). From 712c2b41490a986a5dd99bec99b854bac4322201 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 May 2011 18:35:46 +0100 Subject: [PATCH 07/11] xen: mapcache performance improvements Use qemu_invalidate_entry in cpu_physical_memory_unmap. Do not lock mapcache entries in qemu_get_ram_ptr if the address falls in the ramblock with offset == 0. We don't need to do that because the callers of qemu_get_ram_ptr either try to map an entire block, other from the main ramblock, or until the end of a page to implement a single read or write in the main ramblock. If we don't lock mapcache entries in qemu_get_ram_ptr we don't need to call qemu_invalidate_entry in qemu_put_ram_ptr anymore because we can leave with few long lived block mappings requested by devices. Also move the call to qemu_ram_addr_from_mapcache at the beginning of qemu_ram_addr_from_host. Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- exec.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/exec.c b/exec.c index 238c173de..7f14332c8 100644 --- a/exec.c +++ b/exec.c @@ -3085,9 +3085,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr) if (xen_mapcache_enabled()) { /* We need to check if the requested address is in the RAM * because we don't want to map the entire memory in QEMU. + * In that case just map until the end of the page. */ if (block->offset == 0) { - return qemu_map_cache(addr, 0, 1); + return qemu_map_cache(addr, 0, 0); } else if (block->host == NULL) { block->host = qemu_map_cache(block->offset, block->length, 1); } @@ -3114,9 +3115,10 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) if (xen_mapcache_enabled()) { /* We need to check if the requested address is in the RAM * because we don't want to map the entire memory in QEMU. + * In that case just map until the end of the page. */ if (block->offset == 0) { - return qemu_map_cache(addr, 0, 1); + return qemu_map_cache(addr, 0, 0); } else if (block->host == NULL) { block->host = qemu_map_cache(block->offset, block->length, 1); } @@ -3159,10 +3161,6 @@ void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size) void qemu_put_ram_ptr(void *addr) { trace_qemu_put_ram_ptr(addr); - - if (xen_mapcache_enabled()) { - qemu_invalidate_entry(block->host); - } } int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) @@ -3170,6 +3168,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) RAMBlock *block; uint8_t *host = ptr; + if (xen_mapcache_enabled()) { + *ram_addr = qemu_ram_addr_from_mapcache(ptr); + return 0; + } + QLIST_FOREACH(block, &ram_list.blocks, next) { /* This case append when the block is not mapped. */ if (block->host == NULL) { @@ -3181,11 +3184,6 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } } - if (xen_mapcache_enabled()) { - *ram_addr = qemu_ram_addr_from_mapcache(ptr); - return 0; - } - return -1; } @@ -4086,13 +4084,7 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, } } if (xen_mapcache_enabled()) { - uint8_t *buffer1 = buffer; - uint8_t *end_buffer = buffer + len; - - while (buffer1 < end_buffer) { - qemu_put_ram_ptr(buffer1); - buffer1 += TARGET_PAGE_SIZE; - } + qemu_invalidate_entry(buffer); } return; } From 45dcd36e1ebbb3c085bdd157bffaf9276ffd8c46 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 3 Jun 2011 16:56:54 +0100 Subject: [PATCH 08/11] cirrus_vga: reset lfb_addr after a pci config write if the BAR is unmapped If the cirrus_vga PCI BAR is unmapped than we should not only reset map_addr but also lfb_addr, otherwise we'll keep trying to map the old lfb_addr in map_linear_vram. Signed-off-by: Stefano Stabellini Acked-by: Jan Kiszka Signed-off-by: Alexander Graf --- hw/cirrus_vga.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index 722cac754..3c5043ecf 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -3088,8 +3088,11 @@ static void pci_cirrus_write_config(PCIDevice *d, CirrusVGAState *s = &pvs->cirrus_vga; pci_default_write_config(d, address, val, len); - if (s->vga.map_addr && d->io_regions[0].addr == PCI_BAR_UNMAPPED) + if (s->vga.map_addr && d->io_regions[0].addr == PCI_BAR_UNMAPPED) { s->vga.map_addr = 0; + s->vga.lfb_addr = 0; + s->vga.lfb_end = 0; + } cirrus_update_memory_access(s); } From ebed85058b6e89a5202112e9aa2abab3aa3804c3 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 15 Jun 2011 17:29:27 +0100 Subject: [PATCH 09/11] xen: only track the linear framebuffer Xen can only do dirty bit tracking for one memory region, so we should explicitly avoid trying to track anything but the vga vram region. Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- xen-all.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xen-all.c b/xen-all.c index 75a82c28f..fe75ddd83 100644 --- a/xen-all.c +++ b/xen-all.c @@ -215,6 +215,7 @@ static int xen_add_to_physmap(XenIOState *state, int rc = 0; XenPhysmap *physmap = NULL; target_phys_addr_t pfn, start_gpfn; + RAMBlock *block; if (get_physmapping(state, start_addr, size)) { return 0; @@ -223,6 +224,19 @@ static int xen_add_to_physmap(XenIOState *state, return -1; } + /* Xen can only handle a single dirty log region for now and we want + * the linear framebuffer to be that region. + * Avoid tracking any regions that is not videoram and avoid tracking + * the legacy vga region. */ + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (!strcmp(block->idstr, "vga.vram") && block->offset == phys_offset + && start_addr > 0xbffff) { + goto go_physmap; + } + } + return -1; + +go_physmap: DPRINTF("mapping vram to %llx - %llx, from %llx\n", start_addr, start_addr + size, phys_offset); From bf09551a6b2282a1c59bd8335c1ef6e58704de98 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 15 Jun 2011 17:36:56 +0100 Subject: [PATCH 10/11] xen: fix interrupt routing Compared to the last version I only added a comment to the code. - remove i440FX-xen and i440fx_write_config_xen we don't need to intercept pci config writes to i440FX anymore; - introduce PIIX3-xen and piix3_write_config_xen we do need to intercept pci config write to the PCI-ISA bridge to update the PCI link routing; - set the number of PIIX3-xen interrupts line to 128; Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- hw/pc.h | 1 - hw/pc_piix.c | 6 +---- hw/piix_pci.c | 66 ++++++++++++++++++++++++++------------------------- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/hw/pc.h b/hw/pc.h index 0dcbee7ca..6d5730b26 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -176,7 +176,6 @@ struct PCII440FXState; typedef struct PCII440FXState PCII440FXState; PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size); -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size); void i440fx_init_memory_mappings(PCII440FXState *d); /* piix4.c */ diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 9a22a8afc..ba198de8c 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -124,11 +124,7 @@ static void pc_init1(ram_addr_t ram_size, isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24); if (pci_enabled) { - if (!xen_enabled()) { - pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); - } else { - pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); - } + pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); } else { pci_bus = NULL; i440fx_state = NULL; diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 85a320e72..3e2698d99 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -40,6 +40,7 @@ typedef PCIHostState I440FXState; #define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ #define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */ +#define XEN_PIIX_NUM_PIRQS 128ULL #define PIIX_PIRQC 0x60 typedef struct PIIX3State { @@ -78,6 +79,8 @@ struct PCII440FXState { #define I440FX_SMRAM 0x72 static void piix3_set_irq(void *opaque, int pirq, int level); +static void piix3_write_config_xen(PCIDevice *dev, + uint32_t address, uint32_t val, int len); /* return the global irq number corresponding to a given device irq pin. We could also use the bus number to have a more precise @@ -173,13 +176,6 @@ static void i440fx_write_config(PCIDevice *dev, } } -static void i440fx_write_config_xen(PCIDevice *dev, - uint32_t address, uint32_t val, int len) -{ - xen_piix_pci_write_config_client(address, val, len); - i440fx_write_config(dev, address, val, len); -} - static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) { PCII440FXState *d = opaque; @@ -267,8 +263,21 @@ static PCIBus *i440fx_common_init(const char *device_name, d = pci_create_simple(b, 0, device_name); *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d); - piix3 = DO_UPCAST(PIIX3State, dev, - pci_create_simple_multifunction(b, -1, true, "PIIX3")); + /* Xen supports additional interrupt routes from the PCI devices to + * the IOAPIC: the four pins of each PCI device on the bus are also + * connected to the IOAPIC directly. + * These additional routes can be discovered through ACPI. */ + if (xen_enabled()) { + piix3 = DO_UPCAST(PIIX3State, dev, + pci_create_simple_multifunction(b, -1, true, "PIIX3-xen")); + pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq, + piix3, XEN_PIIX_NUM_PIRQS); + } else { + piix3 = DO_UPCAST(PIIX3State, dev, + pci_create_simple_multifunction(b, -1, true, "PIIX3")); + pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3, + PIIX_NUM_PIRQS); + } piix3->pic = pic; (*pi440fx_state)->piix3 = piix3; @@ -289,21 +298,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, PCIBus *b; b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, pic, ram_size); - pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, (*pi440fx_state)->piix3, - PIIX_NUM_PIRQS); - - return b; -} - -PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, - qemu_irq *pic, ram_addr_t ram_size) -{ - PCIBus *b; - - b = i440fx_common_init("i440FX-xen", pi440fx_state, piix3_devfn, pic, ram_size); - pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq, - (*pi440fx_state)->piix3, PIIX_NUM_PIRQS); - return b; } @@ -365,6 +359,13 @@ static void piix3_write_config(PCIDevice *dev, } } +static void piix3_write_config_xen(PCIDevice *dev, + uint32_t address, uint32_t val, int len) +{ + xen_piix_pci_write_config_client(address, val, len); + piix3_write_config(dev, address, val, len); +} + static void piix3_reset(void *opaque) { PIIX3State *d = opaque; @@ -464,14 +465,6 @@ static PCIDeviceInfo i440fx_info[] = { .no_hotplug = 1, .init = i440fx_initfn, .config_write = i440fx_write_config, - },{ - .qdev.name = "i440FX-xen", - .qdev.desc = "Host bridge", - .qdev.size = sizeof(PCII440FXState), - .qdev.vmsd = &vmstate_i440fx, - .qdev.no_user = 1, - .init = i440fx_initfn, - .config_write = i440fx_write_config_xen, },{ .qdev.name = "PIIX3", .qdev.desc = "ISA bridge", @@ -481,6 +474,15 @@ static PCIDeviceInfo i440fx_info[] = { .no_hotplug = 1, .init = piix3_initfn, .config_write = piix3_write_config, + },{ + .qdev.name = "PIIX3-xen", + .qdev.desc = "ISA bridge", + .qdev.size = sizeof(PIIX3State), + .qdev.vmsd = &vmstate_piix3, + .qdev.no_user = 1, + .no_hotplug = 1, + .init = piix3_initfn, + .config_write = piix3_write_config_xen, },{ /* end of list */ } From 01195b7347e341a49fa554959882b26c666a3616 Mon Sep 17 00:00:00 2001 From: Steven Smith Date: Thu, 16 Jun 2011 17:05:17 +0100 Subject: [PATCH 11/11] xen: Add the Xen platform pci device Introduce a new emulated PCI device, specific to fully virtualized Xen guests. The device is necessary for PV on HVM drivers to work. Signed-off-by: Steven Smith Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- Makefile.target | 2 + hw/hw.h | 3 + hw/pc_piix.c | 4 + hw/pci_ids.h | 2 + hw/xen_platform.c | 340 ++++++++++++++++++++++++++++++++++++++++++++++ trace-events | 3 + 6 files changed, 354 insertions(+) create mode 100644 hw/xen_platform.c diff --git a/Makefile.target b/Makefile.target index b1a0f6d28..760aa02d5 100644 --- a/Makefile.target +++ b/Makefile.target @@ -218,6 +218,8 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o +obj-i386-$(CONFIG_XEN) += xen_platform.o + # Inter-VM PCI shared memory CONFIG_IVSHMEM = ifeq ($(CONFIG_KVM), y) diff --git a/hw/hw.h b/hw/hw.h index 56447a735..9dd7096fc 100644 --- a/hw/hw.h +++ b/hw/hw.h @@ -780,6 +780,9 @@ extern const VMStateDescription vmstate_ptimer; #define VMSTATE_INT32_LE(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) +#define VMSTATE_UINT8_TEST(_f, _s, _t) \ + VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t) + #define VMSTATE_UINT16_TEST(_f, _s, _t) \ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t) diff --git a/hw/pc_piix.c b/hw/pc_piix.c index ba198de8c..8dbeb0c8a 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -136,6 +136,10 @@ static void pc_init1(ram_addr_t ram_size, pc_vga_init(pci_enabled? pci_bus: NULL); + if (xen_enabled()) { + pci_create_simple(pci_bus, -1, "xen-platform"); + } + /* init basic PC hardware */ pc_basic_device_init(isa_irq, &rtc_state, xen_enabled()); diff --git a/hw/pci_ids.h b/hw/pci_ids.h index d9457ed3f..d94578c87 100644 --- a/hw/pci_ids.h +++ b/hw/pci_ids.h @@ -109,3 +109,5 @@ #define PCI_DEVICE_ID_INTEL_82371AB 0x7111 #define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112 #define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113 + +#define PCI_VENDOR_ID_XENSOURCE 0x5853 diff --git a/hw/xen_platform.c b/hw/xen_platform.c new file mode 100644 index 000000000..b167eee1f --- /dev/null +++ b/hw/xen_platform.c @@ -0,0 +1,340 @@ +/* + * XEN platform pci device, formerly known as the event channel device + * + * Copyright (c) 2003-2004 Intel Corp. + * Copyright (c) 2006 XenSource + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "hw.h" +#include "pc.h" +#include "pci.h" +#include "irq.h" +#include "xen_common.h" +#include "net.h" +#include "xen_backend.h" +#include "rwhandler.h" +#include "trace.h" + +#include + +//#define DEBUG_PLATFORM + +#ifdef DEBUG_PLATFORM +#define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ + +typedef struct PCIXenPlatformState { + PCIDevice pci_dev; + uint8_t flags; /* used only for version_id == 2 */ + int drivers_blacklisted; + uint16_t driver_product_version; + + /* Log from guest drivers */ + char log_buffer[4096]; + int log_buffer_off; +} PCIXenPlatformState; + +#define XEN_PLATFORM_IOPORT 0x10 + +/* Send bytes to syslog */ +static void log_writeb(PCIXenPlatformState *s, char val) +{ + if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) { + /* Flush buffer */ + s->log_buffer[s->log_buffer_off] = 0; + trace_xen_platform_log(s->log_buffer); + s->log_buffer_off = 0; + } else { + s->log_buffer[s->log_buffer_off++] = val; + } +} + +/* Xen Platform, Fixed IOPort */ + +static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCIXenPlatformState *s = opaque; + + switch (addr - XEN_PLATFORM_IOPORT) { + case 0: + /* TODO: */ + /* Unplug devices. Value is a bitmask of which devices to + unplug, with bit 0 the IDE devices, bit 1 the network + devices, and bit 2 the non-primary-master IDE devices. */ + break; + case 2: + switch (val) { + case 1: + DPRINTF("Citrix Windows PV drivers loaded in guest\n"); + break; + case 0: + DPRINTF("Guest claimed to be running PV product 0?\n"); + break; + default: + DPRINTF("Unknown PV product %d loaded in guest\n", val); + break; + } + s->driver_product_version = val; + break; + } +} + +static void platform_fixed_ioport_writel(void *opaque, uint32_t addr, + uint32_t val) +{ + switch (addr - XEN_PLATFORM_IOPORT) { + case 0: + /* PV driver version */ + break; + } +} + +static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIXenPlatformState *s = opaque; + + switch (addr - XEN_PLATFORM_IOPORT) { + case 0: /* Platform flags */ { + hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ? + HVMMEM_ram_ro : HVMMEM_ram_rw; + if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type, 0xc0, 0x40)) { + DPRINTF("unable to change ro/rw state of ROM memory area!\n"); + } else { + s->flags = val & PFFLAG_ROM_LOCK; + DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n", + (mem_type == HVMMEM_ram_ro ? "ro":"rw")); + } + break; + } + case 2: + log_writeb(s, val); + break; + } +} + +static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr) +{ + PCIXenPlatformState *s = opaque; + + switch (addr - XEN_PLATFORM_IOPORT) { + case 0: + if (s->drivers_blacklisted) { + /* The drivers will recognise this magic number and refuse + * to do anything. */ + return 0xd249; + } else { + /* Magic value so that you can identify the interface. */ + return 0x49d2; + } + default: + return 0xffff; + } +} + +static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr) +{ + PCIXenPlatformState *s = opaque; + + switch (addr - XEN_PLATFORM_IOPORT) { + case 0: + /* Platform flags */ + return s->flags; + case 2: + /* Version number */ + return 1; + default: + return 0xff; + } +} + +static void platform_fixed_ioport_reset(void *opaque) +{ + PCIXenPlatformState *s = opaque; + + platform_fixed_ioport_writeb(s, XEN_PLATFORM_IOPORT, 0); +} + +static void platform_fixed_ioport_init(PCIXenPlatformState* s) +{ + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 4, platform_fixed_ioport_writel, s); + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 2, platform_fixed_ioport_writew, s); + register_ioport_write(XEN_PLATFORM_IOPORT, 16, 1, platform_fixed_ioport_writeb, s); + register_ioport_read(XEN_PLATFORM_IOPORT, 16, 2, platform_fixed_ioport_readw, s); + register_ioport_read(XEN_PLATFORM_IOPORT, 16, 1, platform_fixed_ioport_readb, s); +} + +/* Xen Platform PCI Device */ + +static uint32_t xen_platform_ioport_readb(void *opaque, uint32_t addr) +{ + addr &= 0xff; + + if (addr == 0) { + return platform_fixed_ioport_readb(opaque, XEN_PLATFORM_IOPORT); + } else { + return ~0u; + } +} + +static void xen_platform_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIXenPlatformState *s = opaque; + + addr &= 0xff; + val &= 0xff; + + switch (addr) { + case 0: /* Platform flags */ + platform_fixed_ioport_writeb(opaque, XEN_PLATFORM_IOPORT, val); + break; + case 8: + log_writeb(s, val); + break; + default: + break; + } +} + +static void platform_ioport_map(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type) +{ + PCIXenPlatformState *d = DO_UPCAST(PCIXenPlatformState, pci_dev, pci_dev); + + register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d); + register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d); +} + +static uint32_t platform_mmio_read(ReadWriteHandler *handler, pcibus_t addr, int len) +{ + DPRINTF("Warning: attempted read from physical address " + "0x" TARGET_FMT_plx " in xen platform mmio space\n", addr); + + return 0; +} + +static void platform_mmio_write(ReadWriteHandler *handler, pcibus_t addr, + uint32_t val, int len) +{ + DPRINTF("Warning: attempted write of 0x%x to physical " + "address 0x" TARGET_FMT_plx " in xen platform mmio space\n", + val, addr); +} + +static ReadWriteHandler platform_mmio_handler = { + .read = &platform_mmio_read, + .write = &platform_mmio_write, +}; + +static void platform_mmio_map(PCIDevice *d, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + int mmio_io_addr; + + mmio_io_addr = cpu_register_io_memory_simple(&platform_mmio_handler, + DEVICE_NATIVE_ENDIAN); + + cpu_register_physical_memory(addr, size, mmio_io_addr); +} + +static int xen_platform_post_load(void *opaque, int version_id) +{ + PCIXenPlatformState *s = opaque; + + platform_fixed_ioport_writeb(s, XEN_PLATFORM_IOPORT, s->flags); + + return 0; +} + +static const VMStateDescription vmstate_xen_platform = { + .name = "platform", + .version_id = 4, + .minimum_version_id = 4, + .minimum_version_id_old = 4, + .post_load = xen_platform_post_load, + .fields = (VMStateField []) { + VMSTATE_PCI_DEVICE(pci_dev, PCIXenPlatformState), + VMSTATE_UINT8(flags, PCIXenPlatformState), + VMSTATE_END_OF_LIST() + } +}; + +static int xen_platform_initfn(PCIDevice *dev) +{ + PCIXenPlatformState *d = DO_UPCAST(PCIXenPlatformState, pci_dev, dev); + uint8_t *pci_conf; + + pci_conf = d->pci_dev.config; + + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_XENSOURCE); + pci_config_set_device_id(pci_conf, 0x0001); + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, PCI_VENDOR_ID_XENSOURCE); + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0001); + + pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + + pci_config_set_revision(pci_conf, 1); + pci_config_set_prog_interface(pci_conf, 0); + + pci_config_set_class(pci_conf, PCI_CLASS_OTHERS << 8 | 0x80); + + pci_conf[PCI_INTERRUPT_PIN] = 1; + + pci_register_bar(&d->pci_dev, 0, 0x100, + PCI_BASE_ADDRESS_SPACE_IO, platform_ioport_map); + + /* reserve 16MB mmio address for share memory*/ + pci_register_bar(&d->pci_dev, 1, 0x1000000, + PCI_BASE_ADDRESS_MEM_PREFETCH, platform_mmio_map); + + platform_fixed_ioport_init(d); + + return 0; +} + +static void platform_reset(DeviceState *dev) +{ + PCIXenPlatformState *s = DO_UPCAST(PCIXenPlatformState, pci_dev.qdev, dev); + + platform_fixed_ioport_reset(s); +} + +static PCIDeviceInfo xen_platform_info = { + .init = xen_platform_initfn, + .qdev.name = "xen-platform", + .qdev.desc = "XEN platform pci device", + .qdev.size = sizeof(PCIXenPlatformState), + .qdev.vmsd = &vmstate_xen_platform, + .qdev.reset = platform_reset, +}; + +static void xen_platform_register(void) +{ + pci_qdev_register(&xen_platform_info); +} + +device_init(xen_platform_register); diff --git a/trace-events b/trace-events index 46a19d372..bebf612fe 100644 --- a/trace-events +++ b/trace-events @@ -407,3 +407,6 @@ disable xen_unmap_block(void* addr, unsigned long size) "%p, size %#lx" # exec.c disable qemu_put_ram_ptr(void* addr) "%p" + +# hw/xen_platform.c +disable xen_platform_log(char *s) "xen platform: %s"